/*
 * gcdat.c - NILFS shadow DAT inode for GC
 *
 * Copyright (C) 2005-2007 Nippon Telegraph and Telephone Corporation.
 *
 * This file is part of NILFS.
 *
 * NILFS is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * NILFS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with NILFS; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * gcdat.c,v 1.4 2007-07-24 06:03:15 kihara Exp
 *
 * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>,
 *            and Ryusuke Konishi <ryusuke@osrg.net>.
 *
 */

#include <linux/buffer_head.h>
#include "nilfs.h"
#include "page.h"
#include "mdt.h"

#define	GCDAT_N_PAGEVEC	16

static void nilfs_gcdat_copy_page(struct page *src, struct page *dst)
{
	void *kaddr0, *kaddr1;

	BUG_ON(src == dst);
	kaddr0 = kmap_atomic(src, KM_USER0);
	kaddr1 = kmap_atomic(dst, KM_USER1);
	memcpy(kaddr1, kaddr0, PAGE_SIZE);
	kunmap_atomic(kaddr1, KM_USER1);
	kunmap_atomic(kaddr0, KM_USER0);
	/* page flags: uptodate, dirty, writeback, mappedtodisk */
	BUG_ON(PageWriteback(dst));
	if (PageUptodate(src) && !PageUptodate(dst))
		SetPageUptodate(dst);
	else if (!PageUptodate(src) && PageUptodate(dst))
		ClearPageUptodate(dst);
	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
		SetPageMappedToDisk(dst);
	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
		ClearPageMappedToDisk(dst);
}

/*
 * nilfs_gcdat_copy_buffer_page -- copy the page with buffers
 * Note: This fuction is for both data pages and btnode pages.
 *	 The dirty flag should be treated by caller.
 *	 The page must not be under i/o.
 */
void nilfs_gcdat_copy_buffer_page(struct page *src, struct page *dst, int dty)
{
	struct buffer_head *d, *dbufs, *s, *sbufs;

	/* BUG_ON(!PageLocked(src)); */
	if (!PageLocked(src))
		printk(KERN_ERR "%s: page %p not locked.\n", __FUNCTION__, src);
	BUG_ON(!PageLocked(dst));
	BUG_ON(!page_has_buffers(src));
	s = sbufs = page_buffers(src);
	if (!page_has_buffers(dst)) {
		dbufs = alloc_page_buffers(dst, s->b_size, 1);
		/* XXX -ENOMEM? what can we do? */
		BUG_ON(!dbufs);
		nilfs_link_buffers(dst, dbufs);
	}
	d = dbufs = page_buffers(dst);
	do {
		d->b_state = s->b_state &
			((1 << BH_Uptodate) | (1 << BH_Mapped) |
			 (!!dty << BH_Dirty) | (!!dty << BH_Prepare_Dirty) |
			 (1 << BH_NILFS_Allocated) | (1 << BH_NILFS_Node));
		d->b_blocknr = s->b_blocknr;
		/* d->b_size = s->b_size; */
		d->b_bdev = s->b_bdev;
		s = s->b_this_page;
		d = d->b_this_page;
	} while (d != dbufs);
	nilfs_gcdat_copy_page(src, dst);
}

static void nilfs_gcdat_copy_dirty_data(struct address_space *src,
					struct address_space *dst)
{
	struct page *pages[GCDAT_N_PAGEVEC];
	unsigned int nr_pages;
	pgoff_t index = 0;
	int i;

repeat:
	READ_LOCK_IRQ(&src->tree_lock);
	nr_pages = radix_tree_gang_lookup_tag(&src->page_tree, (void **)pages,
					      index, GCDAT_N_PAGEVEC,
					      PAGECACHE_TAG_DIRTY);
	for (i = 0; i < nr_pages; i++)
		page_cache_get(pages[i]);
	READ_UNLOCK_IRQ(&src->tree_lock);
	if (nr_pages == 0)
		return;
	index = pages[nr_pages - 1]->index + 1;

	for (i = 0; i < nr_pages; i++) {
		struct page *page = pages[i], *dpage;

		lock_page(page);
		BUG_ON(!PageDirty(page));
		dpage = grab_cache_page(dst, page->index);
		if (!dpage) {
			/* XXX what can I do against ENOMEM? */
			printk(KERN_ERR "%s: cannot alloc memory\n", __FUNCTION__);
			BUG();
		}
		if (likely(page_has_buffers(page)))
			nilfs_gcdat_copy_buffer_page(page, dpage, 1);
		else
			nilfs_gcdat_copy_page(page, dpage);
		__set_page_dirty_nobuffers(dpage);
		unlock_page(dpage);
		page_cache_release(dpage);
		unlock_page(page);
		page_cache_release(page);
	}
	goto repeat;
}

void nilfs_init_gcdat_inode(struct the_nilfs *nilfs)
{
	struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
	struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat);

	nilfs_clear_gcdat_inode(nilfs);
	gii->i_flags = dii->i_flags;
	gii->i_state = dii->i_state | (1 << NILFS_I_GCINODE);
	gii->i_cno = 0;
	nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap);
	nilfs_gcdat_copy_dirty_data(dat->i_mapping, gcdat->i_mapping);
	nilfs_btnode_copy_dirty_pages(&dii->i_btnode_cache,
				      &gii->i_btnode_cache);
}

static inline void __nilfs_clear_page_buffers_dirty(struct page *page)
{
	struct buffer_head *head, *bh;

	if (page_has_buffers(page)) {
		head = bh = page_buffers(page);
		do {
			clear_buffer_dirty(bh);
			bh = bh->b_this_page;
		} while (bh != head);
	}
	__nilfs_clear_page_dirty(page);
}

static void __nilfs_gcdat_clear_dirty_data(struct address_space *mapping)
{
	struct page *pages[GCDAT_N_PAGEVEC];
	unsigned int nr_pages;
	pgoff_t index = 0;
	int i;

repeat:
	READ_LOCK_IRQ(&mapping->tree_lock);
	nr_pages = radix_tree_gang_lookup_tag(&mapping->page_tree,
					      (void **)pages,
					      index, GCDAT_N_PAGEVEC,
					      PAGECACHE_TAG_DIRTY);
	for (i = 0; i < nr_pages; i++)
		page_cache_get(pages[i]);
	READ_UNLOCK_IRQ(&mapping->tree_lock);
	if (nr_pages == 0)
		return;
	index = pages[nr_pages - 1]->index + 1;
	for (i = 0; i < nr_pages; i++) {
		struct page *page = pages[i];
		struct buffer_head *bh, *head;

		lock_page(page);
		__nilfs_clear_page_buffers_dirty(page);
		ClearPageUptodate(page);
		if (page_has_buffers(page)) {
			bh = head = page_buffers(page);
			do {
				lock_buffer(bh);
				clear_buffer_uptodate(bh);
				clear_buffer_mapped(bh);
				unlock_buffer(bh);
				bh = bh->b_this_page;
			} while (bh != head);
		}
		unlock_page(page);
		page_cache_release(page);
	}
	goto repeat;
}

static void __nilfs_gcdat_copy_mapping(struct address_space *gmapping,
				       struct address_space *mapping)
{
	struct page *pages[GCDAT_N_PAGEVEC];
	unsigned int nr_pages;
	pgoff_t index = 0;
	int i;

repeat:
	READ_LOCK_IRQ(&gmapping->tree_lock);
	nr_pages = radix_tree_gang_lookup(&gmapping->page_tree, (void **)pages,
					  index, GCDAT_N_PAGEVEC);
	for (i = 0; i < nr_pages; i++)
		page_cache_get(pages[i]);
	READ_UNLOCK_IRQ(&gmapping->tree_lock);
	if (nr_pages == 0)
		goto out;
	/* note: mdt dirty flags should be cleared by segctor. */
	index = pages[nr_pages - 1]->index + 1;

	for (i = 0; i < nr_pages; i++) {
		struct page *page = pages[i], *dpage;
		pgoff_t offset = page->index;

		lock_page(page);
		dpage = find_lock_page(mapping, offset);
		if (dpage) {
			/* XXX skip if identical */
			BUG_ON(PageDirty(dpage));
			if (page_has_buffers(page) && page_has_buffers(dpage))
				nilfs_gcdat_copy_buffer_page(page, dpage, 0);
			else
				nilfs_gcdat_copy_page(page, dpage);
			unlock_page(dpage);
			page_cache_release(dpage);
#if 1 /* 0 for debug, withdrawn pages only in gcdat cache */
		} else {
			int err;

			/* move page from gcdat to dat cache */
			WRITE_LOCK_IRQ(&gmapping->tree_lock);
			radix_tree_delete(&gmapping->page_tree, offset);
			gmapping->nrpages--;
			WRITE_UNLOCK_IRQ(&gmapping->tree_lock);
			WRITE_LOCK_IRQ(&mapping->tree_lock);
			err = radix_tree_insert(&mapping->page_tree, offset,
						page);
			if (err < 0) {
				page->mapping = NULL;
				page_cache_release(page);
				goto skip_unlock;
			}
			page->mapping = mapping;
			mapping->nrpages++;
			if (PageDirty(page))
				radix_tree_tag_set(&mapping->page_tree, offset,
						   PAGECACHE_TAG_DIRTY);
			else
				radix_tree_tag_clear(&mapping->page_tree,
						     offset,
						     PAGECACHE_TAG_DIRTY);
		skip_unlock:
			WRITE_UNLOCK_IRQ(&mapping->tree_lock);
#endif
		}
		unlock_page(page);
		page_cache_release(page);
	}
	goto repeat;
out:
	return;
}

static void nilfs_commit_gcdat_data(struct address_space *mapping,
				    struct address_space *gmapping)
{
#if 0
	unsigned int nr_pages;
#endif
	__nilfs_gcdat_clear_dirty_data(mapping);
	__nilfs_gcdat_copy_mapping(gmapping, mapping);

#if 0 /* clean cache for original DAT, for debug */
	truncate_inode_pages(mapping, 0); /* XXX for debug */
#ifdef CONFIG_NILFS_DEBUG
	READ_LOCK_IRQ(&mapping->tree_lock);
	nr_pages = mapping->nrpages;
	READ_UNLOCK_IRQ(&mapping->tree_lock);
	if (nr_pages)
		page_debug(3, "%d pages remains after truncate orig DAT.\n",
			   nr_pages);
#endif
#endif /* end clean cache for original DAT, for debug */
}

static void nilfs_clear_gcdat_data(struct address_space *gmapping)
{
#ifdef CONFIG_NILFS_DEBUG
	struct page *pages[GCDAT_N_PAGEVEC];
	unsigned int nr_pages;
	pgoff_t index = 0;

	/* check and clean GC DAT cache */
	READ_LOCK_IRQ(&gmapping->tree_lock);
	nr_pages = radix_tree_gang_lookup_tag(&gmapping->page_tree,
					      (void **)pages,
					      index, GCDAT_N_PAGEVEC,
					      PAGECACHE_TAG_DIRTY);
	READ_UNLOCK_IRQ(&gmapping->tree_lock);
	if (nr_pages) {
		page_debug(1, "%d dirty pages remains in GC DAT.\n", nr_pages);
		BUG();
	}
#endif
	truncate_inode_pages(gmapping, 0);
}

void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs)
{
	struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
	struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat);

	down_write(&NILFS_MDT(dat)->mi_sem);
	dii->i_flags = gii->i_flags;
	dii->i_state = gii->i_state & ~(1 << NILFS_I_GCINODE);
	nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap);
	nilfs_commit_gcdat_data(dat->i_mapping, gcdat->i_mapping);
	nilfs_btnode_commit_gcdat_cache(&dii->i_btnode_cache,
					&gii->i_btnode_cache);
	up_write(&NILFS_MDT(dat)->mi_sem);
}

void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
{
	struct inode *gcdat = nilfs->ns_gc_dat;
	struct nilfs_inode_info *gii = NILFS_I(gcdat);

	gii->i_flags = 0;
	gcdat->i_state = I_CLEAR;
	nilfs_clear_gcdat_data(gcdat->i_mapping);
	nilfs_check_radix_tree(__FUNCTION__, gcdat->i_mapping,
			       gcdat->i_blkbits);
	nilfs_btnode_clear_gcdat_cache(&gii->i_btnode_cache);
}

/*
 * nilfs_gcdat_read_block
 *  derived from nilfs_mdt_read_block().
 *  just for DATA block.  cannot handle b-tree block by this function.
 */
int nilfs_gcdat_read_block(struct inode *inode, nilfs_blkoff_t block,
			   struct buffer_head **out_bh)
{
	struct buffer_head *bh;
	struct nilfs_inode_info *ii = NILFS_I(inode);
	unsigned long blknum = 0;
	int err = -ENOMEM;
	struct page *page, *opage;
	pgoff_t index = block >> (PAGE_CACHE_SHIFT - inode->i_blkbits);

	mdt_debug(3, "called (blkoff=%llu)\n", (unsigned long long)block);
	page = grab_cache_page(inode->i_mapping, index);
	if (!page)
		goto failed;
	if (page_has_buffers(page)) /* XXX check condition */
		goto found;
	/* check original cache */
	opage = find_lock_page(NILFS_I_NILFS(inode)->ns_dat->i_mapping, index);
	if (opage) {
		mdt_debug(3, "hit orig cache.\n");
		BUG_ON(PageDirty(opage));
		nilfs_gcdat_copy_buffer_page(opage, page, 0);
		unlock_page(opage);
		page_cache_release(opage);
	}
found:
	bh = nilfs_get_page_block(page, block, index, inode->i_blkbits);
	if (unlikely(!bh))
		goto failed_unlock;
	if (buffer_uptodate(bh))
		goto out;
	if (!buffer_mapped(bh)) {
		err = nilfs_bmap_lookup(ii->i_bmap,
					(unsigned long)block, &blknum);
		mdt_debug(3, "lookup: blkoff=%llu -> blocknr=%lu "
			  "(err=%d, ino=%lu)\n",
			  (unsigned long long)block, blknum, err,
			  inode->i_ino);
		if (unlikely(err))
			goto failed_bh;

		bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
		bh->b_blocknr = blknum;
		set_buffer_mapped(bh);
	}
	err = -EIO;
	mdt_debug(2, "reading: blocknr=%llu (blkoff=%llu)\n",
		  (unsigned long long)bh->b_blocknr,
		  (unsigned long long)block);
	bh = nilfs_bread_slow(bh);
	if (unlikely(!bh))
		goto failed_unlock;
out:
	err = 0;
	BUG_ON(out_bh == NULL);
	*out_bh = bh;
failed_unlock:
	unlock_page(page);
	page_cache_release(page);
failed:
	mdt_debug(3, "done (err=%d)\n", err);
	return err;
failed_bh:
	brelse(bh);
	goto failed_unlock;
}

/* Local Variables:		*/
/* eval: (c-set-style "linux")	*/
/* End:				*/
