/*
 * segment.c - NILFS segment constructor.
 *
 * Copyright (C) 2005-2007 Nippon Telegraph and Telephone Corporation.
 *
 * This file is part of NILFS.
 *
 * NILFS is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * NILFS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with NILFS; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * segment.c,v 1.175 2007-07-23 11:25:58 ryusuke Exp
 *
 * Written by Ryusuke Konishi <ryusuke@osrg.net>
 *
 */

#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/bio.h>
#include <linux/completion.h>
#include <linux/blkdev.h>
#include "kern_feature.h"
#if NEED_FREEZER_H
#include <linux/freezer.h>
#else
#include <linux/suspend.h>
#endif
#include "nilfs.h"
#include "btnode.h"
#include "page.h"
#include "segment.h"
#include "sufile.h"
#include "cpfile.h"
#include "ifile.h"


/* 
 * Segment constructor
 */
#define SC_N_PAGEVEC	16   /* Size of locally allocated page vector */
#define SC_N_BHVEC	16   /* Size of locally allocated buffer head vector */
#define SC_N_INODEVEC	16   /* Size of locally allocated inode vector */

/* Construction mode */
enum {
	SC_FLUSH_DATA = 1,   /* Flush current dirty data blocks and make
				partial segments without the super root and the
				inode file */
	SC_FLUSH_IFILE,      /* Flush current dirty data blocks and inode file;
				make partial segments without checkpoint */
	SC_LSEG_SR,          /* Make a logical segment having a super root */
	SC_LSEG_DSYNC,       /* Flush data blocks of a given file and make
				a logical segment without the super root */
};

/*
 * Macros to associate a freezing buffer with its copy
 */
#define __buffer_frozen_copy(bh)  \
   (!list_empty(&(bh)->b_assoc_buffers) && buffer_nilfs_allocated(page_buffers((bh)->b_page)))
#define __buffer_frozen_orig(bh)  (!list_empty(&(bh)->b_assoc_buffers))
#define __uncouple_frozen_buffer(bh)  do { list_del_init(&(bh)->b_assoc_buffers); } while (0)
#define __couple_frozen_buffer(orig, copy)  \
   do { \
	BUG_ON(!list_empty(&(orig)->b_assoc_buffers) ||  \
	       !list_empty(&(copy)->b_assoc_buffers));  \
	list_add(&(copy)->b_assoc_buffers, &(orig)->b_assoc_buffers);  \
   } while (0)

/*
 * Construction stages
 */
enum {
	SC_MAIN_INIT = 0,
	SC_MAIN_GC,
	SC_MAIN_FILE,
	SC_MAIN_SKETCH,
	SC_MAIN_IFILE,
	SC_MAIN_CPFILE,
	SC_MAIN_SUFILE,
	SC_MAIN_DAT,
	SC_MAIN_SR,
	SC_MAIN_DONE,
	SC_MAIN_DSYNC,
};

enum {
	SC_SUB_DATA = 0,
	SC_SUB_NODE,
};

#define SC_STAGE_INIT(stage)  \
	do { \
	     (stage)->main = (stage)->sub = 0; \
	 } while(0)
#define SC_STAGE_CLEAR_HISTORY(stage)  \
	do { \
	     (stage)->started = (stage)->done = 0; \
	} while(0)
#define SC_STAGE_NEXT(stage)  \
	do { \
	     (stage)->done |= (1 << (stage)->main++); \
	     (stage)->started |= (1 << (stage)->main); \
	} while(0)
#define SC_STAGE_SKIP_TO(stage, s)  \
	do { \
	     (stage)->done |= (1 << (stage)->main); \
	     (stage)->started |= (1 << ((stage)->main = (s))); \
	} while(0)

#define SC_STAGE_STARTED(stage, s) ((stage)->started & (1 << (s)))
#define SC_STAGE_DONE(stage, s)    ((stage)->done & (1 << (s)))

/*
 * Definitions for collecting or writing segment summary
 */
typedef int (*collect_proc_t)(struct nilfs_sc_info *, struct buffer_head *, struct inode *);
typedef void (*write_binfo_proc_t)(struct nilfs_sc_info *, unsigned long *, union nilfs_binfo *);

static int lookup_dirty_data_buffers(struct nilfs_sc_info *, struct inode *, collect_proc_t);
static int lookup_dirty_node_buffers(struct nilfs_sc_info *, struct inode *, collect_proc_t);
static int lookup_dirty_bmap_buffers(struct nilfs_sc_info *, struct inode *, collect_proc_t);

struct nilfs_sc_operations {
	collect_proc_t  collect_data;
	collect_proc_t  collect_node;
	collect_proc_t  collect_bmap;
	write_binfo_proc_t   write_data_binfo;
	write_binfo_proc_t   write_node_binfo;
#ifdef CONFIG_NILFS_DEBUG
	print_binfo_proc_t   print_data_binfo;
	print_binfo_proc_t   print_node_binfo;
#endif
};

/*
 * Other definitions
 */
static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, int);

#define nilfs_cnt32_gt(a,b)   \
        (typecheck(__u32, a) && typecheck(__u32, b) && \
         ((__s32)(b) - (__s32)(a) < 0))
#define nilfs_cnt32_ge(a,b)   \
        (typecheck(__u32, a) && typecheck(__u32, b) && \
         ((__s32)(a) - (__s32)(b) >= 0))
#define nilfs_cnt32_lt(a,b)   nilfs_cnt32_gt(b,a)
#define nilfs_cnt32_le(a,b)   nilfs_cnt32_ge(b,a)

#define NEXT_SEGMENT_INFO(seginfo)  list_entry((seginfo)->list.next, struct nilfs_segment_info, list)
#define PREV_SEGMENT_INFO(seginfo)  list_entry((seginfo)->list.prev, struct nilfs_segment_info, list)

/* 
 * Transaction 
 *
 * We don't need the exclusion control among same task, because
 * all file operations are serialized through inode->i_mutex(i_sem) including
 * ones by the same task.
 */
static struct kmem_cache *nilfs_transaction_cachep;

/**
 * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info
 *
 * nilfs_init_transaction_cache() creates a slab cache for the struct
 * nilfs_transaction_info. 
 *
 * Return Value: On success, it returns 0. On error, one of the following
 * negative error code is returned.
 * 
 * %-ENOMEM - Insufficient memory available.
 */
int nilfs_init_transaction_cache(void)
{
	nilfs_transaction_cachep =
		kmem_cache_create("nilfs2_transaction_cache",
				  sizeof(struct nilfs_transaction_info),
				  0, SLAB_RECLAIM_ACCOUNT,
#if NEED_SLAB_DESTRUCTOR_ARG
				  NULL, NULL);
#else
				  NULL);
#endif
  
	return ((nilfs_transaction_cachep == NULL) ? -ENOMEM : 0);
}

/**
 * nilfs_detroy_transaction_cache - destroy the cache for nilfs_transaction_info
 *
 * nilfs_destroy_transaction_cache() frees the slab cache for the struct
 * nilfs_transaction_info.
 */
void nilfs_destroy_transaction_cache(void)
{
	kmem_cache_destroy(nilfs_transaction_cachep);
}

static int
nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
{
	struct nilfs_transaction_info *cur_ti = current->journal_info;
	void *save = NULL;

	if (cur_ti) {
		if (cur_ti->ti_magic == NILFS_TI_MAGIC) {
			seg_debug(3, "increment transaction refcnt (ti=%p, cnt=%d)\n",
				  cur_ti, cur_ti->ti_count);
			return ++cur_ti->ti_count;
		} else {
			/*
			 * If journal_info field is occupied by other filesystem,
			 * we save it and restore on nilfs_transaction_end().
			 * But this should never happen.
			 */
			printk(KERN_WARNING
			       "NILFS warning: journal info from a different FS\n");
			save = current->journal_info;
		}
	}
	if (!ti) {
		ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
		if (!ti)
			return -ENOMEM;
		ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
	} else {
		ti->ti_flags = 0;
	}
	ti->ti_count = 0;
	ti->ti_save = save;
	ti->ti_magic = NILFS_TI_MAGIC;
	current->journal_info = ti;
	return 0;
}

/**
 * nilfs_transaction_begin - start indivisible file operations.
 * @sb: super block
 * @ti: nilfs_transaction_info
 * @vacancy_check: flags for vacancy rate checks
 *
 * nilfs_transaction_begin() acquires the read semaphore that excludes
 * segment construction as needed. This function is used with 
 * nilfs_transaction_end() in pairs. The region enclosed by these
 * functions excludes the segment construction.  Their enclosure can be
 * nested; acquiring and releasing of the read semaphore is performed
 * only in the outermost call to avoid multiple read locking by the same
 * task.
 * @ti is used to specify a nilfs_transaction_info on local memory.
 * In outermost call, the memory is initialized and is hooked onto the 
 * current task. The allocation of nilfs_transaction_info can be 
 * done dynamically by giving NULL to @ti.
 * 
 * Return Value: On success, 0 is returned. On error, one of the following 
 * negative error code is returned.
 *
 * %-ENOMEM - Insufficient memory available.
 *
 * %-ERESTARTSYS - Interrupted
 *
 * %-ENOSPC - No space left on device
 */
int nilfs_transaction_begin(struct super_block *sb,
			    struct nilfs_transaction_info *ti, int vacancy_check)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sb);
	int ret = nilfs_prepare_segment_lock(ti);

	if (unlikely(ret < 0))
		return ret;
	if (ret == 0) {
		struct the_nilfs *nilfs = sbi->s_nilfs;

		for (;;) {
			seg_debug(3, "task %p locking segment semaphore\n", current);
			down_read(&nilfs->ns_segctor_sem);
			if (!vacancy_check)
				break;

			ret = -ENOSPC;
			if (unlikely(nilfs_disk_full(nilfs)) && !capable(CAP_SYS_RESOURCE)) {
				up_read(&nilfs->ns_segctor_sem);
				goto failed;
			}
			else if (likely(!nilfs_write_locked(nilfs)))
				break;

			up_read(&nilfs->ns_segctor_sem);
			ret = nilfs_wait_on_write_interruptible(nilfs);
			if (unlikely(ret))
				goto failed;
		}
		seg_debug(3, "locked\n");
	}
	return 0;

 failed:
	ti = current->journal_info;
	current->journal_info = ti->ti_save;
	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
		kmem_cache_free(nilfs_transaction_cachep, ti);
	return ret;
}

/**
 * nilfs_transaction_end - end indivisible file operations.
 * @sb: super block
 * @commit: commit flag (0 for no change)
 *
 * nilfs_transaction_end() releases the read semaphore which is 
 * acquired by nilfs_transaction_begin(). Its releasing is only done
 * in outermost call of this function. If the nilfs_transaction_info
 * was allocated dynamically, it is given back to a slab cache.
 */
int nilfs_transaction_end(struct super_block *sb, int commit)
{
	struct nilfs_transaction_info *ti = current->journal_info;
	struct nilfs_sb_info *sbi;
	int err = 0;

	if (unlikely(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC)) {
		seg_debug(1, "missing nilfs_transaction_begin()\n");
		BUG();
	}
	if (commit)
		ti->ti_flags |= NILFS_TI_COMMIT;
	if (ti->ti_count > 0) {
		ti->ti_count--;
		seg_debug(3, "decremented transaction refcnt (ti=%p, cnt=%d)\n",
			  ti, ti->ti_count);
		return 0;
	}
	sbi = NILFS_SB(sb);
	if (ti->ti_flags & NILFS_TI_COMMIT)
		nilfs_segctor_start_timer(NILFS_SC(sbi));

	up_read(&sbi->s_nilfs->ns_segctor_sem);
	seg_debug(3, "task %p unlocked segment semaphore\n", current);
	current->journal_info = ti->ti_save;

	if (ti->ti_flags & NILFS_TI_SYNC)
		err = nilfs_construct_segment(sb);
	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
		kmem_cache_free(nilfs_transaction_cachep, ti);
	return err;
}

static void nilfs_segctor_lock(struct nilfs_sb_info *sbi,
			       struct nilfs_transaction_info *ti, int gcflag)
{
	struct nilfs_transaction_info *cur_ti = current->journal_info;

	BUG_ON(cur_ti);
	BUG_ON(!ti);
	ti->ti_flags = gcflag ? NILFS_TI_GC : 0;
	ti->ti_count = 0;
	ti->ti_save = cur_ti;
	ti->ti_magic = NILFS_TI_MAGIC;
	INIT_LIST_HEAD(&ti->ti_garbage);
	current->journal_info = ti;

	seg_debug(3, "task %p locking segment semaphore\n", current);
	down_write(&sbi->s_nilfs->ns_segctor_sem);
	seg_debug(3, "locked\n");
}

static void nilfs_segctor_unlock(struct nilfs_sb_info *sbi)
{
	struct nilfs_transaction_info *ti = current->journal_info;

	if (unlikely(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC)) {
		seg_debug(1, "missing nilfs_segctor_lock()\n");
		BUG();
	}
	BUG_ON(ti->ti_count > 0);

	up_write(&sbi->s_nilfs->ns_segctor_sem);
	seg_debug(3, "task %p unlocked segment semaphore\n", current);
	current->journal_info = ti->ti_save;
	if (!list_empty(&ti->ti_garbage))
		nilfs_dispose_list(sbi, &ti->ti_garbage, 0);
}

int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode)
{
	struct nilfs_inode_info *ii = NILFS_I(inode);

	if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state) ||
	    unlikely(inode->i_ino == NILFS_SKETCH_INO))
		return 0;

	spin_lock(&sbi->s_inode_lock);
	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
		/* Because this routine may race with nilfs_dispose_list(),
		   we have to check NILFS_I_QUEUED here, too. */
		if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
			/* This will happen when somebody is freeing 
			   this inode. */
			nilfs_warning(sbi->s_super, __FUNCTION__,
				      "cannot get inode (ino=%lu)\n",
				      inode->i_ino);
			spin_unlock(&sbi->s_inode_lock);
			return -EINVAL; /* NILFS_I_DIRTY may remain for
					   freeing inode */
		}
		list_del(&ii->i_dirty);
		list_add_tail(&ii->i_dirty, &sbi->s_dirty_files);
		inode_debug(3, "registered dirty file (ino=%lu)\n", inode->i_ino);
		set_bit(NILFS_I_QUEUED, &ii->i_state);
	}
	spin_unlock(&sbi->s_inode_lock);
	return 0;
}

/**
 * nilfs_commit_dirty_file - register a file to the dirty file list
 * @inode: inode of the file to be registered.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error code is returned.
 *
 * %-EINVAL - cannot grab the inode (This may happen when somebody is 
 * freeing the inode) or specified inode has an invalid inode number.
 *
 * %-EIO - I/O error
 */
int nilfs_commit_dirty_file(struct inode *inode)
{
	struct nilfs_sb_info *sbi;
	int err, err2;

	if (is_bad_inode(inode)) {
		inode_debug(1, "tried to commit bad_inode. ignored.\n");
		nilfs_dump_stack(NILFS_VERBOSE_INODE, 2);
		err = -EIO;
		goto failed;
	}
	sbi = NILFS_SB(inode->i_sb);

#ifdef CONFIG_NILFS_DEBUG
	if (unlikely(inode->i_state & I_FREEING))
		nilfs_warning(inode->i_sb, __FUNCTION__,
			      "trying to mark deleting file dirty.\n");
#endif
	err = nilfs_set_file_dirty(sbi, inode);
 failed:
	err2 = nilfs_transaction_end(inode->i_sb, 1);
	return (err ? : err2);
}

static int nilfs_mark_inode_dirty(struct inode *inode)
{
	struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
	struct buffer_head *ibh;
	int err = nilfs_load_inode_block(sbi, inode, &ibh);

	if (unlikely(err)) {
		nilfs_warning(inode->i_sb, __FUNCTION__,
			      "failed to reget inode block.\n");
		return err;
	}
	lock_buffer(ibh);
	nilfs_update_inode(inode, ibh);
	unlock_buffer(ibh);
	nilfs_mdt_mark_buffer_dirty(ibh);
	nilfs_mdt_mark_dirty(sbi->s_ifile);
	brelse(ibh);
	return 0;
}

/**
 * nilfs_dirty_inode - reflect changes on given inode to an inode block.
 * @inode: inode of the file to be registered.
 *
 * nilfs_dirty_inode() loads a inode block containing the specified
 * @inode and copies data from a nilfs_inode to a corresponding inode
 * entry in the inode block. This operation is excluded from the segment
 * construction. This function can be called both as a single operation
 * and as a part of indivisible file operations.
 */
void nilfs_dirty_inode(struct inode *inode)
{
	struct nilfs_transaction_info ti;

	inode_debug(3, "called (ino=%lu)\n", inode->i_ino);
	if (is_bad_inode(inode)) {
		inode_debug(1, "tried to make bad_inode dirty. ignored.\n");
		nilfs_dump_stack(NILFS_VERBOSE_INODE, 2);
		return;
	}
	nilfs_transaction_begin(inode->i_sb, &ti, 0);
	if (likely(inode->i_ino != NILFS_SKETCH_INO))
		nilfs_mark_inode_dirty(inode);
	nilfs_transaction_end(inode->i_sb, 1); /* never fails */
	inode_debug(3, "done\n");
}

static int freeze_blocks(nilfs_segbuf_ptr_t *from, int nblocks)
{
	struct buffer_head *bh, *bufs, *bh_org;
	struct nilfs_segment_buffer *segbuf = NILFS_SEGBUF(from);
	nilfs_segbuf_ptr_t tmp_ptr;

	seg_debug(3, "freezing blocks (from=%p, nblocks=%d)\n", from->pbh, nblocks);
	bufs = nilfs_segbuf_copy_page_buffers(from, nblocks);
	if (unlikely(!bufs))
		return -ENOMEM;

	nilfs_segbuf_read_ptr(segbuf, &tmp_ptr);
	nilfs_segbuf_set_ptr(segbuf, from);
	bh = bufs;
	do {
		bh_org = nilfs_segbuf_read_bh(segbuf);
		__couple_frozen_buffer(bh_org, bh);

		get_bh(bh_org);
		nilfs_segbuf_set_bh(segbuf, bh);
		nilfs_segbuf_move_next(segbuf);
		bh = bh->b_this_page;
	} while (--nblocks > 0);

	nilfs_set_page_writeback(bufs->b_page);
	unlock_page(bufs->b_page);
	page_cache_release(bufs->b_page);
	nilfs_segbuf_set_ptr(segbuf, &tmp_ptr);
	return 0;
}

/**
 * peel_off_original_blocks - clear original blocks linked to given frozen buffers
 * @bh_copy: head of copied blocks
 * @err: whether bio was successfully completed or not
 */
static void peel_off_original_blocks(struct buffer_head *bh_copy, int err)
{
	struct buffer_head *bh, *prev;
	struct page *page;

	bh = list_entry(bh_copy->b_assoc_buffers.next,
			struct buffer_head, b_assoc_buffers);
	page = bh->b_page;
	seg_debug(3, "clearing blocks (bhs[0]=%p, page=%p, err=%d)\n",
		  bh, page, err);
	lock_page(page);
	do {
		if (!err) {
			set_buffer_uptodate(bh);
			clear_buffer_dirty(bh);
			clear_buffer_nilfs_volatile(bh);
		}
		__uncouple_frozen_buffer(bh);
		prev = bh;
		bh = bh->b_this_page;
		__brelse(prev);
	} while (__buffer_frozen_orig(bh));

	if (!err) {
		if (nilfs_page_buffers_clean(page)) {
			nilfs_clear_page_to_be_frozen(page);
			__nilfs_clear_page_dirty(page);
		}
		ClearPageError(page);
	} else if (err < 0)
		SetPageError(page);

	unlock_page(page);
	end_page_writeback(page);
}

/*
 * Segment buffers and segment list
 */
static inline struct nilfs_segment_info *alloc_segment_info(unsigned nbuffers)
{
	struct nilfs_segment_info *seginfo;
	int err;

	seginfo = kmalloc(sizeof(struct nilfs_segment_info), GFP_NOFS);
	if (unlikely(!seginfo))
		return NULL;

	memset(seginfo, 0, sizeof(*seginfo));
	INIT_LIST_HEAD(&seginfo->list);

	err = nilfs_segbuf_init(&seginfo->segbuf, nbuffers);
	if (unlikely(err)) {
		kfree(seginfo);
		return NULL;
	}
	return seginfo;
}

static inline void free_segment_info(struct nilfs_segment_info *seginfo)
{
	nilfs_segbuf_destroy(&seginfo->segbuf);
	kfree(seginfo);
}


static void *
alloc_from_header(struct nilfs_sc_info *sci, unsigned long *pos, unsigned bytes)
{
	struct nilfs_segment_info *seginfo = sci->sc_curseg;
	struct super_block *sb = sci->sc_super;
	unsigned index = *pos >> sb->s_blocksize_bits;
	unsigned offset_in_block = (*pos & (sb->s_blocksize - 1));

	if (unlikely(offset_in_block + bytes > sb->s_blocksize)) {
		offset_in_block = 0;
		*pos = (++index) << sb->s_blocksize_bits;
	}
	if (unlikely(index >= seginfo->nheader)) {
		seg_debug(1, "reaches end of the header\n");
		BUG();
	}
	*pos += bytes;

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, index);
	return nilfs_segbuf_read_bh(&seginfo->segbuf)->b_data + offset_in_block;
}

static int extend_segsum_buffer(struct nilfs_sc_info *sci,
				struct nilfs_segment_info *seginfo)
{
	struct buffer_head *bh;

	bh = sb_getblk(sci->sc_super, seginfo->pseg_start + seginfo->nheader);
	if(unlikely(!bh))
		return -ENOMEM;

	nilfs_set_page_writeback(bh->b_page);

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, seginfo->nheader);
	nilfs_segbuf_set_bh(&seginfo->segbuf, bh);
	brelse(bh);
	seginfo->nheader++;
	seginfo->sum.nblocks++;
	seginfo->sum.nsumblk++;
	return 0;
}

static void
add_file_buffer(struct nilfs_sc_info *sci, struct nilfs_segment_info *seginfo,
		struct buffer_head *bh)
{
	struct page *page = bh->b_page;

	lock_page(page);
	nilfs_set_page_writeback(page);

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, seginfo->npayload);
	nilfs_segbuf_set_bh(&seginfo->segbuf, bh);
	unlock_page(page);

	seginfo->npayload++;
	seginfo->sum.nblocks++;
	seginfo->sum.nfileblk++;
}

static int init_segment_info(struct nilfs_sc_info *sci,
			     struct nilfs_segment_info *seginfo)
{
	int err;

	seginfo->sum.nblocks = seginfo->sum.nsumblk = 0;
//	seginfo->nheader = seginfo->npayload = 0;
//	seginfo->io_error = 0;
//	seginfo->submit_blocks = 0;

	err = extend_segsum_buffer(sci, seginfo);
	if (unlikely(err))
		return err;

	seginfo->sum.flags = 0;
	seginfo->sum.sumbytes = sizeof(struct nilfs_segment_summary);
	seginfo->sum.nfinfo = seginfo->sum.nfileblk = 0;
	seginfo->sum.ctime = sci->sc_seg_ctime;
	if (test_bit(NILFS_SC_GC_COPY, &sci->sc_flags))
		seginfo->sum.flags |= NILFS_SS_GC;

	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
	sci->sc_binfo_offset = sci->sc_finfo_offset = seginfo->sum.sumbytes;
	return 0;
}

static int feed_segment(struct nilfs_sc_info *sci)
{
	sci->sc_nblk_this_inc += sci->sc_curseg->sum.nblocks;
	sci->sc_curseg = NEXT_SEGMENT_INFO(sci->sc_curseg);
	if (sci->sc_curseg == &sci->sc_segments)
		return -E2BIG; /* The current segment is filled up (internal code) */
	
	seg_debug(3, "go on to the next full segment\n");
	return init_segment_info(sci, sci->sc_curseg);
}

static int add_super_root_buffer(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_info *seginfo = sci->sc_curseg;
	struct buffer_head *bh;
	int err;

	if (seginfo->sum.nblocks >= seginfo->rest_blocks) {
		err = feed_segment(sci);
		if (err)
			return err;
		seginfo = sci->sc_curseg;
	}

	bh = sb_getblk(sci->sc_super, seginfo->pseg_start + seginfo->sum.nblocks);
	if(unlikely(!bh))
		return -ENOMEM;

	nilfs_set_page_writeback(bh->b_page);

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, seginfo->npayload);
	nilfs_segbuf_set_bh(&seginfo->segbuf, bh);
	brelse(bh);
	nilfs_segbuf_read_ptr(&seginfo->segbuf, &sci->sc_super_root);

	seginfo->npayload++;
	seginfo->sum.nblocks++;
	seginfo->sum.flags |= NILFS_SS_SR;
	return 0;
}

/*
 * Functions for making segment summaries and payloads
 */
static inline int
segsum_block_required(struct nilfs_sc_info *sci, unsigned long pos,
		      unsigned binfo_size)
{
	struct super_block *sb = sci->sc_super;
	/* Size of finfo and binfo is enough small against blocksize */

	return ((pos - 1) & (sb->s_blocksize - 1)) + 1 + binfo_size +
		(!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0)
		> sb->s_blocksize;
}

static void begin_finfo(struct nilfs_sc_info *sci, struct inode *inode)
{
	sci->sc_curseg->sum.nfinfo++;
	sci->sc_binfo_offset = sci->sc_finfo_offset;
	alloc_from_header(sci, &sci->sc_binfo_offset, sizeof(struct nilfs_finfo));
}

static void end_finfo(struct nilfs_sc_info *sci, struct inode *inode)
{
	struct nilfs_finfo *finfo;
	unsigned long offset = sci->sc_finfo_offset;
	struct nilfs_inode_info *ii;

	if (sci->sc_blk_cnt == 0)
		return;

	ii = NILFS_I(inode);
	finfo = alloc_from_header(sci, &offset, sizeof(*finfo));
	finfo->fi_ino = cpu_to_le64(inode->i_ino);
	finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
	finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
	finfo->fi_cno = cpu_to_le64(ii->i_cno);

	sci->sc_curseg->sum.sumbytes = sci->sc_finfo_offset = sci->sc_binfo_offset;
	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
}

static int add_file_block(struct nilfs_sc_info *sci, struct buffer_head *bh,
			  struct inode *inode, unsigned binfo_size)
{
	struct nilfs_segment_info *seginfo;
	int required, err = 0;

 retry:
	seginfo = sci->sc_curseg;
	required = segsum_block_required(sci, sci->sc_binfo_offset, binfo_size);
	if (seginfo->sum.nblocks + required + 1 > seginfo->rest_blocks) {
		end_finfo(sci, inode);
		err = feed_segment(sci);
		if (err)
			return err;
		goto retry;
	}
	if (unlikely(required)) {
		err = extend_segsum_buffer(sci, seginfo);
		if (unlikely(err))
			goto failed;
	}
	if (sci->sc_blk_cnt == 0)
		begin_finfo(sci, inode);

	alloc_from_header(sci, &sci->sc_binfo_offset, binfo_size);
	/* Substitution to vblocknr is delayed until update_blocknr() */
	add_file_buffer(sci, seginfo, bh);
	sci->sc_blk_cnt++;
 failed:
	return err;
}

/*
 * Callback functions that enumerate, mark, and collect dirty blocks
 */
static int collect_file_data(struct nilfs_sc_info *sci,
			     struct buffer_head *bh,
			     struct inode *inode)
{
	int err;

	/* BUG_ON(!buffer_dirty(bh)); *//* excluded by lookup_dirty_data_buffers() */
	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
	if (unlikely(err < 0))
		return nilfs_handle_bmap_error(err, __FUNCTION__, inode, sci->sc_super);

	err = add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_v));
	if (!err)
		sci->sc_datablk_cnt++;
	return err;
}

static int collect_file_node(struct nilfs_sc_info *sci,
			     struct buffer_head *bh,
			     struct inode *inode)
{
	int err;

	/* BUG_ON(!buffer_dirty(bh) || buffer_prepare_dirty(bh)); *//* excluded by lookup_dirty_node_buffers() */
	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
	if (unlikely(err < 0))
		return nilfs_handle_bmap_error(err, __FUNCTION__, inode, sci->sc_super);
	return 0;
}

static int collect_file_bmap(struct nilfs_sc_info *sci,
			     struct buffer_head *bh,
			     struct inode *inode)
{
	BUG_ON(!buffer_dirty(bh));
	return add_file_block(sci, bh, inode, sizeof(__le64));
}

static void write_file_data_binfo(struct nilfs_sc_info *sci, unsigned long *pos,
				  union nilfs_binfo *binfo)
{
	struct nilfs_binfo_v *binfo_v
		= alloc_from_header(sci, pos, sizeof(*binfo_v));
	*binfo_v = binfo->bi_v;
}

static void write_file_node_binfo(struct nilfs_sc_info *sci, unsigned long *pos,
				  union nilfs_binfo *binfo)
{
	__le64 *vblocknr
		= alloc_from_header(sci, pos, sizeof(*vblocknr));
	*vblocknr = binfo->bi_v.bi_vblocknr;
}

#ifdef CONFIG_NILFS_DEBUG
static int print_file_data_binfo(char *buf, int size, union nilfs_binfo *binfo)
{
	return snprintf(buf, size, "file data(vblocknr=%llu, blkoff=%llu)",
			(unsigned long long)le64_to_cpu(binfo->bi_v.bi_vblocknr),
			(unsigned long long)le64_to_cpu(binfo->bi_v.bi_blkoff));
}

static int print_file_node_binfo(char *buf, int size, union nilfs_binfo *binfo)
{
	return snprintf(buf, size, "file node(vblocknr=%llu)",
			(unsigned long long)le64_to_cpu(binfo->bi_v.bi_vblocknr));
}
#endif

struct nilfs_sc_operations sc_file_ops = {
	.collect_data = collect_file_data,
	.collect_node = collect_file_node,
	.collect_bmap = collect_file_bmap,
	.write_data_binfo = write_file_data_binfo,
	.write_node_binfo = write_file_node_binfo,
#ifdef CONFIG_NILFS_DEBUG
	.print_data_binfo = print_file_data_binfo,
	.print_node_binfo = print_file_node_binfo,
#endif
};

static int collect_dat_data(struct nilfs_sc_info *sci,
			    struct buffer_head *bh,
			    struct inode *inode)
{
	int err;

#ifdef CONFIG_NILFS_DEBUG
	BUG_ON(!buffer_dirty(bh)); /* excluded by lookup_dirty_data_buffers() */
#endif
	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
	if (unlikely(err < 0))
		return nilfs_handle_bmap_error(err, __FUNCTION__, inode, sci->sc_super);

	err = add_file_block(sci, bh, inode, sizeof(__le64));
	if (!err)
		sci->sc_datablk_cnt++;
	return err;
}

static int collect_dat_bmap(struct nilfs_sc_info *sci,
			    struct buffer_head *bh,
			    struct inode *inode)
{
	BUG_ON(!buffer_dirty(bh));
	return add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_dat));
}

static void write_dat_data_binfo(struct nilfs_sc_info *sci, unsigned long *pos,
				 union nilfs_binfo *binfo)
{
	__le64 *blkoff = alloc_from_header(sci, pos, sizeof(*blkoff));
	*blkoff = binfo->bi_dat.bi_blkoff;
}

static void write_dat_node_binfo(struct nilfs_sc_info *sci, unsigned long *pos,
				 union nilfs_binfo *binfo)
{
	struct nilfs_binfo_dat *binfo_dat = alloc_from_header(sci, pos, sizeof(*binfo_dat));
	*binfo_dat = binfo->bi_dat;
}

#ifdef CONFIG_NILFS_DEBUG
static int print_dat_data_binfo(char *buf, int size, union nilfs_binfo *binfo)
{
	return snprintf(buf, size, "dat data(blkoff=%llu)",
			(unsigned long long)le64_to_cpu(binfo->bi_dat.bi_blkoff));
}

static int print_dat_node_binfo(char *buf, int size, union nilfs_binfo *binfo)
{
	return snprintf(buf, size, "dat node(blkoff=%llu, level=%d)",
			(unsigned long long)le64_to_cpu(binfo->bi_dat.bi_blkoff),
			(int)binfo->bi_dat.bi_level);
}
#endif

struct nilfs_sc_operations sc_dat_ops = {
	.collect_data = collect_dat_data,
	.collect_node = collect_file_node,
	.collect_bmap = collect_dat_bmap,
	.write_data_binfo = write_dat_data_binfo,
	.write_node_binfo = write_dat_node_binfo,
#ifdef CONFIG_NILFS_DEBUG
	.print_data_binfo = print_dat_data_binfo,
	.print_node_binfo = print_dat_node_binfo,
#endif
};

#ifdef CONFIG_NILFS_DEBUG
static int print_dsync_data_binfo(char *buf, int size, union nilfs_binfo *binfo)
{
	return snprintf(buf, size, "dsync data(vblocknr=%llu, blkoff=%llu)",
			(unsigned long long)le64_to_cpu(binfo->bi_v.bi_vblocknr),
			(unsigned long long)le64_to_cpu(binfo->bi_v.bi_blkoff));
}

static int print_dsync_node_binfo(char *buf, int size, union nilfs_binfo *binfo)
{
	return snprintf(buf, size, "dsync node(<BUG>)");
}
#endif

struct nilfs_sc_operations sc_dsync_ops = {
	.collect_data = collect_file_data,
	.collect_node = NULL,
	.collect_bmap = NULL,
	.write_data_binfo = write_file_data_binfo,
	.write_node_binfo = NULL,
#ifdef CONFIG_NILFS_DEBUG
	.print_data_binfo = print_dsync_data_binfo,
	.print_node_binfo = print_dsync_node_binfo,
#endif
};

static int
nilfs_prepare_data_page(struct inode *inode, struct page *page)
{
	int err = 0;

	lock_page(page);
	if (!page_has_buffers(page)) {
		seg_debug(3, "page has no buffer heads. allocating.. "
			  "(page=%p)\n", page);
		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
	}
	if (!PageMappedToDisk(page)) {
		struct buffer_head *bh, *head;
		sector_t blkoff = page->index >> (PAGE_SHIFT - inode->i_blkbits);

		int non_mapped = 0;

		bh = head = page_buffers(page);
		do {
			if (!buffer_mapped(bh)) {
				if (!buffer_dirty(bh)) {
					non_mapped++;
					continue;
				}
				err = nilfs_get_block(inode, blkoff, bh, 1);
				if (unlikely(err)) {
					seg_debug(2, "nilfs_get_block() failed (err=%d)\n",
						  err);
					goto out_unlock;
				}
			}
		} while (blkoff++, (bh = bh->b_this_page) != head);
		if (!non_mapped)
			SetPageMappedToDisk(page);
	}

 out_unlock:
	unlock_page(page);
	return err;
}

static int
lookup_dirty_data_buffers(struct nilfs_sc_info *sci, struct inode *inode,
			  collect_proc_t proc)
{
	struct address_space *mapping = inode->i_mapping;
	struct page *pages[SC_N_PAGEVEC];
	unsigned int nr_page;
	pgoff_t index = 0;
	int i, n;
	int err = 0, can_copy = !S_ISDIR(inode->i_mode);
	
	seg_debug(3, "called (ino=%lu)\n", inode->i_ino);
 repeat:
	READ_LOCK_IRQ(&mapping->tree_lock);
	nr_page = radix_tree_gang_lookup_tag(&mapping->page_tree,
					     (void **)pages,
					     index,
					     SC_N_PAGEVEC,
					     PAGECACHE_TAG_DIRTY);
	for (i = 0; i < nr_page; i++)
		page_cache_get(pages[i]);
	READ_UNLOCK_IRQ(&mapping->tree_lock);

	if (nr_page == 0) {
		seg_debug(3, "done (ino=%lu)\n", inode->i_ino);
		return 0;
	}
	index = pages[nr_page - 1]->index + 1;

	for (i = 0; i < nr_page; i++) {
		struct buffer_head *bh, *head;
		struct page *page = pages[i];

		if (err)
			goto skip_page;

		if (mapping->host) {
			err = nilfs_prepare_data_page(inode, page);
			if (unlikely(err))
				goto skip_page;
		}

		bh = head = page_buffers(page);
		n = 0;
		do {
			if (buffer_dirty(bh)) {
				get_bh(bh);
				err = (*proc)(sci, bh, inode);
				put_bh(bh);
				if (unlikely(err)) {
					if (!n || err != -E2BIG)
						goto skip_page;
					break;
					/* partial blocks of mmapped 
					   page should be copyied */
				}
				n++;
			}
			bh = bh->b_this_page;
		} while (bh != head);

		if (can_copy) {
			/* Decide whether to copy buffer or not.
			   This must be done after a writeback flag 
			   is set on the page */
			if (page_mapped(page)) {
				nilfs_set_page_to_be_frozen(page);
				ClearPageChecked(page);
			} else if (PageChecked(page)) {
				nilfs_set_page_to_be_frozen(page);
			}
		}
	skip_page:
		page_cache_release(page);
	}
	if (!err)
		goto repeat;

	seg_debug(3, "failed (err=%d, ino=%lu)\n", err, inode->i_ino);
	return err;
}

static int
lookup_dirty_node_buffers(struct nilfs_sc_info *sci, struct inode *inode,
			  collect_proc_t proc)
{
	struct nilfs_inode_info *ii = NILFS_I(inode);
	struct page *pages[SC_N_PAGEVEC];
	struct buffer_head *bh, *head;
	unsigned int nr_page;
	pgoff_t index = 0;
	LIST_HEAD(node_buffers);
	int i;
	int err = 0;
	
	seg_debug(3, "called (ino=%lu)\n", inode->i_ino);
	for (;;) {
		nr_page = nilfs_btnode_gang_lookup_tag(&ii->i_btnode_cache,
						       pages,
						       index,
						       SC_N_PAGEVEC,
						       PAGECACHE_TAG_DIRTY);
		if (unlikely(nr_page == 0))
			break;

		index = pages[nr_page - 1]->index + 1;

		for(i = 0; i < nr_page; i++) {
			bh = head = page_buffers(pages[i]);
			do {
				if (buffer_dirty(bh) && !buffer_prepare_dirty(bh)) {
					get_bh(bh);
					list_add_tail(&bh->b_assoc_buffers, &node_buffers);
				}
				bh = bh->b_this_page;
			} while (bh != head);
			page_cache_release(pages[i]);
		}
	}

	list_for_each_entry_safe(bh, head, &node_buffers, b_assoc_buffers) {
		if (likely(!err))
			err = (*proc)(sci, bh, inode);

		list_del_init(&bh->b_assoc_buffers);
		brelse(bh);
	}
	seg_debug(3, "done (err=%d, ino=%lu)\n", err, inode->i_ino);
	return err;
}

static int
lookup_dirty_bmap_buffers(struct nilfs_sc_info *sci, struct inode *inode,
			  collect_proc_t proc)
{
	struct nilfs_inode_info *ii = NILFS_I(inode);
	struct buffer_head *bhs[SC_N_BHVEC];
	unsigned nitems, i;
	int err = 0;

	nilfs_bmap_lookup_dirty_buffers_begin(ii->i_bmap);
	do {
		nitems = nilfs_bmap_lookup_dirty_buffers(ii->i_bmap, bhs, SC_N_BHVEC);
		if (unlikely(nitems == 0))
			break;
		for (i = 0; i < nitems; i++) {
			if (likely(!err))
				err = (*proc)(sci, bhs[i], inode);
			brelse(bhs[i]);
		}
	} while (likely(!err));
	nilfs_bmap_lookup_dirty_buffers_end(ii->i_bmap);
	return err;
}

static void
nilfs_dispose_list(struct nilfs_sb_info *sbi, struct list_head *head,
		   int force)
{
	struct nilfs_inode_info *ii, *n;
	struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
	unsigned nv = 0;

	while (!list_empty(head)) {
		spin_lock(&sbi->s_inode_lock);
		list_for_each_entry_safe(ii, n, head, i_dirty) {
			seg_debug(3, "deleting file (ino=%lu) from a list\n",
				  ii->vfs_inode.i_ino);
			list_del_init(&ii->i_dirty);
			if (force) {
				if (unlikely(ii->i_bh)) {
					brelse(ii->i_bh);
					ii->i_bh = NULL;
				}
			} else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
				set_bit(NILFS_I_QUEUED, &ii->i_state);
				list_add_tail(&ii->i_dirty, &sbi->s_dirty_files);
				continue;
			}
			ivec[nv++] = ii;
			if (nv == SC_N_INODEVEC)
				break;
		}
		spin_unlock(&sbi->s_inode_lock);

		for (pii = ivec; nv > 0; pii++, nv--)
			iput(&(*pii)->vfs_inode);
	}
}

static void nilfs_clear_mdt_dirty(struct nilfs_sb_info *sbi,
				  struct nilfs_sc_info *sci, int mode)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;

	if (mode != SC_LSEG_DSYNC) {
		if (SC_STAGE_DONE(&sci->sc_stage, SC_MAIN_IFILE))
			nilfs_mdt_clear_dirty(sbi->s_ifile);
		if (SC_STAGE_DONE(&sci->sc_stage, SC_MAIN_CPFILE))
			nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
		if (SC_STAGE_DONE(&sci->sc_stage, SC_MAIN_SUFILE))
			nilfs_mdt_clear_dirty(nilfs->ns_sufile);
		if (SC_STAGE_DONE(&sci->sc_stage, SC_MAIN_DAT))
			nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs));
	}
}

static int nilfs_test_mdt_dirty(struct nilfs_sb_info *sbi, struct nilfs_sc_info *sci)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;
	int ret = 0;

	if (nilfs_mdt_test_dirty(sbi->s_ifile)) ret++;
	if (nilfs_mdt_test_dirty(nilfs->ns_cpfile)) ret++;
	if (nilfs_mdt_test_dirty(nilfs->ns_sufile)) ret++;
	if (ret || nilfs_doing_gc())
		if (nilfs_mdt_test_dirty(nilfs_dat_inode(nilfs))) ret++;
	return ret;
}

static int nilfs_sc_clean(struct nilfs_sc_info *sci)
{
	return (list_empty(&sci->sc_dirty_files) &&
		!test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
		list_empty(&sci->sc_cleaning_segments) &&
		(!test_bit(NILFS_SC_GC_COPY, &sci->sc_flags) ||
		 list_empty(&sci->sc_gc_inodes)));
}

static int
nilfs_confirm_construction(struct nilfs_sb_info *sbi, struct nilfs_sc_info *sci)
{
	int ret = 0;

	if (nilfs_test_mdt_dirty(sbi, sci))
		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);

	spin_lock(&sbi->s_inode_lock);
	if (list_empty(&sbi->s_dirty_files) && nilfs_sc_clean(sci)) {
		ret++;
		seg_debug(2, "Skipped construction (no changes)\n");
	}
	spin_unlock(&sbi->s_inode_lock);
	return ret;
}

static inline int
nilfs_reconfirm_construction(struct nilfs_sb_info *sbi, struct nilfs_sc_info *sci)
{
	if (nilfs_test_mdt_dirty(sbi, sci))
		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);

	if (nilfs_sc_clean(sci)) {
		seg_debug(2, "Aborted construction (no changes found in "
			  "reconfirmation)\n");
		return 1;
	}
	return 0;
}

static inline int
create_checkpoint(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;
	struct buffer_head *bh_cp;
	struct nilfs_checkpoint *raw_cp;
	int err;

	/* XXX: this interface will be changed */
	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
					  &raw_cp, &bh_cp);
	if (likely(!err)) {
		/* The following code is duplicated with cpfile.  But, it is
		   needed to collect the checkpoint even if it was not newly 
		   created */
		nilfs_mdt_mark_buffer_dirty(bh_cp);
		nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
		nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
	} else {
		BUG_ON(err == -EINVAL || err == -ENOENT);
	}
	return err;
}

static int fill_in_checkpoint(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;
	struct buffer_head *bh_cp;
	struct nilfs_checkpoint *raw_cp;
	int err;

	seg_debug(3, "called\n");
	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
					  &raw_cp, &bh_cp);
	if (unlikely(err)) {
		BUG_ON(err == -EINVAL || err == -ENOENT);
		goto failed_ibh;
	}
	raw_cp->cp_snapshot_list.ssl_next = 0; /* XXX: should be moved to cpfile.c? */
	raw_cp->cp_snapshot_list.ssl_prev = 0;
	raw_cp->cp_inodes_count = cpu_to_le64(atomic_read(&sbi->s_inodes_count));
	raw_cp->cp_blocks_count = cpu_to_le64(atomic_read(&sbi->s_blocks_count));
	raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
	raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
	raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
	if (sci->sc_sketch_inode && i_size_read(sci->sc_sketch_inode) > 0)
		nilfs_checkpoint_set_sketch(raw_cp);
	nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1);
	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
	seg_debug(3, "done\n");
	return 0;

 failed_ibh:
	seg_debug(2, "failed (err=%d)\n", err);
	return err;
}

static void do_fill_in_file_bmap(struct nilfs_inode_info *ii, struct nilfs_sb_info *sbi)
{
	struct buffer_head *ibh;
	struct nilfs_inode *raw_inode;

	if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
		ibh = ii->i_bh;
		BUG_ON(!ibh);
		raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ii->vfs_inode.i_ino, ibh);
		nilfs_bmap_write(ii->i_bmap, raw_inode);
		nilfs_print_bmap_direct_pointers(&ii->vfs_inode, raw_inode);
		nilfs_ifile_unmap_inode(sbi->s_ifile, ii->vfs_inode.i_ino, ibh);
	}
}

static void fill_in_file_bmap(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi)
{
	struct nilfs_inode_info *ii;

	seg_debug(3, "called\n");
	list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
		do_fill_in_file_bmap(ii, sbi);
		set_bit(NILFS_I_COLLECTED, &ii->i_state);
	}
	if (sci->sc_sketch_inode) {
		ii = NILFS_I(sci->sc_sketch_inode);
		if (test_bit(NILFS_I_DIRTY, &ii->i_state))
			do_fill_in_file_bmap(ii, sbi);
	}
	seg_debug(3, "done\n");
}

/*
 * CRC calculation routines
 */
static void fill_in_segsum_crc(struct nilfs_sc_info *sci, u32 seed,
			       struct nilfs_segment_info *seginfo)
{
	struct buffer_head *bh;
	struct nilfs_segment_summary *raw_sum;
	unsigned long size, bytes = seginfo->sum.sumbytes;
	int rest = seginfo->nheader;
	u32 crc;

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, 0);
	bh = nilfs_segbuf_read_bh(&seginfo->segbuf);

	raw_sum = (struct nilfs_segment_summary *)bh->b_data;
	size = min_t(unsigned long, bytes, bh->b_size);
	crc = nilfs_crc32(seed,
			  (unsigned char *)raw_sum +
			  sizeof(raw_sum->ss_datasum) +
			  sizeof(raw_sum->ss_sumsum),
			  size - (sizeof(raw_sum->ss_datasum) + 
				  sizeof(raw_sum->ss_sumsum)));

	while (--rest > 0) {
		nilfs_segbuf_move_prev(&seginfo->segbuf);
		bh = nilfs_segbuf_read_bh(&seginfo->segbuf);
		bytes -= size;
		size = min_t(unsigned long, bytes, bh->b_size);
		crc = nilfs_crc32(crc, bh->b_data, size);
	}
	raw_sum->ss_sumsum = cpu_to_le32(crc);
}

static void fill_in_super_root_crc(struct nilfs_sc_info *sci, u32 seed)
{
	struct buffer_head *bh_sr = NILFS_SEGBUF_PTR_BH(&sci->sc_super_root);
	struct nilfs_super_root *raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
	u32 crc;

	BUG_ON(NILFS_SR_BYTES > bh_sr->b_size);
	crc = nilfs_crc32(seed, (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
			  NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
	raw_sr->sr_sum = cpu_to_le32(crc);
}

static void fill_in_data_crc(struct nilfs_sc_info *sci, u32 seed,
			     struct nilfs_segment_info *seginfo)
{
	struct buffer_head *bh;
	struct nilfs_segment_summary *raw_sum;
	int rest;
	void *kaddr;
	u32 crc;

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, 0);
	bh = nilfs_segbuf_read_bh(&seginfo->segbuf);

	raw_sum = (struct nilfs_segment_summary *)bh->b_data;
	crc = nilfs_crc32(seed,
			  (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum),
			  bh->b_size - sizeof(raw_sum->ss_datasum));

	rest = seginfo->nheader;
	while (--rest > 0) {
		nilfs_segbuf_move_prev(&seginfo->segbuf);
		bh = nilfs_segbuf_read_bh(&seginfo->segbuf);
		crc = nilfs_crc32(crc, bh->b_data, bh->b_size);
	}

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, 0);
	rest = seginfo->npayload;
	while (rest-- > 0) {
		bh = nilfs_segbuf_read_bh(&seginfo->segbuf);
		kaddr = kmap_atomic(bh->b_page, KM_USER0);
		crc = nilfs_crc32(crc, kaddr + bh_offset(bh), bh->b_size);
		kunmap_atomic(kaddr, KM_USER0);
		nilfs_segbuf_move_next(&seginfo->segbuf);
	}
	raw_sum->ss_datasum = cpu_to_le32(crc);
}

static void fill_in_checksums(struct nilfs_sc_info *sci, u32 seed, int has_sr)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;

	seg_debug(3, "called\n");
	if (has_sr)
		fill_in_super_root_crc(sci, seed);
	do {
		fill_in_segsum_crc(sci, seed, seginfo);
		fill_in_data_crc(sci, seed, seginfo);
		seginfo = NEXT_SEGMENT_INFO(seginfo);
	} while (seginfo != &sci->sc_segments);
	seg_debug(3, "done\n");
}

static void
fill_in_super_root(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct buffer_head *bh_sr = NILFS_SEGBUF_PTR_BH(&sci->sc_super_root);
	struct nilfs_super_root *raw_sr = (struct nilfs_super_root *)bh_sr->b_data;

	raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
	raw_sr->sr_pad = 0;
	raw_sr->sr_nongc_ctime = cpu_to_le64(test_bit(NILFS_SC_GC_COPY, &sci->sc_flags) ?
					     nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
	nilfs_mdt_write_inode_direct(nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(nilfs));
	nilfs_mdt_write_inode_direct(nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(nilfs));
	nilfs_mdt_write_inode_direct(nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(nilfs));
}

static void fill_in_segsum(struct nilfs_segment_info *seginfo)
{
	struct nilfs_segment_summary *raw_sum;
	struct buffer_head *bh_sum;

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, 0);
	bh_sum = nilfs_segbuf_read_bh(&seginfo->segbuf);
	
	raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data;

	raw_sum->ss_magic    = cpu_to_le32(NILFS_SEGSUM_MAGIC);
	raw_sum->ss_bytes    = cpu_to_le16(sizeof(struct nilfs_segment_summary));
	raw_sum->ss_flags    = cpu_to_le16(seginfo->sum.flags);
	raw_sum->ss_seq      = cpu_to_le64(seginfo->sum.seg_seq);
	raw_sum->ss_create   = cpu_to_le64(seginfo->sum.ctime);
	raw_sum->ss_next     = cpu_to_le64(seginfo->sum.next);
	raw_sum->ss_nblocks  = cpu_to_le32(seginfo->sum.nblocks);
	raw_sum->ss_nfinfo   = cpu_to_le32(seginfo->sum.nfinfo);
	raw_sum->ss_sumbytes = cpu_to_le32(seginfo->sum.sumbytes);
	raw_sum->ss_pad      = 0;
}

static void nilfs_redirty_inodes(struct list_head *head)
{
	struct nilfs_inode_info *ii;

	list_for_each_entry(ii, head, i_dirty) {
		if (test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
			seg_debug(3, "redirty inode (ino=%lu)\n",
				  ii->vfs_inode.i_ino);
	}
}

static void nilfs_drop_collected_inodes(struct list_head *head)
{
	struct nilfs_inode_info *ii;

	list_for_each_entry(ii, head, i_dirty) {
		if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
			continue;

		clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
		seg_debug(3, "dropping collected inode (ino=%lu)\n",
			  ii->vfs_inode.i_ino);
		set_bit(NILFS_I_UPDATED, &ii->i_state);
	}
}

static void
nilfs_cancel_free_cleaning_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct list_head *head = &sci->sc_cleaning_segments;
	struct nilfs_segment_list_head *slh;
	int err;

	list_for_each_entry(slh, head, list) {
		if (!(slh->flags & NILFS_SLH_FREED))
			break;
		err = nilfs_sufile_cancel_free(nilfs->ns_sufile, slh->segnum);
		if (unlikely(err)) {
			seg_debug(1, "nilfs_sufile_cancel_free() failed (err=%d, segnum=%lu)\n",
				  err, slh->segnum);
			nilfs_print_segment_list("cleaning_segments", head, nilfs);
			BUG();
		}
		seg_debug(2, "reallocate segment (segnum=%lu) on sufile\n", slh->segnum);
		slh->flags &= ~NILFS_SLH_FREED;
	}
}

static int
nilfs_prepare_free_cleaning_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct list_head *head = &sci->sc_cleaning_segments;
	struct nilfs_segment_list_head *slh;
	int err;

	list_for_each_entry(slh, head, list) {
		err = nilfs_sufile_free(nilfs->ns_sufile, slh->segnum);
		if (unlikely(err))
			return err;
		seg_debug(2, "free segment (segnum=%lu) on sufile\n", slh->segnum);
		slh->flags |= NILFS_SLH_FREED;
	}
	return 0;
}

static inline void
nilfs_commit_free_cleaning_segments(struct nilfs_sc_info *sci)
{
	nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
}

static int
collect_file_blocks(struct nilfs_sc_info *sci, struct inode *inode,
		    struct nilfs_sc_operations *sc_ops)
{
	int err = 0;

	seg_debug(3, "called (ino=%lu, main_stage=%d)\n",
		  inode->i_ino, sci->sc_stage.main);

	if (sci->sc_stage.sub == SC_SUB_DATA) {
		err = lookup_dirty_data_buffers(sci, inode, sc_ops->collect_data);
		if (unlikely(err))
			goto break_or_fail;

		sci->sc_stage.sub++;
	}
	/* sci->sc_stage.sub == SC_SUB_NODE */
	err = lookup_dirty_node_buffers(sci, inode, sc_ops->collect_node);
	if (unlikely(err))
		goto break_or_fail;

	err = lookup_dirty_bmap_buffers(sci, inode, sc_ops->collect_bmap);
	if (unlikely(err))
		goto break_or_fail;
		
	end_finfo(sci, inode);
	sci->sc_stage.sub = SC_SUB_DATA;

 break_or_fail:
	seg_debug(3, "done (err=%d, sub-stage=%d)\n", err, sci->sc_stage.sub);
	return err;
}

static int collect_segment_blocks(struct nilfs_sc_info *sci, int mode)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sci->sc_super);
	struct the_nilfs *nilfs = sbi->s_nilfs;
	struct list_head *head;
	struct nilfs_inode_info *ii;
	int err = 0;

 start:
	switch (sci->sc_stage.main) {
	case SC_MAIN_INIT:
		/*
		 * Pre-processes before first segment construction are
		 * inserted here.
		 */
		if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
			sci->sc_nblk_inc = 0;
			sci->sc_curseg->sum.flags = NILFS_SS_LOGBGN;
			if (mode == SC_LSEG_DSYNC) {
				seg_debug(2, "** DSYNC BEGIN\n");
				SC_STAGE_SKIP_TO(&sci->sc_stage, SC_MAIN_DSYNC);
				goto start;
			}
			seg_debug(2, "** LSEG BEGIN\n");
		} else
			seg_debug(2, "** LSEG RESUME\n");

		sci->sc_stage.dirty_file_ptr = NULL;
		sci->sc_stage.gc_inode_ptr = NULL;
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_GC:
		seg_debug(3, "** GC INODE STAGE\n");
		if (test_bit(NILFS_SC_GC_COPY, &sci->sc_flags)) {
			head = &sci->sc_gc_inodes;
			ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, head, i_dirty);
			list_for_each_entry_continue(ii, head, i_dirty) {
				err = collect_file_blocks(sci, &ii->vfs_inode, &sc_file_ops);
				if (unlikely(err)) {
					sci->sc_stage.gc_inode_ptr = 
						list_entry(ii->i_dirty.prev,
							   struct nilfs_inode_info,
							   i_dirty);
					goto break_or_fail;
				}
				set_bit(NILFS_I_COLLECTED, &ii->i_state);
			}
			sci->sc_stage.gc_inode_ptr = NULL;
		}
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_FILE:
		seg_debug(3, "** FILE STAGE\n");
		head = &sci->sc_dirty_files;
		ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, i_dirty);
		list_for_each_entry_continue(ii, head, i_dirty) {
			clear_bit(NILFS_I_DIRTY, &ii->i_state);

			err = collect_file_blocks(sci, &ii->vfs_inode, &sc_file_ops);
			if (unlikely(err)) {
				sci->sc_stage.dirty_file_ptr = 
					list_entry(ii->i_dirty.prev,
						   struct nilfs_inode_info,
						   i_dirty);
				goto break_or_fail;
			}
			/* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); *//* XXX: required ? */
		}
		sci->sc_stage.dirty_file_ptr = NULL;
		if (mode == SC_FLUSH_DATA) {
			SC_STAGE_SKIP_TO(&sci->sc_stage, SC_MAIN_DONE);
			seg_debug(2, "** LSEG CONTINUED\n");
			return 0;
		}
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_SKETCH:
		seg_debug(3, "** SKETCH FILE STAGE\n");
		if (mode == SC_LSEG_SR && sci->sc_sketch_inode) {
			ii = NILFS_I(sci->sc_sketch_inode);
			if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
				sci->sc_sketch_inode->i_mtime.tv_sec
					= sci->sc_sketch_inode->i_ctime.tv_sec = sci->sc_seg_ctime;
				err = nilfs_mark_inode_dirty(sci->sc_sketch_inode);
				if (unlikely(err))
					goto break_or_fail;
			}
			err = collect_file_blocks(sci, sci->sc_sketch_inode, &sc_file_ops);
			if (unlikely(err))
				goto break_or_fail;
		}
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_IFILE:
		seg_debug(3, "** IFILE STAGE\n");
		err = collect_file_blocks(sci, sbi->s_ifile, &sc_file_ops);
		if (unlikely(err))
			break;
		if (mode == SC_FLUSH_IFILE) {
			SC_STAGE_SKIP_TO(&sci->sc_stage, SC_MAIN_DONE);
			seg_debug(2, "** LSEG CONTINUED\n");
			return 0;
		}
		SC_STAGE_NEXT(&sci->sc_stage);
		/* Creating a checkpoint */
		err = create_checkpoint(sci, sbi);
		if (unlikely(err))
			break;
	case SC_MAIN_CPFILE:
		seg_debug(3, "** CP STAGE\n");
		err = collect_file_blocks(sci, nilfs->ns_cpfile, &sc_file_ops);
		if (unlikely(err))
			break;
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_SUFILE:
		seg_debug(3, "** SUFILE STAGE\n");
		err = nilfs_prepare_free_cleaning_segments(sci, nilfs);
		if (unlikely(err))
			break;
		err = collect_file_blocks(sci, nilfs->ns_sufile, &sc_file_ops);
		if (unlikely(err))
			break;
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_DAT:
		seg_debug(3, "** DAT STAGE\n");
		err = collect_file_blocks(sci, nilfs_dat_inode(nilfs), &sc_dat_ops);
		if (unlikely(err))
			break;
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_SR:
		seg_debug(3, "** SR STAGE\n");
		if (mode == SC_LSEG_SR) {
			/* Appending a super root */
			err = add_super_root_buffer(sci);
			if (unlikely(err))
				break;
			seg_debug(3, "add a super root block\n");
		}
		SC_STAGE_NEXT(&sci->sc_stage);
	case SC_MAIN_DONE:
		/*
		 * Post processes after final segment construction
		 * can be inserted here.
		 */
		sci->sc_curseg->sum.flags |= NILFS_SS_LOGEND;
		seg_debug(2, "** LSEG END\n");
		return 0;
	case SC_MAIN_DSYNC:
		sci->sc_curseg->sum.flags |= NILFS_SS_SYNDT;
		ii = sci->sc_stage.dirty_file_ptr;
		if (!test_bit(NILFS_I_BUSY, &ii->i_state))
			break;
		err = lookup_dirty_data_buffers(sci, &ii->vfs_inode, collect_file_data);
		if (unlikely(err))
			break;
		end_finfo(sci, &ii->vfs_inode);
		sci->sc_stage.dirty_file_ptr = NULL;
		sci->sc_curseg->sum.flags |= NILFS_SS_LOGEND;
		SC_STAGE_SKIP_TO(&sci->sc_stage, SC_MAIN_DONE);
		seg_debug(2, "** DSYNC END\n");
		return 0;
	default:
		BUG();
	}
 break_or_fail:
	if (unlikely(err)) {
		if (err == -E2BIG)
			seg_debug(2, "** SEG FEED(stage=%d)\n", sci->sc_stage.main);
		else
			seg_debug(2, "** ERROR(err=%d, stage=%d)\n", err, sci->sc_stage.main);
	}
	return err;
}

/**
 * follow_up_check - Check whether the segment is empty or not.
 * @sci: nilfs_sc_info
 * @has_sr: whether if the current segment includes super root or not.
 *
 * We reject empty or SR-only segment if the previous write was continuing.
 */
static inline int follow_up_check(struct nilfs_sc_info *sci, int has_sr, int mode)
{
	struct nilfs_segment_info *seginfo = sci->sc_curseg;

 	if (NILFS_SEG_SIMPLEX(&seginfo->sum) &&
 	    seginfo->sum.nblocks - seginfo->sum.nsumblk /* # of payload blocks */ <= !!has_sr) {
		if (mode != SC_LSEG_DSYNC)
			clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
		seg_debug(2, "Aborted construction (no blocks were collected)\n");
		return 1;
	}
	return 0;
}

static int update_payload_blocknr(struct nilfs_sc_info *sci,
				  struct nilfs_segment_info *seginfo, int mode)
{
	struct page *prev_page = NULL;
	struct nilfs_finfo *finfo = NULL;
	struct inode *inode = NULL;
	sector_t blocknr = seginfo->pseg_start + seginfo->sum.nsumblk;
	unsigned long nfinfo = seginfo->sum.nfinfo;
	unsigned long pos = sizeof(struct nilfs_segment_summary);
	unsigned long nblocks = 0, ndatablk = 0;
	struct nilfs_sc_operations *sc_op = NULL;
	struct buffer_head *bh;
	nilfs_segbuf_ptr_t sbh_ptr = NILFS_SEGBUF_NULL, bh_ptr;
	union nilfs_binfo binfo;
	ino_t ino = 0;
	int nr_page_blocks = 0;
	int rest, err = 0;

	seg_debug(3, "called\n");
	if (!nfinfo)
		goto out;

	nilfs_segbuf_seek(&seginfo->segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, 0);
	rest = seginfo->npayload;
	while (rest-- > 0) {
		bh = nilfs_segbuf_read_bh(&seginfo->segbuf);
		nilfs_segbuf_read_ptr(&seginfo->segbuf, &bh_ptr);
		if (!finfo) {
			finfo = alloc_from_header(sci, &pos, sizeof(*finfo));
			ino = le64_to_cpu(finfo->fi_ino);
			nblocks = le32_to_cpu(finfo->fi_nblocks);
			ndatablk = le32_to_cpu(finfo->fi_ndatablk);
			nilfs_print_finfo(blocknr, ino, nblocks, ndatablk);

			inode = bh->b_page->mapping->host;
			if (!inode)
				inode = NILFS_AS_I(bh->b_page->mapping);

			if (mode == SC_LSEG_DSYNC)
				sc_op = &sc_dsync_ops;
			else if (ino == NILFS_DAT_INO)
				sc_op = &sc_dat_ops;
			else /* file blocks */
				sc_op = &sc_file_ops;
		}
		err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, bh, blocknr, &binfo);
		if (unlikely(err))
			goto failed_bmap;
			
		if (ndatablk > 0) {
#if 0
			seg_debug(3, "called nilfs_bmap_assign() for data block "
				  "(ino=%lu, blocknr=%llu, bh=%p)\n",
				  ino, (unsigned long long)blocknr, bh);
#endif
			sc_op->write_data_binfo(sci, &pos, &binfo);
			nilfs_print_binfo(blocknr, &binfo, sc_op->print_node_binfo);
		} else {
#if 0
			seg_debug(3, "called nilfs_bmap_assign() for node block "
				  "(ino=%lu, blocknr=%llu, bh=%p)\n",
				  ino, (unsigned long long)blocknr, bh);
#endif
			sc_op->write_node_binfo(sci, &pos, &binfo);
			nilfs_print_binfo(blocknr, &binfo, sc_op->print_data_binfo);
		}

		nilfs_segbuf_set_ptr(&seginfo->segbuf, &bh_ptr);
		if (prev_page != bh->b_page) {
			if (prev_page && nilfs_page_to_be_frozen(prev_page)) {
				err = freeze_blocks(&sbh_ptr, nr_page_blocks);
				if (unlikely(err))
					goto failed;
			}
			nilfs_segbuf_read_ptr(&seginfo->segbuf, &sbh_ptr);
			nr_page_blocks = 0;
		}
		prev_page = bh->b_page;
		blocknr++;
		nr_page_blocks++;
		if (--nblocks == 0) {
			finfo = NULL;
			if (--nfinfo == 0) {
				nilfs_segbuf_move_next(&seginfo->segbuf);
				break;
			}
		} else if (ndatablk > 0)
			ndatablk--;

		nilfs_segbuf_move_next(&seginfo->segbuf);
	}
	if (prev_page && nilfs_page_to_be_frozen(prev_page)) {
		err = freeze_blocks(&sbh_ptr, nr_page_blocks);
		if (unlikely(err))
			goto failed;
	}
 out:
	seg_debug(3, "done\n");
	return 0;
 failed_bmap:
	err = nilfs_handle_bmap_error(err, __FUNCTION__, inode, sci->sc_super);
 failed:
	seg_debug(2, "failed\n");
	return err;
}

static void abort_segment_io(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;

	do {
		struct nilfs_segment_buffer *segbuf = &seginfo->segbuf;
		struct buffer_head *bh;
		int rest = seginfo->submit_blocks;

		seg_debug(2, "aborting segment (nblk_submit=%u)\n", seginfo->submit_blocks);
		if (seginfo->submit_blocks > seginfo->sum.nsumblk) {
			rest -= seginfo->sum.nsumblk;
			nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, rest);
			while (rest-- > 0) {
				nilfs_segbuf_move_prev(segbuf);
				bh = nilfs_segbuf_read_bh(segbuf);
				unlock_buffer(bh);
			}
			rest = seginfo->sum.nsumblk;
		}
		nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, rest);
		while (rest-- > 0) {
			nilfs_segbuf_move_next(segbuf);
			bh = nilfs_segbuf_read_bh(segbuf);
			unlock_buffer(bh);
		}
		seginfo = NEXT_SEGMENT_INFO(seginfo);
	} while (seginfo != &sci->sc_segments);

	/*
	 * When started the ifile stage, dirty inodes come into the collected
	 * state.  If the current partial segment includes regular files,
	 * the collected state must be canceled to rewrite bmap roots of the files
	 * in the next construction.
	 */
	if (SC_STAGE_DONE(&sci->sc_stage, SC_MAIN_FILE) &&
	    SC_STAGE_STARTED(&sci->sc_stage, SC_MAIN_IFILE))
		nilfs_redirty_inodes(&sci->sc_dirty_files);

	if (test_bit(NILFS_SC_GC_COPY, &sci->sc_flags))
		nilfs_redirty_inodes(&sci->sc_gc_inodes);
}

static void complete_segment_io(struct nilfs_sc_info *sci, int update_sr)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;
	struct nilfs_sb_info *sbi = NILFS_SB(sci->sc_super);
	struct the_nilfs *nilfs = sbi->s_nilfs;

	do {
		struct nilfs_segment_buffer *segbuf = &seginfo->segbuf;
		struct buffer_head *bh;
		int rest;

		seg_debug(3, "completing segment (flags=0x%x)\n", seginfo->sum.flags);

		nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, 0);
		rest = seginfo->nheader;
		while (rest-- > 0) {
			bh = nilfs_segbuf_read_bh(segbuf);
			set_buffer_uptodate(bh);
			clear_buffer_dirty(bh);
			unlock_buffer(bh);
			nilfs_segbuf_move_prev(segbuf);
		}

		nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, 0);
		rest = seginfo->npayload;
		while (rest-- > 0) {
			bh = nilfs_segbuf_read_bh(segbuf);
			set_buffer_uptodate(bh);
			clear_buffer_dirty(bh);
			clear_buffer_nilfs_volatile(bh);
			unlock_buffer(bh);
			nilfs_segbuf_move_next(segbuf);
		}

		if (!NILFS_SEG_SIMPLEX(&seginfo->sum)) {
			if (NILFS_SEG_LOGBGN(&seginfo->sum)) {
				set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
				sci->sc_lseg_stime = jiffies;
				seg_debug(3, "set UNCLOSED flag\n");
			}
			if (NILFS_SEG_LOGEND(&seginfo->sum)) {
				clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
				seg_debug(3, "cleared UNCLOSED flag\n");
			}
		}
		seginfo = NEXT_SEGMENT_INFO(seginfo);
	} while (seginfo != &sci->sc_segments);
	
	nilfs_drop_collected_inodes(&sci->sc_dirty_files);

	if (test_bit(NILFS_SC_GC_COPY, &sci->sc_flags)) {
		nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
		if (update_sr)
			nilfs_commit_gcdat_inode(nilfs);
	} else {
		nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
		set_nilfs_cond_nongc_write(nilfs);
		wake_up(&nilfs->ns_cleanerd_wq);
	}

	sci->sc_nblk_inc += sci->sc_nblk_this_inc;

	seginfo = PREV_SEGMENT_INFO(&sci->sc_segments);
	if (update_sr) {
		nilfs_set_last_segment(nilfs, seginfo->pseg_start, seginfo->sum.seg_seq,
				       nilfs->ns_cno);

		clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
		set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
		seg_debug(2, "completed a segment having a super root "
			  "(seq=%llu, start=%llu, cno=%llu)\n",
			  (unsigned long long)seginfo->sum.seg_seq,
			  (unsigned long long)seginfo->pseg_start,
			  (unsigned long long)nilfs->ns_cno);
	} else
		clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);

	nilfs->ns_segnum = seginfo->segnum;
	nilfs->ns_nextnum = seginfo->nextnum;
	nilfs->ns_pseg_offset = seginfo->pseg_start - seginfo->fseg_start + seginfo->sum.nblocks;
	nilfs->ns_seg_seq = seginfo->sum.seg_seq;
	nilfs->ns_ctime = sci->sc_seg_ctime;
}

static int write_segment(struct nilfs_sc_info *sci,
			 struct nilfs_segment_info *seginfo,
			 struct nilfs_segbuf_write_info *wi)
{
#ifdef NILFS_SR_BARRIER
	struct nilfs_sb_info *sbi = NILFS_SB(sci->sc_super);
	int has_sr = NILFS_SEG_HAS_SR(&seginfo->sum);
#endif
	struct nilfs_segment_buffer *segbuf = &seginfo->segbuf;
	struct buffer_head *bh;
	int rest, res, rw = WRITE;

	seg_debug(3, "submitting summary blocks\n");
	nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, 0);
	rest = seginfo->nheader;
	while (rest-- > 0) {
		res = nilfs_segbuf_commit_bh(segbuf, wi, rw);
		if (unlikely(res))
			goto failed_bio;
		bh = nilfs_segbuf_read_bh(segbuf);
		lock_buffer(bh);
		nilfs_segbuf_move_prev(segbuf);
	}

	seg_debug(3, "submitting normal blocks (index=%d)\n", wi->end);
	nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, 0);
	rest = seginfo->npayload;
#ifdef NILFS_SR_BARRIER
	if (has_sr)
		rest--;
#endif

	while (rest-- > 0) {
		res = nilfs_segbuf_commit_bh(segbuf, wi, rw);
		if (unlikely(res))
			goto failed_bio;
		bh = nilfs_segbuf_read_bh(segbuf);
		lock_buffer(bh);
		nilfs_segbuf_move_next(segbuf);
	}

	if (wi->bio) {
		/*
		 * Last BIO is always sent through the following
		 * submission.
		 */
#ifdef NILFS_SR_BARRIER
		if (!has_sr)
#endif
			rw |= (1 << BIO_RW_SYNC);
		res = nilfs_segbuf_write(wi, rw);
		if (unlikely(res))
			goto failed_bio;
	}

#ifdef NILFS_SR_BARRIER
	if (has_sr) {
		seg_debug(3, "submitting super root block (index=%d)\n", wi->end);

		rw |= (1 << BIO_RW_SYNC);
		if (nilfs_test_opt(sbi, BARRIER))
			rw |= (1 << BIO_RW_BARRIER);
	retry_sr:
		BUG_ON(wi->bio != NULL);
		wi->nr_vecs = 1;
		res = nilfs_segbuf_commit_bh(segbuf, wi, 0);
		if (unlikely(res))
			goto failed_bio;
		lock_buffer(nilfs_segbuf_read_bh(segbuf));

		res = nilfs_segbuf_write(wi, rw);
		if (res == -EOPNOTSUPP && (rw & (1 << BIO_RW_BARRIER))) {
			nilfs_warning(sci->sc_super, __FUNCTION__,
				      "barrier-based sync failed. "
				      "disabling barriers\n");
			nilfs_clear_opt(sbi, BARRIER);
			wi->end--;
			unlock_buffer(*pbh);
			rw &= ~(1 << BIO_RW_BARRIER);
			goto retry_sr;
		}
		if (unlikely(res))
			goto failed_bio;
	}
#endif
	seg_debug(2, "submitted a segment (pseg_start=%llu)\n",
		  (unsigned long long)seginfo->pseg_start);
	return 0;

 failed_bio:
	seg_debug(1, "Failed to write. getting back the state of segment "
		  "(pseg_start=%llu)\n", (unsigned long long)seginfo->pseg_start);
	atomic_inc(&wi->err);
	return res;
}

static int write_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;
	struct nilfs_segbuf_write_info wi;
	int err, res;

	wi.sb = sci->sc_super;
	wi.bdi = nilfs->ns_bdi;
	wi.private = sci;
	wi.end_io = NULL;

	do {
		nilfs_segbuf_prepare_write(&seginfo->segbuf, &wi, seginfo->pseg_start,
					   seginfo->sum.nblocks);
		err = write_segment(sci, seginfo, &wi);

		res = nilfs_segbuf_wait(&wi);
		if (likely(!err))
			err = res;

		seginfo->submit_blocks = wi.end;
		if (unlikely(err)) {
			if (err == -EIO)
				seginfo->io_error = 1;
			return err;
		}

		seginfo = NEXT_SEGMENT_INFO(seginfo);
	} while (seginfo != &sci->sc_segments);

	return 0;
}

static int finish_current_segment(struct nilfs_sc_info *sci,
				  struct the_nilfs *nilfs)
{
	struct nilfs_segment_list_head *slh;
	int err;

	slh = nilfs_alloc_segment_list_head(nilfs->ns_segnum);
	if (unlikely(!slh))
		return -ENOMEM;

	err = nilfs_open_segment_list_head(slh, nilfs);
	if (unlikely(err)) {
		nilfs_free_segment_list_head(slh);
		return err;
	}
	list_add_tail(&slh->list, &sci->sc_used_active_segments);
	slh->flags |= NILFS_SLH_COMMIT;
	nilfs_mdt_mark_buffer_dirty(slh->bh_su);
	nilfs_mdt_mark_dirty(nilfs->ns_sufile);
	nilfs_close_segment_list_head(slh, nilfs);
	return 0;
}

static int nilfs_pin_segusage(struct the_nilfs *nilfs, nilfs_segnum_t segnum)
{
	struct buffer_head *bh_su;
	struct nilfs_segment_usage *raw_su;
	int err;

	err = nilfs_sufile_get_segment_usage(nilfs->ns_sufile, segnum, &raw_su, &bh_su);
	if (unlikely(err))
		return err;
	nilfs_mdt_mark_buffer_dirty(bh_su);
	nilfs_mdt_mark_dirty(nilfs->ns_sufile);
	nilfs_sufile_put_segment_usage(nilfs->ns_sufile, segnum, bh_su);
	return 0;
}

static int begin_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;
	int err;
	
	seginfo->nheader = seginfo->npayload = 0;
	seginfo->io_error = 0;
	seginfo->submit_blocks = 0;

	seginfo->segnum = nilfs->ns_segnum;
	nilfs_get_segment_range(nilfs, seginfo->segnum, &seginfo->fseg_start, &seginfo->fseg_end);
	seginfo->pseg_start = seginfo->fseg_start + nilfs->ns_pseg_offset;
	seginfo->rest_blocks = (seginfo->fseg_end + 1) - seginfo->pseg_start;

	if (seginfo->rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
		err = finish_current_segment(sci, nilfs);
		if (unlikely(err))
			return err;

		seginfo->segnum = nilfs->ns_segnum = nilfs->ns_nextnum;
		nilfs->ns_pseg_offset = 0;
		nilfs->ns_seg_seq++;
		nilfs_get_segment_range(nilfs, seginfo->segnum, &seginfo->fseg_start, &seginfo->fseg_end);
		seginfo->pseg_start = seginfo->fseg_start;
		seginfo->rest_blocks = (seginfo->fseg_end + 1) - seginfo->pseg_start;
	}

	err = nilfs_pin_segusage(nilfs, seginfo->segnum);
	if (unlikely(err))
		return err;

	if (nilfs->ns_segnum == nilfs->ns_nextnum) {
		/* Start from the head of a new full segment */
		err = nilfs_sufile_alloc(nilfs->ns_sufile, &seginfo->nextnum);
		if (unlikely(err))
			return err;
	} else
		seginfo->nextnum = nilfs->ns_nextnum;

	seginfo->sum.seg_seq = nilfs->ns_seg_seq;
	seginfo->sum.next = nilfs_get_segment_start_blocknr(nilfs, seginfo->nextnum);

	return 0;
}

static int extend_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs, int nadd)
{
	struct nilfs_segment_info *seginfo, *prev, *n;
	unsigned nbuffers = NILFS_SEGBUF_SIZE(&sci->sc_segments.segbuf);
	LIST_HEAD(list);
	int err;

	prev = PREV_SEGMENT_INFO(&sci->sc_segments);

	while (nadd-- > 0) {
		/* extend segment info */
		err = -ENOMEM;
		seginfo = alloc_segment_info(nbuffers);
		if (unlikely(!seginfo))
			goto failed;

		/* allocate new full segments */
		err = nilfs_sufile_alloc(nilfs->ns_sufile, &seginfo->nextnum);
		if (unlikely(err))
			goto failed_seginfo;

		seginfo->segnum = prev->nextnum;
		nilfs_get_segment_range(nilfs, seginfo->segnum, &seginfo->fseg_start, &seginfo->fseg_end);
		err = nilfs_pin_segusage(nilfs, seginfo->segnum);
		if (unlikely(err))
			goto failed_seginfo;

		seginfo->pseg_start = seginfo->fseg_start;
		seginfo->rest_blocks = seginfo->fseg_end - seginfo->fseg_start + 1;
		seginfo->sum.seg_seq = prev->sum.seg_seq + 1;
		seginfo->sum.next = nilfs_get_segment_start_blocknr(nilfs, seginfo->nextnum);
		list_add_tail(&seginfo->list, &list);
		prev = seginfo;
	}
	list_splice(&list, sci->sc_segments.list.prev);
	return 0;

 failed_seginfo:
	free_segment_info(seginfo);
 failed:
	list_for_each_entry_safe(seginfo, n, &list, list) {
		int ret = nilfs_sufile_free(nilfs->ns_sufile, seginfo->nextnum);
		BUG_ON(ret);
		list_del(&seginfo->list);
		free_segment_info(seginfo);
	}
	return err;
}

static void free_extended_segments(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_info *seginfo, *n;

	list_for_each_entry_safe(seginfo, n, &sci->sc_segments.list, list) {
		list_del(&seginfo->list);
		free_segment_info(seginfo);
	}
}

static void nilfs_end_page_io(struct page *page, int err)
{
	int bits;

	if (!page || !PageWriteback(page))
		/* For split b-tree node pages, this function may be
		   called twice.  We ignore the 2nd or later calls by
		   this check.  This scheme is not safe for data pages,
		   but data pages are never split on a segment buffer. */
		return;

	lock_page(page);
	if (err < 0)
		SetPageError(page);
	else if (!err) {
		if ((bits = nilfs_page_buffers_clean(page)) != 0)
			nilfs_clear_page_dirty(page, bits);
		ClearPageError(page);
	}
	unlock_page(page);
	nilfs_end_page_writeback(page);
}

static void release_segment_buffers(struct nilfs_sc_info *sci, int err)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;

	do {
		struct page *page, *prev_page = NULL;
		struct nilfs_segment_buffer *segbuf = &seginfo->segbuf;
		struct buffer_head *bh;
		int rest;

		nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_TAIL, 0);
		rest = seginfo->nheader;
		while (rest-- > 0) {
			bh = nilfs_segbuf_read_bh(segbuf);
			nilfs_end_page_writeback(bh->b_page);
			nilfs_segbuf_release_bh(segbuf);
			nilfs_segbuf_move_prev(segbuf);
		}

		/*
		 * We assume that the buffers which belong to the same page
		 * continue over the buffer head array.
		 * Under this assumption, the last BHs of pages is identifiable
		 * by the discontinuity of bh->b_page (page != prev_page).
		 *
		 * For B-tree node blocks, however, this assumption is not
		 * guaranteed.  The cleanup code of B-tree node pages needs
		 * special care.
		 */
		nilfs_segbuf_seek(segbuf, NILFS_SEGBUF_SEEK_FROM_HEAD, 0);
		rest = seginfo->npayload;
		while (rest-- > 0) {
			bh = nilfs_segbuf_read_bh(segbuf);
			page = bh->b_page;
			if (__buffer_frozen_copy(bh))
				peel_off_original_blocks(bh, err);
			if (page != prev_page) {
				nilfs_end_page_io(prev_page, err);
				prev_page = page;
			}
			nilfs_segbuf_release_bh(segbuf);
			nilfs_segbuf_move_next(segbuf);
		}
		nilfs_end_page_io(prev_page, err);

		seginfo->npayload = seginfo->nheader = 0;

		seginfo = NEXT_SEGMENT_INFO(seginfo);
	} while (seginfo != &sci->sc_segments);
}

static void end_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs, int err)
{
	if (unlikely(err)) {
		struct nilfs_segment_info *seginfo = &sci->sc_segments;
		int ret, done = 0;

		if (nilfs->ns_nextnum != seginfo->nextnum) {
			ret = nilfs_sufile_free(nilfs->ns_sufile, seginfo->nextnum);
			BUG_ON(ret);
		}
		if (seginfo->io_error) {
			/* Case 1: The first segment failed */
			if (seginfo->pseg_start != seginfo->fseg_start)
				/* Case 1a:  Partial segment appended into an existing segment */
				nilfs_terminate_segment(nilfs, seginfo->fseg_start, seginfo->fseg_end);
			else /* Case 1b:  New full segment */
				set_nilfs_discontinued(nilfs);
			done++;
		}
		seginfo = NEXT_SEGMENT_INFO(seginfo);

		while (seginfo != &sci->sc_segments) {
			ret = nilfs_sufile_free(nilfs->ns_sufile, seginfo->nextnum);
			BUG_ON(ret);
			if (!done && seginfo->io_error) {
				if (seginfo->segnum != nilfs->ns_nextnum)
					/* Case 2: extended segment (!= next) failed */
					nilfs_sufile_set_error(nilfs->ns_sufile, seginfo->segnum);
				done++;
			}
			seginfo = NEXT_SEGMENT_INFO(seginfo);
		}

		nilfs_cancel_free_cleaning_segments(sci, nilfs);
	}
	release_segment_buffers(sci, err);
	free_extended_segments(sci);
}

static void nilfs_update_segusage(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;
	struct buffer_head *bh_su;
	struct nilfs_segment_usage *raw_su;
	int ret;
	unsigned long live_blocks;

	seg_debug(3, "called\n");
	do {
		ret = nilfs_sufile_get_segment_usage(nilfs->ns_sufile, seginfo->segnum, &raw_su, &bh_su);
		BUG_ON(ret); /* succeed because bh_su is dirty */
		live_blocks = (seginfo->sum.nblocks + (seginfo->pseg_start - seginfo->fseg_start));
		raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime);
		raw_su->su_nblocks = cpu_to_le32(live_blocks);
		nilfs_sufile_put_segment_usage(nilfs->ns_sufile, seginfo->segnum, bh_su);

		seginfo = NEXT_SEGMENT_INFO(seginfo);
	} while (seginfo != &sci->sc_segments);
	seg_debug(3, "done\n");
}

static void nilfs_cancel_segusage(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments;
	struct buffer_head *bh_su;
	struct nilfs_segment_usage *raw_su;
	int ret;

	ret = nilfs_sufile_get_segment_usage(nilfs->ns_sufile, seginfo->segnum, &raw_su, &bh_su);
	BUG_ON(ret); /* succeed because bh_su is dirty */
	raw_su->su_nblocks = cpu_to_le32(nilfs->ns_pseg_offset);
	nilfs_sufile_put_segment_usage(nilfs->ns_sufile, seginfo->segnum, bh_su);
	for (;;) {
		seginfo = NEXT_SEGMENT_INFO(seginfo);
		if (unlikely(seginfo == &sci->sc_segments))
			break;
		ret = nilfs_sufile_get_segment_usage(nilfs->ns_sufile, seginfo->segnum, &raw_su, &bh_su);
		BUG_ON(ret); /* succeed because bh_su is dirty */
		raw_su->su_nblocks = 0;
		nilfs_sufile_put_segment_usage(nilfs->ns_sufile, seginfo->segnum, bh_su);
	}
}

static void truncate_extra_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct nilfs_segment_info *seginfo, *n;
	int ret;

	for (seginfo = NEXT_SEGMENT_INFO(sci->sc_curseg);
	     seginfo != &sci->sc_segments; seginfo = n) {
		n = NEXT_SEGMENT_INFO(seginfo);
		list_del(&seginfo->list);
		ret = nilfs_sufile_free(nilfs->ns_sufile, seginfo->nextnum);
		BUG_ON(ret);
		free_segment_info(seginfo);
	}
}

static int collect_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs,
			    int mode)
{
	struct nilfs_collection_stage prev_stage = sci->sc_stage;
	int err;

	/* Collection retry loop */
	for (;;) {
		sci->sc_nblk_this_inc = 0;
		sci->sc_curseg = &sci->sc_segments;
		err = init_segment_info(sci, sci->sc_curseg);
		if (unlikely(err))
			goto failed;

		err = collect_segment_blocks(sci, mode);
		sci->sc_nblk_this_inc += sci->sc_curseg->sum.nblocks;
		if (!err)
			break;

		if (unlikely(err != -E2BIG))
			goto failed;

		/* The current segment is filled up */
		if (mode == SC_LSEG_DSYNC || sci->sc_stage.main < SC_MAIN_CPFILE)
			break;

		nilfs_cancel_free_cleaning_segments(sci, nilfs);
		release_segment_buffers(sci, 1);

		err = extend_segments(sci, nilfs, 1);
		if (unlikely(err))
			return err;

		seg_debug(2, "Segment buffer extended. Retrying collection\n");
		sci->sc_stage = prev_stage;
	}

	truncate_extra_segments(sci, nilfs);
	return 0;

 failed:
	return err;
}

static int assign_segments(struct nilfs_sc_info *sci, int mode)
{
	int err;

	sci->sc_curseg = &sci->sc_segments;

	do {
		struct nilfs_segment_info *seginfo = sci->sc_curseg;

		nilfs_print_seginfo(seginfo);

		err = update_payload_blocknr(sci, seginfo, mode);
		if (unlikely(err))
			return err;

		fill_in_segsum(seginfo);

		sci->sc_curseg = list_entry(sci->sc_curseg->list.next,
					    struct nilfs_segment_info, list);
	} while (sci->sc_curseg != &sci->sc_segments);

	return 0;
}

static int
nilfs_check_in_files(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi)
{
	struct nilfs_inode_info *ii, *n;
	nilfs_cno_t cno = sbi->s_nilfs->ns_cno;

	spin_lock(&sbi->s_inode_lock);
 retry:
	list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) {
		if (!ii->i_bh) {
			struct buffer_head *ibh;
			int err;

			spin_unlock(&sbi->s_inode_lock);
			err = nilfs_ifile_get_inode_block(sbi->s_ifile, ii->vfs_inode.i_ino, &ibh);
			if (unlikely(err)) {
				nilfs_warning(sbi->s_super, __FUNCTION__,
					      "failed to get inode block.\n");
				return err;
			}
			nilfs_mdt_mark_buffer_dirty(ibh);
			nilfs_mdt_mark_dirty(sbi->s_ifile);
			spin_lock(&sbi->s_inode_lock);
			if (likely(!ii->i_bh))
				ii->i_bh = ibh;
			else
				brelse(ibh);
			goto retry;
		}
		ii->i_cno = cno;

		seg_debug(3, "check in file (ino=%lu)\n", ii->vfs_inode.i_ino);
		clear_bit(NILFS_I_QUEUED, &ii->i_state);
		set_bit(NILFS_I_BUSY, &ii->i_state);
		list_del(&ii->i_dirty);
		list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
	}
	spin_unlock(&sbi->s_inode_lock);

	NILFS_I(sbi->s_ifile)->i_cno = cno;

	return 0;
}

static void
nilfs_check_out_files(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi)
{
	struct nilfs_transaction_info *ti = current->journal_info;
	struct nilfs_inode_info *ii, *n;
	nilfs_cno_t cno = sbi->s_nilfs->ns_cno;

	spin_lock(&sbi->s_inode_lock);
	list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
		if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
		    test_bit(NILFS_I_DIRTY, &ii->i_state)) {
			/* The current checkpoint number (=nilfs->ns_cno) is changed
			   between check-in and check-out only if the super root 
			   is written out.  So, we can update i_cno for the inodes
			   that remain in the dirty list. */
			ii->i_cno = cno;
			continue;
		}
		seg_debug(3, "check out file (ino=%lu)\n", ii->vfs_inode.i_ino);
		clear_bit(NILFS_I_BUSY, &ii->i_state);
		brelse(ii->i_bh);
		ii->i_bh = NULL;
		list_del(&ii->i_dirty);
		list_add_tail(&ii->i_dirty, &ti->ti_garbage);
	}
	spin_unlock(&sbi->s_inode_lock);
}

static int nilfs_do_prepare_used_active_segments(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_info *seginfo = &sci->sc_segments, *n;
	struct nilfs_segment_list_head *slh;
	int err = 0;

	/*
	 * Adding used-up full segments to the used_active_segments list.
	 * The last segment is excluded because it is the current segment.
	 */
	for (;;) {
		n = NEXT_SEGMENT_INFO(seginfo);
		if (n == &sci->sc_segments)
			break;
		
		slh = nilfs_alloc_segment_list_head(seginfo->segnum);
		if (unlikely(!slh)) {
			err = -ENOMEM;
			break;
		}
		list_add_tail(&slh->list, &sci->sc_used_active_segments);
		seginfo = n;
	}
	return err;
}

static void
nilfs_do_cancel_used_active_segments(struct nilfs_sc_info *sci, int force)
{
	struct list_head *head = &sci->sc_used_active_segments;
	struct nilfs_segment_list_head *slh, *n;

	list_for_each_entry_safe(slh, n, head, list) {
		if (force || !(slh->flags & NILFS_SLH_COMMIT)) {
			list_del_init(&slh->list);
			seg_debug(3, "remove (segnum=%lu, commit=%d, bh_su=%p)\n",
				  slh->segnum, slh->flags & NILFS_SLH_COMMIT, slh->bh_su);
			nilfs_free_segment_list_head(slh);
		}
	}
}

static void
nilfs_do_commit_used_active_segments(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_list_head *slh;

	list_for_each_entry(slh, &sci->sc_used_active_segments, list) {
		seg_debug(3, "commit used active segment (segnum=%lu)\n", slh->segnum);
		slh->flags |= NILFS_SLH_COMMIT;
	}
}

static void
nilfs_do_cancel_used_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)
{
	struct list_head *head;
	struct nilfs_segment_list_head *slh, *n;

	head = &sci->sc_used_active_segments;
	list_for_each_entry_safe(slh, n, head, list) {
		nilfs_segment_usage_set_active(slh->raw_su);
		nilfs_close_segment_list_head(slh, nilfs);
		if (!(slh->flags & NILFS_SLH_COMMIT)) {
			list_del_init(&slh->list);
			seg_debug(3, "remove (segnum=%lu, commit=%d, bh_su=%p)\n",
				  slh->segnum, slh->flags & NILFS_SLH_COMMIT, slh->bh_su);
			nilfs_free_segment_list_head(slh);
		}
	}

	down_write(&nilfs->ns_sem);
	head = &nilfs->ns_used_segments;
	list_for_each_entry(slh, head, list) {
		seg_debug(3, "set volatile active on used segment (segnum=%lu)\n",
			  slh->segnum);
		nilfs_segment_usage_set_volatile_active(slh->raw_su);
	}
	up_write(&nilfs->ns_sem);
}

static int
nilfs_prepare_used_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs,
			    int has_sr)
{
	struct list_head *head;
	struct nilfs_segment_list_head *slh;
	int err;

	err = nilfs_do_prepare_used_active_segments(sci);
	if (unlikely(err)) {
		nilfs_do_cancel_used_active_segments(sci, 0);
		return err;
	}
	if (has_sr) {
		head = &sci->sc_used_active_segments;
		list_for_each_entry(slh, head, list) {
			err = nilfs_open_segment_list_head(slh, nilfs);
			if (unlikely(err))
				goto failed;
			nilfs_segment_usage_clear_active(slh->raw_su);
			BUG_ON(!buffer_dirty(slh->bh_su));
			seg_debug(3, "clear active on used segment (segnum=%lu)\n",
				  slh->segnum);
		}

		down_write(&nilfs->ns_sem);
		head = &nilfs->ns_used_segments;
		list_for_each_entry(slh, head, list) {
			seg_debug(3, "clear volatile active on used segment (segnum=%lu)\n",
				  slh->segnum);
			nilfs_segment_usage_clear_volatile_active(slh->raw_su);
		}
		up_write(&nilfs->ns_sem);
	}
	return 0;
 failed:
	nilfs_do_cancel_used_segments(sci, nilfs);
	return err;
}

static void
nilfs_do_commit_used_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs)

{
	struct nilfs_segment_list_head *slh;

	list_splice_init(&sci->sc_used_active_segments, nilfs->ns_used_segments.prev);

	list_for_each_entry(slh, &nilfs->ns_used_segments, list) {
		seg_debug(3, "set volatile active on used segment (segnum=%lu)\n",
			  slh->segnum);
		nilfs_segment_usage_set_volatile_active(slh->raw_su);
	}
}

static void
nilfs_cancel_used_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs,
			   int has_sr)
{
	if (has_sr)
		nilfs_do_cancel_used_segments(sci, nilfs);
	else
		nilfs_do_cancel_used_active_segments(sci, 0);
}

static int 
nilfs_do_construct_segment(struct nilfs_sb_info *sbi, struct nilfs_sc_info *sci, int mode)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;
	int err, has_sr = 0;

	SC_STAGE_INIT(&sci->sc_stage);

	err = nilfs_check_in_files(sci, sbi);
	if (unlikely(err))
		goto out;

	do {
		SC_STAGE_CLEAR_HISTORY(&sci->sc_stage);

		/* Re-check needs of construction */
		if (sci->sc_stage.main == SC_MAIN_INIT &&
		    nilfs_reconfirm_construction(sbi, sci))
				goto out;

		/* Set next segment */
		err = begin_segments(sci, nilfs);
		if (unlikely(err))
			goto out;

		/* Update time stamp */
		sci->sc_seg_ctime = get_seconds();

		err = collect_segments(sci, nilfs, mode);
		if (unlikely(err))
			goto failed;

		has_sr = NILFS_SEG_HAS_SR(&sci->sc_curseg->sum);
		if (sci->sc_stage.main == SC_MAIN_DONE &&
		    follow_up_check(sci, has_sr, mode))
			goto out_empty_segment;

		err = assign_segments(sci, mode);
		if (unlikely(err))
			goto failed;

		err = nilfs_prepare_used_segments(sci, nilfs, has_sr);
		if (unlikely(err))
			goto failed;

		if (mode != SC_LSEG_DSYNC) {
			if (SC_STAGE_STARTED(&sci->sc_stage, SC_MAIN_IFILE))
				fill_in_file_bmap(sci, sbi);
			if (has_sr) {
				err = fill_in_checkpoint(sci, sbi);
				if (unlikely(err))
					goto failed_to_make_up;

				fill_in_super_root(sci, nilfs);
			}
		}
		nilfs_update_segusage(sci, nilfs);

		/* Checksum calculations */
		fill_in_checksums(sci, nilfs->ns_crc_seed, has_sr);

		/* Write partial segments */
		err = write_segments(sci, nilfs);
		if (unlikely(err))
			goto failed_to_write;

		complete_segment_io(sci, has_sr);

		/* Commit segments */
		if (has_sr) {
			down_write(&nilfs->ns_sem);
			nilfs_update_last_segment(sbi, 1);
			nilfs_do_commit_used_segments(sci, nilfs);
			up_write(&nilfs->ns_sem);
			nilfs_commit_free_cleaning_segments(sci);
		} else
			nilfs_do_commit_used_active_segments(sci);

		nilfs_clear_mdt_dirty(sbi, sci, mode);
		end_segments(sci, nilfs, 0);

	} while (sci->sc_stage.main != SC_MAIN_DONE);

	seg_debug(2, "submitted all segments\n");

	/* Clearing sketch data */
	if (has_sr && sci->sc_sketch_inode) {
		if (i_size_read(sci->sc_sketch_inode) == 0)
			clear_bit(NILFS_I_DIRTY, &NILFS_I(sci->sc_sketch_inode)->i_state);
		i_size_write(sci->sc_sketch_inode, 0);
	}
 out:
	nilfs_check_out_files(sci, sbi);
	return err;

 failed_to_write:
	nilfs_cancel_segusage(sci, nilfs);
	nilfs_cancel_used_segments(sci, nilfs, has_sr);
	abort_segment_io(sci);
	end_segments(sci, nilfs, err);
	goto out;

 failed_to_make_up:
	nilfs_cancel_used_segments(sci, nilfs, has_sr);

 failed:
	end_segments(sci, nilfs, err);
	goto out;

 out_empty_segment:
	end_segments(sci, nilfs, 1);
	goto out;
}

/**
 * nilfs_secgtor_start_timer - set timer of background write
 * @sci: nilfs_sc_info
 *
 * If the timer has already been set, it ignores the new request.
 * This function MUST be called within a section locking the segment semaphore.
 */
static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
	if (unlikely(!sci))
		return;

	spin_lock(&sci->sc_state_lock);
	if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
		sci->sc_timer->expires = jiffies + sci->sc_interval;
		add_timer(sci->sc_timer);
		sci->sc_state |= NILFS_SEGCTOR_COMMIT;
	}
	spin_unlock(&sci->sc_state_lock);
}

static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, unsigned long flag)
{
	if (sci->sc_state & NILFS_SEGCTOR_INIT)
		return;

	if (!(sci->sc_state & flag)) {
		sci->sc_state |= flag;
		wake_up(&sci->sc_wait_daemon);
	}
}

/**
 * nilfs_flush_segment - trigger a segment construction for resource control
 * @sbi: nilfs_sb_info
 * @ino: inode number of the file to be flushed out.
 */
void nilfs_flush_segment(struct nilfs_sb_info *sbi, ino_t ino)
{
	struct nilfs_sc_info *sci = NILFS_SC(sbi);
	unsigned long flag;

	if (!sci) {
		nilfs_warning(sbi->s_super, __FUNCTION__,
			      "Tried to flush destructed FS.\n");
		nilfs_dump_stack(NILFS_VERBOSE_SEGMENT, 1);
		return;
	}
	if (ino >= sbi->s_nilfs->ns_first_ino)
		flag = NILFS_SEGCTOR_FLUSH_DATA;
	else if (ino == NILFS_IFILE_INO)
		flag = NILFS_SEGCTOR_FLUSH_IFILE;
	else
		return;

	spin_lock(&sci->sc_state_lock);
	seg_debug(2, "kick segment constructor (inode number=%lu)\n", ino);
	nilfs_segctor_do_flush(sci, flag);
	spin_unlock(&sci->sc_state_lock);
}

void nilfs_segctor_add_dirty(struct nilfs_sc_info *sci, unsigned delta)
{
	BUG_ON(!sci);
	spin_lock(&sci->sc_state_lock);
	sci->sc_nr_dirty += delta;
	if (sci->sc_nr_dirty > sci->sc_watermark)
		nilfs_segctor_do_flush(sci, NILFS_SEGCTOR_FLUSH_DATA);
	spin_unlock(&sci->sc_state_lock);
}

int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci,
					   nilfs_segnum_t *segnum, size_t nsegs)
{
	struct nilfs_segment_list_head *slh;
	struct the_nilfs *nilfs = NILFS_SB(sci->sc_super)->s_nilfs;
	LIST_HEAD(list);
	nilfs_segnum_t *pnum;
	const char *flag_name;
	size_t i;
	int err, err2 = 0;

	for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) {
		slh = nilfs_alloc_segment_list_head(*pnum);
		if (unlikely(!slh)) {
			err = -ENOMEM;
			goto failed;
		}
		list_add_tail(&slh->list, &list);

		err = nilfs_open_segment_list_head(slh, nilfs);
		if (unlikely(err))
			goto failed;

		if (unlikely(le32_to_cpu(slh->raw_su->su_flags) != (1UL << NILFS_SEGMENT_USAGE_DIRTY))) {
			if (nilfs_segment_usage_clean(slh->raw_su))
				flag_name = "clean";
			else if (nilfs_segment_usage_active(slh->raw_su))
				flag_name = "active";
			else if (nilfs_segment_usage_volatile_active(slh->raw_su))
				flag_name = "volatile active";
			else if (!nilfs_segment_usage_dirty(slh->raw_su))
				flag_name = "non-dirty";
			else
				flag_name = "erroneous";

			printk(KERN_ERR "NILFS: %s segment is requested to be cleaned (segnum=%lu)\n",
			       flag_name, slh->segnum);
			err2 = -EINVAL;
		}
		nilfs_close_segment_list_head(slh, nilfs);
	}
	if (unlikely(err2)) {
		err = err2;
		goto failed;
	}
	list_splice(&list, sci->sc_cleaning_segments.prev);
	return 0;

 failed:
	nilfs_dispose_segment_list(&list);
	return err;
}

void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci)
{
	nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
}

struct nilfs_segctor_wait_request {
	wait_queue_t	wq;
	__u32		seq;
	int		err;
	atomic_t	done;
};

static int nilfs_segctor_do_request(struct nilfs_sc_info *sci)
{
	struct nilfs_segctor_wait_request wait_req;
	int err = 0;

	spin_lock(&sci->sc_state_lock);
	if (sci->sc_state & NILFS_SEGCTOR_INIT) {
		spin_unlock(&sci->sc_state_lock);
		return -EROFS;
	}
	init_wait(&wait_req.wq);
	wait_req.err = 0;
	atomic_set(&wait_req.done, 0);
	wait_req.seq = ++sci->sc_seq_request;
	spin_unlock(&sci->sc_state_lock);

        seg_debug(3, "start task=%p seq=%d\n", current, wait_req.seq);
	init_waitqueue_entry(&wait_req.wq, current);
	add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
	set_current_state(TASK_INTERRUPTIBLE);
	wake_up(&sci->sc_wait_daemon);

	for (;;) {
		if (atomic_read(&wait_req.done)) {
			err = wait_req.err;
			break;
		}
		if (!signal_pending(current)) {
			schedule();
			continue;
		}
		err = -ERESTARTSYS;
		break;
	}
	finish_wait(&sci->sc_wait_request, &wait_req.wq);
        seg_debug(3, "done task=%p seq=%d err=%d\n", current, wait_req.seq, err);

	return err;
}

static void nilfs_segctor_do_wakeup(struct nilfs_sc_info *sci, int err)
{
	struct nilfs_segctor_wait_request *wrq, *n;
	unsigned long flags;

	spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, wq.task_list) {
		if (!atomic_read(&wrq->done) && nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
			wrq->err = err;
			atomic_set(&wrq->done, 1);
		}
		if (atomic_read(&wrq->done)) {
			seg_debug(3, "wakeup task=%p seq=%d\n", WAIT_QUEUE_TASK(&wrq->wq), wrq->seq);
			wrq->wq.func(&wrq->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, NULL);
		}
	}
	spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
}

/**
 * nilfs_construct_segment - construct a logical segment
 * @sb: super block
 *
 * Return Value: On success, 0 is retured. On errors, one of the following
 * negative error code is returned.
 *
 * %-EROFS - Read only filesystem.
 *
 * %-EIO - I/O error
 *
 * %-ENOSPC - No space left on device (only in a panic state).
 *
 * %-ERESTARTSYS - Interrupted.
 *
 * %-ENOMEM - Insufficient memory available.
 */
int nilfs_construct_segment(struct super_block *sb)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sb);
	struct nilfs_sc_info *sci = NILFS_SC(sbi);
	struct nilfs_transaction_info *ti;
	int err;

	if (!sci) {
		seg_debug(1, "Skipped construction (read only)\n");
		return -EROFS;
	}
	/* A call inside transactions causes a deadlock. */
	BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);

	err = nilfs_segctor_do_request(sci);
	return err;
}

/**
 * nilfs_construct_dsync_segment - construct a data-only logical segment
 * @sb: super block
 * @inode: the inode whose data blocks should be written out
 *
 * Return Value: On success, 0 is retured. On errors, one of the following
 * negative error code is returned.
 *
 * %-EROFS - Read only filesystem.
 *
 * %-EIO - I/O error
 *
 * %-ENOSPC - No space left on device (only in a panic state).
 *
 * %-ERESTARTSYS - Interrupted.
 *
 * %-ENOMEM - Insufficient memory available.
 */
int nilfs_construct_dsync_segment(struct super_block *sb,
				  struct inode *inode)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sb);
	struct nilfs_sc_info *sci = NILFS_SC(sbi);
	struct nilfs_inode_info *ii;
	struct nilfs_transaction_info ti;
	int err = 0;

	if (!sci) {
		seg_debug(1, "Skipped construction (read only)\n");
		return -EROFS;
	}
 retry:
	nilfs_segctor_lock(sbi, &ti, 0);

	if (unlikely(nilfs_write_locked(sbi->s_nilfs))) {
		nilfs_segctor_unlock(sbi);
		err = nilfs_wait_on_write_interruptible(sbi->s_nilfs);
		if (unlikely(err))
			return err;
		goto retry;
	}

	ii = NILFS_I(inode);
	if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || nilfs_test_opt(sbi, STRICT_ORDER) ||
	    test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || nilfs_discontinued(sbi->s_nilfs)) {
		nilfs_segctor_unlock(sbi);
		err = nilfs_segctor_do_request(sci);
		return err;
	}

	spin_lock(&sbi->s_inode_lock);
	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
		spin_unlock(&sbi->s_inode_lock);
		nilfs_segctor_unlock(sbi);
		return 0;
	}
	spin_unlock(&sbi->s_inode_lock);
	sci->sc_stage.dirty_file_ptr = ii;
	sci->sc_seg_ctime = sbi->s_nilfs->ns_ctime;

	seg_debug(2, "begin (mode=0x%x)\n", SC_LSEG_DSYNC);
	err = nilfs_do_construct_segment(sbi, sci, SC_LSEG_DSYNC);
	seg_debug(2, "end (stage=%d)\n", sci->sc_stage.main);

	nilfs_segctor_unlock(sbi);
	if (unlikely(err == -EAGAIN)) {
		seg_debug(1, "retry construction due to watermark control\n");
		goto retry;
	}
	return err;
}

struct nilfs_segctor_req {
	int mode;
	__u32 seq_accepted;
	int sc_err;  /* construction failure */
	int sb_err;  /* super block writeback failure */
};

static void nilfs_segctor_accept(struct nilfs_sc_info *sci, struct nilfs_segctor_req *req)
{
	BUG_ON(!sci);

	req->sc_err = req->sb_err = 0;
	spin_lock(&sci->sc_state_lock);
	req->seq_accepted = sci->sc_seq_request;
	spin_unlock(&sci->sc_state_lock);

	if (sci->sc_timer)
		del_timer_sync(sci->sc_timer);
}

static void nilfs_segctor_notify(struct nilfs_sc_info *sci, struct nilfs_segctor_req *req)
{
	spin_lock(&sci->sc_state_lock);
	if (likely(!req->sc_err))
		sci->sc_nr_dirty = 0;

	/* Clear requests (even when the construction failed) */
	if (req->mode == SC_FLUSH_DATA)
		sci->sc_state &= ~(NILFS_SEGCTOR_COMMIT | NILFS_SEGCTOR_FLUSH_DATA);
	else
		sci->sc_state &= ~(NILFS_SEGCTOR_COMMIT | NILFS_SEGCTOR_FLUSH);

	if (req->mode == SC_LSEG_SR && req->sc_err != -EAGAIN) {
		seg_debug(3, "complete requests from seq=%d to seq=%d\n",
			  sci->sc_seq_done + 1, req->seq_accepted);
		sci->sc_seq_done = req->seq_accepted;
		nilfs_segctor_do_wakeup(sci, req->sc_err ? : req->sb_err);
	}
	spin_unlock(&sci->sc_state_lock);
}

static int nilfs_segctor_construct(struct nilfs_sc_info *sci, struct nilfs_segctor_req *req)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sci->sc_super);
	struct the_nilfs *nilfs = sbi->s_nilfs;
	int err = 0;

	sci->sc_seg_ctime = nilfs->ns_ctime;

	if (nilfs_discontinued(nilfs))
		req->mode = SC_LSEG_SR;
	if (!nilfs_confirm_construction(sbi, sci)) {
		seg_debug(2, "begin (mode=0x%x)\n", req->mode);
		req->sc_err = err = nilfs_do_construct_segment(sbi, sci, req->mode);
		seg_debug(2, "end (stage=%d)\n", sci->sc_stage.main);
	}
	if (likely(!err) && test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && nilfs_discontinued(nilfs)) {
		down_write(&nilfs->ns_sem);
		req->sb_err = nilfs_writeback_super(sbi);
		up_write(&nilfs->ns_sem);
	}
	return err;
}

static void construction_timeout(unsigned long data)
{
	struct task_struct *p = (struct task_struct *)data;
	wake_up_process(p);
}

static void
nilfs_dispose_gcinode_list(struct the_nilfs *nilfs, struct list_head *head)
{
	struct nilfs_inode_info *ii, *n;
	struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
	unsigned nv = 0;

	while (!list_empty(head)) {
		spin_lock(&nilfs->ns_gc_inode_lock); /* XXX: to be removed? */
		list_for_each_entry_safe(ii, n, head, i_dirty) {
			seg_debug(3, "removing gc_inode (ino=%lu)\n",
				  ii->vfs_inode.i_ino);
			if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state))
				continue;

			hlist_del_init(&ii->vfs_inode.i_hash);
			list_del_init(&ii->i_dirty);
			ivec[nv++] = ii;
			if (nv == SC_N_INODEVEC)
				break;
		}
		spin_unlock(&nilfs->ns_gc_inode_lock);

		for (pii = ivec; nv > 0; pii++, nv--)
			nilfs_clear_gcinode(&(*pii)->vfs_inode);
	}
}

int nilfs_clean_segments(struct super_block *sb, unsigned long arg)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sb);
	struct nilfs_sc_info *sci = NILFS_SC(sbi);
	struct the_nilfs *nilfs = sbi->s_nilfs;
	struct nilfs_transaction_info ti;
	struct nilfs_segctor_req req = { .mode = SC_LSEG_SR };
	int err;

	if (unlikely(!sci))
		return -EROFS;

	nilfs_segctor_lock(sbi, &ti, 1);

	nilfs_init_gcdat_inode(nilfs);
	err = nilfs_ioctl_prepare_clean_segments(nilfs, arg);
	if (unlikely(err))
		goto out_unlock;

	spin_lock(&nilfs->ns_gc_inode_lock); /* XXX: shouled be removed? */
	list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev);
	spin_unlock(&nilfs->ns_gc_inode_lock);

	set_bit(NILFS_SC_GC_COPY, &sci->sc_flags);

	for (;;) {
		nilfs_segctor_accept(sci, &req);
		err = nilfs_segctor_construct(sci, &req);
		nilfs_dispose_gcinode_list(nilfs, &sci->sc_gc_inodes);
		nilfs_segctor_notify(sci, &req);

		if (likely(!err))
			break;

		nilfs_warning(sb, __FUNCTION__,
			      "segment construction failed. (err=%d)", err);
		set_current_state(TASK_INTERRUPTIBLE);
		schedule_timeout(sci->sc_interval);
	}

	clear_bit(NILFS_SC_GC_COPY, &sci->sc_flags);
	nilfs_clear_gcdat_inode(nilfs);

 out_unlock:
	nilfs_segctor_unlock(sbi);
	return err;
}

static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sci->sc_super);
	struct nilfs_transaction_info ti;
	struct nilfs_segctor_req req = { .mode = mode };

	nilfs_segctor_lock(sbi, &ti, 0);

	nilfs_segctor_accept(sci, &req);

	if (unlikely(nilfs_write_locked(sbi->s_nilfs))) {
		nilfs_segctor_unlock(sbi);
		nilfs_wait_on_write(sbi->s_nilfs);
		return; /* Cleaner takes over the job */
	} else
		nilfs_segctor_construct(sci, &req);

	nilfs_segctor_notify(sci, &req);

	/*
	 * Unclosed segment should be retried.  We do this using sc_timer.
	 * Timeout of sc_timer will invoke complete construction which leads to
	 * close the current logical segment.
	 */
	if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
		nilfs_segctor_start_timer(sci);

	nilfs_segctor_unlock(sbi);
}

/**
 * nilfs_segctor_thread - main loop of the segment constructor thread.
 * @arg: pointer to a struct nilfs_sc_info.
 *
 * nilfs_segctor_thread() initializes a timer and serves as a daemon
 * to execute segment constructions.
 */
static int nilfs_segctor_thread(void *arg)
{
	struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
	struct timer_list timer;
	int timeout = 0;

	daemonize("segctord");

	init_timer(&timer);
	timer.data = (unsigned long)current;
	timer.function = construction_timeout;
	sci->sc_timer = &timer;

	/* start sync. */
	sci->sc_task = current;
	wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
	printk(KERN_INFO
	       "segctord starting. Construction interval = %lu seconds, "
	       "CP frequency < %lu seconds\n",
	       sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);

	spin_lock(&sci->sc_state_lock);
 loop:
	for (;;) {
		int mode;
  
		if (sci->sc_state & NILFS_SEGCTOR_QUIT)
			goto end_thread;

		seg_debug(2, "sequence: req=%u, done=%u, state=%lx\n",
			  sci->sc_seq_request, sci->sc_seq_done, sci->sc_state);

		if (timeout || sci->sc_seq_request != sci->sc_seq_done) {
			mode = SC_LSEG_SR;
		} else if (sci->sc_state & NILFS_SEGCTOR_FLUSH) {
			if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
			    time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq))
				mode = (sci->sc_state & NILFS_SEGCTOR_FLUSH_IFILE) ?
					SC_FLUSH_IFILE : SC_FLUSH_DATA;
			else
				mode = SC_LSEG_SR;
		} else
			break;

		spin_unlock(&sci->sc_state_lock);
		nilfs_segctor_thread_construct(sci, mode);
		spin_lock(&sci->sc_state_lock);
		timeout = 0;
  	}


#if NEED_REFRIGERATOR_ARGS
	if (current->flags & PF_FREEZE) {
#else
	if (freezing(current)) {
#endif
		seg_debug(2, "suspending segctord\n");
		spin_unlock(&sci->sc_state_lock);
#if NEED_REFRIGERATOR_ARGS
		refrigerator(PF_FREEZE);
#else
		refrigerator();
#endif
		spin_lock(&sci->sc_state_lock);
	} else {
		DEFINE_WAIT(wait);
		int should_sleep = 1;

		prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE);

		if (sci->sc_seq_request != sci->sc_seq_done)
			should_sleep = 0;
		else if (sci->sc_state & NILFS_SEGCTOR_FLUSH)
			should_sleep = 0;
		else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
			should_sleep = time_before(jiffies, sci->sc_timer->expires);

		if (should_sleep) {
			spin_unlock(&sci->sc_state_lock);
			schedule();
			spin_lock(&sci->sc_state_lock);
		}
		finish_wait(&sci->sc_wait_daemon, &wait);
		timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
			   time_after_eq(jiffies, sci->sc_timer->expires));
	}
	seg_debug(2, "woke %s\n", timeout ? "(timeout)" : "");
	goto loop;

 end_thread:
	spin_unlock(&sci->sc_state_lock);
	del_timer_sync(sci->sc_timer);
	sci->sc_timer = NULL;

	/* end sync. */
	sci->sc_task = NULL;
	wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
	seg_debug(1, "segctord exiting.\n");
	return 0;
}

static void nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
{
	kernel_thread(nilfs_segctor_thread, sci,
		      CLONE_VM | CLONE_FS | CLONE_FILES);
	wait_event(sci->sc_wait_task, sci->sc_task != NULL);
}

static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
{
	sci->sc_state |= NILFS_SEGCTOR_QUIT;

	while (sci->sc_task) {
		wake_up(&sci->sc_wait_daemon);
		spin_unlock(&sci->sc_state_lock);
		wait_event(sci->sc_wait_task, sci->sc_task == NULL);
		spin_lock(&sci->sc_state_lock);
	}
}

static inline void nilfs_segctor_init(struct nilfs_sc_info *sci,
				      struct nilfs_recovery_info *ri)
{
	sci->sc_sketch_inode = iget(sci->sc_super, NILFS_SKETCH_INO);
	if (sci->sc_sketch_inode)
		i_size_write(sci->sc_sketch_inode, 0);

	sci->sc_state &= ~NILFS_SEGCTOR_INIT;
	sci->sc_seq_done = sci->sc_seq_request;
	sci->sc_nr_dirty = 0;
	if (ri) {
		list_splice_init(&ri->ri_used_segments, sci->sc_used_active_segments.prev);
	}
	nilfs_segctor_start_thread(sci);
}

/*
 * Setup & clean-up functions
 */
static struct nilfs_sc_info *
nilfs_segctor_new(struct nilfs_sb_info *sbi, unsigned int max_blocks)
{
	struct nilfs_sc_info *sci;

	sci = kmalloc(sizeof(*sci), GFP_KERNEL);
	if (!sci)
		return NULL;
	memset(sci, 0, sizeof(*sci));

	if (nilfs_segbuf_init(&sci->sc_segments.segbuf, max_blocks))
		goto failed_segbuf;

	sci->sc_super = sbi->s_super;

	init_waitqueue_head(&sci->sc_wait_request);
	init_waitqueue_head(&sci->sc_wait_daemon);
	init_waitqueue_head(&sci->sc_wait_task);
	spin_lock_init(&sci->sc_state_lock);
	INIT_LIST_HEAD(&sci->sc_dirty_files);
	INIT_LIST_HEAD(&sci->sc_segments.list);
	INIT_LIST_HEAD(&sci->sc_gc_inodes);
	INIT_LIST_HEAD(&sci->sc_used_active_segments);
	INIT_LIST_HEAD(&sci->sc_cleaning_segments);

	sci->sc_curseg = &sci->sc_segments;

	sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
	sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
	sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
	sci->sc_state = NILFS_SEGCTOR_INIT;

	if (sbi->s_interval)
		sci->sc_interval = sbi->s_interval;
	if (sbi->s_watermark)
		sci->sc_watermark = sbi->s_watermark;
	return sci;

 failed_segbuf:
	kfree(sci);
	return NULL;
}

/**
 * nilfs_segctor_destroy - destroy the segment constructor.
 * @sbi: nilfs_sb_info
 * @sci: nilfs_sc_info
 *
 * nilfs_segctor_destroy() kills the segctord thread and frees
 * the nilfs_sc_info struct.
 * Caller must hold the segment semaphore.
 */
static void nilfs_segctor_destroy(struct nilfs_sb_info *sbi, struct nilfs_sc_info *sci)
{
	int flag;

	spin_lock(&sci->sc_state_lock);
	flag = (sci->sc_state & NILFS_SEGCTOR_INIT);
	spin_unlock(&sci->sc_state_lock);
	if (flag)
		/* segctord is not running */
		goto destroy;

	up_write(&sbi->s_nilfs->ns_segctor_sem);

	spin_lock(&sci->sc_state_lock);
	nilfs_segctor_kill_thread(sci);
	flag = ((sci->sc_state & (NILFS_SEGCTOR_COMMIT | NILFS_SEGCTOR_FLUSH)) ||
		sci->sc_seq_request != sci->sc_seq_done);
	spin_unlock(&sci->sc_state_lock);

	if (flag || nilfs_confirm_construction(sbi, sci)) {
		int ret, retrycount = NILFS_SC_CLEANUP_RETRY;

		/* The segctord thread was stopped and its timer was removed.
		   But some tasks remain. */
		do {
			struct nilfs_transaction_info ti;
			struct nilfs_segctor_req req = { .mode = SC_LSEG_SR };

			nilfs_segctor_lock(sbi, &ti, 0);
			if (unlikely(nilfs_disk_full(sbi->s_nilfs))) {
				nilfs_warning(sbi->s_super, __FUNCTION__,
					      "closing writes on disk full condition.\n");
			} else if (unlikely(nilfs_write_locked(sbi->s_nilfs))) {
				nilfs_warning(sbi->s_super, __FUNCTION__,
					      "normal writes suspended by cleaner (ignored)\n");
			}
			nilfs_segctor_accept(sci, &req);
			ret = nilfs_segctor_construct(sci, &req);
			nilfs_segctor_notify(sci, &req);
			nilfs_segctor_unlock(sbi);

		} while (ret && retrycount-- > 0);
	}
	if (!list_empty(&sci->sc_dirty_files)) {
		nilfs_warning(sbi->s_super, __FUNCTION__, 
			      "holding dirty file(s) after the last construction\n");
		nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1);
	}
	if (!list_empty(&sci->sc_used_active_segments)) {
		seg_debug(1, "disposing uncommitted active segment(s)\n");
		nilfs_do_cancel_used_active_segments(sci, 1);
	}
	if (!list_empty(&sci->sc_cleaning_segments)) {
		seg_debug(1, "disposing uncommitted segments to be freed\n");
		nilfs_segctor_clear_segments_to_be_freed(sci);
	}

	if (sci->sc_sketch_inode) {
		iput(sci->sc_sketch_inode);
		sci->sc_sketch_inode = NULL;
	}
	down_write(&sbi->s_nilfs->ns_segctor_sem);

 destroy:
	free_extended_segments(sci);
	nilfs_segbuf_destroy(&sci->sc_segments.segbuf);
	kfree(sci);
}

/**
 * nilfs_attach_segment_constructor - attach a segment constructor
 * @sbi: nilfs_sb_info
 * @ri: nilfs_recovery_info
 *
 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info,
 * initilizes it, and starts the segment constructor.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error code is returned.
 *
 * %-ENOMEM - Insufficient memory available.
 */
int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi,
				     struct nilfs_recovery_info *ri)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;

	/* Each field of nilfs_segctor is zero-cleared in the initialization of
	   super block info */
	sbi->s_sc_info = nilfs_segctor_new(sbi, sbi->s_nilfs->ns_blocks_per_segment);
	if (!sbi->s_sc_info)
		return -ENOMEM;

	nilfs_attach_writer(nilfs, sbi);
	nilfs_segctor_init(NILFS_SC(sbi), ri);
	return 0;
}

/**
 * nilfs_detach_segment_constructor - destroy the segment constructor
 * @sbi: nilfs_sb_info
 *
 * nilfs_detach_segment_constructor() kills the segment constructor daemon,
 * frees the struct nilfs_sc_info, and destroy the dirty file list.
 */
void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi)
{
	struct the_nilfs *nilfs = sbi->s_nilfs;
	LIST_HEAD(garbage_list);

	down_write(&nilfs->ns_segctor_sem);
	if (NILFS_SC(sbi)) {
		if (unlikely(nilfs_write_locked(nilfs)))
			nilfs_unlock_write(nilfs);

		nilfs_segctor_destroy(sbi, NILFS_SC(sbi));
		sbi->s_sc_info = NULL;
	}
	
	/* Force to free the list of dirty files */
	spin_lock(&sbi->s_inode_lock);
	if (!list_empty(&sbi->s_dirty_files)) {
		list_splice_init(&sbi->s_dirty_files, &garbage_list);
		nilfs_warning(sbi->s_super, __FUNCTION__, 
			      "Non empty dirty list after the last "
			      "segment construction\n");
	}
	spin_unlock(&sbi->s_inode_lock);
	up_write(&nilfs->ns_segctor_sem);

	nilfs_dispose_list(sbi, &garbage_list, 1);
	nilfs_detach_writer(nilfs, sbi);
}

/* Local Variables:		*/
/* eval: (c-set-style "linux")	*/
/* End:				*/
