/*
 *  linux/fs/ext2/inode.c
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/inode.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Goal-directed block allocation by Stephen Tweedie
 * 	(sct@dcs.ed.ac.uk), 1993, 1998
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  64-bit file support on 64-bit platforms by Jakub Jelinek
 * 	(jj@sunsite.ms.mff.cuni.cz)
 *
 *  Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
 */

/*
 *  Copyright (C) 1995  Antoine Dumesnil de Maricourt (dumesnil@etca.fr) 
 *  	(transparent compression code)
 */

/*
 *  Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch FRANCE
 *
 *  	Transparent compression code for 2.4 kernel.
 *
 *  Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
 *  Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr)
 *
 *  Adapted from patch e2compr-0.4.39-patch-2.2.18 .
 */

#include <linux/config.h>
#include <linux/fs.h>
#include "debug.h"
#include <linux/ext2_fs.h>
#include <linux/ext2_fs_c.h>
#include <linux/kmod.h>
#include <linux/locks.h>
#include <linux/smp_lock.h>
#include <linux/sched.h>
#include <linux/highuid.h>
#include <linux/quotaops.h>
#include <linux/module.h>

MODULE_AUTHOR("Remy Card and others");
MODULE_DESCRIPTION("Second Extended Filesystem");
MODULE_LICENSE("GPL");


static int ext2_update_inode(struct inode * inode, int do_sync);

/*
 * Called at each iput()
 */
void ext2_put_inode (struct inode * inode)
{
#ifdef CONFIG_EXT2_COMPRESS
        if (S_ISREG (inode->i_mode)
	    && inode->i_nlink
	    && (inode->u.ext2_i.i_compr_flags & EXT2_CLEANUP_FL)) {
# ifdef EXT2_COMPR_REPORT_PUT
		printk(KERN_DEBUG
		       "put_inode: pid=%d, i_ino=%ld, "
		       "compr_flags=0x%x, i_count=%d\n",
		       current->pid, inode->i_ino,
		       inode->u.ext2_i.i_compr_flags,
		       inode->i_count);
# endif
		(void) ext2_cleanup_compressed_inode (inode);
	}
#endif

	ext2_discard_prealloc (inode);
}

/*
 * Called at the last iput() if i_nlink is zero.
 */
void ext2_delete_inode (struct inode * inode)
{
	lock_kernel();

	if (is_bad_inode(inode) ||
	    inode->i_ino == EXT2_ACL_IDX_INO ||
	    inode->i_ino == EXT2_ACL_DATA_INO)
		goto no_delete;
	inode->u.ext2_i.i_dtime	= CURRENT_TIME;
	mark_inode_dirty(inode);
	ext2_update_inode(inode, IS_SYNC(inode));
	inode->i_size = 0;
	if (inode->i_blocks)
		ext2_truncate (inode);
	ext2_free_inode (inode);

	unlock_kernel();
	return;
no_delete:
	unlock_kernel();
	clear_inode(inode);	/* We must guarantee clearing of inode... */
}

void ext2_discard_prealloc (struct inode * inode)
{
#ifdef EXT2_PREALLOCATE
	lock_kernel();
	/* Writer: ->i_prealloc* */
	if (inode->u.ext2_i.i_prealloc_count) {
		unsigned short total = inode->u.ext2_i.i_prealloc_count;
		unsigned long block = inode->u.ext2_i.i_prealloc_block;
		inode->u.ext2_i.i_prealloc_count = 0;
		inode->u.ext2_i.i_prealloc_block = 0;
		/* Writer: end */
		ext2_free_blocks (inode, block, total);
	}
	unlock_kernel();
#endif
}

static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
{
#ifdef EXT2FS_DEBUG
	static unsigned long alloc_hits = 0, alloc_attempts = 0;
#endif
	unsigned long result;


#ifdef EXT2_PREALLOCATE
	/* Writer: ->i_prealloc* */
	if (inode->u.ext2_i.i_prealloc_count &&
	    (goal == inode->u.ext2_i.i_prealloc_block ||
	     goal + 1 == inode->u.ext2_i.i_prealloc_block))
	{		
		result = inode->u.ext2_i.i_prealloc_block++;
		inode->u.ext2_i.i_prealloc_count--;
		/* Writer: end */
		ext2_debug ("preallocation hit (%lu/%lu).\n",
			    ++alloc_hits, ++alloc_attempts);
	} else {
		ext2_discard_prealloc (inode);
		ext2_debug ("preallocation miss (%lu/%lu).\n",
			    alloc_hits, ++alloc_attempts);
		if (S_ISREG(inode->i_mode))
			result = ext2_new_block (inode, goal, 
				 &inode->u.ext2_i.i_prealloc_count,
				 &inode->u.ext2_i.i_prealloc_block, err);
		else
			result = ext2_new_block (inode, goal, 0, 0, err);
	}
#else
	result = ext2_new_block (inode, goal, 0, 0, err);
#endif
	return result;
}

typedef struct {
	u32	*p;
	u32	key;
	struct buffer_head *bh;
} Indirect;

static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
{
	p->key = *(p->p = v);
	p->bh = bh;
}

static inline int verify_chain(Indirect *from, Indirect *to)
{
	while (from <= to && from->key == *from->p)
		from++;
	return (from > to);
}

/**
 *	ext2_block_to_path - parse the block number into array of offsets
 *	@inode: inode in question (we are only interested in its superblock)
 *	@i_block: block number to be parsed
 *	@offsets: array to store the offsets in
 *
 *	To store the locations of file's data ext2 uses a data structure common
 *	for UNIX filesystems - tree of pointers anchored in the inode, with
 *	data blocks at leaves and indirect blocks in intermediate nodes.
 *	This function translates the block number into path in that tree -
 *	return value is the path length and @offsets[n] is the offset of
 *	pointer to (n+1)th node in the nth one. If @block is out of range
 *	(negative or too large) warning is printed and zero returned.
 *
 *	Note: function doesn't find node addresses, so no IO is needed. All
 *	we need to know is the capacity of indirect blocks (taken from the
 *	inode->i_sb).
 */

/*
 * Portability note: the last comparison (check that we fit into triple
 * indirect block) is spelled differently, because otherwise on an
 * architecture with 32-bit longs and 8Kb pages we might get into trouble
 * if our filesystem had 8Kb blocks. We might use long long, but that would
 * kill us on x86. Oh, well, at least the sign propagation does not matter -
 * i_block would have to be negative in the very beginning, so we would not
 * get there at all.
 */

static int ext2_block_to_path(struct inode *inode, long i_block, int offsets[4])
{
	int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
	int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
	const long direct_blocks = EXT2_NDIR_BLOCKS,
		indirect_blocks = ptrs,
		double_blocks = (1 << (ptrs_bits * 2));
	int n = 0;

	if (i_block < 0) {
		ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
	} else if (i_block < direct_blocks) {
		offsets[n++] = i_block;
	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
		offsets[n++] = EXT2_IND_BLOCK;
		offsets[n++] = i_block;
	} else if ((i_block -= indirect_blocks) < double_blocks) {
		offsets[n++] = EXT2_DIND_BLOCK;
		offsets[n++] = i_block >> ptrs_bits;
		offsets[n++] = i_block & (ptrs - 1);
	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
		offsets[n++] = EXT2_TIND_BLOCK;
		offsets[n++] = i_block >> (ptrs_bits * 2);
		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
		offsets[n++] = i_block & (ptrs - 1);
	} else {
		ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
	}
	return n;
}

/**
 *	ext2_get_branch - read the chain of indirect blocks leading to data
 *	@inode: inode in question
 *	@depth: depth of the chain (1 - direct pointer, etc.)
 *	@offsets: offsets of pointers in inode/indirect blocks
 *	@chain: place to store the result
 *	@err: here we store the error value
 *
 *	Function fills the array of triples <key, p, bh> and returns %NULL
 *	if everything went OK or the pointer to the last filled triple
 *	(incomplete one) otherwise. Upon the return chain[i].key contains
 *	the number of (i+1)-th block in the chain (as it is stored in memory,
 *	i.e. little-endian 32-bit), chain[i].p contains the address of that
 *	number (it points into struct inode for i==0 and into the bh->b_data
 *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
 *	block for i>0 and NULL for i==0. In other words, it holds the block
 *	numbers of the chain, addresses they were taken from (and where we can
 *	verify that chain did not change) and buffer_heads hosting these
 *	numbers.
 *
 *	Function stops when it stumbles upon zero pointer (absent block)
 *		(pointer to last triple returned, *@err == 0)
 *	or when it gets an IO error reading an indirect block
 *		(ditto, *@err == -EIO)
 *	or when it notices that chain had been changed while it was reading
 *		(ditto, *@err == -EAGAIN)
 *	or when it reads all @depth-1 indirect blocks successfully and finds
 *	the whole chain, all way to the data (returns %NULL, *err == 0).
 */
static Indirect *ext2_get_branch(struct inode *inode,
				 int depth,
				 int *offsets,
				 Indirect chain[4],
				 int *err)
{
	kdev_t dev = inode->i_dev;
	int size = inode->i_sb->s_blocksize;
	Indirect *p = chain;
	struct buffer_head *bh;

	*err = 0;
	/* i_data is not going away, no lock needed */
	add_chain (chain, NULL, inode->u.ext2_i.i_data + *offsets);
	if (HOLE_BLKADDR(p->key))
		goto no_block;
	while (--depth) {
		bh = bread(dev, le32_to_cpu(p->key), size);
		if (!bh)
			goto failure;
		/* Reader: pointers */
		if (!verify_chain(chain, p))
			goto changed;
		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
		/* Reader: end */
		if (HOLE_BLKADDR(p->key))
			goto no_block;
	}
	return NULL;

changed:
	*err = -EAGAIN;
	goto no_block;
failure:
	*err = -EIO;
no_block:
	return p;
}

/**
 *	ext2_find_near - find a place for allocation with sufficient locality
 *	@inode: owner
 *	@ind: descriptor of indirect block.
 *
 *	This function returns the prefered place for block allocation.
 *	It is used when heuristic for sequential allocation fails.
 *	Rules are:
 *	  + if there is a block to the left of our position - allocate near it.
 *	  + if pointer will live in indirect block - allocate near that block.
 *	  + if pointer will live in inode - allocate in the same cylinder group.
 *	Caller must make sure that @ind is valid and will stay that way.
 */

static inline unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
{
	u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext2_i.i_data;
	u32 *p;

	/* Try to find previous block */
	for (p = ind->p - 1; p >= start; p--)
		if (!HOLE_BLKADDR(*p))
			return le32_to_cpu(*p);

	/* No such thing, so let's try location of indirect block */
	if (ind->bh)
		return ind->bh->b_blocknr;

	/*
	 * It is going to be refered from inode itself? OK, just put it into
	 * the same cylinder group then.
	 */
	return (inode->u.ext2_i.i_block_group * 
		EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
	       le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block);
}

/**
 *	ext2_find_goal - find a prefered place for allocation.
 *	@inode: owner
 *	@block:  block we want
 *	@chain:  chain of indirect blocks
 *	@partial: pointer to the last triple within a chain
 *	@goal:	place to store the result.
 *
 *	Normally this function find the prefered place for block allocation,
 *	stores it in *@goal and returns zero. If the branch had been changed
 *	under us we return -EAGAIN.
 */

static inline int ext2_find_goal(struct inode *inode,
				 long block,
				 Indirect chain[4],
				 Indirect *partial,
				 unsigned long *goal)
{
	/* Writer: ->i_next_alloc* */
	if (block == inode->u.ext2_i.i_next_alloc_block + 1) {
		inode->u.ext2_i.i_next_alloc_block++;
		inode->u.ext2_i.i_next_alloc_goal++;
	} 
	/* Writer: end */
	/* Reader: pointers, ->i_next_alloc* */
	if (verify_chain(chain, partial)) {
		/*
		 * try the heuristic for sequential allocation,
		 * failing that at least try to get decent locality.
		 */
		if (block == inode->u.ext2_i.i_next_alloc_block)
			*goal = inode->u.ext2_i.i_next_alloc_goal;
		if (!*goal)
			*goal = ext2_find_near(inode, partial);
		return 0;
	}
	/* Reader: end */
	return -EAGAIN;
}

/**
 *	ext2_alloc_branch - allocate and set up a chain of blocks.
 *	@inode: owner
 *	@num: depth of the chain (number of blocks to allocate)
 *	@offsets: offsets (in the blocks) to store the pointers to next.
 *	@branch: place to store the chain in.
 *
 *	This function allocates @num blocks, zeroes out all but the last one,
 *	links them into chain and (if we are synchronous) writes them to disk.
 *	In other words, it prepares a branch that can be spliced onto the
 *	inode. It stores the information about that chain in the branch[], in
 *	the same format as ext2_get_branch() would do. We are calling it after
 *	we had read the existing part of chain and partial points to the last
 *	triple of that (one with zero ->key). Upon the exit we have the same
 *	picture as after the successful ext2_get_block(), excpet that in one
 *	place chain is disconnected - *branch->p is still zero (we did not
 *	set the last link), but branch->key contains the number that should
 *	be placed into *branch->p to fill that gap.
 *
 *	If allocation fails we free all blocks we've allocated (and forget
 *	their buffer_heads) and return the error value the from failed
 *	ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain
 *	as described above and return 0.
 */

static int ext2_alloc_branch(struct inode *inode,
			     int num,
			     unsigned long goal,
			     int *offsets,
			     Indirect *branch)
{
	int blocksize = inode->i_sb->s_blocksize;
	int n = 0;
	int err;
	int i;
	int parent = ext2_alloc_block(inode, goal, &err);

	branch[0].key = cpu_to_le32(parent);
	if (parent) for (n = 1; n < num; n++) {
		struct buffer_head *bh;
		/* Allocate the next block */
		int nr = ext2_alloc_block(inode, parent, &err);
		if (!nr)
			break;
		branch[n].key = cpu_to_le32(nr);
		/*
		 * Get buffer_head for parent block, zero it out and set 
		 * the pointer to new one, then send parent to disk.
		 */
		bh = getblk(inode->i_dev, parent, blocksize);
		if (!buffer_uptodate(bh))
			wait_on_buffer(bh);
		memset(bh->b_data, 0, blocksize);
		branch[n].bh = bh;
		branch[n].p = (u32*) bh->b_data + offsets[n];
		*branch[n].p = branch[n].key;
		mark_buffer_uptodate(bh, 1);
		mark_buffer_dirty_inode(bh, inode);
		if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
			ll_rw_block (WRITE, 1, &bh);
			wait_on_buffer (bh);
		}
		parent = nr;
	}
	if (n == num)
		return 0;

	/* Allocation failed, free what we already allocated */
	for (i = 1; i < n; i++)
		bforget(branch[i].bh);
	for (i = 0; i < n; i++)
		ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1);
	return err;
}

/**
 *	ext2_splice_branch - splice the allocated branch onto inode.
 *	@inode: owner
 *	@block: (logical) number of block we are adding
 *	@chain: chain of indirect blocks (with a missing link - see
 *		ext2_alloc_branch)
 *	@where: location of missing link
 *	@num:   number of blocks we are adding
 *
 *	This function verifies that chain (up to the missing link) had not
 *	changed, fills the missing link and does all housekeeping needed in
 *	inode (->i_blocks, etc.). In case of success we end up with the full
 *	chain to new block and return 0. Otherwise (== chain had been changed)
 *	we free the new blocks (forgetting their buffer_heads, indeed) and
 *	return -EAGAIN.
 */

static inline int ext2_splice_branch(struct inode *inode,
				     long block,
				     Indirect chain[4],
				     Indirect *where,
				     int num)
{
	int i;

	/* Verify that place we are splicing to is still there and vacant */

	/* Writer: pointers, ->i_next_alloc* */
	if (!verify_chain(chain, where-1) || !HOLE_BLKADDR(*where->p))
		/* Writer: end */
		goto changed;

	/* That's it */

	*where->p = where->key;
	inode->u.ext2_i.i_next_alloc_block = block;
	inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);

	/* Writer: end */

	/* We are done with atomic stuff, now do the rest of housekeeping */

	inode->i_ctime = CURRENT_TIME;

	/* had we spliced it onto indirect block? */
	if (where->bh) {
		mark_buffer_dirty_inode(where->bh, inode);
		if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
			ll_rw_block (WRITE, 1, &where->bh);
			wait_on_buffer(where->bh);
		}
	}

	if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
		ext2_sync_inode (inode);
	else
		mark_inode_dirty(inode);
	return 0;

changed:
	for (i = 1; i < num; i++)
		bforget(where[i].bh);
	for (i = 0; i < num; i++)
		ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1);
	return -EAGAIN;
}

/*
 * Allocation strategy is simple: if we have to allocate something, we will
 * have to go the whole way to leaf. So let's do it before attaching anything
 * to tree, set linkage between the newborn blocks, write them if sync is
 * required, recheck the path, free and repeat if check fails, otherwise
 * set the last missing link (that will protect us from any truncate-generated
 * removals - all blocks on the path are immune now) and possibly force the
 * write on the parent block.
 * That has a nice additional property: no special recovery from the failed
 * allocations is needed - we simply release blocks and do not touch anything
 * reachable from inode.
 */
int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
{
	int err = -EIO;
	int offsets[4];
	Indirect chain[4];
	Indirect *partial;
	unsigned long goal;
	int left;
	int depth = ext2_block_to_path(inode, iblock, offsets);

	if (depth == 0)
		goto out;

	lock_kernel();
reread:
	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
	/* Simplest case - block found, no allocation needed */
	if (!partial) {
got_it:
		bh_result->b_dev = inode->i_dev;
		bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key);
		bh_result->b_state |= (1UL << BH_Mapped);
		/* Clean up and exit */
		partial = chain+depth-1; /* the whole chain */
		goto cleanup;
	}

	/* Next simple case - plain lookup or failed read of indirect block */
	if (!create || err == -EIO) {
cleanup:
		while (partial > chain) {
			brelse(partial->bh);
			partial--;
		}
		unlock_kernel();
out:
		return err;
	}

	/*
	 * Indirect block might be removed by truncate while we were
	 * reading it. Handling of that case (forget what we've got and
	 * reread) is taken out of the main path.
	 */
	if (err == -EAGAIN)
		goto changed;

	if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0)
		goto changed;

	left = (chain + depth) - partial;
	err = ext2_alloc_branch(inode, left, goal,
					offsets+(partial-chain), partial);
	if (err)
		goto cleanup;

	if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
		goto changed;

	bh_result->b_state |= (1UL << BH_New);
	goto got_it;

changed:
	while (partial > chain) {
		bforget(partial->bh);
		partial--;
	}
	goto reread;
}

#ifdef CONFIG_EXT2_COMPRESS
/*
 *	Readpage method that will take care of decompression.
 */
/* effic: I (pjm) think tht at present, reading a 32KB cluster 4KB at
   a time does `decompress 4KB' for the first 4KB, then `decompress
   8KB' for the second, and so on.  See if we can provide the page
   cache with all the pages in a cluster.  The problem is, we don't
   want to erase anything tht hasn't been written to disk, so we can't
   just call update_vm_cache().  The plan at present is to remember
   what the contents of ext2_rd_wa.u come from, and don't bother
   decompressing anything if the working area already contains the
   right data.  However, this is only a win where adjacent calls to
   ext2_decompress_blocks() request the same cluster.  We could force
   that by copying some code from generic_file_read() (but check for
   deadlocks before doing anything like that), but instead I'm taking
   the more passive approach of hoping for the best. */
static int ext2_readpage(struct file *file, struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
	u32 cluster0, max_cluster;
	int i, blockOfCluster, blocksToDo, npg;
	const int inc = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;

	/* For directories, fall out through default routine */
        if (S_ISDIR(inode->i_mode))
	  return block_read_full_page(page,ext2_get_block);

	/* The semaphore prevents us trying to compress and decompress
	   the cluster at the same time, or compress a cluster in the
	   middle of reading it (thinking it to be uncompressed).

	   You may not like the fact that we hold the semaphore across
	   readpage (given that it isn't held without e2compr compiled
	   in), but it does guarantee that we won't compress the
	   cluster during readpage.  (OTOH, it's unlikely, if not
	   impossible, for someone to ,compress a cluster and rewrite
	   the blocks` before the readpage completes.) */
	/* This procedure used to have `#ifndef EXT2_LOCK_BUFFERS'
	   around all the semaphore stuff, and unlocked each buffer
	   before brelsing them ifdef EXT2_LOCK_BUFFERS.  I (pjm,
	   1998-01-20) have removed that because (a) EXT2_LOCK_BUFFERS
	   isn't #defined anywhere, and doesn't appear outside of this
	   function, and (b) I haven't looked at what effect locking
	   the buffers has.  You may like to reintroduce the idea of
	   buffer locking to this function if you're more familiar
	   with buffer locking than I, and believe that the full i_sem
	   isn't necessary to protect from races (people seeing raw
	   compressed data) between readpage and ext2_file_write(),
	   ext2_compress_cluster() and ext2_truncate(). */
	UnlockPage(page);
	down(&inode->i_sem);
	assert (atomic_read(&inode->i_sem.count) <= 0); /* i.e. sem down */

	if (!(inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL)
	    || ext2_compression_disabled(inode))
		goto readpage_uncompressed;

	/* */
	{
		register u32 blockOfFile
			= (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;

		blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
		cluster0 = ext2_block_to_cluster(inode, blockOfFile);
		max_cluster = ext2_block_to_cluster
			(inode, blockOfFile + blocksToDo - 1);
		blockOfCluster
			= blockOfFile - ext2_cluster_block0(inode, cluster0);
	}

	/* Check if any part of the requested area contains part of a
	   compressed cluster.  If not, we can use default ext2_readpage().

	   (Note that we don't have to worry about a cluster becoming
	   compressed in the meantime, because we have the semaphore.)

	   A page can cover up to 9 clusters.  (The maximum can only
	   occur with 32KB pages, 4KB clusters, and a non-page-aligned
	   offset.  Thanks go to Kurt Fitzner for reporting that
	   page offsets needn't be aligned; see generic_file_mmap().)  */

	/* */
	{
	  int isCmp[(PAGE_SIZE >> 12) + 1];
	  u8 *dst;
	  unsigned clu_ix;

	  assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
	  for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
		  isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);
		  if (isCmp[clu_ix] < 0)
			  goto io_error;
	  }
	  for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)
		  if (isCmp[clu_ix] > 0)
			  goto readpage_compressed;
	  /* fall through */
   readpage_uncompressed:
	  /* */
	  {
		  int rc=0;
		  
		  lock_page(page);

		  /* Did somebody else fill it already? */
		  if (Page_Uptodate(page))
		    UnlockPage(page);
		  else
		    rc = block_read_full_page(page,ext2_get_block);
		  up(&inode->i_sem);
		  return rc;
	  }

   readpage_compressed:
	  if (ext2_rd_wa == NULL)
		  goto io_error;

	  /* Copied from block_read_full_page */
/* 	  if (!PageLocked(page)) */
/* 	    PAGE_BUG(page); */
	  lock_page(page);
	  if (Page_Uptodate(page)) {
	    UnlockPage(page);
	    up(&inode->i_sem);
	    return(0);
	  }
 	  get_page(page);

	  ClearPageUptodate(page);
	  ClearPageError(page);

 	  dst = (u8 *) page_address(page);
	  for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
		  struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
		  int nbh, blocksThisClu;

/* 		  clear_bit(PG_locked, &page->flags); */
		  npg = ext2_cluster_npages(inode, cluster0 + clu_ix);
		  nbh =  ext2_get_cluster_pages(inode, cluster0 + clu_ix, pg, page, 0);
		  if (nbh <= 0) {
		    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
		      epg[i] = NULL;
		    goto out;
		  }
		  nbh =  ext2_get_cluster_extra_pages(inode, cluster0 + clu_ix, pg, epg);
		  if (nbh <= 0)
		    goto out;
		  nbh = ext2_get_cluster_blocks(inode, cluster0 + clu_ix, bh, pg, epg, 0);
		  if (nbh <= 0)
		    goto out;
		  /* How many blocks (including holes) we need from this cluster. */
		  {
			  blocksThisClu = (ext2_cluster_nblocks(inode, cluster0 + clu_ix)
					   - blockOfCluster);
			  if (blocksThisClu > blocksToDo)
				  blocksThisClu = blocksToDo;
		  }

		  if (isCmp[clu_ix]) {
			  u8 const *src;
			  int n, nbytes_wanted;
			  struct ext2_cluster_head *head;
			  unsigned meth;
# ifdef CONFIG_KMOD
			  unsigned alg;
# endif

			  head = (struct ext2_cluster_head *) bh[0]->b_data;

			  /* jmr 1998-10-28 Hope this is the last time I'm moving this code.
			   * Module loading must be done _before_ we lock wa, just thik what
			   * can happen if we reallocate wa when somebody else uses it...
			   */
			  meth = head->method; /* only a byte, so no swabbing needed. */
			  if (meth >= EXT2_N_METHODS) {
				  ext2_warning(inode->i_sb,
					     "illegal method id",
					     "inode = %lu, id = %u",
					     inode->i_ino, meth);
				  goto out;
			  }
# ifdef CONFIG_KMOD
			  alg = ext2_method_table[meth].alg;
			  if (!ext2_algorithm_table[alg].avail) {
			  	  char str[32];

				  sprintf(str, "ext2-compr-%d", alg);
				  request_module(str);
			  }
# endif /* CONFIG_KMOD */

			  ext2_lock_rd_wa_uninterruptible();

			  /* Calculate nbytes_wanted. */
			  {
				  unsigned nblk_wanted, i;

				  /* We want to decompress the whole cluster	*/
				  nblk_wanted = ext2_cluster_nblocks(inode, cluster0 + clu_ix);
				  for (i = nblk_wanted; i != 0;)
					  if (((--i >> 3) < head->holemap_nbytes)
					      && (head->holemap[i >> 3] & (1 << (i & 7))))
						  --nblk_wanted;
				  nbytes_wanted = (nblk_wanted
						   << inode->i_sb->s_blocksize_bits);
			  }

			  /* Decompress. */
			  n = ext2_decompress_blocks(inode, bh, nbh, nbytes_wanted);
			  if (n < 0) {
				  assert (nbh >= 0);
				  ext2_unlock_rd_wa();
				  goto out;
			  }
			  if ((nbytes_wanted >= le32_to_cpu(head->ulen))
			      && (n >= nbytes_wanted)) {
				  assert (ext2_rd_wa_ucontents.ino == ~0ul);
				  ext2_rd_wa_ucontents.ino = inode->i_ino;
				  ext2_rd_wa_ucontents.dev = inode->i_dev;
				  ext2_rd_wa_ucontents.cluster = cluster0 + clu_ix;
			  }

# ifdef EXT2_COMPR_REPORT_VERBOSE
			  if (inode->u.ext2_i.i_flags & EXT2_COMPR_FL)
				  printk(KERN_DEBUG "ext2: mmap %04x:%lu: blocksToDo=%d, blockOfCluster=%d, blocksThisClu=%d, clu_nblocks=%d\n",
					 inode->i_dev,
					 inode->i_ino,
					 blocksToDo,
					 blockOfCluster,
					 blocksThisClu,
					 ext2_cluster_nblocks(inode, cluster0 + clu_ix));
# endif

			  /* */
			  {
			    unsigned i;
			    int ipg;

			    i = ext2_cluster_nblocks(inode, cluster0 + clu_ix) - 1;
			    blockOfCluster = 0;
			    assert(n > 0);
			    src = ext2_rd_wa->u + nbytes_wanted - inode->i_sb->s_blocksize;
			    trace_e2c("ext2_readpage: copy data inc=%d blocksThisClu=%d, n=%d\n", inc, blocksThisClu, n);
			    for (ipg = npg - 1; ipg >= 0; ipg--) {
			      if (pg[ipg] == NULL) {
				i -= inc;
				src -= PAGE_SIZE;
				continue;
			      }
			      if (((inode->i_size-1) >> PAGE_SHIFT) ==  pg[ipg]->index)
				{
				  n = ((inode->i_size-1) & (PAGE_SIZE -1)) >> inode->i_sb->s_blocksize_bits;
				  i -= ((blocksThisClu-1) - n);
				  src -= ((blocksThisClu-1) - n) << inode->i_sb->s_blocksize_bits;
				}
			      else
				n = blocksThisClu - 1;
			      if (Page_Uptodate(pg[ipg]))
				{
				  for (;n >= 0;n--, i--) {
				    if (((i >> 3) >= head->holemap_nbytes)
					|| !(head->holemap[i >> 3] & (1 << (i & 7)))) {
				      src -= inode->i_sb->s_blocksize;
				    }
				  }
				} else {
				  dst = (u8 *) page_address(pg[ipg]) + (n << inode->i_sb->s_blocksize_bits);
				  for (;
				       n >= 0;
				       n--, i--, dst -= inode->i_sb->s_blocksize) {
				    if (atomic_set_buffer_clean(bh[i])) { /* Clear BH_Dirty */
				      refile_buffer(bh[i]);
				    }
				    if (((i >> 3) >= head->holemap_nbytes)
					|| !(head->holemap[i >> 3] & (1 << (i & 7)))) {
				      memcpy(dst, src, inode->i_sb->s_blocksize);
				      src -= inode->i_sb->s_blocksize;
				    } else {
				      memset (dst, 0, inode->i_sb->s_blocksize);
				    }
/* 				    clear_bit(BH_Uptodate, &bh[i]->b_state); */
				  }
				  SetPageUptodate(pg[ipg]);
				}
			    }
			  }
			  ext2_unlock_rd_wa();
		  } else {
			  /* Uncompressed cluster.  Just copy the data.  */
			  int n;

# ifdef EXT2_COMPR_REPORT_VERBOSE
			  if (inode->u.ext2_i.i_flags & EXT2_COMPR_FL)
				  printk(KERN_DEBUG
					 "ext2: mmap %lu: blocksToDo = %d, off = %d, blocks = %d\n",
					 inode->i_ino, blocksToDo, blockOfCluster, blocks);
# endif

			  for (n = 0;
			       n < blocksThisClu;
			       n++, dst += inode->i_sb->s_blocksize) {
				  if ((blockOfCluster + n < nbh)
				      && (bh[blockOfCluster + n] != NULL))
					  memcpy(dst,
						 bh[blockOfCluster + n]->b_data,
						 inode->i_sb->s_blocksize);
				  else
					  memset(dst, 0, inode->i_sb->s_blocksize);
			  }
			  blockOfCluster = 0;
		  }

		  blocksToDo -= blocksThisClu;

		  for (i = 0; i < npg; i++)
		    {
		      if (pg[i] == NULL)
			break;
		      if (pg[i] == page)
			continue;
		      UnlockPage(pg[i]);
		      page_cache_release(pg[i]);
		      if (epg[i] != NULL) 
			{
 			  try_to_free_buffers(epg[i], 1);
			  UnlockPage(epg[i]);
 			  assert(page_count(epg[i]) == 1);
			  page_cache_release(epg[i]);		
			}
		    }
	  }
	}

	SetPageUptodate(page);
	clear_bit(PG_locked, &page->flags);
	atomic_dec(&page->count);
	wake_up(&page->wait);
	up(&inode->i_sem);
	return 0;

 out:
	for (i = 0; i < npg; i++)
	  {
	    if (pg[i] == NULL)
	      break;
	    if (pg[i] == page)
	      continue;
	    UnlockPage(pg[i]);
	    page_cache_release(pg[i]);
	    if (epg[i] != NULL) 
	      {
 		try_to_free_buffers(epg[i], 1);
		UnlockPage(epg[i]);
 		assert(page_count(epg[i]) == 1);
		page_cache_release(epg[i]);		
	      }
	  }

 io_error:
	set_bit(PG_error, &page->flags);
	clear_bit(PG_locked, &page->flags);
	atomic_dec(&page->count);
	wake_up(&page->wait);
	up(&inode->i_sem);
	return -EIO; /* it is tested in do_generic_file_read(), ...	*/
}
#endif /* CONFIG_EXT2_COMPRESS */

static int ext2_writepage(struct page *page)
{
#ifdef CONFIG_EXT2_COMPRESS
	struct inode *inode = page->mapping->host;
	u32 cluster0, max_cluster;
	int blocksToDo;

  	UnlockPage(page);
	if (!(inode->u.ext2_i.i_compr_flags & EXT2_OSYNC_INODE)) {
	  /* trace_e2c("ext2_writepage: inode"); */
	  down(&inode->i_sem);
	  /* trace_e2c(" down\n"); */
	}
	if (!(inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL)
	    || ext2_compression_disabled(inode)) {
	  if (!(inode->u.ext2_i.i_compr_flags & EXT2_OSYNC_INODE)) {
	    /* trace_e2c("ext2_writepage: inode up 1\n"); */
	    up(&inode->i_sem);
	  }
	  lock_page(page);
	  return block_write_full_page(page,ext2_get_block);
	}
	/* */
	{
		register u32 blockOfFile
			= (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;

		blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
		cluster0 = ext2_block_to_cluster(inode, blockOfFile);
		max_cluster = ext2_block_to_cluster
			(inode, blockOfFile + blocksToDo - 1);
	}

	/* Check if any part of the requested area contains part of a
	   compressed cluster.  If not, we can use default ext2_readpage().

	   (Note that we don't have to worry about a cluster becoming
	   compressed in the meantime, because we have the semaphore.)

	   A page can cover up to 9 clusters.  (The maximum can only
	   occur with 32KB pages, 4KB clusters, and a non-page-aligned
	   offset.  Thanks go to Kurt Fitzner for reporting that
	   page offsets needn't be aligned; see generic_file_mmap().)  */

	/* */
	{
	  int isCmp[(PAGE_SIZE >> 12) + 1];
	  unsigned clu_ix;

	  assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
	  for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
		  isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);
		  if (isCmp[clu_ix] < 0) {
		    if (!(inode->u.ext2_i.i_compr_flags & EXT2_OSYNC_INODE)) {
		      /* trace_e2c("ext2_writepage: inode up 2\n"); */
		      up(&inode->i_sem);
		    }
		    return -EIO;
		  }
	  }
	  for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)
		  if (isCmp[clu_ix] > 0) 
			  ext2_decompress_cluster(inode, cluster0 + clu_ix);
	  if (!(inode->u.ext2_i.i_compr_flags & EXT2_OSYNC_INODE)) {
	    /* trace_e2c("ext2_writepage: inode up 3\n"); */
	    up(&inode->i_sem);
	  }
	  lock_page(page);
	  /* fall through */
	}
#endif /* CONFIG_EXT2_COMPRESS */
	return block_write_full_page(page,ext2_get_block);
}
#ifndef CONFIG_EXT2_COMPRESS
static int ext2_readpage(struct file *file, struct page *page)
{
	return block_read_full_page(page,ext2_get_block);
}
#endif /* not CONFIG_EXT2_COMPRESS */
static int ext2_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
	return block_prepare_write(page,from,to,ext2_get_block);
}
#ifdef CONFIG_EXT2_COMPRESS
static int ext2_do_bmap(struct address_space *mapping, long block)
#else
static int ext2_bmap(struct address_space *mapping, long block)
#endif
{
	return generic_block_bmap(mapping,block,ext2_get_block);
}

#ifdef CONFIG_EXT2_COMPRESS
/* Return 0 instead of EXT2_COMPRESSED_BLKADDR if EXT2_NOCOMPR_FL
   high.  This is necessary for us to be able to use
   generic_readpage() when EXT2_NOCOMPR_FL is high. */
static int ext2_bmap(struct address_space *mapping, long block)
{
	int result;
	struct inode *inode = mapping->host;

	if ((inode->u.ext2_i.i_flags
	     & (EXT2_COMPRBLK_FL | EXT2_NOCOMPR_FL))
	    == (EXT2_COMPRBLK_FL | 0)) {
		int err;

		err = ext2_cluster_is_compressed_fn
			(inode, ext2_block_to_cluster(inode, block));
		if (err > 0)
			ext2_warning (inode->i_sb, "ext2_bmap",
				      "compressed cluster, inode %lu", 
				      inode->i_ino);
		if (err != 0)
			return 0;
	}

	result = ext2_do_bmap(mapping, block);
	if (result != EXT2_COMPRESSED_BLKADDR)
		return result;

	if (!(inode->i_sb->u.ext2_sb.s_es->s_feature_incompat
	      & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)))
		ext2_error(inode->i_sb, "ext2_bmap",
			   "compressed_blkaddr (ino %lu, blk %ld) "
			   "on non-compressed fs",
			   inode->i_ino, block);
	if (!S_ISREG(inode->i_mode))
		ext2_error(inode->i_sb, "ext2_bmap",
			   "compressed_blkaddr for non-regular file "
			   "(ino %lu, blk %ld)",
			   inode->i_ino, block);
	return 0;
}
#endif /* CONFIG_EXT2_COMPRESS */

static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
{
	return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
}
struct address_space_operations ext2_aops = {
	readpage: ext2_readpage,
	writepage: ext2_writepage,
	sync_page: block_sync_page,
	prepare_write: ext2_prepare_write,
	commit_write: generic_commit_write,
	bmap: ext2_bmap,
	direct_IO: ext2_direct_IO,
};

/*
 * Probably it should be a library function... search for first non-zero word
 * or memcmp with zero_page, whatever is better for particular architecture.
 * Linus?
 */
static inline int all_zeroes(u32 *p, u32 *q)
{
	while (p < q)
		if (*p++)
			return 0;
	return 1;
}

/**
 *	ext2_find_shared - find the indirect blocks for partial truncation.
 *	@inode:	  inode in question
 *	@depth:	  depth of the affected branch
 *	@offsets: offsets of pointers in that branch (see ext2_block_to_path)
 *	@chain:	  place to store the pointers to partial indirect blocks
 *	@top:	  place to the (detached) top of branch
 *
 *	This is a helper function used by ext2_truncate().
 *
 *	When we do truncate() we may have to clean the ends of several indirect
 *	blocks but leave the blocks themselves alive. Block is partially
 *	truncated if some data below the new i_size is refered from it (and
 *	it is on the path to the first completely truncated data block, indeed).
 *	We have to free the top of that path along with everything to the right
 *	of the path. Since no allocation past the truncation point is possible
 *	until ext2_truncate() finishes, we may safely do the latter, but top
 *	of branch may require special attention - pageout below the truncation
 *	point might try to populate it.
 *
 *	We atomically detach the top of branch from the tree, store the block
 *	number of its root in *@top, pointers to buffer_heads of partially
 *	truncated blocks - in @chain[].bh and pointers to their last elements
 *	that should not be removed - in @chain[].p. Return value is the pointer
 *	to last filled element of @chain.
 *
 *	The work left to caller to do the actual freeing of subtrees:
 *		a) free the subtree starting from *@top
 *		b) free the subtrees whose roots are stored in
 *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
 *		c) free the subtrees growing from the inode past the @chain[0].p
 *			(no partially truncated stuff there).
 */

static Indirect *ext2_find_shared(struct inode *inode,
				int depth,
				int offsets[4],
				Indirect chain[4],
				u32 *top)
{
	Indirect *partial, *p;
	int k, err;

	*top = 0;
	for (k = depth; k > 1 && !offsets[k-1]; k--)
		;
	partial = ext2_get_branch(inode, k, offsets, chain, &err);
	/* Writer: pointers */
	if (!partial)
		partial = chain + k-1;
	/*
	 * If the branch acquired continuation since we've looked at it -
	 * fine, it should all survive and (new) top doesn't belong to us.
	 */
	if (!partial->key && *partial->p)
		/* Writer: end */
		goto no_top;
	for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
		;
	/*
	 * OK, we've found the last block that must survive. The rest of our
	 * branch should be detached before unlocking. However, if that rest
	 * of branch is all ours and does not grow immediately from the inode
	 * it's easier to cheat and just decrement partial->p.
	 */
	if (p == chain + k - 1 && p > chain) {
		p->p--;
	} else {
		*top = *p->p;
		*p->p = 0;
	}
	/* Writer: end */

	while(partial > p)
	{
		brelse(partial->bh);
		partial--;
	}
no_top:
	return partial;
}

/**
 *	ext2_free_data - free a list of data blocks
 *	@inode:	inode we are dealing with
 *	@p:	array of block numbers
 *	@q:	points immediately past the end of array
 *
 *	We are freeing all blocks refered from that array (numbers are
 *	stored as little-endian 32-bit) and updating @inode->i_blocks
 *	appropriately.
 */
static inline void ext2_free_data(struct inode *inode, u32 *p, u32 *q)
{
	unsigned long block_to_free = 0, count = 0;
	unsigned long nr;

	for ( ; p < q ; p++) {
		nr = le32_to_cpu(*p);
#ifdef CONFIG_EXT2_COMPRESS
		if (nr == EXT2_COMPRESSED_BLKADDR) {
			*p = 0;
			continue;
		}
#endif
		if (nr) {
			*p = 0;
			/* accumulate blocks to free if they're contiguous */
			if (count == 0)
				goto free_this;
			else if (block_to_free == nr - count)
				count++;
			else {
				mark_inode_dirty(inode);
				ext2_free_blocks (inode, block_to_free, count);
			free_this:
				block_to_free = nr;
				count = 1;
			}
		}
	}
	if (count > 0) {
		mark_inode_dirty(inode);
		ext2_free_blocks (inode, block_to_free, count);
	}
}

/**
 *	ext2_free_branches - free an array of branches
 *	@inode:	inode we are dealing with
 *	@p:	array of block numbers
 *	@q:	pointer immediately past the end of array
 *	@depth:	depth of the branches to free
 *
 *	We are freeing all blocks refered from these branches (numbers are
 *	stored as little-endian 32-bit) and updating @inode->i_blocks
 *	appropriately.
 */
static void ext2_free_branches(struct inode *inode, u32 *p, u32 *q, int depth)
{
	struct buffer_head * bh;
	unsigned long nr;

	if (depth--) {
		int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
		for ( ; p < q ; p++) {
			nr = le32_to_cpu(*p);
			if (!nr)
				continue;
#ifdef CONFIG_EXT2_COMPRESS
			if (nr == EXT2_COMPRESSED_BLKADDR) {
			  *p = 0;
			  continue;
			}
#endif
			*p = 0;
			bh = bread (inode->i_dev, nr, inode->i_sb->s_blocksize);
			/*
			 * A read failure? Report error and clear slot
			 * (should be rare).
			 */ 
			if (!bh) {
				ext2_error(inode->i_sb, "ext2_free_branches",
					"Read failure, inode=%ld, block=%ld",
					inode->i_ino, nr);
				continue;
			}
			ext2_free_branches(inode,
					   (u32*)bh->b_data,
					   (u32*)bh->b_data + addr_per_block,
					   depth);
			bforget(bh);
			ext2_free_blocks(inode, nr, 1);
			mark_inode_dirty(inode);
		}
	} else
		ext2_free_data(inode, p, q);
}

/* pjm 1998-01-14: As far as I can tell, "I don't do any locking" is
   no longer correct, as i_sem is downed for all write() and
   truncate() stuff except where it doesn't matter (e.g. new inode). */

#ifdef CONFIG_EXT2_COMPRESS
/* If the EXT2_ECOMPR_FL bit is high, then things can go rather badly.
   This can only happen if access permission was obtained before the
   flag was raised.  Also, it shouldn't be too much of a problem
   unless the end point of truncation is a compressed cluster with a
   compression error. */

  /* From what I (Antoine) understand, the complexity of the truncate
     code is due to the fact that we don't want to free blocks that
     are still referenced.  It does not ensure that concurrent read
     operation will terminate properly, i.e., the semantic of reading
     while somebody truncates is undefined (you can either get the old
     data if you got the blocks before, or get plenty of zeros
     otherwise). */

/* todo: Provide error trapping in readiness for when i_op->truncate
   allows a return code. */
static void fix_compression (struct inode * inode)
{
	assert (inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL);
	assert ((atomic_read(&inode->i_sem.count) < 1)
		|| ((inode->i_nlink == 0)
		    && (atomic_read(&inode->i_count) == 0)));
/* 		    && (inode->i_count == 0))); */
	/* pjm 1998-01-14: I think the below comment can safely be removed, as
	   it's impossible for someone to be compressing during truncate(), because
	   i_sem is down. */
	/*   Dans le cas ou les clusters peuvent etre compresses, cela pose
	     un probleme : il faudrait stopper aussi si le cluster est
	     comprime et ne contient pas plus de donnees que i_size ne
	     permet. Sinon, on peut passer son temps a decompresser un
	     cluster que quelqu'un d'autre compresse en meme
	     temps... (TODO).  Cela ne peut arriver que si on reverifie apres
	     coup si le cluster est non compresse (ce qu'on fait a l'heure
	     actuelle) => faire autrement.

	     pjm fixme tr

	     If the clusters can be compressed, we'd have a problem: we'd
	     also need to stop if the cluster is compressed and doesn't
	     contain more data than i_size permits.  Otherwise we can spend
	     time decompressing a cluster that someone else is compressing
	     at the same time.  (TODO.)  This can only happen if we reverify
	     "apres coup" ("after the event"? "after each time"?) "si" ("if"
	     or "that") the cluster is not compressed (as we are currently
	     doing) => do differently. */

	/* todo: Handle errors from ext2_cluster_is_compressed().
	   (Except ext2_truncate() currently silently ignores errors
	   anyway.) */

	if (!ext2_offset_is_clu_boundary(inode, inode->i_size)
	    && ext2_compression_enabled(inode)
	    && (ext2_cluster_is_compressed_fn
		  (inode, ext2_offset_to_cluster (inode, inode->i_size))
		> 0)) {
		ext2_decompress_cluster(inode, ext2_offset_to_cluster(inode, inode->i_size));
		/* todo: Check the return code of
		   ext2_decompress_cluster().  (Then again, I don't
		   know how to report an error anyway.
		   ext2_truncate() silently ignores errors.) */
	  
		/* Organise for the cluster to be recompressed later. */
		assert (inode->u.ext2_i.i_flags & EXT2_COMPR_FL);
		inode->u.ext2_i.i_flags       |= EXT2_DIRTY_FL;
		inode->u.ext2_i.i_compr_flags |= EXT2_CLEANUP_FL;
		mark_inode_dirty(inode);
	} else
		/* If there are no more compressed clusters, then
		   remove the EXT2_COMPRBLK_FL.  Not essential from a
		   safety point of view, but friendlier.  We only do
		   this in the `else' because the cleanup function
		   will handle it in the `if' case. */
		ext2_update_comprblk(inode);
}
#endif

void ext2_truncate (struct inode * inode)
{
	u32 *i_data = inode->u.ext2_i.i_data;
	int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
	int offsets[4];
	Indirect chain[4];
	Indirect *partial;
	int nr = 0;
	int n;
	long iblock;
	unsigned blocksize;

	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
	    S_ISLNK(inode->i_mode)))
		return;
	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
		return;

	ext2_discard_prealloc(inode);
#ifdef CONFIG_EXT2_COMPRESS
	/* If the new size is in the middle of a compressed cluster,
	   then we decompress it, and set things up to be recompressed
	   later.

	   todo: It isn't very nice to get ENOSPC on truncate.  We
	   can't completely remove the possibility (unless the
	   compression algorithms obey the rule `shorter input never
	   gives longer output') but we could greatly reduce the
	   possibility, e.g. by moving the fix_compression() function
	   to compress.c, and have it decompress and immediately
	   recompress the cluster, without allocating blocks for the
	   full decompressed data. */
	if (inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL) {
	  	trace_e2c("ext2_truncate: ino=%ld sz=%d\n", inode->i_ino, (int)inode->i_size);
		fix_compression (inode);
		truncate_inode_pages(inode->i_mapping, inode->i_size);
	}
#endif

	blocksize = inode->i_sb->s_blocksize;
	iblock = (inode->i_size + blocksize-1)
					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);

	block_truncate_page(inode->i_mapping, inode->i_size, ext2_get_block);

	n = ext2_block_to_path(inode, iblock, offsets);
	if (n == 0)
		return;

	if (n == 1) {
		ext2_free_data(inode, i_data+offsets[0],
					i_data + EXT2_NDIR_BLOCKS);
		goto do_indirects;
	}

	partial = ext2_find_shared(inode, n, offsets, chain, &nr);
	/* Kill the top of shared branch (already detached) */
	if (nr) {
		if (partial == chain)
			mark_inode_dirty(inode);
		else
			mark_buffer_dirty_inode(partial->bh, inode);
		ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
	}
	/* Clear the ends of indirect blocks on the shared branch */
	while (partial > chain) {
		ext2_free_branches(inode,
				   partial->p + 1,
				   (u32*)partial->bh->b_data + addr_per_block,
				   (chain+n-1) - partial);
		mark_buffer_dirty_inode(partial->bh, inode);
		if (IS_SYNC(inode)) {
			ll_rw_block (WRITE, 1, &partial->bh);
			wait_on_buffer (partial->bh);
		}
		brelse (partial->bh);
		partial--;
	}
do_indirects:
	/* Kill the remaining (whole) subtrees */
	switch (offsets[0]) {
		default:
			nr = i_data[EXT2_IND_BLOCK];
			if (nr) {
				i_data[EXT2_IND_BLOCK] = 0;
				mark_inode_dirty(inode);
				ext2_free_branches(inode, &nr, &nr+1, 1);
			}
		case EXT2_IND_BLOCK:
			nr = i_data[EXT2_DIND_BLOCK];
			if (nr) {
				i_data[EXT2_DIND_BLOCK] = 0;
				mark_inode_dirty(inode);
				ext2_free_branches(inode, &nr, &nr+1, 2);
			}
		case EXT2_DIND_BLOCK:
			nr = i_data[EXT2_TIND_BLOCK];
			if (nr) {
				i_data[EXT2_TIND_BLOCK] = 0;
				mark_inode_dirty(inode);
				ext2_free_branches(inode, &nr, &nr+1, 3);
			}
		case EXT2_TIND_BLOCK:
			;
	}
	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
	if (IS_SYNC(inode))
		ext2_sync_inode (inode);
	else
		mark_inode_dirty(inode);
}

void ext2_read_inode (struct inode * inode)
{
	struct buffer_head * bh;
	struct ext2_inode * raw_inode;
	unsigned long block_group;
	unsigned long group_desc;
	unsigned long desc;
	unsigned long block;
	unsigned long offset;
	struct ext2_group_desc * gdp;

	if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO &&
	     inode->i_ino != EXT2_ACL_DATA_INO &&
	     inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
	    inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "bad inode number: %lu", inode->i_ino);
		goto bad_inode;
	}
	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
	if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "group >= groups count");
		goto bad_inode;
	}
	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
	desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
	bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
	if (!bh) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "Descriptor not loaded");
		goto bad_inode;
	}

	gdp = (struct ext2_group_desc *) bh->b_data;
	/*
	 * Figure out the offset within the block group inode table
	 */
	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
		EXT2_INODE_SIZE(inode->i_sb);
	block = le32_to_cpu(gdp[desc].bg_inode_table) +
		(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
	if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "unable to read inode block - "
			    "inode=%lu, block=%lu", inode->i_ino, block);
		goto bad_inode;
	}
	offset &= (EXT2_BLOCK_SIZE(inode->i_sb) - 1);
	raw_inode = (struct ext2_inode *) (bh->b_data + offset);

	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
	if(!(test_opt (inode->i_sb, NO_UID32))) {
		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
	}
	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
	inode->i_size = le32_to_cpu(raw_inode->i_size);
	inode->i_atime = le32_to_cpu(raw_inode->i_atime);
	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
	inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
	/* We now have enough fields to check if the inode was active or not.
	 * This is needed because nfsd might try to access dead inodes
	 * the test is that same one that e2fsck uses
	 * NeilBrown 1999oct15
	 */
	if (inode->i_nlink == 0 && (inode->i_mode == 0 || inode->u.ext2_i.i_dtime)) {
		/* this inode is deleted */
		brelse (bh);
		goto bad_inode;
	}
	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
	inode->i_version = ++event;

#ifdef CONFIG_EXT2_COMPRESS
	inode->u.ext2_i.i_flags = 0x807fffff & le32_to_cpu(raw_inode->i_flags);
	inode->u.ext2_i.i_compr_flags = 0;
	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
		if (S_ISDIR(inode->i_mode))
			inode->u.ext2_i.i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL);
		/* The above shouldn't be necessary unless someone's
		   been playing with EXT2_IOC_SETFLAGS on a non-e2compr 
		   kernel, or the inode has been scribbled on. */
		if (inode->u.ext2_i.i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL)) {
			inode->u.ext2_i.i_compr_method
				= (le32_to_cpu(raw_inode->i_flags) >> 26) & 0x1f;
			inode->u.ext2_i.i_log2_clu_nblocks
				= (le32_to_cpu(raw_inode->i_flags) >> 23) & 0x7;
			if ((inode->u.ext2_i.i_log2_clu_nblocks < 2)
			    || (inode->u.ext2_i.i_log2_clu_nblocks > 5)) {
				if ((inode->u.ext2_i.i_log2_clu_nblocks == 0)
				    && !(inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL)) {
					/* The EXT2_COMPR_FL flag was
					   raised under a kernel
					   without e2compr support. */
					if (S_ISREG(inode->i_mode))
						inode->u.ext2_i.i_flags |= EXT2_DIRTY_FL;
					/* Todo: once we're sure the kernel can
					   handle [log2_]clu_nblocks==0, get rid
					   of the next statement. */
					inode->u.ext2_i.i_log2_clu_nblocks
						= EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
				} else {
					inode->u.ext2_i.i_flags |= EXT2_ECOMPR_FL;
					ext2_error(inode->i_sb, "ext2_read_inode",
						   "inode %lu is corrupted: "
						   "log2_clu_nblocks=%u",
						   inode->i_ino,
						   inode->u.ext2_i.i_log2_clu_nblocks);
				}
			}
		} else {
			inode->u.ext2_i.i_compr_method
				= EXT2_DEFAULT_COMPR_METHOD;
			inode->u.ext2_i.i_log2_clu_nblocks
				= EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
		}
		if (inode->u.ext2_i.i_log2_clu_nblocks
		    > (EXT2_LOG2_MAX_CLUSTER_BYTES
		       - inode->i_sb->s_blocksize_bits))
			inode->u.ext2_i.i_log2_clu_nblocks 
				= (EXT2_LOG2_MAX_CLUSTER_BYTES
				   - inode->i_sb->s_blocksize_bits);
		inode->u.ext2_i.i_clu_nblocks
			= 1 << inode->u.ext2_i.i_log2_clu_nblocks;
		if (inode->u.ext2_i.i_flags & EXT2_DIRTY_FL)
			inode->u.ext2_i.i_compr_flags = EXT2_CLEANUP_FL;
	}
#else /* !CONFIG_EXT2_COMPRESS */
	inode->u.ext2_i.i_flags = le32_to_cpu(raw_inode->i_flags);
#endif

	inode->u.ext2_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
	inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
	inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
	inode->u.ext2_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
	if (S_ISREG(inode->i_mode))
		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
	else
		inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
	inode->u.ext2_i.i_block_group = block_group;

	/*
	 * NOTE! The in-memory inode i_data array is in little-endian order
	 * even on big-endian machines: we do NOT byteswap the block numbers!
	 */
	for (block = 0; block < EXT2_N_BLOCKS; block++)
		inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];

	if (inode->i_ino == EXT2_ACL_IDX_INO ||
	    inode->i_ino == EXT2_ACL_DATA_INO)
		/* Nothing to do */ ;
	else if (S_ISREG(inode->i_mode)) {
		inode->i_op = &ext2_file_inode_operations;
		inode->i_fop = &ext2_file_operations;
		inode->i_mapping->a_ops = &ext2_aops;
	} else if (S_ISDIR(inode->i_mode)) {
		inode->i_op = &ext2_dir_inode_operations;
		inode->i_fop = &ext2_dir_operations;
		inode->i_mapping->a_ops = &ext2_aops;
	} else if (S_ISLNK(inode->i_mode)) {
		if (!inode->i_blocks)
			inode->i_op = &ext2_fast_symlink_inode_operations;
		else {
			inode->i_op = &page_symlink_inode_operations;
			inode->i_mapping->a_ops = &ext2_aops;
		}
	} else 
		init_special_inode(inode, inode->i_mode,
				   le32_to_cpu(raw_inode->i_block[0]));
	brelse (bh);
	inode->i_attr_flags = 0;
	if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
		inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS;
		inode->i_flags |= S_SYNC;
	}
	if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) {
		inode->i_attr_flags |= ATTR_FLAG_APPEND;
		inode->i_flags |= S_APPEND;
	}
	if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL) {
		inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE;
		inode->i_flags |= S_IMMUTABLE;
	}
	if (inode->u.ext2_i.i_flags & EXT2_NOATIME_FL) {
		inode->i_attr_flags |= ATTR_FLAG_NOATIME;
		inode->i_flags |= S_NOATIME;
	}
	return;
	
bad_inode:
	make_bad_inode(inode);
	return;
}

static int ext2_update_inode(struct inode * inode, int do_sync)
{
	struct buffer_head * bh;
	struct ext2_inode * raw_inode;
	unsigned long block_group;
	unsigned long group_desc;
	unsigned long desc;
	unsigned long block;
	unsigned long offset;
	int err = 0;
	struct ext2_group_desc * gdp;

	if ((inode->i_ino != EXT2_ROOT_INO &&
	     inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
	    inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
		ext2_error (inode->i_sb, "ext2_write_inode",
			    "bad inode number: %lu", inode->i_ino);
		return -EIO;
	}
	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
	if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
		ext2_error (inode->i_sb, "ext2_write_inode",
			    "group >= groups count");
		return -EIO;
	}
	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
	desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
	bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
	if (!bh) {
		ext2_error (inode->i_sb, "ext2_write_inode",
			    "Descriptor not loaded");
		return -EIO;
	}
	gdp = (struct ext2_group_desc *) bh->b_data;
	/*
	 * Figure out the offset within the block group inode table
	 */
	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
		EXT2_INODE_SIZE(inode->i_sb);
	block = le32_to_cpu(gdp[desc].bg_inode_table) +
		(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
	if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
		ext2_error (inode->i_sb, "ext2_write_inode",
			    "unable to read inode block - "
			    "inode=%lu, block=%lu", inode->i_ino, block);
		return -EIO;
	}
	offset &= EXT2_BLOCK_SIZE(inode->i_sb) - 1;
	raw_inode = (struct ext2_inode *) (bh->b_data + offset);

	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
	if(!(test_opt(inode->i_sb, NO_UID32))) {
		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
/*
 * Fix up interoperability with old kernels. Otherwise, old inodes get
 * re-used with the upper 16 bits of the uid/gid intact
 */
		if(!inode->u.ext2_i.i_dtime) {
			raw_inode->i_uid_high = cpu_to_le16(high_16_bits(inode->i_uid));
			raw_inode->i_gid_high = cpu_to_le16(high_16_bits(inode->i_gid));
		} else {
			raw_inode->i_uid_high = 0;
			raw_inode->i_gid_high = 0;
		}
	} else {
		raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(inode->i_uid));
		raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(inode->i_gid));
		raw_inode->i_uid_high = 0;
		raw_inode->i_gid_high = 0;
	}
	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
	raw_inode->i_size = cpu_to_le32(inode->i_size);
	raw_inode->i_atime = cpu_to_le32(inode->i_atime);
	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
	raw_inode->i_dtime = cpu_to_le32(inode->u.ext2_i.i_dtime);
#ifdef CONFIG_EXT2_COMPRESS
	if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
	    && (inode->u.ext2_i.i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL))) {
		if ((inode->u.ext2_i.i_log2_clu_nblocks < 2)
		    || (inode->u.ext2_i.i_log2_clu_nblocks > 5)) {
			inode->u.ext2_i.i_flags |= EXT2_ECOMPR_FL;
			ext2_error (inode->i_sb, "ext2_write_inode",
				"inode %lu is corrupted: log2_clu_nblocks=%u",
				inode->i_ino, inode->u.ext2_i.i_log2_clu_nblocks);
		}
		assert (inode->u.ext2_i.i_clu_nblocks == (1 << inode->u.ext2_i.i_log2_clu_nblocks));
		assert (inode->u.ext2_i.i_compr_method < 0x20);
		raw_inode->i_flags = cpu_to_le32
			((inode->u.ext2_i.i_flags & 0x807fffff)
			 | (inode->u.ext2_i.i_compr_method << 26)
			 | (inode->u.ext2_i.i_log2_clu_nblocks << 23));
	} else
		raw_inode->i_flags = cpu_to_le32
			(inode->u.ext2_i.i_flags 
			 & 0x807fffff /* no compr meth/size */
			 & ~(EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_IMMUTABLE_FL | EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL));
#else
	raw_inode->i_flags = cpu_to_le32(inode->u.ext2_i.i_flags);
#endif
	raw_inode->i_faddr = cpu_to_le32(inode->u.ext2_i.i_faddr);
	raw_inode->i_frag = inode->u.ext2_i.i_frag_no;
	raw_inode->i_fsize = inode->u.ext2_i.i_frag_size;
	raw_inode->i_file_acl = cpu_to_le32(inode->u.ext2_i.i_file_acl);
	if (S_ISDIR(inode->i_mode))
		raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl);
	else {
		raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
		if (inode->i_size > 0x7fffffffULL) {
			struct super_block *sb = inode->i_sb;
			if (!EXT2_HAS_RO_COMPAT_FEATURE(sb,
					EXT2_FEATURE_RO_COMPAT_LARGE_FILE) ||
			    EXT2_SB(sb)->s_es->s_rev_level ==
					cpu_to_le32(EXT2_GOOD_OLD_REV)) {
			       /* If this is the first large file
				* created, add a flag to the superblock.
				*/
				lock_kernel();
				ext2_update_dynamic_rev(sb);
				EXT2_SET_RO_COMPAT_FEATURE(sb,
					EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
				unlock_kernel();
				ext2_write_super(sb);
			}
		}
	}
	
	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
		raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
	else for (block = 0; block < EXT2_N_BLOCKS; block++)
		raw_inode->i_block[block] = inode->u.ext2_i.i_data[block];
	mark_buffer_dirty(bh);
	if (do_sync) {
		ll_rw_block (WRITE, 1, &bh);
		wait_on_buffer (bh);
		if (buffer_req(bh) && !buffer_uptodate(bh)) {
			printk ("IO error syncing ext2 inode ["
				"%s:%08lx]\n",
				bdevname(inode->i_dev), inode->i_ino);
			err = -EIO;
		}
	}
	brelse (bh);
	return err;
}

void ext2_write_inode (struct inode * inode, int wait)
{
	lock_kernel();
	ext2_update_inode (inode, wait);
	unlock_kernel();
}

int ext2_sync_inode (struct inode *inode)
{
	return ext2_update_inode (inode, 1);
}
