/*
 *  linux/fs/ext2/file.c
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/file.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  ext2 fs regular file handling primitives
 *
 *  64-bit file support on 64-bit platforms by Jakub Jelinek
 * 	(jj@sunsite.ms.mff.cuni.cz)
 */

/*
 *  Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch FRANCE
 *
 *  	Transparent compression code for 2.4 kernel.
 *
 *  Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
 *  Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr)
 *
 *  Adapted from patch e2compr-0.4.39-patch-2.2.18 .
 */

#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/sched.h>
#include "debug.h"
#include <linux/ext2_fs_c.h>
#include <linux/locks.h>
#include <asm/uaccess.h>
#include <linux/config.h>
#ifdef  CONFIG_EXT2_COMPRESS
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#endif

/*
 * Called when an inode is released. Note that this is different
 * from ext2_open_file: open gets called at every open, but release
 * gets called only when /all/ the files are closed.
 */
/* pjm 1998-01-09: I would note that this is different from `when no
   process has the inode open'. */
static int ext2_release_file (struct inode * inode, struct file * filp)
{
#ifdef CONFIG_EXT2_COMPRESS
      /* Now's as good a time as any to clean up wrt compression.
         Previously (before 2.1.4x) we waited until
         ext2_put_inode(), but now the dcache sometimes delays that
         call until umount time. */
      if (S_ISREG (inode->i_mode)
          && inode->i_nlink
          && (inode->u.ext2_i.i_compr_flags & EXT2_CLEANUP_FL)) {
# ifdef EXT2_COMPR_REPORT_PUT
              printk(KERN_DEBUG
                     "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n",
                     current->pid, inode->i_ino, inode->i_count);
# endif
              /* todo: See how the return code of
                 ext2_release_file() is used, and decide whether it
                 might be appropriate to pass any errors to
                 caller. */
              (void) ext2_cleanup_compressed_inode (inode);
      }
#endif
	if (filp->f_mode & FMODE_WRITE)
		ext2_discard_prealloc (inode);
	return 0;
}

#ifdef CONFIG_EXT2_COMPRESS
struct page_cluster {
  struct page *	page;
  loff_t	pos;
  unsigned	bytes;
  unsigned long	offset;
  unsigned char in_range;
  const char *	buf;
};

#define PAGE_IN_RANGE	1
#define PAGE_KMAPPED	2

/*
 * Write to a file through the page cache. 
 *
 * We currently put everything into the page cache prior to writing it.
 * This is not a problem when writing full pages. With partial pages,
 * however, we first have to read the data into the cache, then
 * dirty the page, and finally schedule it for writing. Alternatively, we
 * could write-through just the portion of data that would go into that
 * page, but that would kill performance for applications that write data
 * line by line, and it's prone to race conditions.
 *
 * Note that this routine doesn't try to keep track of dirty pages. Each
 * file system has to do this all by itself, unfortunately.
 *							okir@monad.swb.de
 */
ssize_t
ext2_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
{
	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
	struct inode	*inode = mapping->host;
	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
	loff_t		pos;
	struct page     *cached_page;
	struct page_cluster pageClu[EXT2_MAX_CLUSTER_PAGES];
	unsigned long	written;
	long		status;
	int		err, i;
	unsigned	bytes;
	int             pagesPerCluster=0; /* number of pages per cluster */
	unsigned long   last_index;           /* last page index */
	u32 		comprblk_mask=0;
	const char * curbuf = buf;
	int	osync_already;

	if (!(inode->u.ext2_i.i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)))
	  /* file not compressed: fall in the default file_write */
	  return generic_file_write(file, buf, count, ppos);

	if ((ssize_t) count < 0)
		return -EINVAL;

	if (!access_ok(VERIFY_READ, buf, count))
		return -EFAULT;
		
	cached_page = NULL;

	down(&inode->i_sem);

	pos = *ppos;
	err = -EINVAL;
	if (pos < 0)
		goto out;

	err = file->f_error;
	if (err) {
		file->f_error = 0;
		goto out;
	}

	written = 0;

	/* FIXME: this is for backwards compatibility with 2.4 */
	if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
		pos = inode->i_size;

	/*
	 * Check whether we've reached the file size limit.
	 */
	err = -EFBIG;
	
	if (limit != RLIM_INFINITY) {
		if (pos >= limit) {
			send_sig(SIGXFSZ, current, 0);
			goto out;
		}
		if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
			/* send_sig(SIGXFSZ, current, 0); */
			count = limit - (u32)pos;
		}
	}

	/*
	 *	LFS rule 
	 */
	if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
		if (pos >= MAX_NON_LFS) {
			send_sig(SIGXFSZ, current, 0);
			goto out;
		}
		if (count > MAX_NON_LFS - (u32)pos) {
			/* send_sig(SIGXFSZ, current, 0); */
			count = MAX_NON_LFS - (u32)pos;
		}
	}

	/*
	 *	Are we about to exceed the fs block limit ?
	 *
	 *	If we have written data it becomes a short write
	 *	If we have exceeded without writing data we send
	 *	a signal and give them an EFBIG.
	 *
	 *	Linus frestrict idea will clean these up nicely..
	 */
	 
	if (!S_ISBLK(inode->i_mode)) {
		if (pos >= inode->i_sb->s_maxbytes)
		{
			if (count || pos > inode->i_sb->s_maxbytes) {
				send_sig(SIGXFSZ, current, 0);
				err = -EFBIG;
				goto out;
			}
			/* zero-length writes at ->s_maxbytes are OK */
		}

		if (pos + count > inode->i_sb->s_maxbytes)
			count = inode->i_sb->s_maxbytes - pos;
	} else {
		if (is_read_only(inode->i_rdev)) {
			err = -EPERM;
			goto out;
		}
		if (pos >= inode->i_size) {
			if (count || pos > inode->i_size) {
				err = -ENOSPC;
				goto out;
			}
		}

		if (pos + count > inode->i_size)
			count = inode->i_size - pos;
	}

	err = 0;
	if (count == 0)
		goto out;

	status  = 0;

	if (file->f_flags & O_DIRECT)
	  {
	    err = -EINVAL;
	    goto out;
	  }
	/*
	 *	We must still check for EXT2_ECOMPR_FL, as it may have been
	 *	set after we got the write permission to this file.
	 */
	if ((inode->u.ext2_i.i_flags
	     & (EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL))
	    == (EXT2_ECOMPR_FL | 0))
	  {
	    err = -EXT2_ECOMPR;
	    goto out;
	  }

	remove_suid(inode);
	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
	mark_inode_dirty_sync(inode);

	if ((pos+count) > inode->i_size)
	  last_index = (pos+count-1) >> PAGE_CACHE_SHIFT ;
	else
	  last_index = (inode->i_size-1) >> PAGE_CACHE_SHIFT ;

	comprblk_mask = inode->u.ext2_i.i_flags | ~EXT2_COMPRBLK_FL;
# ifdef EXT2_COMPRESS_WHEN_CLU
	inode->u.ext2_i.i_flags |= EXT2_COMPRBLK_FL;
# endif

	do {
		unsigned long index, offset, clusters_page_index0, cluster_compressed=0;
		loff_t curpos = pos;
		size_t curcount = count;
		char *kaddr;
		u32  cluster=0;
		/*
		 * Try to find the page in the cache. If it isn't there,
		 * allocate a free page.
		 */
		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
		index = pos >> PAGE_CACHE_SHIFT;
		bytes = PAGE_CACHE_SIZE - offset;
		if (bytes > count) {
			bytes = count;
		}

		/* Compute number of pages per cluster on *this* cluster */
		/* Every cluster have the same size except the first one */
		cluster = ext2_page_to_cluster(inode, index);
		pagesPerCluster = ext2_cluster_npages(inode, cluster);
		/* We bring all the pages needed to store the whole cluster */
		clusters_page_index0 = ext2_cluster_page0(inode, cluster);
		assert((pagesPerCluster > 0) && (pagesPerCluster <= EXT2_MAX_CLUSTER_PAGES));
		trace_e2c("ext2_file_write: cluster=%d, pagesPerCluster=%d, clusters_page_index0=%ld\n", cluster, pagesPerCluster, clusters_page_index0);

		status = -ENOMEM;	/* we'll assign it later anyway */

		/* Here, for each page we will need, we compute some data needed to call the different routine
		   (prepare_write, ...) The pb is that we must first call the routines pn each page, then
		   decompress the data, and then valid the rest */
		for (i=0; i<pagesPerCluster; i++)
		  {
		    pageClu[i].page = NULL;
		    if (((clusters_page_index0 + i) < index) || 
			((clusters_page_index0 + i) > ((pos+count-1) >> PAGE_CACHE_SHIFT)))
		      {
			pageClu[i].offset	= 0;
			pageClu[i].bytes	= 0;
			pageClu[i].in_range	= 0;
			pageClu[i].pos		= 0;
			pageClu[i].buf		= NULL;
		      }
		    else /* we are inside the range */
		      {
			pageClu[i].offset	= (curpos & (PAGE_CACHE_SIZE -1)); /* Within page */;
			pageClu[i].bytes	= PAGE_CACHE_SIZE - pageClu[i].offset;;
			pageClu[i].in_range	= 1;
			pageClu[i].pos		= curpos;
			pageClu[i].buf		= curbuf;
			if (pageClu[i].bytes > curcount)
			  {
			    pageClu[i].bytes = curcount;
			  }
			curpos 		+= pageClu[i].bytes;
			curcount	-= pageClu[i].bytes;
			curbuf		+= pageClu[i].bytes;
		      }
		  }


		trace_e2c("ext2_file_write: [pos=%d count=%d size=%d last_index=%ld]\n", (int)pos, count, (int)inode->i_size, last_index);

		/* We decompress the cluster if needed, and write
		   the data as normal.  The cluster will be
		   compressed again when the inode is cleaned up. */
		if ((comprblk_mask == ~(u32)0)
		    && !(inode->u.ext2_i.i_flags
			 & EXT2_NOCOMPR_FL)) {
		    /* assert (block == pos >> inode->i_sb->s_blocksize_bits); */

		    cluster_compressed = ext2_cluster_is_compressed_fn(inode, cluster);

		    if (cluster_compressed < 0) {
		      if (! written)
			written = cluster_compressed;
		      break;
		    }
		}

		if (cluster_compressed > 0) {
		  /* Here, decompression take place  */
		  cluster_compressed = ext2_decompress_cluster(inode, cluster);
		  if (cluster_compressed < 0) {
		    if (! written) {
		      written = cluster_compressed;
		    }
		    break;
		  }
		}
		/* Cluster is not compressed	*/
		for (i = 0; i < pagesPerCluster; i++) {
		  /*
		   * Bring in the user page that we will copy from _first_.
		   * Otherwise there's a nasty deadlock on copying from the
		   * same page as we're writing to, without it being marked
		   * up-to-date.
		   */
		  if (pageClu[i].in_range) {
		    volatile unsigned char dummy;
		    __get_user(dummy, pageClu[i].buf);
		    __get_user(dummy, pageClu[i].buf+pageClu[i].bytes-1);
		  }
		  pageClu[i].page = __grab_cache_page(mapping, clusters_page_index0+i, &cached_page);
		  
		  if (!pageClu[i].page) {
		    while (i--) {
		      UnlockPage(pageClu[i].page);
		      page_cache_release(pageClu[i].page);
		    }
		    status = -ENOMEM;
		    break;
		  }
		  
		  /* We have exclusive IO access to the page.. */
		  if (!PageLocked(pageClu[i].page)) {
		    PAGE_BUG(pageClu[i].page);
		  }
		  if (pageClu[i].in_range) {
		    status = mapping->a_ops->prepare_write(file, pageClu[i].page, pageClu[i].offset,
							   pageClu[i].offset+pageClu[i].bytes);
		    if (status) {
		      goto unlock;
		    }

		    kaddr = kmap(pageClu[i].page);
		    pageClu[i].in_range = PAGE_KMAPPED;

		    status = __copy_from_user(kaddr+pageClu[i].offset, pageClu[i].buf, pageClu[i].bytes);
		    flush_dcache_page(pageClu[i].page);
		    mapping->a_ops->commit_write(file, pageClu[i].page, pageClu[i].offset,
						 pageClu[i].offset+pageClu[i].bytes);
		    if (status) {
		      status = -EFAULT;
		      goto unlock;
		    }

		    status = pageClu[i].bytes;

		    written += status;
		    count -= status;
		    pos += status;
		    buf += status;
		  }
		}

unlock:
		/* Mark them unlocked again and drop the page.. */
		for (i=0; (i<pagesPerCluster) && (pageClu[i].page != NULL); i++)
		  {
		    if (pageClu[i].in_range == PAGE_KMAPPED) {
		      kunmap(pageClu[i].page);
		      SetPageReferenced(pageClu[i].page);
		    }
		    UnlockPage(pageClu[i].page);
		    page_cache_release(pageClu[i].page);
		  }

		if (status < 0)
		  break;

#ifdef EXT2_COMPRESS_WHEN_CLU
		assert (inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL);
		if ((inode->u.ext2_i.i_flags & EXT2_COMPR_FL)
		    && (ext2_offset_is_clu_boundary(inode, curpos))
		    && (ext2_wr_wa != NULL)) {


			if ((mapping->i_mmap == NULL) && (mapping->i_mmap_shared == NULL))
			  /* Pierre Peiffer: For file mapped (via mmap, I mean),
			     compression will occure when releasing the file.
			     We must, in this case, avoid the pages (possibly
			     mapped by a process) to be compressed under them */
			  {
			    int error;
			    error = ext2_compress_cluster(inode, cluster);
			    /* Actually, raising write_error may be a
			       mistake.  For example,
			       ext2_cleanup_compressed_cluster() doesn't
			       usually return any errors to user.  todo:
			       Have a look at ext2_compress_cluster, and
			       check whether its errors are such that they
			       should be returned to user.  Some of the
			       will be, of course, but it might be
			       possible for it to return without
			       change. */
			    if (error > 0)
			      comprblk_mask = ~(u32)0;
			  }
			else
			  {
			    trace_e2c("ext2_file_write: (dev. %s): ino=%ld, cluster=%d: file mapped, does not compress cluster\n", bdevname(inode->i_sb->s_dev), inode->i_ino, cluster);
			    inode->u.ext2_i.i_flags |= EXT2_DIRTY_FL;
			    inode->u.ext2_i.i_compr_flags |= EXT2_CLEANUP_FL;
			  }
		}
#endif

	} while (count);
	*ppos = pos;

	if (cached_page)
	  page_cache_release(cached_page);

	/* For now, when the user asks for O_SYNC, we'll actually
	 * provide O_DSYNC. */
	if (status >= 0) {
	  if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
		  	if (inode->u.ext2_i.i_compr_flags & EXT2_OSYNC_INODE) {
			  osync_already = 1;
			} else {
			  osync_already = 0;
			  inode->u.ext2_i.i_compr_flags |= EXT2_OSYNC_INODE;
			}
			status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
			if (osync_already == 0) {
			  inode->u.ext2_i.i_compr_flags &= ~EXT2_OSYNC_INODE;
			}
	  }
	}
	
	err = written ? written : status;

# ifdef EXT2_COMPRESS_WHEN_CLU
	assert (inode->u.ext2_i.i_flags & EXT2_COMPRBLK_FL);
	inode->u.ext2_i.i_flags &= comprblk_mask;
	if ((inode->u.ext2_i.i_flags & EXT2_COMPR_FL)
	    && (!ext2_offset_is_clu_boundary(inode, pos)
		|| (ext2_wr_wa == NULL))) {
		inode->u.ext2_i.i_flags |= EXT2_DIRTY_FL;
		inode->u.ext2_i.i_compr_flags |= EXT2_CLEANUP_FL;
	}
# else
	if (inode->u.ext2_i.i_flags & EXT2_COMPR_FL) {
		inode->u.ext2_i.i_flags |= EXT2_DIRTY_FL;
		inode->u.ext2_i.i_compr_flags |= EXT2_CLEANUP_FL;
	}
# endif
 out:
	up(&inode->i_sem);
	return err;
}

/*
 * Called when an inode is about to be open.
 * We use this to disallow opening RW large files on 32bit systems if
 * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
 * on this flag in sys_open.
 * Prevent opening compressed file with O_DIRECT.
 */
static int ext2_file_open(struct inode * inode, struct file * filp)
{
	if ((filp->f_flags & O_DIRECT) && (inode->u.ext2_i.i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)))
	        return -EINVAL;
	if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS)
		return -EFBIG;
	return 0;
}
#endif /* CONFIG_EXT2_COMPRESS*/

/*
 * We have mostly NULL's here: the current defaults are ok for
 * the ext2 filesystem.
 */
struct file_operations ext2_file_operations = {
	llseek:		generic_file_llseek,
	read:		generic_file_read,
#ifdef CONFIG_EXT2_COMPRESS
 	write:		ext2_file_write,
#else
	write:		generic_file_write,
#endif /* CONFIG_EXT2_COMPRESS*/
	ioctl:		ext2_ioctl,
	mmap:		generic_file_mmap,
#ifdef CONFIG_EXT2_COMPRESS
 	open:		ext2_file_open,
#else
	open:		generic_file_open,
#endif /* CONFIG_EXT2_COMPRESS*/
	release:	ext2_release_file,
	fsync:		ext2_sync_file,
};

struct inode_operations ext2_file_inode_operations = {
	truncate:	ext2_truncate,
};
