#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/netdevice.h>
#include <linux/ip.h>
#include <linux/tcp.h>

#include <asm/mconfig.h>
#include <linux/urlfilter.h>

static spinlock_t urllog_list_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(urllog_free);

// urllog_active is an ordered list.
// recently reference url location was placed in the list tail.
static LIST_HEAD(urllog_active);

static URLLOG urllog[URLLOG_MAX];	
static struct list_head urllog_hash_list[URLLOG_HASH_SIZE];

static inline int get_url_hash_value(unsigned long val)
{
	return ((val >> 16) ^ (val & 0xffff)) % URLLOG_HASH_SIZE;
}

static inline void copy_url_target(URLLOG *url, char *firstp, char *endp)
{
	int	i;

	for (i = 0; i < URL_TARGET_SIZE - 1 && *firstp != 0x0d && *firstp != 0x0a && firstp < endp; i++) {
		url->target[i] = *firstp++;
	}
	url->target[i] = 0;
}

void fill_record(unsigned long srcip, char *firstp, char *endp)
{
	int	log_index;
	URLLOG	*url, *newurl;
	struct list_head *pfree, *p;

	spin_lock_irq(&urllog_list_lock);
	log_index = get_url_hash_value(srcip);
	list_for_each(p, &urllog_hash_list[log_index]) {
		url = list_entry(p, URLLOG, hash_list);
		if ( srcip == url->srcip ) {
			copy_url_target(url, firstp, endp);
			list_del(&url->list);
			list_add_tail(&url->list, &urllog_active);
			spin_unlock_irq(&urllog_list_lock);
			return;
		}
	}

	// get free urllog node
	if ( !list_empty(&urllog_free) ) {
		pfree = urllog_free.next;
		list_del(pfree);
		newurl = list_entry(pfree, URLLOG, list);
	} else {
		int	index;
		// remove the oldest node in the urllog_active
		pfree = urllog_active.next;
		list_del(pfree);
		newurl = list_entry(pfree, URLLOG, list);

		// remove the correspoinging node in the urllog_hash_list
		index = get_url_hash_value(newurl->srcip);
		list_for_each(p, &urllog_hash_list[index]) {
			url = list_entry(p, URLLOG, hash_list);	
			if ( url == newurl ) {
				list_del(&url->hash_list);
				break;
			}
		}
	}

	newurl->time = jiffies;
	newurl->srcip = srcip;
	copy_url_target(newurl, firstp, endp);

	list_add_tail(&newurl->list, &urllog_active);
	list_add(&newurl->hash_list, &urllog_hash_list[log_index]);

	spin_unlock_irq(&urllog_list_lock);
}

static spinlock_t url_filter_lock = SPIN_LOCK_UNLOCKED;
static int	url_filter_enable;
static int	nr_suffix_domain, nr_fully_qualified_domain;
static int	suffix_domain_len[MAX_URL_ACCESS_RULES];
static int	fully_qualified_domain_len[MAX_URL_ACCESS_RULES];
static char	suffix_domain[MAX_URL_ACCESS_RULES][URL_NAME_SIZE];
static char	fully_qualified_domain[MAX_URL_ACCESS_RULES][URL_NAME_SIZE];

static void lower_case(char *str)
{
	for ( ; *str != 0; str++) {
		if ( *str >= 'A' && *str <= 'Z' )
			*str |= 0x20;
	}
}

// if the requested domain name is allowed, return 0;
// otherwise, return -1
static int verify_pattern(char *domain_name, char *firstp, char *endp)
{
	int	i, j, k, len;

	spin_lock_irq(&url_filter_lock);

	if ( !url_filter_enable )
		goto allow;

	if ( strncmp(firstp, "http://", 7) == 0 ) {
		firstp += 7;
	}

	for (len = 0; len < URL_NAME_SIZE - 1 && *firstp != '/' && *firstp != 0x0d && *firstp != 0x0a && firstp < endp; len++) {
		domain_name[len] = *firstp++;
	}

	domain_name[len] = 0;
	lower_case(domain_name);

	for (i = 0; i < nr_fully_qualified_domain; i++) {
		if ( len != fully_qualified_domain_len[i] )
			continue;

		if ( strcmp(domain_name, fully_qualified_domain[i]) == 0 ) {
#if 0
			printk("drop packet destined for \"%s\"\n", domain_name);
#endif
			goto drop;
		}
	}

	for (i = 0; i < nr_suffix_domain; i++) {
		char	*psuffix;
		psuffix = suffix_domain[i];
		for (j = suffix_domain_len[i] - 1, k = len - 1; j >= 0 && k >= 0; j--, k--) {
			if ( psuffix[j] != domain_name[k] )
				break;
		}

		if ( j < 0 ) {
#if 0
			printk("drop packet destined for \"%s\" with suffix domain rule \"%s\"\n", domain_name, suffix_domain[i]);
#endif
			goto drop;
		}
	}

allow:
	spin_unlock_irq(&url_filter_lock);
	return 0;

drop:
	spin_unlock_irq(&url_filter_lock);
	return -1;
}

void submit_url_records(REG_URL_ACCESS_CONF *url_conf)
{
	int	i;

	spin_lock_irq(&url_filter_lock);

	url_filter_enable = url_conf->enable;
	if ( !url_filter_enable )
		goto done;

	nr_suffix_domain = nr_fully_qualified_domain = 0;

	for (i = 0; i < MAX_URL_ACCESS_RULES; i++) {
		if ( url_conf->url[i][0] == 0 )
			continue;

		if ( url_conf->url[i][0] == '.' ) {
			strcpy(suffix_domain[nr_suffix_domain], url_conf->url[i]);
			lower_case(suffix_domain[nr_suffix_domain]);
			suffix_domain_len[nr_suffix_domain] = strlen(suffix_domain[nr_suffix_domain]);
			nr_suffix_domain++;
		} else {
			strcpy(fully_qualified_domain[nr_fully_qualified_domain], url_conf->url[i]);
			lower_case(fully_qualified_domain[nr_fully_qualified_domain]);
			fully_qualified_domain_len[nr_fully_qualified_domain] = strlen(fully_qualified_domain[nr_fully_qualified_domain]);
			nr_fully_qualified_domain++;
		}
	}

done:
	spin_unlock_irq(&url_filter_lock);
}

int process_url_access(struct sk_buff *skb)
{
	struct iphdr	*iph;
	struct tcphdr	*tcph;
	unsigned int	len;
	struct net_device *dev;
	char		*firstp, *endp;
	char		domain_name[URL_NAME_SIZE];	
	int		ret;

	dev = skb->dev;
	//printk("name = %s\n", dev->name);
	if ( strncmp(dev->name, "eth1", 4) != 0 )
		return 0;

	iph = skb->nh.iph;
	if ( iph->protocol != IPPROTO_TCP )	// constant defined in linux/in.h
		return 0;

	//tcph = skb->h.th;
	tcph = (struct tcphdr *) (((void *) iph) + iph->ihl * 4);
	if ( tcph->dest != ntohs(80) || !tcph->ack || !tcph->psh )
		return 0;

	//printk("ifindex = %d\n", dev->ifindex);

	len = iph->tot_len;
	len = len - iph->ihl * 4 - tcph->doff * 4;

	firstp = (char *) (((void*) tcph) + tcph->doff * 4);
	endp = firstp + len;
	do {
		int hit;

		// skip one line
		while ( *firstp != 0x0a && firstp < endp)
			firstp++;

		if ( firstp > endp )
			break;

		firstp++;

		if ( firstp + 9 > endp )
			break;

		if ( *firstp == 0x0d && *(firstp+1) == 0x0a )
			break;

		hit = 0;
		if ( strncmp(firstp, "Referer:", 8) == 0 ) {
			firstp += 8;
			hit = 1;
		} else if ( strncmp(firstp, "Host:", 5) == 0 ) {
			firstp += 5;
			hit = 1;
		}

		if ( hit ) {
			// skip leading spaces
			while ( *firstp == ' ' && firstp < endp )
				firstp++;

			ret = verify_pattern(domain_name, firstp, endp);
			if ( ret < 0 )
				return ret;

			fill_record(iph->saddr, firstp, endp);
			break;
		}
	} while ( firstp < endp );

	return 0;
}


asmlinkage int sys_get_urllog(int *size, URLLOG_INFO *urllog_info)
{
	int	i;
	struct list_head *p;
	URLLOG		*url;
	URLLOG_INFO	*pinfo;

	spin_lock_irq(&urllog_list_lock);
	i = 0;
	pinfo = &urllog_info[i];
	list_for_each(p, &urllog_active) {
		url = list_entry(p, URLLOG, list);
		pinfo->time = url->time;
		pinfo->srcip = url->srcip;
		strcpy(pinfo->target, url->target);
		i++;
		pinfo++;
	}
	spin_unlock_irq(&urllog_list_lock);

	*size = i;

	return 0;
}

int __init init_urllog(void)
{
	URLLOG *p;
	struct list_head *entry;
	
	printk(KERN_INFO "URL log initialization\n");

	// There is no need to initialize "list_head" member in the URLLOG
	// before we use list_add function. This is because __list_add function
	// will update both "prev" and "next" member of added entry.
	for (p = &urllog[0]; p < &urllog[URLLOG_MAX]; p++) {
		list_add_tail(&p->list, &urllog_free);
	}

	for (entry = &urllog_hash_list[0]; entry < &urllog_hash_list[URLLOG_HASH_SIZE]; entry++)
		INIT_LIST_HEAD(entry);

#if 0
	url_filter_enable = 1;
	nr_suffix_domain = 1;
	nr_fully_qualified_domain = 1;
	strcpy(suffix_domain[0], ".com.tw");
	strcpy(fully_qualified_domain[0], "www.yahoo.com");
	suffix_domain_len[0] = strlen(suffix_domain[0]);
	fully_qualified_domain_len[0] = strlen(fully_qualified_domain[0]);
#endif

	return 0;
}

__initcall(init_urllog);
