/*
 * premier3.c - TOSHIBA Premier3 IEEE1394-Link driver
 * Copyright (C) 2000-2001 Toshiba Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * $Id: premier3.c,v 1.1.1.1 2004/04/07 08:36:53 louistsai Exp $
 */

#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/pci.h>
#include <asm/byteorder.h>
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/bootinfo.h>

#include "ieee1394.h"
#include "ieee1394_types.h"
#include "hosts.h"
#include "ieee1394_core.h"

#include <asm/dma.h>
#include <asm/cpu.h>
#include <asm/bootinfo.h>
#include <asm/toshiba-boards/dma.h>

#define PREMIER3_DRIVER_NAME	"premier3"
#define MAX_PREMIER3_CARDS	1

#define NUM_ISORCV_PORT		4
#define MAX_ISORCV_SIZE		2048	/* 400Mbps */
#define ISORCV_PER_PAGE		(PAGE_SIZE / MAX_ISORCV_SIZE)
#define ISORCV_PAGES		(NUM_ISORCV_PORT / ISORCV_PER_PAGE)

#undef PR3_DEBUG_LOG
#define PR3_HANDLE_PHYSICAL_REQUEST
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
#define PR3_USE_DMAC
#endif

#undef PREMIER3_CONFIG_ROM_MINIMAL
#undef PR3_CLEAR_RXINT_AFTER_RXDATA
#define PR3_BUG_WORKAROUND

static unsigned long premier3_base;
static int premier3_irq;

#ifdef PR3_DEBUG_LOG
#define PR3_LOG_MAX_IDX 1024
#define PR3_LOG_MAX_OFS 16
static unsigned long pr3_dbglog[PR3_LOG_MAX_IDX][PR3_LOG_MAX_OFS];
static int pr3_dbglog_idx = 0;
static int pr3_dbglog_ofs = 0;
void premier3_dump_log(void)
{
	int idx, i, j, n;
	idx = pr3_dbglog_idx;
	for (i = 0; i < PR3_LOG_MAX_IDX; i++) {
		idx = (idx + 1) % PR3_LOG_MAX_IDX;

		printk("%d:", i);
		n = 0;
		if (pr3_dbglog[idx][0] & 0xffc00000) {
			printk("R ");
			for (n = PR3_LOG_MAX_OFS - 1 - 1; n > 0; n--)
				if (pr3_dbglog[idx][n])
					break;
			if (n < PR3_LOG_MAX_OFS - 1)
				n++;
			if (n < PR3_LOG_MAX_OFS - 1)
				n++;
			for (j = 0; j < n; j++)
				printk(" %08lx", pr3_dbglog[idx][j]);
			if (n < PR3_LOG_MAX_OFS - 1)
				printk(" ........");
			printk(" %08lx", pr3_dbglog[idx][PR3_LOG_MAX_OFS - 1]);
		} else {
			printk("T ");
			for (n = PR3_LOG_MAX_OFS - 1; n > 0; n--)
				if (pr3_dbglog[idx][n])
					break;
			if (n < PR3_LOG_MAX_OFS)
				n++;
			if (n < PR3_LOG_MAX_OFS)
				n++;
			if (n < 4)
				n = 4;
			for (j = 0; j < n; j++)
				printk(" %08lx", pr3_dbglog[idx][j]);
		}
		printk("\n");
	}
}
#endif
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
/* handle read/write request to system memory without involving highlevel */
struct premier3_phys_req {
	struct premier3_phys_req *next;	/* dma queue */
	quadlet_t req_header[4];
	quadlet_t resp_header[4];
	int resp_header_size;
	int resp_data_size;
	int speed_code;
	unsigned long dma_addr;
	int dma_length;
	int dma_outgoing;
};
struct premier3_packet {
	struct premier3_packet *xnext;
	int physical;
	union {
		struct premier3_phys_req *phys;
		struct hpsb_packet* normal;
	} u;
};
#ifdef PR3_USE_DMAC
static int premier3_min_dma_size = 32;
static int premier3_dmach = -1;
#endif
#endif /* PR3_HANDLE_PHYSICAL_REQUEST */

struct premier3 {
	int id; /* sequential card number */

	spinlock_t lock;

	/* remapped memory spaces */
	void *registers;
	dma_addr_t registers_dma;
	int irq;
	int intmaskreg;

	/* buffer for csr config rom */
	quadlet_t *csr_config_rom; 

	/* IEEE-1394 part follows */
	struct hpsb_host *host;

	spinlock_t phy_reg_lock;

	quadlet_t *rcv_page;
	dma_addr_t rcv_page_dma;

	struct premier3_send_data {
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
		struct premier3_packet *queue, *queue_last;
#else
		struct hpsb_packet *queue, *queue_last;
#endif
		spinlock_t queue_lock;
	} async /* , iso_send */;

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	struct {
		struct premier3_phys_req *queue, *queue_last;
		spinlock_t queue_lock;
	} dma;
	int dmairq;
	int dmach;
#if defined(PR3_USE_DMAC) && defined(__LITTLE_ENDIAN)
	/* temporary buffer for endian conversion */
	quadlet_t *dma_page;
	dma_addr_t dma_page_dma;
	int do_reverse_dma;
#endif
#endif
#if 0	/* NOT IMPLEMENTED YET */
	struct {
		void *page[ISORCV_PAGES];
		dma_addr_t page_dma[ISORCV_PAGES];
		struct tq_struct tq;
		spinlock_t lock;
	} iso_rcv;
#endif
};

/*
 * Register read and write helper functions.
 */
inline static void reg_write(const struct premier3 *pr3, int offset, u32 data)
{
	writel(data, (unsigned long)pr3->registers + offset);
}

inline static u32 reg_read(const struct premier3 *pr3, int offset)
{
	return readl((unsigned long)pr3->registers + offset);
}

inline static void reg_set_bits(const struct premier3 *pr3, int offset,
				u32 mask)
{
	reg_write(pr3, offset, (reg_read(pr3, offset) | mask));
}

inline static void reg_clear_bits(const struct premier3 *pr3, int offset,
				  u32 mask)
{
	reg_write(pr3, offset, (reg_read(pr3, offset) & ~mask));
}

static quadlet_t premier3_csr_rom[] = {
#ifdef PREMIER3_CONFIG_ROM_MINIMAL
	0x01000000, /* vendor ID */
#else
/* bus info block     offset (hex) */
	0x04040000, /* info/CRC length, CRC */
	0x31333934, /* 1394 magic number */
	0xd0648000, /* misc. settings (no Iso support, max_rec:8 (512bytes)) */
	0x00000000, /* vendor ID, chip ID high */
	0x00000000, /* chip ID low */
/* root directory */
	0x00030000, /* directory length, CRC */
	0x03000000, /* vendor ID */
	0x0c000200, /* FIXME: node capabilities */
	0x8d000001, /* offset to unique ID */
/* node unique ID leaf */
	0x00020000, /* CRC length, CRC */
	0x00000000, /* vendor ID, chip ID high */
	0x00000000, /* chip ID low */
#endif
};


/* chip register definitions follow */

#define PR3_VERSION	0x00
#define PR3_NODE	0x04
#define PR3_CTL		0x08
#define PR3_RESET	0x0c
#define PR3_PKTCTL	0x10
#define PR3_STAT	0x14
#define PR3_PHYCTL	0x18
#define PR3_ERRTX	0x1c
#define PR3_CYCLE	0x20
#define PR3_ISOPORT1	0x24
#define PR3_ISOPORT2	0x28
#define PR3_ATFDATA	0x2c
#define PR3_ARFDATA	0x30
#define PR3_BUFCTL	0x34
#define PR3_ISOTXHD	0x38
#define PR3_INT		0x3c
#define PR3_INTMASK1	0x40
#define PR3_INTMASK2	0x44
#define PR3_TCODECTL	0x48
#define PR3_ROMADDR	0x4c
#define PR3_ADDR	0x50
#define PR3_PING	0x54
#define PR3_TXRETRY	0x58

/* Node register fields */
#define PR3_NODE_IDVALID	(1<<31)
#define PR3_NODE_ROOT		(1<<24)
#define PR3_NODE_BUS_BITS	10
#define PR3_NODE_BUS_SHIFT	6
#define PR3_NODE_BUS(bus)	\
	(((bus) & ((1<<PR3_NODE_BUS_BITS)-1))<<PR3_NODE_BUS_SHIFT)
#define PR3_NODE_BUS_MASK	\
	(((1<<PR3_NODE_BUS_BITS)-1)<<PR3_NODE_BUS_SHIFT)
#define PR3_NODE_NODE_BITS	6
#define PR3_NODE_NODE_SHIFT	0
#define PR3_NODE_NODE(node)	\
	(((node) & ((1<<PR3_NODE_NODE_BITS)-1))<<PR3_NODE_NODE_SHIFT)
#define PR3_NODE_NODE_MASK	\
	(((1<<PR3_NODE_NODE_BITS)-1)<<PR3_NODE_NODE_SHIFT)

/* Control register fields */
#define PR3_CTL_ACKPND		(1<<26)
#define PR3_CTL_PHYPAR		(1<<25)
#define PR3_CTL_ISOAUTO		(1<<24)
#define PR3_CTL_CYCSRC		(1<<18)
#define PR3_CTL_CYCMST		(1<<17)
#define PR3_CTL_CYCTEN		(1<<16)
#define PR3_CTL_IRP1EN		(1<<15)
#define PR3_CTL_IRP2EN		(1<<14)
#define PR3_CTL_IRP3EN		(1<<13)
#define PR3_CTL_IRP4EN		(1<<12)
#define PR3_CTL_INTEN		(1<<7)
#define PR3_CTL_LPSON		(1<<2)
#define PR3_CTL_RXEN		(1<<1)
#define PR3_CTL_TXEN		(1<<0)

/* Reset register fields */
#define PR3_RESET_ARF		(1<<17)
#define PR3_RESET_ATF		(1<<16)
#define PR3_RESET_RX		(1<<1)
#define PR3_RESET_TX		(1<<0)
#define PR3_RESET_ALL		(PR3_RESET_ARF|PR3_RESET_ATF|PR3_RESET_RX|PR3_RESET_TX)

/* Packet Control register fields */
#define PR3_PKTCTL_ACKCNF	(1<<17)
#define PR3_PKTCTL_ACKTARDY	(1<<16)
#define PR3_PKTCTL_BSYCNT_BITS	3
#define PR3_PKTCTL_BSYCNT_SHIFT	8
#define PR3_PKTCTL_BSYCNT(cnt)	\
	(((cnt) & ((1<<PR3_PKTCTL_BSYCNT_BITS)-1))<<PR3_PKTCTL_BSYCNT_SHIFT)
#define PR3_PKTCTL_RXPHY	(1<<6)
#define PR3_PKTCTL_RXSELID	(1<<5)
#define PR3_PKTCTL_ENSNOOP	(1<<4)

/* Status register fields */
#define PR3_STAT_ATACK_BITS	4
#define PR3_STAT_ATACK_SHIFT	8

/* PHY Control register fields */
#define PR3_PHYCTL_RD	(1<<31)
#define PR3_PHYCTL_WR	(1<<30)
#define PR3_PHYCTL_RGAD_BITS	4
#define PR3_PHYCTL_RGAD_SHIFT	24
#define PR3_PHYCTL_RGAD(ad)	\
	(((ad) & ((1<<PR3_PHYCTL_RGAD_BITS)-1))<<PR3_PHYCTL_RGAD_SHIFT)
#define PR3_PHYCTL_RGDATA_BITS	8
#define PR3_PHYCTL_RGDATA_SHIFT	16
#define PR3_PHYCTL_RGDATA(data)	\
	(((data) & ((1<<PR3_PHYCTL_RGDATA_BITS)-1))<<PR3_PHYCTL_RGDATA_SHIFT)
#define PR3_PHYCTL_RXAD_BITS	4
#define PR3_PHYCTL_RXAD_SHIFT	8
#define PR3_PHYCTL_RXDATA_BITS	8
#define PR3_PHYCTL_RXDATA_SHIFT	0

/* Cycle Timer register fields */
#define PR3_CYCLE_SECONDS_BITS	7
#define PR3_CYCLE_SECONDS_SHIFT	25
#define PR3_CYCLE_SECONDS(s)	\
	(((s) & ((1<<PR3_CYCLE_SECONDS_BITS)-1))<<PR3_CYCLE_SECONDS_SHIFT)
#define PR3_CYCLE_COUNT_BITS	13
#define PR3_CYCLE_COUNT_SHIFT	12
#define PR3_CYCLE_COUNT(c)	\
	(((c) & ((1<<PR3_CYCLE_COUNT_BITS)-1))<<PR3_CYCLE_COUNT_SHIFT)
#define PR3_CYCLE_OFFSET_BITS	12
#define PR3_CYCLE_OFFSET_SHIFT	0
#define PR3_CYCLE_OFFSET(o)	\
	(((o) & ((1<<PR3_CYCLE_OFFSET_BITS)-1))<<PR3_CYCLE_OFFSET_SHIFT)

/* Buffer Status & Control register fields */
#define PR3_BUFCTL_SELDMA	(1<<31)
#define PR3_BUFCTL_ATGO		(1<<25)
#define PR3_BUFCTL_DREQEN	(1<<24)
#define PR3_BUFCTL_NODATTX	(1<<16)
#define PR3_BUFCTL_ARFFUL	(1<<3)
#define PR3_BUFCTL_ARFEPT	(1<<2)
#define PR3_BUFCTL_ATFFUL	(1<<1)
#define PR3_BUFCTL_ATFEPT	(1<<0)

/* Interrupt register fields */
#define PR3_INT_PHYPER		(1<<31)
#define PR3_INT_PHYINT		(1<<30)
#define PR3_INT_DTLOST		(1<<29)
#define PR3_INT_RTOVER		(1<<27)
#define PR3_INT_LINKON		(1<<26)
#define PR3_INT_CMDRESET	(1<<25)
#define PR3_INT_ATRDY		(1<<24)
#define PR3_INT_TXCONFLCT	(1<<23)
#define PR3_INT_TXTARDY		(1<<22)
#define PR3_INT_ARXDATA		(1<<21)
#define PR3_INT_RPYPKT		(1<<20)
#define PR3_INT_ACKMIS		(1<<19)
#define PR3_INT_CONFERR		(1<<18)
#define PR3_INT_ARBGP		(1<<17)
#define PR3_INT_PHYRST		(1<<14)
#define PR3_INT_PHYRGRX		(1<<12)
#define PR3_INT_ACKERR		(1<<11)
#define PR3_INT_TCODEERR	(1<<10)
#define PR3_INT_HDRERR		(1<<9)
#define PR3_INT_SNTREJ		(1<<8)
#define PR3_INT_CYCLESEC	(1<<7)
#define PR3_INT_CYCLEST		(1<<6)
#define PR3_INT_CYCLEDONE	(1<<5)
#define PR3_INT_CYCLELOST	(1<<3)
#define PR3_INT_CYCLETOUT	(1<<2)

/* ROM Region address register fields */
#define PR3_ROMADDR_LO_BITS	16
#define PR3_ROMADDR_LO_SHIFT	16
#define PR3_ROMADDR_LO(ad)	\
	(((ad) & ((1<<PR3_ROMADDR_LO_BITS)-1))<<PR3_ROMADDR_LO_SHIFT)
#define PR3_ROMADDR_HI_BITS	16
#define PR3_ROMADDR_HI_SHIFT	0
#define PR3_ROMADDR_HI(ad)	\
	(((ad) & ((1<<PR3_ROMADDR_HI_BITS)-1))<<PR3_ROMADDR_HI_SHIFT)

/* Region address register fields */
#define PR3_ADDR_LO_BITS	16
#define PR3_ADDR_LO_SHIFT	16
#define PR3_ADDR_LO(ad)	\
	(((ad) & ((1<<PR3_ADDR_LO_BITS)-1))<<PR3_ADDR_LO_SHIFT)
#define PR3_ADDR_HI_BITS	16
#define PR3_ADDR_HI_SHIFT	0
#define PR3_ADDR_HI(ad)	\
	(((ad) & ((1<<PR3_ADDR_HI_BITS)-1))<<PR3_ADDR_HI_SHIFT)

/* Ping Timer register fields */
#define PR3_PING_BITS	8
#define PR3_PING_SHIFT	0

/* Transmit Retry register fields */
#define PR3_TXRETRY_STP	(1<<31)
#define PR3_TXRETRY_LM_BITS	4
#define PR3_TXRETRY_LM_SHIFT	16
#define PR3_TXRETRY_LM(cnt)	\
	(((cnt) & ((1<<PR3_TXRETRY_LM_BITS)-1))<<PR3_TXRETRY_LM_SHIFT)
#define PR3_TXRETRY_SLM_BITS	3
#define PR3_TXRETRY_SLM_SHIFT	13
#define PR3_TXRETRY_SLM(sec)	\
	(((sec) & ((1<<PR3_TXRETRY_SLM_BITS)-1))<<PR3_TXRETRY_SLM_SHIFT)
#define PR3_TXRETRY_CLM_BITS	13
#define PR3_TXRETRY_CLM_SHIFT	0
#define PR3_TXRETRY_CLM(cyc)	\
	(((cyc) & ((1<<PR3_TXRETRY_CLM_BITS)-1))<<PR3_TXRETRY_CLM_SHIFT)



/* print general (card independent) information */
#define PRINT_G(level, fmt, args...) printk(level "premier3: " fmt "\n" , ## args)
/* print card specific information */
#define PRINT(level, card, fmt, args...) printk(level "premier3:%d: " fmt "\n" , card , ## args)

#ifdef CONFIG_IEEE1394_VERBOSEDEBUG
#define PRINT_GD(level, fmt, args...) printk(level "premier3: " fmt "\n" , ## args)
#define PRINTD(level, card, fmt, args...) printk(level "premier3:%d: " fmt "\n" , card , ## args)
#else
#define PRINT_GD(level, fmt, args...) do {} while (0)
#define PRINTD(level, card, fmt, args...) do {} while (0)
#endif

static struct premier3 cards[MAX_PREMIER3_CARDS];
static int num_of_cards = 0;

static int add_card(void);
static void remove_card(struct premier3 *pr3);
static int init_driver(void);

/***********************************
 * IEEE-1394 functionality section *
 ***********************************/


static int get_phy_reg(struct premier3 *pr3, int addr)
{
	int retval = 0;
	int i = 0;

	unsigned long flags;

	if (addr > 15) {
		PRINT(KERN_ERR, pr3->id, __FUNCTION__
		      ": PHY register address %d out of range", addr);
		return -1;
	}

	spin_lock_irqsave(&pr3->phy_reg_lock, flags);

	/*
	 * PR3_PHYCTL_RD bit is cleared BEFORE completion.
	 * Poll PR3_INT_PHYRGRX bit instead of PR3_PHYCTL_RD bit.
	 */
	reg_write(pr3, PR3_INT, PR3_INT_PHYRGRX);
	reg_write(pr3, PR3_PHYCTL, PR3_PHYCTL_RD | PR3_PHYCTL_RGAD(addr));
	do {
		if (i > 10000) {
			PRINT(KERN_ERR, pr3->id, __FUNCTION__ 
			      ": runaway loop, aborting");
			retval = -1;
			break;
		}
		i++;
	} while ((reg_read(pr3, PR3_INT) & PR3_INT_PHYRGRX) == 0);
	if (retval != -1)
		retval = reg_read(pr3, PR3_PHYCTL);

	reg_write(pr3, PR3_INT, PR3_INT_PHYRGRX);
	spin_unlock_irqrestore(&pr3->phy_reg_lock, flags);

	if (retval != -1) {
		if (((retval >> PR3_PHYCTL_RXAD_SHIFT) &
		     ((1<<PR3_PHYCTL_RXAD_BITS)-1))
		    != addr) {
			PRINT(KERN_ERR, pr3->id, __FUNCTION__ 
			      ": PhyRgAd!=PhyRxAd (%x,%x)",
			      addr,
			      retval);
			return -1;
		}

		return (retval >> PR3_PHYCTL_RXDATA_SHIFT) &
			((1<<PR3_PHYCTL_RXDATA_BITS)-1);
	}
	return -1;
}

static int set_phy_reg(struct premier3 *pr3, int addr, int val)
{
	int retval = 0;
	unsigned long flags;
	int i = 0;

	if (addr > 15) {
		PRINT(KERN_ERR, pr3->id, __FUNCTION__
		      ": PHY register address %d out of range", addr);
		return -1;
	}

	if (val > 0xff) {
		PRINT(KERN_ERR, pr3->id, __FUNCTION__
		      ": PHY register value %d out of range", val);
		return -1;
	}

	spin_lock_irqsave(&pr3->phy_reg_lock, flags);

	reg_write(pr3, PR3_PHYCTL, PR3_PHYCTL_WR | PR3_PHYCTL_RGAD(addr)
		  | PR3_PHYCTL_RGDATA(val));
	do {
		retval = reg_read(pr3, PR3_PHYCTL);

		if (i > 10000) {
			PRINT(KERN_ERR, pr3->id, __FUNCTION__ 
			      ": runaway loop, aborting");
			retval = -1;
			break;
		}
		i++;
	} while (retval & PR3_PHYCTL_WR);

	spin_unlock_irqrestore(&pr3->phy_reg_lock, flags);

	return retval;
}

#if 0 /* not needed at this time */
static int sel_phy_reg_page(struct premier3 *pr3, int page)
{
	int reg;

	if (page > 7) {
		PRINT(KERN_ERR, pr3->id, __FUNCTION__
		      ": PHY page %d out of range", page);
		return -1;
	}

	reg = get_phy_reg(pr3, 7);
	if (reg != -1) {
		reg &= 0x1f;
		reg |= (page << 5);
		set_phy_reg(pr3, 7, reg);
		return 0;
	} else {
		return -1;
	}
}

static int sel_phy_reg_port(struct premier3 *pr3, int port)
{
	int reg;

	if (port > 15) {
		PRINT(KERN_ERR, pr3->id, __FUNCTION__
		      ": PHY port %d out of range", port);
		return -1;
	}

	reg = get_phy_reg(pr3, 7);
	if (reg != -1) {
		reg &= 0xf0;
		reg |= port;
		set_phy_reg(pr3, 7, reg);
		return 0;
	} else {
		return -1;
	}
}


static quadlet_t generate_own_selfid(struct premier3 *pr3,
				     struct hpsb_host *host)
{
	quadlet_t lsid;
	unsigned char phyreg[8];
	int i;

	for (i = 0; i < 7; i++) {
		phyreg[i] = get_phy_reg(pr3, i);
	}

	/* This code doesn't support more than 3 ports on the PHY. */

	lsid = 0x80400000 | ((phyreg[0] & 0xfc) << 22);
	lsid |= (phyreg[1] & 0x3f) << 16; /* gap count */
	if ((phyreg[2] & 0xe0) == 0xe0) {
		/* IEEE1394.a PHY */
		if ((phyreg[3] >> 5) > 2)
			lsid |= 0x2 << 14;/* > 400M.  What can I do? */
		else
			lsid |= (phyreg[3] & 0x60) << (14 - 5); /* max speed */
		lsid |= (phyreg[4] & 0x40) << (11 - 6); /* contender */
		lsid |= (phyreg[4] & 0x7) << 8;	/* power class */

		sel_phy_reg_page(pr3, 0);	/* page 0 */
		for (i = 0; i < (phyreg[2] & 0x1f); i++) { /* ports */
			sel_phy_reg_port(pr3, i);	/* port N */
			phyreg[8] = get_phy_reg(pr3, 8);
			if (phyreg[8] & 0x4) {
				lsid |= (((phyreg[8] & 0x8) | 0x10) >> 3)
					<< (6 - i*2);
			} else {
				lsid |= 1 << (6 - i*2);
			}
		}
	} else {
		/* IEEE1394-1995 PHY */
		lsid |= (phyreg[2] & 0xc0) << 8; /* max speed */
		lsid |= (phyreg[6] & 0x01) << 11; /* contender (phy dependent) */
		lsid |= (phyreg[6] & 0x10) >> 3; /* initiated reset */

		for (i = 0; i < (phyreg[2] & 0xf); i++) { /* ports */
			if (phyreg[3 + i] & 0x4) {
				lsid |= (((phyreg[3 + i] & 0x8) | 0x10) >> 3)
					<< (6 - i*2);
			} else {
				lsid |= 1 << (6 - i*2);
			}
		}
	}

	PRINT(KERN_DEBUG, pr3->id, "generated own selfid 0x%x", lsid);
	cpu_to_be32s(&lsid);
	return lsid;
}
#endif /* unneeded functions */

static void handle_selfid(struct premier3 *pr3, struct hpsb_host *host, size_t size)
{
	quadlet_t *q = pr3->rcv_page;
	int phyid, isroot;
	int i;

	/* skip header */
	q++;
	size -= 4;

	i = size / 4 - 1;
	while (i >= 0) {
		be32_to_cpus(&q[i]);
		i--;
	}

	phyid = (reg_read(pr3, PR3_NODE) >> PR3_NODE_NODE_SHIFT) &
		((1<<PR3_NODE_NODE_BITS)-1);
	isroot = reg_read(pr3, PR3_NODE) & PR3_NODE_ROOT;

	PRINT(KERN_INFO, pr3->id, "SelfID process finished (phyid %d, %s)",
	      phyid, (isroot ? "root" : "not root"));

	if (phyid != 0x3f && size == 0) {
		/* unconnected state? */
#if 0
		/* we need our own self-id packet */
		lsid = generate_own_selfid(pr3, host);
		hpsb_selfid_received(host, lsid);
#endif
	}

	while (size > 0) {
#if 0
		struct selfid *sid = (struct selfid *)q;

		if (phyid != 0x3f && !sid->extended &&
		    (sid->phy_id == (phyid + 1))) {
			/* insert our own self-id here */
			hpsb_selfid_received(host, lsid);
		}
#endif

		if (q[0] == ~q[1]) {
			hpsb_selfid_received(host, q[0]);
			if (((q[0]&0x3f000000)>>24)==phyid) {
				PRINT(KERN_INFO, pr3->id, 
				      "This node self-id is 0x%08x", q[0]);
			}
		} else {
			PRINT(KERN_INFO, pr3->id,
			      "inconsistent selfid 0x%x/0x%x", q[0], q[1]);
		}
		q += 2;
		size -= 8;
	}

	PRINT(KERN_INFO, pr3->id, "calling self-id complete");

	hpsb_selfid_complete(host, phyid, isroot);
}

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
static void complete_phys_req_dma(struct premier3 *pr3, struct premier3_phys_req *preq);
static void complete_phys_resp_dma(struct premier3 *pr3, struct premier3_phys_req *preq);

#ifdef PR3_USE_DMAC
#ifdef __LITTLE_ENDIAN
void copy_and_swap_buf(unsigned long from, unsigned long to, int length)
{
	unsigned long *fromp = (unsigned long *)KSEG0ADDR(from);
	unsigned long *top = (unsigned long *)KSEG0ADDR(to);
	unsigned long *top_org = top;
	int i;
	dma_cache_wback_inv((unsigned long)fromp, length);
	for (i = 0; i < length / 4; i++) {
		*top++ = be32_to_cpu(*fromp++);
	}
	dma_cache_wback_inv((unsigned long)top_org, length);
}
#endif

static void start_next_dma(struct premier3 *pr3);

static void premier3_dma_completion_handler(int irq, void *dev_id,
					    struct pt_regs *regs_are_unused)
{
	struct premier3 *pr3 = (struct premier3 *)dev_id;
	unsigned long status = get_txx927_dma_status(pr3->dmach);
	unsigned long flags;

	if (status & TXx927_DMA_STATUS_CHNACT)
		return; /* still active */

	if (status & TXx927_DMA_STATUS_NTRNFC) {
		/* normal completion */
		struct premier3_phys_req *preq;

		spin_lock_irqsave(&pr3->dma.queue_lock, flags);
		preq = pr3->dma.queue;
#ifdef __LITTLE_ENDIAN
		if (!preq->dma_outgoing && !pr3->do_reverse_dma)
			copy_and_swap_buf(preq->dma_addr, preq->dma_addr,
					  preq->dma_length);
#endif
		pr3->dma.queue = preq->next;
		if (pr3->dma.queue) {
			start_next_dma(pr3);
		}
		spin_unlock_irqrestore(&pr3->dma.queue_lock, flags);
		if (preq->dma_outgoing) {
			complete_phys_resp_dma(pr3, preq);
		} else {
			complete_phys_req_dma(pr3, preq);
		}
	} else {
		PRINT(KERN_ERR, pr3->id, "DMA error (CSR:%lx)", status);
	}
	/* clear status */
	clear_txx927_dma_status(pr3->dmach);
}

/* This must be called with the respective queue_lock held. */
static void start_next_dma(struct premier3 *pr3)
{
	struct premier3_phys_req *preq = pr3->dma.queue;
	quadlet_t q;
	int i;
	unsigned long *mem;
	unsigned long flags;

	/* use DMAC only for large data */
	if (preq->dma_length > premier3_min_dma_size) {
		unsigned long regaddr = pr3->registers_dma;
		unsigned int dma_mode = TXx927_DMA_MODE_DUAL_1W;
#ifdef __LITTLE_ENDIAN
		if (pr3->do_reverse_dma)
			dma_mode |= TXx927_DMA_CCR_RVBYTE;
#endif
		(void)reg_read(pr3, PR3_VERSION);	/* flush WB */
		init_txx927_dma(pr3->dmach);
		set_txx927_dma_mode(pr3->dmach, dma_mode);
		if (preq->dma_outgoing) {
			unsigned long saddr = preq->dma_addr;
#ifdef __LITTLE_ENDIAN
			if (!pr3->do_reverse_dma) {
				saddr = pr3->dma_page_dma;
				copy_and_swap_buf(preq->dma_addr,
						  pr3->dma_page_dma,
						  preq->dma_length);
			}
#endif
			set_txx927_dma_addr(pr3->dmach,
					    saddr, regaddr + PR3_ATFDATA);
			set_txx927_dma_inc(pr3->dmach, 4, 0);
		} else {
			set_txx927_dma_addr(pr3->dmach,
					    regaddr + PR3_ARFDATA, preq->dma_addr);
			set_txx927_dma_inc(pr3->dmach, 0, 4);
		}
		set_txx927_dma_count(pr3->dmach, preq->dma_length & ~3);
		enable_txx927_dma(pr3->dmach);
		return;
	}

	mem = (unsigned long *)KSEG1ADDR(preq->dma_addr);
	if (preq->dma_outgoing) {
		for (i = 0; i < preq->dma_length / 4; i++) {
			q = *mem++;
			reg_write(pr3, PR3_ATFDATA, be32_to_cpu(q));
		}
	} else {
		for (i = 0; i < preq->dma_length / 4; i++) {
			q = reg_read(pr3, PR3_ARFDATA);
			*mem++ = cpu_to_be32(q);
		}
	}
	spin_lock_irqsave(&pr3->dma.queue_lock, flags);
	preq = pr3->dma.queue;
	pr3->dma.queue = preq->next;
	if (pr3->dma.queue) {
		start_next_dma(pr3);
	}
	spin_unlock_irqrestore(&pr3->dma.queue_lock, flags);
	if (preq->dma_outgoing)
		complete_phys_resp_dma(pr3, preq);
	else
		complete_phys_req_dma(pr3, preq);
}
#endif /* PR3_USE_DMAC */

static void send_next(struct premier3 *pr3);
static void do_arx(struct premier3 *pr3);
static void complete_phys_resp_dma(struct premier3 *pr3, struct premier3_phys_req *preq)
{
#ifdef PR3_DEBUG_LOG
	pr3_dbglog_idx = (pr3_dbglog_idx + 1) % PR3_LOG_MAX_IDX;
	pr3_dbglog_ofs = 0;
#endif
	/* kick transmitter */
	reg_write(pr3, PR3_BUFCTL,
		  (reg_read(pr3, PR3_BUFCTL) & (PR3_BUFCTL_SELDMA |
						PR3_BUFCTL_DREQEN |
						PR3_BUFCTL_NODATTX)) |
		  PR3_BUFCTL_ATGO);
}

/* do DMA for READB physical request */
static void start_phys_resp_dma(struct premier3 *pr3, struct premier3_phys_req *preq)
{
	quadlet_t q;
	int i;
	unsigned long *mem;
#ifdef PR3_USE_DMAC
	if (pr3->dmach >= 0) {
		unsigned long flags;
		spin_lock_irqsave(&pr3->dma.queue_lock, flags);
		if (pr3->dma.queue == NULL) {
			pr3->dma.queue = preq;
			pr3->dma.queue_last = preq;
			start_next_dma(pr3);
		} else {
			pr3->dma.queue_last->next = preq;
			pr3->dma.queue_last = preq;
		}
		spin_unlock_irqrestore(&pr3->dma.queue_lock, flags);
		return;
	}
#endif
	mem = (unsigned long *)KSEG0ADDR(preq->dma_addr);
	dma_cache_wback_inv((unsigned long)mem, preq->dma_length);
	for (i = 0; i < preq->dma_length / 4; i++) {
		q = *mem++;
#ifdef PR3_DEBUG_LOG
		if (pr3_dbglog_ofs < PR3_LOG_MAX_OFS)
			pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = be32_to_cpu(q);
#endif
		reg_write(pr3, PR3_ATFDATA, be32_to_cpu(q));
	}
	complete_phys_resp_dma(pr3, preq);
}

static void complete_phys_req_dma(struct premier3 *pr3, struct premier3_phys_req *preq)
{
	int tcode = (preq->resp_header[0] >> 4) & 0xf;
	struct premier3_send_data *d = &pr3->async;
	struct premier3_packet *ppacket;
	unsigned long flags;
	quadlet_t q;

	/* spd/AckSent */
	q = reg_read(pr3, PR3_ARFDATA);
#ifdef PR3_DEBUG_LOG
	if (pr3_dbglog_ofs < PR3_LOG_MAX_OFS)
		pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = q;
	pr3_dbglog_idx = (pr3_dbglog_idx + 1) % PR3_LOG_MAX_IDX;
	pr3_dbglog_ofs = 0;
#endif
	preq->speed_code = (q >> 16) & 3;

#ifdef PR3_CLEAR_RXINT_AFTER_RXDATA
	/* clear ARXDATA interrupt */
	reg_write(pr3, PR3_INT, PR3_INT_ARXDATA);
#endif
	/* re-enable ARXDATA interrupt */
	reg_clear_bits(pr3, pr3->intmaskreg, PR3_INT_ARXDATA);

	switch (tcode) {
	case TCODE_READQ_RESPONSE:
		preq->resp_header[3] =
			*(unsigned long *)KSEG1ADDR(preq->dma_addr);
		be32_to_cpus(&preq->resp_header[3]);
		preq->resp_header_size = 16;
		preq->resp_data_size = 0;
		break;
	case TCODE_READB_RESPONSE:
		preq->resp_header[3] = preq->dma_length << 16;
		preq->resp_header_size = 16;
		/* start DMA in send_next */
		preq->resp_data_size = preq->dma_length;
		break;
	case TCODE_WRITE_RESPONSE:
		if (((preq->req_header[0] >> 16) & NODE_MASK) == NODE_MASK) {
			/* broadcast write. do not reply. */
			return;
		}
	default:
		preq->resp_header[2] = 0;
		preq->resp_header_size = 12;
		preq->resp_data_size = 0;
		break;
	}

	ppacket = kmalloc(sizeof(*ppacket), GFP_ATOMIC);
	if (ppacket == NULL) {
		PRINT(KERN_ERR, pr3->id, "no memory for physical request");
		return;
	}
	memset(ppacket, 0, sizeof(*ppacket));
	ppacket->physical = 1;
	ppacket->u.phys = preq;

	spin_lock_irqsave(&d->queue_lock, flags);

	if (d->queue == NULL) {
		d->queue = ppacket;
		d->queue_last = ppacket;
		send_next(pr3);
	} else {
		d->queue_last->xnext = ppacket;
		d->queue_last = ppacket;
	}

	spin_unlock_irqrestore(&d->queue_lock, flags);

#ifdef PR3_BUG_WORKAROUND
	save_and_cli(flags);
	/* check NoPkt bit */
	if ((reg_read(pr3, PR3_BUFCTL) & 0x10) == 0 &&
	    (reg_read(pr3, PR3_INT) & PR3_INT_ARXDATA) == 0) {
		PRINTD(KERN_INFO, pr3->id, "AFXDATA interrupt lost.");
		do_arx(pr3);
	}
	restore_flags(flags);
#endif
}

/* do DMA for WRITEB physical request */
static void start_phys_req_dma(struct premier3 *pr3, struct premier3_phys_req *preq)
{
	quadlet_t q;
	int i;
	unsigned long *mem;
#ifdef PR3_USE_DMAC
	if (pr3->dmach >= 0) {
		unsigned long flags;
		spin_lock_irqsave(&pr3->dma.queue_lock, flags);
		if (pr3->dma.queue == NULL) {
			pr3->dma.queue = preq;
			pr3->dma.queue_last = preq;
			start_next_dma(pr3);
		} else {
			pr3->dma.queue_last->next = preq;
			pr3->dma.queue_last = preq;
		}
		spin_unlock_irqrestore(&pr3->dma.queue_lock, flags);
		return;
	}
#endif
	mem = (unsigned long *)KSEG0ADDR(preq->dma_addr);
	for (i = 0; i < preq->dma_length / 4; i++) {
		q = reg_read(pr3, PR3_ARFDATA);
#ifdef PR3_DEBUG_LOG
		if (pr3_dbglog_ofs < PR3_LOG_MAX_OFS - 1)
			pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = q;
#endif
		mem[i] = cpu_to_be32(q);
	}
	dma_cache_wback_inv((unsigned long)mem, preq->dma_length);
	complete_phys_req_dma(pr3, preq);
}

static int handle_physical_request(struct premier3 *pr3, quadlet_t *data, int data_size)
{
	struct hpsb_host *host = pr3->host;
	struct premier3_phys_req *preq;
	nodeid_t nodeid = data[1] >> 16;
	char tlabel = (data[0] >> 10) & 0x3f;
	int req_tcode = (data[0] >> 4) & 0xf;
	u64 addr = (((u64)(data[1] & 0xffff)) << 32) | data[2];
	unsigned long paddr;
	int length, dma_length = 0, dma_outgoing = 0;
	int tcode;
	int i;

	switch (req_tcode) {
	case TCODE_READQ:
		tcode = TCODE_READQ_RESPONSE;
		length = 4;
		break;
	case TCODE_READB:
		tcode = TCODE_READB_RESPONSE;
		dma_length = length = data[3] >> 16;
		dma_outgoing = 1;
		break;
	case TCODE_WRITEQ:
		tcode = TCODE_WRITE_RESPONSE;
		length = 4;
		break;
	case TCODE_WRITEB:
		tcode = TCODE_WRITE_RESPONSE;
		dma_length = length = data[3] >> 16;
		dma_outgoing = 0;
		break;
	default:
		return 0;
	}

	if (addr + length > __pa(high_memory))
		return 0;
	paddr = (unsigned long)(addr & 0xffffffffULL);
	if ((dma_length & 3) != 0 || (paddr & 3) != 0) {
		PRINT(KERN_ERR, pr3->id, "unaligned physical request");
		return 0;
	}

	preq = kmalloc(sizeof(*preq), GFP_ATOMIC);
	if (preq == NULL) {
		PRINT(KERN_ERR, pr3->id, "no memory for physical request");
		return 0;
	}
	memset(preq, 0, sizeof(*preq));

	for (i = 0; i < data_size / 4; i++)
		preq->req_header[i] = data[i];

	preq->resp_header[0] = (nodeid << 16) | (tlabel << 10) | (1 << 8) | (tcode << 4);
	preq->resp_header[1] = (host->node_id << 16) | (RCODE_COMPLETE << 12);
	preq->resp_header[2] = 0;
	preq->dma_addr = paddr;
	preq->dma_length = dma_length;
	preq->dma_outgoing = dma_outgoing;

	switch (req_tcode) {
	case TCODE_WRITEQ:
		*(unsigned long*)KSEG1ADDR(paddr) = data[3];
		break;
	case TCODE_WRITEB:
		start_phys_req_dma(pr3, preq);
		return 1;
	}
	complete_phys_req_dma(pr3, preq);

	return 1;
}
#endif /* PR3_HANDLE_PHYSICAL_REQUEST */

/* This must be called with the respective queue_lock held. */
static void send_next(struct premier3 *pr3)
{
	struct hpsb_packet *packet;
	quadlet_t *q;
	size_t size;

	if (reg_read(pr3, PR3_BUFCTL) & PR3_BUFCTL_ATGO) {
		PRINT(KERN_ERR, pr3->id, "transmitter busy");
		return;	/* busy */
	}

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	if (pr3->async.queue->physical) {
		struct premier3_phys_req *preq = pr3->async.queue->u.phys;
		q = preq->resp_header;
		if (((*q >> 4) & 0x0f) == 0xe0) {
			PRINT(KERN_ERR, pr3->id, "PHY request send is not supported.");
			return;
		}
		/* 1st quadlet */
		reg_write(pr3, PR3_ATFDATA,
			  (preq->speed_code << 16) | (q[0] & 0x0000ffff));
		/* 2nd quadlet (contains destination ID) */
		reg_write(pr3, PR3_ATFDATA,
			  (q[0] & 0xffff0000) | (q[1] & 0x0000ffff));
#ifdef PR3_DEBUG_LOG
		memset(pr3_dbglog[pr3_dbglog_idx], 0, PR3_LOG_MAX_OFS * 4);
		pr3_dbglog[pr3_dbglog_idx][0] = (preq->speed_code << 16) | (q[0] & 0x0000ffff);
		pr3_dbglog[pr3_dbglog_idx][1] = (q[0] & 0xffff0000) | (q[1] & 0x0000ffff);
		pr3_dbglog_ofs = 2;
#endif
		size = 4 * 2;
		q += 2;
		/* rest of header */
		for (; size < preq->resp_header_size; size += 4) {
#ifdef PR3_DEBUG_LOG
			pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = *q;
#endif
			reg_write(pr3, PR3_ATFDATA, *q++);
		}
		if (preq->resp_data_size) {
			start_phys_resp_dma(pr3, preq);
		} else {
			complete_phys_resp_dma(pr3, preq);
		}
		return;
	}
	packet = pr3->async.queue->u.normal;
#else
	packet = pr3->async.queue;
#endif

	q = packet->header;
	if (((*q >> 4) & 0x0f) == 0xe0) {
		PRINT(KERN_ERR, pr3->id, "PHY request send is not supported.");
		return;
	}
	/* 1st quadlet */
	reg_write(pr3, PR3_ATFDATA,
		  (packet->speed_code << 16) | (q[0] & 0x0000ffff));
	/* 2nd quadlet (contains destination ID) */
	reg_write(pr3, PR3_ATFDATA,
		  (q[0] & 0xffff0000) | (q[1] & 0x0000ffff));
#ifdef PR3_DEBUG_LOG
	memset(pr3_dbglog[pr3_dbglog_idx], 0, PR3_LOG_MAX_OFS * 4);
	pr3_dbglog[pr3_dbglog_idx][0] = (packet->speed_code << 16) | (q[0] & 0x0000ffff);
	pr3_dbglog[pr3_dbglog_idx][1] = (q[0] & 0xffff0000) | (q[1] & 0x0000ffff);
	pr3_dbglog_ofs = 2;
#endif
	size = 4 * 2;
	q += 2;
	/* rest of header */
	for (; size < packet->header_size; size += 4) {
#ifdef PR3_DEBUG_LOG
		pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = *q;
#endif
		reg_write(pr3, PR3_ATFDATA, *q++);
	}

	/* block data */
	/* non-DMA version */
	q = packet->data;
	for (size = 0; size < packet->data_size; size += 4) {
#ifdef PR3_DEBUG_LOG
		if (pr3_dbglog_ofs < PR3_LOG_MAX_OFS)
			pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = be32_to_cpu(*q);
#endif
		reg_write(pr3, PR3_ATFDATA, be32_to_cpu(*q));
		q++;
	}

#ifdef PR3_DEBUG_LOG
	pr3_dbglog_idx = (pr3_dbglog_idx + 1) % PR3_LOG_MAX_IDX;
	pr3_dbglog_ofs = 0;
#endif
	/* kick transmitter */
	reg_write(pr3, PR3_BUFCTL,
		  (reg_read(pr3, PR3_BUFCTL) & (PR3_BUFCTL_SELDMA |
						PR3_BUFCTL_DREQEN |
						PR3_BUFCTL_NODATTX)) |
		  PR3_BUFCTL_ATGO);
}

static int premier3_detect(struct hpsb_host_template *tmpl)
{
	struct hpsb_host *host;
	int i;

	init_driver();

	for (i = 0; i < num_of_cards; i++) {
		host = hpsb_get_host(tmpl, 0);
		if (host == NULL) {
			/* simply don't init more after out of mem */
			return i;
		}
		host->hostdata = &cards[i];
		cards[i].host = host;
	}

	return num_of_cards;
}

static int premier3_initialize(struct hpsb_host *host)
{
	struct premier3 *pr3 = host->hostdata;

	pr3->async.queue = NULL;
	spin_lock_init(&pr3->async.queue_lock);
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	pr3->dma.queue = NULL;
	spin_lock_init(&pr3->dma.queue_lock);
#endif
	spin_lock_init(&pr3->phy_reg_lock);

	/* Set the bus number */
	reg_set_bits(pr3, PR3_NODE, PR3_NODE_BUS(0x3ff));

	/* Enable cycle timer and cycle master */
	reg_set_bits(pr3, PR3_CTL, PR3_CTL_CYCMST | PR3_CTL_CYCTEN);

	reg_write(pr3, PR3_INTMASK1, 0xffffffff);
	reg_write(pr3, PR3_INTMASK2, 0xffffffff);
	/* Clear interrupt registers */
	reg_write(pr3, PR3_INT, 0xffffffff);

	/* enable interrupts */
	reg_clear_bits(pr3, pr3->intmaskreg,
		       PR3_INT_PHYPER
		       | PR3_INT_PHYINT
		       | PR3_INT_DTLOST
		       | PR3_INT_RTOVER
#if 0
		       | PR3_INT_LINKON
#endif
		       | PR3_INT_CMDRESET
		       | PR3_INT_ATRDY
#if 0
		       | PR3_INT_TXCONFLCT
		       | PR3_INT_TXTARDY
#endif
		       | PR3_INT_ARXDATA
		       | PR3_INT_RPYPKT
		       | PR3_INT_ACKMIS
		       | PR3_INT_CONFERR
#if 0
		       | PR3_INT_ARBGP
#endif
		       | PR3_INT_PHYRST
		       | PR3_INT_PHYRGRX
		       | PR3_INT_ACKERR
		       | PR3_INT_TCODEERR
		       | PR3_INT_HDRERR
		       | PR3_INT_SNTREJ
#if 0
		       | PR3_INT_CYCLESEC
		       | PR3_INT_CYCLEST
		       | PR3_INT_CYCLEDONE
		       | PR3_INT_CYCLELOST
		       | PR3_INT_CYCLETOUT
#endif
		);
	reg_set_bits(pr3, PR3_CTL, PR3_CTL_INTEN);

#if 1
	reg_set_bits(pr3, PR3_PKTCTL, PR3_PKTCTL_RXSELID | PR3_PKTCTL_RXPHY);
#else
	/* Don't accept phy packets (but accect Self-ID packet) */ 
	reg_set_bits(pr3, PR3_PKTCTL, PR3_PKTCTL_RXSELID);
#endif

	/* Enable Rx/Tx */
	reg_set_bits(pr3, PR3_CTL, PR3_CTL_RXEN | PR3_CTL_TXEN);

	return 1;
}

static void premier3_release(struct hpsb_host *host)
{
	struct premier3 *pr3;
	
	if (host != NULL) {
		pr3 = host->hostdata;
		remove_card(pr3);
	}
}

static int premier3_transmit(struct hpsb_host *host, struct hpsb_packet *packet)
{
	struct premier3 *pr3 = host->hostdata;
	struct premier3_send_data *d;
	unsigned long flags;
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	struct premier3_packet *ppacket;
#endif

	if (packet->data_size + packet->header_size > 1024) {
		PRINT(KERN_ERR, pr3->id, "transmit packet data too big (%d)",
		      packet->data_size);
		return 0;
	}

	switch (packet->type) {
	case hpsb_async:
	case hpsb_raw:
		d = &pr3->async;
		break;
#if 0	/* NOT IMPLEMENTED YET */
	case hpsb_iso:
		d = &pr3->iso_send;
		break;
#endif
	default:
		PRINT(KERN_ERR, pr3->id, "invalid packet type %d",
		      packet->type);
		return 0;
	}

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	ppacket = kmalloc(sizeof(*ppacket), GFP_ATOMIC);
	if (ppacket == NULL)
		return 0;
	memset(ppacket, 0, sizeof(*ppacket));
	ppacket->physical = 0;
	ppacket->u.normal = packet;
#endif

	packet->xnext = NULL;
	if (packet->tcode == TCODE_WRITEQ
	    || packet->tcode == TCODE_READQ_RESPONSE) {
		be32_to_cpus(&packet->header[3]);
	}

	spin_lock_irqsave(&d->queue_lock, flags);

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	if (d->queue == NULL) {
		d->queue = ppacket;
		d->queue_last = ppacket;
		send_next(pr3);
	} else {
		d->queue_last->xnext = ppacket;
		d->queue_last = ppacket;
	}
#else
	if (d->queue == NULL) {
		d->queue = packet;
		d->queue_last = packet;
		send_next(pr3);
	} else {
		d->queue_last->xnext = packet;
		d->queue_last = packet;
	}
#endif

	spin_unlock_irqrestore(&d->queue_lock, flags);

	return 1;
}

static int premier3_devctl(struct hpsb_host *host, enum devctl_cmd cmd, int arg)
{
	struct premier3 *pr3 = host->hostdata;
	int retval = 0;
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	struct premier3_packet *packet, *lastpacket;
	struct premier3_phys_req *preq, *lastpreq;
#else
	struct hpsb_packet *packet, *lastpacket;
#endif
	unsigned long flags;

	switch (cmd) {
	case RESET_BUS:
		/*
		 * FIXME: this flag might be necessary in some case
		 */
		/* host->attempt_root = 1; */
		retval = get_phy_reg(pr3, 1);
		if (retval == -1) {
			PRINT(KERN_INFO, pr3->id, "bus not connected.");
			retval = -1;
			break;
		}
		PRINT(KERN_INFO, pr3->id, "resetting bus on request%s",
		      (host->attempt_root ? " and attempting to become root"
		       : ""));

		retval &= 0x3f;
		retval |= host->attempt_root ? 0xc0 : 0x40;
		set_phy_reg(pr3, 1, retval);
		retval = 0;
		break;

	case GET_CYCLE_COUNTER:
		retval = reg_read(pr3, PR3_CYCLE);
		break;
		
	case SET_CYCLE_COUNTER:
		reg_write(pr3, PR3_CYCLE, arg);
		break;

	case SET_BUS_ID:
		reg_write(pr3, PR3_NODE, 
			  PR3_NODE_BUS(arg) |
			  (reg_read(pr3, PR3_NODE) & ~PR3_NODE_BUS_MASK));
		break;
		
	case ACT_CYCLE_MASTER:
		if (arg) {
			/* check if we are root and other nodes are present */
			u32 nodeId = reg_read(pr3, PR3_NODE);
			if ((nodeId & PR3_NODE_ROOT) &&
			    (nodeId & PR3_NODE_NODE_MASK)) {
				/*
				 * enable cycleTimer, cycleMaster
				 */
				PRINTD(KERN_DEBUG, pr3->id, "Cycle master enabled");
				reg_set_bits(pr3, PR3_CTL,
					     PR3_CTL_CYCMST | PR3_CTL_CYCTEN);
			}
		} else {
			/* disable cycleTimer, cycleMaster, cycleSource */
			reg_clear_bits(pr3, PR3_CTL,
				       PR3_CTL_CYCMST | PR3_CTL_CYCTEN | PR3_CTL_CYCSRC);
		}
		break;

	case CANCEL_REQUESTS:
		spin_lock_irqsave(&pr3->async.queue_lock, flags);

		packet = pr3->async.queue;
		pr3->async.queue = NULL;

		spin_unlock_irqrestore(&pr3->async.queue_lock, flags);

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
		spin_lock_irqsave(&pr3->dma.queue_lock, flags);
		preq = pr3->dma.queue;
		pr3->dma.queue = NULL;
		spin_unlock_irqrestore(&pr3->dma.queue_lock, flags);
#endif

#if 0
		/* stop Tx Retry */
		reg_set_bits(pr3, PR3_TXRETRY, PR3_TXRETRY_STP);
		while (reg_read(pr3, PR3_TXRETRY) & PR3_TXRETRY_STP)
			;
		/* clear Tx Complete/Abort Interrupts */
		reg_write(pr3, PR3_INT,
			  PR3_INT_ATRDY | PR3_INT_ACKERR |
			  PR3_INT_RTOVER | PR3_INT_ACKMIS |
			  PR3_INT_DTLOST | PR3_INT_TCODEERR);
#endif

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
		while (preq != NULL) {
			lastpreq = preq;
			preq = preq->next;
			/* incoming only */
			if (!lastpreq->dma_outgoing)
				kfree(lastpreq);
		}
#endif

		while (packet != NULL) {
			lastpacket = packet;
			packet = packet->xnext;
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
			if (lastpacket->physical) {
				/* this is outgoing preq */
				kfree(lastpacket->u.phys);
			} else {
				hpsb_packet_sent(host, lastpacket->u.normal,
						 ACKX_ABORTED);
			}
			kfree(lastpacket);
#else
			hpsb_packet_sent(host, lastpacket, ACKX_ABORTED);
#endif
		}

		break;

	case MODIFY_USAGE:
		if (arg) {
			MOD_INC_USE_COUNT;
		} else {
			MOD_DEC_USE_COUNT;
		}
		break;

	case ISO_LISTEN_CHANNEL:
	case ISO_UNLISTEN_CHANNEL:
		PRINT(KERN_ERR, pr3->id, "devctl cmd %d not implemented yet",
		      cmd);
		retval = -1;
		break;

	default:
		PRINT(KERN_ERR, pr3->id, "unknown devctl command %d", cmd);
		retval = -1;
	}

	return retval;
}


/***************************************
 * IEEE-1394 functionality section END *
 ***************************************/


/********************************************************
 * Global stuff (interrupt handler, init/shutdown code) *
 ********************************************************/

static void do_arx(struct premier3 *pr3)
{
	struct hpsb_host *host = pr3->host;
	quadlet_t *q = pr3->rcv_page;
	int size = 0;
	int tcode;
	int header_size = 4;
	int have_block_data = 0;

	if (reg_read(pr3, PR3_BUFCTL) & PR3_BUFCTL_ARFEPT) {
		PRINT(KERN_ERR, pr3->id, "Async Rx FIFO empty.");
#ifdef PR3_CLEAR_RXINT_AFTER_RXDATA
		/* clear ARXDATA interrupt */
		reg_write(pr3, PR3_INT, PR3_INT_ARXDATA);
#endif
		return;
	}

#ifdef PR3_BUG_WORKAROUND
 do_rx_again:
#endif
	/* read 1st quadlet to determine packet format */
	*q = reg_read(pr3, PR3_ARFDATA);
#ifdef PR3_DEBUG_LOG
	memset(pr3_dbglog[pr3_dbglog_idx], 0, PR3_LOG_MAX_OFS * 4);
	pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = *q;
#endif
	tcode = (*q >> 4) & 0x0f;
	q++;
	size += 4;

	switch (tcode) {
	case TCODE_READQ:
	case TCODE_WRITE_RESPONSE:
		/* nodata packet */
		header_size = 4 * 3;
		break;
	case TCODE_READB:
	case TCODE_WRITEQ:
	case TCODE_READQ_RESPONSE:
		/* quadlet data packet */
		header_size = 4 * 4;
		break;
	case TCODE_WRITEB:
	case TCODE_LOCK_REQUEST:
	case TCODE_READB_RESPONSE:
	case TCODE_LOCK_RESPONSE:
		/* block data packet */
		header_size = 4 * 4;
		have_block_data = 1;
		break;
	case 0x0e:
		/* PHY packet */
		*q++ = reg_read(pr3, PR3_ARFDATA);
		size += 4;
		/* logical reversed data */
		*q++ = reg_read(pr3, PR3_ARFDATA);
		size += 4;
		for (;;) {
			*q++ = reg_read(pr3, PR3_ARFDATA);
			size += 4;
			if ((pr3->rcv_page[size / 4 - 1] & 0xfffffff0) == 0)
				break;	/* ack */
				/* logical reversed data */
			*q++ = reg_read(pr3, PR3_ARFDATA);
			size += 4;
		}
#ifdef PR3_DEBUG_LOG
		pr3_dbglog_idx = (pr3_dbglog_idx + 1) % PR3_LOG_MAX_IDX;
		pr3_dbglog_ofs = 0;
#endif
		if (pr3->rcv_page[size / 4 - 1] != 1) {
			PRINT(KERN_ERR, pr3->id, "PHY packet parity error.");
		} else {
			if ((pr3->rcv_page[1] & 0xc0000000) == 0x80000000) {
				if (reg_read(pr3, PR3_NODE) & PR3_NODE_IDVALID)
					handle_selfid(pr3, host, size - 4);
				else
					PRINT(KERN_ERR, pr3->id, 
					      "SelfID process finished but "
					      "NodeID not valid");
			} else {
				PRINT(KERN_INFO, pr3->id,
				      "PHY packet %08x received.",
				      pr3->rcv_page[1]);
			}
		}
#ifdef PR3_CLEAR_RXINT_AFTER_RXDATA
		/* clear ARXDATA interrupt */
		reg_write(pr3, PR3_INT, PR3_INT_ARXDATA);
#endif
		return;
	default:
		PRINT(KERN_ERR, pr3->id, "Unknown TCODE (%d).", tcode);
#ifdef PR3_DEBUG_LOG
		pr3_dbglog_idx = (pr3_dbglog_idx + 1) % PR3_LOG_MAX_IDX;
		pr3_dbglog_ofs = 0;
#endif
#ifdef PR3_CLEAR_RXINT_AFTER_RXDATA
		/* clear ARXDATA interrupt */
		reg_write(pr3, PR3_INT, PR3_INT_ARXDATA);
#endif
		return;
	}

	while (size < header_size) {
		*q = reg_read(pr3, PR3_ARFDATA);
#ifdef PR3_DEBUG_LOG
		pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = *q;
#endif
		q++;
		size += 4;
	}
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	/* mask ARXDATA interrupt */
	reg_set_bits(pr3, pr3->intmaskreg, PR3_INT_ARXDATA);
	if (handle_physical_request(pr3, pr3->rcv_page, header_size))
		return;
#endif
	if (have_block_data) {
		int dlen = (pr3->rcv_page[3] >> 16) & 0xffff;
		dlen = (dlen + 3) & ~3;	/* alignment */
		if (header_size + dlen + 4 > 1024) {
			PRINT(KERN_ERR, pr3->id,
			      "too big data length %d", dlen);
			dlen = 0;
		}
		/* non-DMA version */
		while (size < header_size + dlen) {
			*q = reg_read(pr3, PR3_ARFDATA);
#ifdef PR3_DEBUG_LOG
			if (pr3_dbglog_ofs < PR3_LOG_MAX_OFS - 1)
				pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = *q;
#endif
			/* highlevel asssumes block data is BE */
			cpu_to_be32s(q);
			q++;
			size += 4;
		}
	}
	/* spd/AckSent */
	*q = reg_read(pr3, PR3_ARFDATA);
#ifdef PR3_DEBUG_LOG
	if (pr3_dbglog_ofs < PR3_LOG_MAX_OFS)
		pr3_dbglog[pr3_dbglog_idx][pr3_dbglog_ofs++] = *q;
	pr3_dbglog_idx = (pr3_dbglog_idx + 1) % PR3_LOG_MAX_IDX;
	pr3_dbglog_ofs = 0;
#endif
	q++;
	size += 4;
#ifdef PR3_CLEAR_RXINT_AFTER_RXDATA
	/* clear ARXDATA interrupt */
	reg_write(pr3, PR3_INT, PR3_INT_ARXDATA);
#endif
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	/* re-enable ARXDATA interrupt */
	reg_clear_bits(pr3, pr3->intmaskreg, PR3_INT_ARXDATA);
#endif

	if (tcode == TCODE_WRITEQ || tcode == TCODE_READQ_RESPONSE) {
		/* swap a data quadlet */
		/* highlevel asssumes quadlet data is BE */
		cpu_to_be32s(&pr3->rcv_page[3]);
	}

	PRINTD(KERN_DEBUG, pr3->id,
	       "received packet size %d, spd/ackSent 0x%x",
	       size, pr3->rcv_page[size / 4 - 1]);

	size -= 4;	/* cut off a last quadlet */
	hpsb_packet_received(host, pr3->rcv_page, size, 0);
#ifdef PR3_BUG_WORKAROUND
	/* check NoPkt bit */
	if ((reg_read(pr3, PR3_BUFCTL) & 0x10) == 0 &&
	    (reg_read(pr3, PR3_INT) & PR3_INT_ARXDATA) == 0) {
		PRINTD(KERN_INFO, pr3->id, "AFXDATA interrupt lost.");
		goto do_rx_again;
	}
#endif
}

static void premier3_irq_handler(int irq, void *dev_id,
				 struct pt_regs *regs_are_unused)
{
	struct premier3 *pr3 = (struct premier3 *)dev_id;
	struct hpsb_host *host = pr3->host;
	u32 intstat;

	intstat = reg_read(pr3, PR3_INT);
#ifdef PR3_CLEAR_RXINT_AFTER_RXDATA
	/* do not clear ARXDATA interrupt here. */
	reg_write(pr3, PR3_INT, intstat & ~PR3_INT_ARXDATA);
#else
	reg_write(pr3, PR3_INT, intstat);
#endif

	intstat &= ~reg_read(pr3, pr3->intmaskreg);
	PRINTD(KERN_DEBUG, pr3->id, "interrupt: 0x%08x", intstat);

	if (intstat & PR3_INT_PHYPER) {
		PRINT(KERN_INFO, pr3->id, "PHY parity error interrupt");
	}
	if (intstat & PR3_INT_RTOVER) {
		PRINT(KERN_INFO, pr3->id, "Tx Retry over interrupt");
	}
	if (intstat & PR3_INT_ACKERR) {
		PRINT(KERN_INFO, pr3->id, "Ack miss interrupt");
	}
	if (intstat & PR3_INT_DTLOST) {
		PRINT(KERN_INFO, pr3->id, "Data lost interrupt");
	}
	if (intstat & PR3_INT_CMDRESET) {
		PRINT(KERN_INFO, pr3->id, "Command reset interrupt");
	}
	if (intstat & PR3_INT_TCODEERR) {
		PRINT(KERN_INFO, pr3->id, "TCODE error interrupt");
	}
	if (intstat & PR3_INT_HDRERR) {
		PRINT(KERN_INFO, pr3->id, "Header CRC error interrupt");
	}
	if (intstat & PR3_INT_SNTREJ) {
		PRINT(KERN_INFO, pr3->id, "Sent reject interrupt");
	}
	if (intstat & PR3_INT_RPYPKT) {
		PRINT(KERN_INFO, pr3->id, "PHY reply interrupt");
	}
#if 0
	if (intstat & PR3_INT_CYCLELOST) {
		PRINT(KERN_INFO, pr3->id, "Cycle lost interrupt");
	}
	if (intstat & PR3_INT_CYCLETOUT) {
		PRINT(KERN_INFO, pr3->id, "Cycle start timeout interrupt");
	}
#endif
	if (intstat & PR3_INT_ACKMIS) {
		PRINT(KERN_INFO, pr3->id, "Ack miss interrupt");
	}
	if (intstat & PR3_INT_RTOVER) {
		PRINT(KERN_INFO, pr3->id, "Retry Timeover interrupt");
	}
	if (intstat & PR3_INT_DTLOST) {
		PRINT(KERN_INFO, pr3->id, "Data Lost interrupt");
	}
	if (intstat & PR3_INT_TCODEERR) {
		PRINT(KERN_INFO, pr3->id, "Tcode error interrupt");
	}
	if (intstat & PR3_INT_ACKERR) {
		PRINT(KERN_INFO, pr3->id, "Ack error interrupt");
	}

	if (intstat & PR3_INT_PHYRST) {
		PRINT(KERN_INFO, pr3->id, "bus reset interrupt");
		if (!host->in_bus_reset) {
			hpsb_bus_reset(host);
		}
	}

	if (intstat & (PR3_INT_ATRDY | PR3_INT_ACKERR |
		       PR3_INT_RTOVER | PR3_INT_ACKMIS |
		       PR3_INT_DTLOST | PR3_INT_TCODEERR)) {	/* Async. Tx done */
		int ack;
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
		struct premier3_packet *packet;
#else
		struct hpsb_packet *packet;
#endif
		spin_lock(&pr3->async.queue_lock);

		if (intstat & (PR3_INT_ATRDY | PR3_INT_ACKERR))
			ack = (reg_read(pr3, PR3_STAT) >> PR3_STAT_ATACK_SHIFT) &
				((1<<PR3_STAT_ATACK_BITS)-1);
		else if (intstat & (PR3_INT_RTOVER | PR3_INT_ACKMIS))
			ack = ACKX_TIMEOUT;
		else
			ack = ACKX_SEND_ERROR;
		packet = pr3->async.queue;
		pr3->async.queue = packet->xnext;

		if (pr3->async.queue != NULL) {
			send_next(pr3);
		}

		spin_unlock(&pr3->async.queue_lock);
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
		if (packet->physical)
			kfree(packet->u.phys);
		else
			hpsb_packet_sent(host, packet->u.normal, ack);
		kfree(packet);
#else
		hpsb_packet_sent(host, packet, ack);
#endif
	}

	if (intstat & PR3_INT_PHYRGRX) {
		if (!host->in_bus_reset) {
			PRINT(KERN_INFO, pr3->id,
			      "phy reg received without reset");
		}
	}
	if (intstat & PR3_INT_ARXDATA) {	/* Async. Rx data available */
		do_arx(pr3);
	}
}

#ifndef PREMIER3_CONFIG_ROM_MINIMAL
static u32 crc16(unsigned *data, int length)
{
	int check=0, i;
	int shift, sum, next=0;

	for (i = length; i; i--) {
		for (next = check, shift = 28; shift >= 0; shift -= 4 ) {
			sum = ((next >> 12) ^ (*data >> shift)) & 0xf;
			next = (next << 4) ^ (sum << 12) ^ (sum << 5) ^ (sum);
		}
		check = next & 0xffff;
		data++;
	}

	return check;
}
#endif

static void init_config_rom(struct premier3 *pr3)
{
	int i;
	unsigned int vendor_id = 0x00600A;	/* FIXME: SORD */

#ifdef PREMIER3_CONFIG_ROM_MINIMAL
	premier3_csr_rom[0] |= vendor_id;
#else
	unsigned long long chip_id = 0x0000000badULL;	/* FIXME */

	/* bus info block */
	premier3_csr_rom[3] = (vendor_id << 8) | (chip_id >> 32);
	premier3_csr_rom[4] = (unsigned long)chip_id;
	premier3_csr_rom[0] = 0x04040000 | crc16(premier3_csr_rom+1, 4);

	/* root directory */
	premier3_csr_rom[6] |= vendor_id;
	premier3_csr_rom[5] = 0x00030000 | crc16(premier3_csr_rom+5+1, 3);

	/* node unique ID leaf */
	premier3_csr_rom[10] = premier3_csr_rom[3];
	premier3_csr_rom[11] = premier3_csr_rom[4];
	premier3_csr_rom[9] = 0x00020000 | crc16(premier3_csr_rom+9+1, 2);
#endif

	for (i=0;i<sizeof(premier3_csr_rom)/4;i++) {
		PRINTD(KERN_DEBUG, pr3->id,
		       "ConfigROM[%02x]; %08x", i, premier3_csr_rom[i]);
		pr3->csr_config_rom[i] = cpu_to_be32(premier3_csr_rom[i]);
	}
}

static int add_card()
{
#define FAIL(fmt, args...) do { \
	PRINT_G(KERN_ERR, fmt , ## args); \
	num_of_cards--; \
	remove_card(pr3); \
	return 1; \
	} while (0)
#define FAIL_DEBUG(fmt, args...) do { \
	PRINT_G(KERN_DEBUG, fmt , ## args); \
	num_of_cards--; \
	remove_card(pr3); \
	return 1; \
	} while (0)

	struct premier3 *pr3; /* shortcut to currently handled device */
	unsigned int version;
	int irq = 0;
	int dmairq = 0, dmach = -1;
	unsigned long irqflags = SA_SHIRQ;

	if (num_of_cards == MAX_PREMIER3_CARDS) {
		PRINT_G(KERN_WARNING, "cannot handle more than %d cards.  "
			"Adjust MAX_PREMIER3_CARDS in premier3.c.",
			MAX_PREMIER3_CARDS);
		return 1;
	}

	pr3 = &cards[num_of_cards++];
	pr3->id = num_of_cards-1;

	pr3->registers = NULL;
	pr3->irq = 0;
	pr3->dmach = -1;
	pr3->dmairq = 0;
	pr3->registers = (void *)premier3_base;
	irq = premier3_irq;
	pr3->intmaskreg = PR3_INTMASK1;
#if defined(PR3_USE_DMAC) && defined(__LITTLE_ENDIAN)
	pr3->do_reverse_dma = 0;
	/* TX4927 DMAC can not reverse 32bit data... :-< */
	if (mips_cpu.cputype == CPU_TX3927)
		pr3->do_reverse_dma = 1;
#endif
	if (pr3->registers == NULL || irq == 0) {
		FAIL_DEBUG("not supported");
	}

	version = reg_read(pr3, PR3_VERSION);
	if (version & 0xff00ff00) {
		FAIL_DEBUG("Premier#3 not found.");
	}
	reg_write(pr3, PR3_ADDR, 0xffff0000);	/* initial value */
	if (reg_read(pr3, PR3_ADDR) != 0xffff0000) {
		FAIL_DEBUG("Premier#3 not found.");
	}

	PRINT(KERN_INFO, pr3->id, "Premier#3 ver %d rev %d",
	      (version >> 16) & 0xf, version & 0xf);

	/* csr_config rom allocation */
	pr3->csr_config_rom = kmalloc(sizeof(premier3_csr_rom), GFP_KERNEL);
	if (pr3->csr_config_rom == NULL) {
		FAIL("failed to allocate buffer config rom");
	}

	pr3->registers_dma = pci_map_single(NULL, pr3->registers, 0x80, PCI_DMA_BIDIRECTIONAL);
	pr3->rcv_page = pci_alloc_consistent(NULL, PAGE_SIZE, &pr3->rcv_page_dma);
	if (pr3->rcv_page == NULL) {
		FAIL("failed to allocate receive buffer");
	}
	memset(pr3->rcv_page, 0, PAGE_SIZE);

#ifdef PR3_USE_DMAC
	dmach = premier3_dmach;
	if (dmach >= 0) {
		dmairq = get_txx927_dma_irqno(dmach);
		if (dmairq < 0) {
			PRINT(KERN_WARNING, pr3->id, "failed to get irqno for DMA %d",
			      dmach);
			dmach = -1;
			dmairq = 0;
		}
	}
#ifdef __LITTLE_ENDIAN
	pr3->dma_page = pci_alloc_consistent(NULL, PAGE_SIZE, &pr3->dma_page_dma);
	if (pr3->dma_page == NULL) {
		FAIL("failed to allocate DMA buffer");
	}
#endif
#endif

#if 0	/* NOT IMPLEMENTED YET */
	for (i = 0; i < ISORCV_PAGES; i++) {
		pr3->iso_rcv.page[i] =
			pci_alloc_consistent(NULL, PAGE_SIZE,
					     &pr3->iso_rcv.page_dma[i]);
		if (pr3->iso_rcv.page[i] == NULL) {
			FAIL("failed to allocate iso receive buffers");
		}
		memset(pr3->iso_rcv.page[i], 0, PAGE_SIZE);
	}
#endif

	reg_write(pr3, PR3_RESET, PR3_RESET_ALL);
	reg_write(pr3, PR3_CTL, PR3_CTL_ACKPND | PR3_CTL_LPSON);
	reg_write(pr3, PR3_PKTCTL, 0);	/* does not accept all phy packets */

	if (dmach >= 0)
		irqflags |= SA_INTERRUPT;
	if (!request_irq(irq, premier3_irq_handler, irqflags,
			 PREMIER3_DRIVER_NAME, pr3)) {
		PRINT(KERN_INFO, pr3->id, "allocated interrupt %d", irq);
		pr3->irq = irq;
	} else {
		FAIL("failed to allocate shared interrupt %d", irq);
	}
#ifdef PR3_HANDLE_PHYSICAL_REQUEST
#ifdef PR3_USE_DMAC
	if (dmach >= 0) {
		if (request_dma(dmach, "premier3")) {
			FAIL("failed to allocate DMA %d", dmach);
		}
		pr3->dmach = dmach;
		if (request_irq(dmairq, premier3_dma_completion_handler,
				irqflags, "premier3 DMA", pr3)) {
			FAIL("failed to allocate shared interrupt %d", dmairq);
		}
		PRINT(KERN_INFO, pr3->id, "allocated interrupt %d for DMA(%d)", dmairq, dmach);
		pr3->dmairq = dmairq;
	} else {
		pr3->dmairq = 0;
		pr3->dmach = -1;
		PRINT(KERN_INFO, pr3->id, "non-DMA mode");
	}
#endif
#endif

	/* all allocations successful - simple init stuff follows */

	pr3->lock = SPIN_LOCK_UNLOCKED;

	pr3->async.queue_lock = SPIN_LOCK_UNLOCKED;

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
	pr3->dma.queue_lock = SPIN_LOCK_UNLOCKED;
#endif

#if 0	/* NOT IMPLEMENTED YET */
	pr3->iso_rcv.tq.routine = (void (*)(void*))iso_rcv_bh;
	pr3->iso_rcv.tq.data = pr3;
	pr3->iso_rcv.lock = SPIN_LOCK_UNLOCKED;
	pr3->iso_send.queue_lock = SPIN_LOCK_UNLOCKED;
#endif

	init_config_rom(pr3);

	return 0;
#undef FAIL
}

#ifdef CONFIG_PROC_FS

#define SR(fmt, reg0, reg1, reg2)\
p += sprintf(p,fmt,reg_read(pr3, reg0),\
	       reg_read(pr3, reg1),reg_read(pr3, reg2));

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
static int premier3_get_status(char *buf)
#else
int premier3_get_info(char *buf, char **start, off_t fpos, 
		      int length, int *eof, void *data)
#endif
{
	struct premier3 *pr3=&cards[0];
	struct hpsb_host *host=pr3->host;
	char *p=buf;

	p += sprintf(p,"IEEE-1394 Premier3 Driver status report:\n");
	p += sprintf(p,"  bus number: 0x%x Node ID: 0x%x\n", 
		     (reg_read(pr3, PR3_NODE) & PR3_NODE_BUS_MASK) >> PR3_NODE_BUS_SHIFT, 
		     (reg_read(pr3, PR3_NODE) & PR3_NODE_NODE_MASK) >> PR3_NODE_NODE_SHIFT);
	p += sprintf(p,"  hardware version %d.%d\n\n", 
		     (reg_read(pr3, PR3_VERSION) & 0xFF0000) >>16, 
		     reg_read(pr3, PR3_VERSION) & 0xFF);
	p += sprintf(p,"\n### Host data ###\n");
	p += sprintf(p,"node_count: %8d  ",host->node_count);
	p += sprintf(p,"node_id   : %08X\n",host->node_id);
	p += sprintf(p,"irm_id    : %08X  ",host->irm_id);
	p += sprintf(p,"busmgr_id : %08X\n",host->busmgr_id);
	p += sprintf(p,"%s %s %s\n",
		     host->initialized ? "initialized" : "",
		     host->in_bus_reset ? "in_bus_reset" : "",
		     host->attempt_root ? "attempt_root" : "");
	p += sprintf(p,"%s %s %s %s\n",
		     host->is_root ? "root" : "",
		     host->is_cycmst ? "cycle_master" : "",
		     host->is_irm ? "iso_res_mgr" : "",
		     host->is_busmgr ? "bus_mgr" : "");

	/* ----- Register Dump ----- */
	p += sprintf(p,"\n### HC Register dump ###\n");
	SR("Version     : %08x  Node        : %08x  Control     : %08x\n",
	   PR3_VERSION, PR3_NODE, PR3_CTL);
	SR("Reset       : %08x  PktControl  : %08x  Status      : %08x\n",
	   PR3_RESET, PR3_PKTCTL, PR3_STAT);
	SR("CycleTimer  : %08x  IsoPort1    : %08x  IsoPort2    : %08x\n",
	   PR3_CYCLE, PR3_ISOPORT1, PR3_ISOPORT2);
	SR("BufStatCtl  : %08x  IsoTxHeader : %08x  IntStat     : %08x\n",
	   PR3_BUFCTL, PR3_ISOTXHD, PR3_INT);
	SR("IntMask1    : %08x  IntMask2    : %08x  TcodeCtl    : %08x\n",
	   PR3_INTMASK1, PR3_INTMASK2, PR3_TCODECTL);
	SR("ROMregion   : %08x  ADDRregion  : %08x  PingTimer   : %08x\n",
	   PR3_ROMADDR, PR3_ADDR, PR3_PING);
	p += sprintf(p, "TxRetry     : %08x\n",
		     reg_read(pr3, PR3_TXRETRY));

	return  p - buf;
}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
static int premier3_read_proc(char *page, char **start, off_t off,
			      int count, int *eof, void *data)
{
	int len = premier3_get_status(page);
	if (len <= off+count) *eof = 1;
	*start = page + off;
	len -= off;
	if (len>count) len = count;
	if (len<0) len = 0;
	return len;
}
#endif /* LINUX_VERSION_CODE */
#endif /* CONFIG_PROC_FS */

static void remove_card(struct premier3 *pr3)
{
	if (pr3->registers == NULL)
		return;
	/* disable all interrupts */
	reg_clear_bits(pr3, PR3_CTL, PR3_CTL_INTEN);
	reg_write(pr3, PR3_INTMASK1, 0xffffffff);
	reg_write(pr3, PR3_INTMASK2, 0xffffffff);

#ifdef PR3_HANDLE_PHYSICAL_REQUEST
#ifdef PR3_USE_DMAC
	if (pr3->dmairq)
		free_irq(pr3->dmairq, pr3);
	if (pr3->dmach >= 0)
		free_dma(pr3->dmach);
#endif
#endif

	/* Free the IRQ */
	if (pr3->irq)
		free_irq(pr3->irq, pr3);

	/* stop Tx/Rx, CycleMaster */
	reg_clear_bits(pr3, PR3_CTL, PR3_CTL_RXEN | PR3_CTL_TXEN | PR3_CTL_CYCMST);

	reg_write(pr3, PR3_RESET, PR3_RESET_ALL);

	pci_unmap_single(NULL, pr3->registers_dma, 0x80, PCI_DMA_BIDIRECTIONAL);
#if 0	/* NOT IMPLEMENTED YET */
	for (i = 0; i < ISORCV_PAGES; i++) {
		if (pr3->iso_rcv.page[i]) {
			pci_free_consistent(NULL, PAGE_SIZE,
					    pr3->iso_rcv.page[i],
					    pr3->iso_rcv.page_dma[i]);
		}
	}
#endif
	if (pr3->rcv_page) {
		pci_free_consistent(NULL, PAGE_SIZE, pr3->rcv_page,
				    pr3->rcv_page_dma);
	}
#if defined(PR3_USE_DMAC) && defined(__LITTLE_ENDIAN)
	if (pr3->dma_page) {
		pci_free_consistent(NULL, PAGE_SIZE, pr3->dma_page,
				    pr3->dma_page_dma);
	}
#endif

	/* Free config rom */
	if (pr3->csr_config_rom)
		kfree(pr3->csr_config_rom);

	memset(pr3, 0, sizeof(struct premier3));
}

static int init_driver()
{
	if (num_of_cards) {
		PRINT_G(KERN_DEBUG, __PRETTY_FUNCTION__ " called again");
		return 0;
	}

	PRINT_G(KERN_DEBUG, "looking for Premier3 cards");

	if (add_card() != 0) {
		PRINT_G(KERN_DEBUG, "no operable Premier3 cards found");
		return -ENXIO;
	}

#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
	create_proc_read_entry("premier3", 0, NULL, premier3_read_proc, NULL);
#else
	{
		struct proc_dir_entry *ent =
			create_proc_entry("premier3", 0, 0);
		if (ent)
			ent->read_proc = premier3_get_info;
	}
#endif
#endif

	return 0;
}

static size_t get_premier3_rom(struct hpsb_host *host, const quadlet_t **ptr)
{
	struct premier3 *pr3 = host->hostdata;
	*ptr = pr3->csr_config_rom;
	return sizeof(premier3_csr_rom);
}

struct hpsb_host_template *get_premier3_template(void)
{
	static struct hpsb_host_template tmpl = {
		name:		  "premier3",
		detect_hosts:	  premier3_detect,
		initialize_host:  premier3_initialize,
		release_host:	  premier3_release,
		get_rom:	  get_premier3_rom,
		transmit_packet:  premier3_transmit,
		devctl:		  premier3_devctl
	};

	return &tmpl;
}

MODULE_AUTHOR("Atsushi Nemoto <nemoto@toshiba-tops.co.jp>");
MODULE_DESCRIPTION("driver for TOSHIBA Premier3 IEEE-1394 controller");
MODULE_SUPPORTED_DEVICE("premier3");

static void __exit premier3_cleanup(void)
{
	hpsb_unregister_lowlevel(get_premier3_template());
	remove_proc_entry("premier3", NULL);
	PRINT_G(KERN_INFO, "removed " PREMIER3_DRIVER_NAME " module");
}

static int __init premier3_init(void)
{
	if (hpsb_register_lowlevel(get_premier3_template())) {
		PRINT_G(KERN_ERR, "registering failed");
		return -ENXIO;
	} else {
		return 0;
	}
}

module_init(premier3_init);
module_exit(premier3_cleanup);

#ifndef MODULE
static int __init premier3_setup(char *str)
{
	char *p;
	p = str;
	while (p) {
#ifdef PR3_USE_DMAC
		if (strncmp(p, "dma:", 4) == 0) {
			premier3_dmach = simple_strtol(p + 4,NULL,0);
			/* for backward compatibility... */
			if (premier3_dmach >= 0 && premier3_dmach < MAX_TXX927_DMA_CHANNELS)
				premier3_dmach += TXX927_DMA_CHANNEL_START;
		}
#endif
		p = strchr(p, ',');
		if (p)
			p++;
	}
	return 0;
}
__setup("premier3=", premier3_setup);

int __init early_premier3_setup(unsigned long base, int irq)
{
	premier3_base = base;
	premier3_irq = irq;
	return 0;
}
#endif /* !MODULE */
