File-copy from v4.4.100

This is the result of 'cp' from a linux-stable tree with the 'v4.4.100'
tag checked out (commit 26d6298789e695c9f627ce49a7bbd2286405798a) on
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git

Please refer to that tree for all history prior to this point.

Change-Id: I8a9ee2aea93cd29c52c847d0ce33091a73ae6afe
diff --git a/arch/cris/arch-v32/Kconfig b/arch/cris/arch-v32/Kconfig
new file mode 100644
index 0000000..17dbe03
--- /dev/null
+++ b/arch/cris/arch-v32/Kconfig
@@ -0,0 +1,210 @@
+if ETRAX_ARCH_V32
+
+source arch/cris/arch-v32/mach-fs/Kconfig
+source arch/cris/arch-v32/mach-a3/Kconfig
+
+source drivers/cpufreq/Kconfig
+
+config ETRAX_DRAM_VIRTUAL_BASE
+	hex
+	depends on ETRAX_ARCH_V32
+	default "c0000000"
+
+choice
+	prompt "Kernel GDB port"
+	depends on ETRAX_KGDB
+	default ETRAX_KGDB_PORT0
+	help
+	  Choose a serial port for kernel debugging.  NOTE: This port should
+	  not be enabled under Drivers for built-in interfaces (as it has its
+	  own initialization code) and should not be the same as the debug port.
+
+config ETRAX_KGDB_PORT4
+	bool "Serial-4"
+	depends on ETRAX_SERIAL_PORTS = 5
+	help
+	  Use serial port 4 for kernel debugging.
+
+endchoice
+
+config ETRAX_MEM_GRP1_CONFIG
+	hex "MEM_GRP1_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "4044a"
+	help
+	  Waitstates for flash. The default value is suitable for the
+	  standard flashes used in axis products (120 ns).
+
+config ETRAX_MEM_GRP2_CONFIG
+	hex "MEM_GRP2_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  Waitstates for SRAM. 0 is a good choice for most Axis products.
+
+config ETRAX_MEM_GRP3_CONFIG
+	hex "MEM_GRP3_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  Waitstates for CSP0-3. 0 is a good choice for most Axis products.
+	  It may need to be changed if external devices such as extra
+	  register-mapped LEDs are used.
+
+config ETRAX_MEM_GRP4_CONFIG
+	hex "MEM_GRP4_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  Waitstates for CSP4-6. 0 is a good choice for most Axis products.
+
+config ETRAX_SDRAM_GRP0_CONFIG
+	hex "SDRAM_GRP0_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "336"
+	help
+	  SDRAM configuration for group 0. The value depends on the
+	  hardware configuration. The default value is suitable
+	  for 32 MB organized as two 16 bits chips (e.g. Axis
+	  part number 18550) connected as one 32 bit device (i.e. in
+	  the same group).
+
+config ETRAX_SDRAM_GRP1_CONFIG
+	hex "SDRAM_GRP1_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  SDRAM configuration for group 1. The default value is 0
+	  because group 1 is not used in the default configuration,
+	  described in the help for SDRAM_GRP0_CONFIG.
+
+config ETRAX_SDRAM_TIMING
+	hex "SDRAM_TIMING"
+	depends on ETRAX_ARCH_V32
+	default "104a"
+	help
+	  SDRAM timing parameters. The default value is ok for
+	  most hardwares but large SDRAMs may require a faster
+	  refresh (a.k.a 8K refresh). The default value implies
+	  100MHz clock and SDR mode.
+
+config ETRAX_SDRAM_COMMAND
+	hex "SDRAM_COMMAND"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  SDRAM command. Should be 0 unless you really know what
+	  you are doing (may be != 0 for unusual address line
+	  mappings such as in a MCM)..
+
+config ETRAX_DEF_GIO_PA_OE
+	hex "GIO_PA_OE"
+	depends on ETRAX_ARCH_V32
+	default "1c"
+	help
+	  Configures the direction of general port A bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PA_OUT
+	hex "GIO_PA_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00"
+	help
+	  Configures the initial data for the general port A bits.  Most
+	  products should use 00 here.
+
+config ETRAX_DEF_GIO_PB_OE
+	hex "GIO_PB_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port B bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PB_OUT
+	hex "GIO_PB_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port B bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PC_OE
+	hex "GIO_PC_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port C bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PC_OUT
+	hex "GIO_PC_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port C bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PD_OE
+	hex "GIO_PD_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port D bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PD_OUT
+	hex "GIO_PD_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port D bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PE_OE
+	hex "GIO_PE_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port E bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PE_OUT
+	hex "GIO_PE_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port E bits.  Most
+	  products should use 00000 here.
+
+endif
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
new file mode 100644
index 0000000..2735eb7
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -0,0 +1,263 @@
+if ETRAX_ARCH_V32
+
+config ETRAX_ETHERNET
+	bool "Ethernet support"
+	depends on ETRAX_ARCH_V32 && NETDEVICES
+	select MII
+	help
+	  This option enables the ETRAX FS built-in 10/100Mbit Ethernet
+	  controller.
+
+config ETRAX_NO_PHY
+	bool "PHY not present"
+	depends on ETRAX_ETHERNET
+	help
+	  This option disables all MDIO communication with an ethernet
+	  transceiver connected to the MII interface. This option shall
+	  typically be enabled if the MII interface is connected to a
+	  switch. This option should normally be disabled. If enabled,
+	  speed and duplex will be locked to 100 Mbit and full duplex.
+
+config ETRAXFS_SERIAL
+	bool "Serial-port support"
+	depends on ETRAX_ARCH_V32
+	select SERIAL_CORE
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enables the ETRAX FS serial driver for ser0 (ttyS0)
+	  You probably want this enabled.
+
+config ETRAX_RS485
+	bool "RS-485 support"
+	depends on ETRAXFS_SERIAL
+	help
+	  Enables support for RS-485 serial communication.
+
+config ETRAX_RS485_DISABLE_RECEIVER
+	bool "Disable serial receiver"
+	depends on ETRAX_RS485
+	help
+	  It is necessary to disable the serial receiver to avoid serial
+	  loopback.  Not all products are able to do this in software only.
+
+config ETRAX_SERIAL_PORT0
+	bool "Serial port 0 enabled"
+	depends on ETRAXFS_SERIAL
+	help
+	  Enables the ETRAX FS serial driver for ser0 (ttyS0)
+	  Normally you want this on. You can control what DMA channels to use
+	  if you do not need DMA to something else.
+	  ser0 can use dma4 or dma6 for output and dma5 or dma7 for input.
+
+config ETRAX_SERIAL_PORT1
+	bool "Serial port 1 enabled"
+	depends on ETRAXFS_SERIAL
+	help
+	  Enables the ETRAX FS serial driver for ser1 (ttyS1).
+
+config ETRAX_SERIAL_PORT2
+	bool "Serial port 2 enabled"
+	depends on ETRAXFS_SERIAL
+	help
+	  Enables the ETRAX FS serial driver for ser2 (ttyS2).
+
+config ETRAX_SERIAL_PORT3
+	bool "Serial port 3 enabled"
+	depends on ETRAXFS_SERIAL
+	help
+	  Enables the ETRAX FS serial driver for ser3 (ttyS3).
+
+config ETRAX_SYNCHRONOUS_SERIAL
+	bool "Synchronous serial-port support"
+	depends on ETRAX_ARCH_V32
+	help
+	  Enables the ETRAX FS synchronous serial driver.
+
+config ETRAX_SYNCHRONOUS_SERIAL_PORT0
+         bool "Synchronous serial port 0 enabled"
+         depends on ETRAX_SYNCHRONOUS_SERIAL
+         help
+           Enabled synchronous serial port 0.
+
+config ETRAX_SYNCHRONOUS_SERIAL0_DMA
+         bool "Enable DMA on synchronous serial port 0."
+         depends on ETRAX_SYNCHRONOUS_SERIAL_PORT0
+         help
+           A synchronous serial port can run in manual or DMA mode.
+           Selecting this option will make it run in DMA mode.
+
+config ETRAX_SYNCHRONOUS_SERIAL_PORT1
+         bool "Synchronous serial port 1 enabled"
+         depends on ETRAX_SYNCHRONOUS_SERIAL && ETRAXFS
+         help
+           Enabled synchronous serial port 1.
+
+config ETRAX_SYNCHRONOUS_SERIAL1_DMA
+         bool "Enable DMA on synchronous serial port 1."
+         depends on ETRAX_SYNCHRONOUS_SERIAL_PORT1
+         help
+           A synchronous serial port can run in manual or DMA mode.
+           Selecting this option will make it run in DMA mode.
+
+config ETRAX_AXISFLASHMAP
+	bool "Axis flash-map support"
+	depends on ETRAX_ARCH_V32
+	select MTD
+	select MTD_CFI
+	select MTD_CFI_AMDSTD
+	select MTD_JEDECPROBE
+	select MTD_BLOCK
+	select MTD_COMPLEX_MAPPINGS
+	select MTD_MTDRAM
+	help
+	  This option enables MTD mapping of flash devices.  Needed to use
+	  flash memories.  If unsure, say Y.
+
+config ETRAX_AXISFLASHMAP_MTD0WHOLE
+	bool "MTD0 is whole boot flash device"
+	depends on ETRAX_AXISFLASHMAP
+	help
+	  When this option is not set, mtd0 refers to the first partition
+	  on the boot flash device. When set, mtd0 refers to the whole
+	  device, with mtd1 referring to the first partition etc.
+
+config ETRAX_PTABLE_SECTOR
+	int "Byte-offset of partition table sector"
+	depends on ETRAX_AXISFLASHMAP
+	default "65536"
+	help
+	  Byte-offset of the partition table in the first flash chip.
+	  The default value is 64kB and should not be changed unless
+	  you know exactly what you are doing. The only valid reason
+	  for changing this is when the flash block size is bigger
+	  than 64kB (e.g. when using two parallel 16 bit flashes).
+
+config ETRAX_NANDFLASH
+	bool "NAND flash support"
+	depends on ETRAX_ARCH_V32
+	select MTD_NAND
+	select MTD_NAND_IDS
+	help
+	  This option enables MTD mapping of NAND flash devices.  Needed to use
+	  NAND flash memories.  If unsure, say Y.
+
+config ETRAX_NANDBOOT
+	bool "Boot from NAND flash"
+	depends on ETRAX_NANDFLASH
+	help
+	  This options enables booting from NAND flash devices.
+	  Say Y if your boot code, kernel and root file system is in
+	  NAND flash. Say N if they are in NOR flash.
+
+config ETRAX_CARDBUS
+        bool "Cardbus support"
+        depends on ETRAX_ARCH_V32
+        help
+	 Enabled the ETRAX Cardbus driver.
+
+config PCI
+       bool
+       depends on ETRAX_CARDBUS
+       default y
+       select HAVE_GENERIC_DMA_COHERENT
+
+config ETRAX_IOP_FW_LOAD
+	tristate "IO-processor hotplug firmware loading support"
+	depends on ETRAX_ARCH_V32
+	select FW_LOADER
+	help
+	  Enables IO-processor hotplug firmware loading support.
+
+config ETRAX_STREAMCOPROC
+	tristate "Stream co-processor driver enabled"
+	depends on ETRAX_ARCH_V32
+	help
+	  This option enables a driver for the stream co-processor
+	  for cryptographic operations.
+
+config ETRAX_MMC_IOP
+	tristate "MMC/SD host driver using IO-processor"
+	depends on ETRAX_ARCH_V32 && MMC
+	help
+	  This option enables the SD/MMC host controller interface.
+	  The host controller is implemented using the built in
+	  IO-Processor. Only the SPU is used in this implementation.
+
+config ETRAX_SPI_MMC
+# Make this one of several "choices" (possible simultaneously but
+# suggested uniquely) when an IOP driver emerges for "real" MMC/SD
+# protocol support.
+	tristate
+	depends on !ETRAX_MMC_IOP
+	default MMC
+	select SPI
+	select MMC_SPI
+
+# While the board info is MMC_SPI only, the drivers are written to be
+# independent of MMC_SPI, so we'll keep SPI non-dependent on the
+# MMC_SPI config choices (well, except for a single depends-on-line
+# for the board-info file until a separate non-MMC SPI board file
+# emerges).
+# FIXME: When that happens, we'll need to be able to ask for and
+# configure non-MMC SPI ports together with MMC_SPI ports (if multiple
+# SPI ports are enabled).
+
+config SPI_ETRAX_SSER
+	tristate
+	depends on SPI_MASTER && ETRAX_ARCH_V32
+	select SPI_BITBANG
+	help
+	  This enables using an synchronous serial (sser) port as a
+	  SPI master controller on Axis ETRAX FS and later.  The
+	  driver can be configured to use any sser port.
+
+config SPI_ETRAX_GPIO
+	tristate
+	depends on SPI_MASTER && ETRAX_ARCH_V32
+	select SPI_BITBANG
+	help
+	  This enables using GPIO pins port as a SPI master controller
+	  on Axis ETRAX FS and later.  The driver can be configured to
+	  use any GPIO pins.
+
+config ETRAX_SPI_SSER0
+	tristate "SPI using synchronous serial port 0 (sser0)"
+	depends on ETRAX_SPI_MMC
+	default m if MMC_SPI=m
+	default y if MMC_SPI=y
+	default y if MMC_SPI=n
+	select SPI_ETRAX_SSER
+	help
+	  Say Y for an MMC/SD socket connected to synchronous serial port 0,
+	  or for devices using the SPI protocol on that port.  Say m if you
+	  want to build it as a module, which will be named spi_crisv32_sser.
+	  (You need to select MMC separately.)
+
+config ETRAX_SPI_SSER1
+	tristate "SPI using synchronous serial port 1 (sser1)"
+	depends on ETRAX_SPI_MMC
+	default m if MMC_SPI=m && ETRAX_SPI_SSER0=n
+	default y if MMC_SPI=y && ETRAX_SPI_SSER0=n
+	default y if MMC_SPI=n && ETRAX_SPI_SSER0=n
+	select SPI_ETRAX_SSER
+	help
+	  Say Y for an MMC/SD socket connected to synchronous serial port 1,
+	  or for devices using the SPI protocol on that port.  Say m if you
+	  want to build it as a module, which will be named spi_crisv32_sser.
+	  (You need to select MMC separately.)
+
+config ETRAX_SPI_GPIO
+	tristate "Bitbanged SPI using gpio pins"
+	depends on ETRAX_SPI_MMC
+	select SPI_ETRAX_GPIO
+	default m if MMC_SPI=m && ETRAX_SPI_SSER0=n && ETRAX_SPI_SSER1=n
+	default y if MMC_SPI=y && ETRAX_SPI_SSER0=n && ETRAX_SPI_SSER1=n
+	default y if MMC_SPI=n && ETRAX_SPI_SSER0=n && ETRAX_SPI_SSER1=n
+	help
+	  Say Y for an MMC/SD socket connected to general I/O pins (but not
+	  a complete synchronous serial ports), or for devices using the SPI
+	  protocol on general I/O pins.  Slow and slows down the system.
+	  Say m to build it as a module, which will be called spi_crisv32_gpio.
+	  (You need to select MMC separately.)
+
+endif
diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile
new file mode 100644
index 0000000..b5a75fd
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for Etrax-specific drivers
+#
+
+obj-$(CONFIG_ETRAX_STREAMCOPROC)        += cryptocop.o
+obj-$(CONFIG_ETRAX_AXISFLASHMAP)        += axisflashmap.o
+obj-$(CONFIG_ETRAXFS)                   += mach-fs/
+obj-$(CONFIG_CRIS_MACH_ARTPEC3)         += mach-a3/
+obj-$(CONFIG_ETRAX_IOP_FW_LOAD)         += iop_fw_load.o
+obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
+obj-$(CONFIG_PCI)			+= pci/
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
new file mode 100644
index 0000000..c6309a1
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -0,0 +1,592 @@
+/*
+ * Physical mapping layer for MTD using the Axis partitiontable format
+ *
+ * Copyright (c) 2001-2007 Axis Communications AB
+ *
+ * This file is under the GPL.
+ *
+ * First partition is always sector 0 regardless of if we find a partitiontable
+ * or not. In the start of the next sector, there can be a partitiontable that
+ * tells us what other partitions to define. If there isn't, we use a default
+ * partition split defined below.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <linux/mtd/concat.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/mtdram.h>
+#include <linux/mtd/partitions.h>
+
+#include <asm/axisflashmap.h>
+#include <asm/mmu.h>
+
+#define MEM_CSE0_SIZE (0x04000000)
+#define MEM_CSE1_SIZE (0x04000000)
+
+#define FLASH_UNCACHED_ADDR  KSEG_E
+#define FLASH_CACHED_ADDR    KSEG_F
+
+#define PAGESIZE (512)
+
+#if CONFIG_ETRAX_FLASH_BUSWIDTH==1
+#define flash_data __u8
+#elif CONFIG_ETRAX_FLASH_BUSWIDTH==2
+#define flash_data __u16
+#elif CONFIG_ETRAX_FLASH_BUSWIDTH==4
+#define flash_data __u32
+#endif
+
+/* From head.S */
+extern unsigned long romfs_in_flash; /* 1 when romfs_start, _length in flash */
+extern unsigned long romfs_start, romfs_length;
+extern unsigned long nand_boot; /* 1 when booted from nand flash */
+
+struct partition_name {
+	char name[6];
+};
+
+/* The master mtd for the entire flash. */
+struct mtd_info* axisflash_mtd = NULL;
+
+/* Map driver functions. */
+
+static map_word flash_read(struct map_info *map, unsigned long ofs)
+{
+	map_word tmp;
+	tmp.x[0] = *(flash_data *)(map->map_priv_1 + ofs);
+	return tmp;
+}
+
+static void flash_copy_from(struct map_info *map, void *to,
+			    unsigned long from, ssize_t len)
+{
+	memcpy(to, (void *)(map->map_priv_1 + from), len);
+}
+
+static void flash_write(struct map_info *map, map_word d, unsigned long adr)
+{
+	*(flash_data *)(map->map_priv_1 + adr) = (flash_data)d.x[0];
+}
+
+/*
+ * The map for chip select e0.
+ *
+ * We run into tricky coherence situations if we mix cached with uncached
+ * accesses to we only use the uncached version here.
+ *
+ * The size field is the total size where the flash chips may be mapped on the
+ * chip select. MTD probes should find all devices there and it does not matter
+ * if there are unmapped gaps or aliases (mirrors of flash devices). The MTD
+ * probes will ignore them.
+ *
+ * The start address in map_priv_1 is in virtual memory so we cannot use
+ * MEM_CSE0_START but must rely on that FLASH_UNCACHED_ADDR is the start
+ * address of cse0.
+ */
+static struct map_info map_cse0 = {
+	.name = "cse0",
+	.size = MEM_CSE0_SIZE,
+	.bankwidth = CONFIG_ETRAX_FLASH_BUSWIDTH,
+	.read = flash_read,
+	.copy_from = flash_copy_from,
+	.write = flash_write,
+	.map_priv_1 = FLASH_UNCACHED_ADDR
+};
+
+/*
+ * The map for chip select e1.
+ *
+ * If there was a gap between cse0 and cse1, map_priv_1 would get the wrong
+ * address, but there isn't.
+ */
+static struct map_info map_cse1 = {
+	.name = "cse1",
+	.size = MEM_CSE1_SIZE,
+	.bankwidth = CONFIG_ETRAX_FLASH_BUSWIDTH,
+	.read = flash_read,
+	.copy_from = flash_copy_from,
+	.write = flash_write,
+	.map_priv_1 = FLASH_UNCACHED_ADDR + MEM_CSE0_SIZE
+};
+
+#define MAX_PARTITIONS			7
+#ifdef CONFIG_ETRAX_NANDBOOT
+#define NUM_DEFAULT_PARTITIONS		4
+#define DEFAULT_ROOTFS_PARTITION_NO	2
+#define DEFAULT_MEDIA_SIZE              0x2000000 /* 32 megs */
+#else
+#define NUM_DEFAULT_PARTITIONS		3
+#define DEFAULT_ROOTFS_PARTITION_NO	(-1)
+#define DEFAULT_MEDIA_SIZE              0x800000 /* 8 megs */
+#endif
+
+#if (MAX_PARTITIONS < NUM_DEFAULT_PARTITIONS)
+#error MAX_PARTITIONS must be >= than NUM_DEFAULT_PARTITIONS
+#endif
+
+/* Initialize the ones normally used. */
+static struct mtd_partition axis_partitions[MAX_PARTITIONS] = {
+	{
+		.name = "part0",
+		.size = CONFIG_ETRAX_PTABLE_SECTOR,
+		.offset = 0
+	},
+	{
+		.name = "part1",
+		.size = 0,
+		.offset = 0
+	},
+	{
+		.name = "part2",
+		.size = 0,
+		.offset = 0
+	},
+	{
+		.name = "part3",
+		.size = 0,
+		.offset = 0
+	},
+	{
+		.name = "part4",
+		.size = 0,
+		.offset = 0
+	},
+	{
+		.name = "part5",
+		.size = 0,
+		.offset = 0
+	},
+	{
+		.name = "part6",
+		.size = 0,
+		.offset = 0
+	},
+};
+
+
+/* If no partition-table was found, we use this default-set.
+ * Default flash size is 8MB (NOR). CONFIG_ETRAX_PTABLE_SECTOR is most
+ * likely the size of one flash block and "filesystem"-partition needs
+ * to be >=5 blocks to be able to use JFFS.
+ */
+static struct mtd_partition axis_default_partitions[NUM_DEFAULT_PARTITIONS] = {
+	{
+		.name = "boot firmware",
+		.size = CONFIG_ETRAX_PTABLE_SECTOR,
+		.offset = 0
+	},
+	{
+		.name = "kernel",
+		.size = 10 * CONFIG_ETRAX_PTABLE_SECTOR,
+		.offset = CONFIG_ETRAX_PTABLE_SECTOR
+	},
+#define FILESYSTEM_SECTOR (11 * CONFIG_ETRAX_PTABLE_SECTOR)
+#ifdef CONFIG_ETRAX_NANDBOOT
+	{
+		.name = "rootfs",
+		.size = 10 * CONFIG_ETRAX_PTABLE_SECTOR,
+		.offset = FILESYSTEM_SECTOR
+	},
+#undef FILESYSTEM_SECTOR
+#define FILESYSTEM_SECTOR (21 * CONFIG_ETRAX_PTABLE_SECTOR)
+#endif
+	{
+		.name = "rwfs",
+		.size = DEFAULT_MEDIA_SIZE - FILESYSTEM_SECTOR,
+		.offset = FILESYSTEM_SECTOR
+	}
+};
+
+#ifdef CONFIG_ETRAX_AXISFLASHMAP_MTD0WHOLE
+/* Main flash device */
+static struct mtd_partition main_partition = {
+	.name = "main",
+	.size = 0,
+	.offset = 0
+};
+#endif
+
+/* Auxiliary partition if we find another flash */
+static struct mtd_partition aux_partition = {
+	.name = "aux",
+	.size = 0,
+	.offset = 0
+};
+
+/*
+ * Probe a chip select for AMD-compatible (JEDEC) or CFI-compatible flash
+ * chips in that order (because the amd_flash-driver is faster).
+ */
+static struct mtd_info *probe_cs(struct map_info *map_cs)
+{
+	struct mtd_info *mtd_cs = NULL;
+
+	printk(KERN_INFO
+	       "%s: Probing a 0x%08lx bytes large window at 0x%08lx.\n",
+	       map_cs->name, map_cs->size, map_cs->map_priv_1);
+
+#ifdef CONFIG_MTD_CFI
+	mtd_cs = do_map_probe("cfi_probe", map_cs);
+#endif
+#ifdef CONFIG_MTD_JEDECPROBE
+	if (!mtd_cs)
+		mtd_cs = do_map_probe("jedec_probe", map_cs);
+#endif
+
+	return mtd_cs;
+}
+
+/*
+ * Probe each chip select individually for flash chips. If there are chips on
+ * both cse0 and cse1, the mtd_info structs will be concatenated to one struct
+ * so that MTD partitions can cross chip boundries.
+ *
+ * The only known restriction to how you can mount your chips is that each
+ * chip select must hold similar flash chips. But you need external hardware
+ * to do that anyway and you can put totally different chips on cse0 and cse1
+ * so it isn't really much of a restriction.
+ */
+extern struct mtd_info* __init crisv32_nand_flash_probe (void);
+static struct mtd_info *flash_probe(void)
+{
+	struct mtd_info *mtd_cse0;
+	struct mtd_info *mtd_cse1;
+	struct mtd_info *mtd_total;
+	struct mtd_info *mtds[2];
+	int count = 0;
+
+	if ((mtd_cse0 = probe_cs(&map_cse0)) != NULL)
+		mtds[count++] = mtd_cse0;
+	if ((mtd_cse1 = probe_cs(&map_cse1)) != NULL)
+		mtds[count++] = mtd_cse1;
+
+	if (!mtd_cse0 && !mtd_cse1) {
+		/* No chip found. */
+		return NULL;
+	}
+
+	if (count > 1) {
+		/* Since the concatenation layer adds a small overhead we
+		 * could try to figure out if the chips in cse0 and cse1 are
+		 * identical and reprobe the whole cse0+cse1 window. But since
+		 * flash chips are slow, the overhead is relatively small.
+		 * So we use the MTD concatenation layer instead of further
+		 * complicating the probing procedure.
+		 */
+		mtd_total = mtd_concat_create(mtds, count, "cse0+cse1");
+		if (!mtd_total) {
+			printk(KERN_ERR "%s and %s: Concatenation failed!\n",
+				map_cse0.name, map_cse1.name);
+
+			/* The best we can do now is to only use what we found
+			 * at cse0. */
+			mtd_total = mtd_cse0;
+			map_destroy(mtd_cse1);
+		}
+	} else
+		mtd_total = mtd_cse0 ? mtd_cse0 : mtd_cse1;
+
+	return mtd_total;
+}
+
+/*
+ * Probe the flash chip(s) and, if it succeeds, read the partition-table
+ * and register the partitions with MTD.
+ */
+static int __init init_axis_flash(void)
+{
+	struct mtd_info *main_mtd;
+	struct mtd_info *aux_mtd = NULL;
+	int err = 0;
+	int pidx = 0;
+	struct partitiontable_head *ptable_head = NULL;
+	struct partitiontable_entry *ptable;
+	int ptable_ok = 0;
+	static char page[PAGESIZE];
+	size_t len;
+	int ram_rootfs_partition = -1; /* -1 => no RAM rootfs partition */
+	int part;
+	struct mtd_partition *partition;
+
+	/* We need a root fs. If it resides in RAM, we need to use an
+	 * MTDRAM device, so it must be enabled in the kernel config,
+	 * but its size must be configured as 0 so as not to conflict
+	 * with our usage.
+	 */
+#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) || (CONFIG_MTDRAM_ABS_POS != 0)
+	if (!romfs_in_flash && !nand_boot) {
+		printk(KERN_EMERG "axisflashmap: Cannot create an MTD RAM "
+		       "device; configure CONFIG_MTD_MTDRAM with size = 0!\n");
+		panic("This kernel cannot boot from RAM!\n");
+	}
+#endif
+
+	main_mtd = flash_probe();
+	if (main_mtd)
+		printk(KERN_INFO "%s: 0x%08llx bytes of NOR flash memory.\n",
+		       main_mtd->name, main_mtd->size);
+
+#ifdef CONFIG_ETRAX_NANDFLASH
+	aux_mtd = crisv32_nand_flash_probe();
+	if (aux_mtd)
+		printk(KERN_INFO "%s: 0x%08x bytes of NAND flash memory.\n",
+			aux_mtd->name, aux_mtd->size);
+
+#ifdef CONFIG_ETRAX_NANDBOOT
+	{
+		struct mtd_info *tmp_mtd;
+
+		printk(KERN_INFO "axisflashmap: Set to boot from NAND flash, "
+		       "making NAND flash primary device.\n");
+		tmp_mtd = main_mtd;
+		main_mtd = aux_mtd;
+		aux_mtd = tmp_mtd;
+	}
+#endif /* CONFIG_ETRAX_NANDBOOT */
+#endif /* CONFIG_ETRAX_NANDFLASH */
+
+	if (!main_mtd && !aux_mtd) {
+		/* There's no reason to use this module if no flash chip can
+		 * be identified. Make sure that's understood.
+		 */
+		printk(KERN_INFO "axisflashmap: Found no flash chip.\n");
+	}
+
+#if 0 /* Dump flash memory so we can see what is going on */
+	if (main_mtd) {
+		int sectoraddr;
+		for (sectoraddr = 0; sectoraddr < 2*65536+4096;
+				sectoraddr += PAGESIZE) {
+			main_mtd->read(main_mtd, sectoraddr, PAGESIZE, &len,
+				page);
+			printk(KERN_INFO
+			       "Sector at %d (length %d):\n",
+			       sectoraddr, len);
+			print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, page, PAGESIZE, false);
+		}
+	}
+#endif
+
+	if (main_mtd) {
+		loff_t ptable_sector = CONFIG_ETRAX_PTABLE_SECTOR;
+		main_mtd->owner = THIS_MODULE;
+		axisflash_mtd = main_mtd;
+
+
+		/* First partition (rescue) is always set to the default. */
+		pidx++;
+#ifdef CONFIG_ETRAX_NANDBOOT
+		/* We know where the partition table should be located,
+		 * it will be in first good block after that.
+		 */
+		int blockstat;
+		do {
+			blockstat = mtd_block_isbad(main_mtd, ptable_sector);
+			if (blockstat < 0)
+				ptable_sector = 0; /* read error */
+			else if (blockstat)
+				ptable_sector += main_mtd->erasesize;
+		} while (blockstat && ptable_sector);
+#endif
+		if (ptable_sector) {
+			mtd_read(main_mtd, ptable_sector, PAGESIZE, &len,
+				 page);
+			ptable_head = &((struct partitiontable *) page)->head;
+		}
+
+#if 0 /* Dump partition table so we can see what is going on */
+		printk(KERN_INFO
+		       "axisflashmap: flash read %d bytes at 0x%08x, data: %8ph\n",
+		       len, CONFIG_ETRAX_PTABLE_SECTOR, page);
+		printk(KERN_INFO
+		       "axisflashmap: partition table offset %d, data: %8ph\n",
+		       PARTITION_TABLE_OFFSET, page + PARTITION_TABLE_OFFSET);
+#endif
+	}
+
+	if (ptable_head && (ptable_head->magic == PARTITION_TABLE_MAGIC)
+	    && (ptable_head->size <
+		(MAX_PARTITIONS * sizeof(struct partitiontable_entry) +
+		PARTITIONTABLE_END_MARKER_SIZE))
+	    && (*(unsigned long*)((void*)ptable_head + sizeof(*ptable_head) +
+				  ptable_head->size -
+				  PARTITIONTABLE_END_MARKER_SIZE)
+		== PARTITIONTABLE_END_MARKER)) {
+		/* Looks like a start, sane length and end of a
+		 * partition table, lets check csum etc.
+		 */
+		struct partitiontable_entry *max_addr =
+			(struct partitiontable_entry *)
+			((unsigned long)ptable_head + sizeof(*ptable_head) +
+			 ptable_head->size);
+		unsigned long offset = CONFIG_ETRAX_PTABLE_SECTOR;
+		unsigned char *p;
+		unsigned long csum = 0;
+
+		ptable = (struct partitiontable_entry *)
+			((unsigned long)ptable_head + sizeof(*ptable_head));
+
+		/* Lets be PARANOID, and check the checksum. */
+		p = (unsigned char*) ptable;
+
+		while (p <= (unsigned char*)max_addr) {
+			csum += *p++;
+			csum += *p++;
+			csum += *p++;
+			csum += *p++;
+		}
+		ptable_ok = (csum == ptable_head->checksum);
+
+		/* Read the entries and use/show the info.  */
+		printk(KERN_INFO "axisflashmap: "
+		       "Found a%s partition table at 0x%p-0x%p.\n",
+		       (ptable_ok ? " valid" : "n invalid"), ptable_head,
+		       max_addr);
+
+		/* We have found a working bootblock.  Now read the
+		 * partition table.  Scan the table.  It ends with 0xffffffff.
+		 */
+		while (ptable_ok
+		       && ptable->offset != PARTITIONTABLE_END_MARKER
+		       && ptable < max_addr
+		       && pidx < MAX_PARTITIONS - 1) {
+
+			axis_partitions[pidx].offset = offset + ptable->offset;
+#ifdef CONFIG_ETRAX_NANDFLASH
+			if (main_mtd->type == MTD_NANDFLASH) {
+				axis_partitions[pidx].size =
+					(((ptable+1)->offset ==
+					  PARTITIONTABLE_END_MARKER) ?
+					  main_mtd->size :
+					  ((ptable+1)->offset + offset)) -
+					(ptable->offset + offset);
+
+			} else
+#endif /* CONFIG_ETRAX_NANDFLASH */
+				axis_partitions[pidx].size = ptable->size;
+#ifdef CONFIG_ETRAX_NANDBOOT
+			/* Save partition number of jffs2 ro partition.
+			 * Needed if RAM booting or root file system in RAM.
+			 */
+			if (!nand_boot &&
+			    ram_rootfs_partition < 0 && /* not already set */
+			    ptable->type == PARTITION_TYPE_JFFS2 &&
+			    (ptable->flags & PARTITION_FLAGS_READONLY_MASK) ==
+				PARTITION_FLAGS_READONLY)
+				ram_rootfs_partition = pidx;
+#endif /* CONFIG_ETRAX_NANDBOOT */
+			pidx++;
+			ptable++;
+		}
+	}
+
+	/* Decide whether to use default partition table. */
+	/* Only use default table if we actually have a device (main_mtd) */
+
+	partition = &axis_partitions[0];
+	if (main_mtd && !ptable_ok) {
+		memcpy(axis_partitions, axis_default_partitions,
+		       sizeof(axis_default_partitions));
+		pidx = NUM_DEFAULT_PARTITIONS;
+		ram_rootfs_partition = DEFAULT_ROOTFS_PARTITION_NO;
+	}
+
+	/* Add artificial partitions for rootfs if necessary */
+	if (romfs_in_flash) {
+		/* rootfs is in directly accessible flash memory = NOR flash.
+		   Add an overlapping device for the rootfs partition. */
+		printk(KERN_INFO "axisflashmap: Adding partition for "
+		       "overlapping root file system image\n");
+		axis_partitions[pidx].size = romfs_length;
+		axis_partitions[pidx].offset = romfs_start - FLASH_CACHED_ADDR;
+		axis_partitions[pidx].name = "romfs";
+		axis_partitions[pidx].mask_flags |= MTD_WRITEABLE;
+		ram_rootfs_partition = -1;
+		pidx++;
+	} else if (romfs_length && !nand_boot) {
+		/* romfs exists in memory, but not in flash, so must be in RAM.
+		 * Configure an MTDRAM partition. */
+		if (ram_rootfs_partition < 0) {
+			/* None set yet, put it at the end */
+			ram_rootfs_partition = pidx;
+			pidx++;
+		}
+		printk(KERN_INFO "axisflashmap: Adding partition for "
+		       "root file system image in RAM\n");
+		axis_partitions[ram_rootfs_partition].size = romfs_length;
+		axis_partitions[ram_rootfs_partition].offset = romfs_start;
+		axis_partitions[ram_rootfs_partition].name = "romfs";
+		axis_partitions[ram_rootfs_partition].mask_flags |=
+			MTD_WRITEABLE;
+	}
+
+#ifdef CONFIG_ETRAX_AXISFLASHMAP_MTD0WHOLE
+	if (main_mtd) {
+		main_partition.size = main_mtd->size;
+		err = mtd_device_register(main_mtd, &main_partition, 1);
+		if (err)
+			panic("axisflashmap: Could not initialize "
+			      "partition for whole main mtd device!\n");
+	}
+#endif
+
+	/* Now, register all partitions with mtd.
+	 * We do this one at a time so we can slip in an MTDRAM device
+	 * in the proper place if required. */
+
+	for (part = 0; part < pidx; part++) {
+		if (part == ram_rootfs_partition) {
+			/* add MTDRAM partition here */
+			struct mtd_info *mtd_ram;
+
+			mtd_ram = kmalloc(sizeof(struct mtd_info), GFP_KERNEL);
+			if (!mtd_ram)
+				panic("axisflashmap: Couldn't allocate memory "
+				      "for mtd_info!\n");
+			printk(KERN_INFO "axisflashmap: Adding RAM partition "
+			       "for rootfs image.\n");
+			err = mtdram_init_device(mtd_ram,
+						 (void *)(u_int32_t)partition[part].offset,
+						 partition[part].size,
+						 partition[part].name);
+			if (err)
+				panic("axisflashmap: Could not initialize "
+				      "MTD RAM device!\n");
+			/* JFFS2 likes to have an erasesize. Keep potential
+			 * JFFS2 rootfs happy by providing one. Since image
+			 * was most likely created for main mtd, use that
+			 * erasesize, if available. Otherwise, make a guess. */
+			mtd_ram->erasesize = (main_mtd ? main_mtd->erasesize :
+				CONFIG_ETRAX_PTABLE_SECTOR);
+		} else {
+			err = mtd_device_register(main_mtd, &partition[part],
+						  1);
+			if (err)
+				panic("axisflashmap: Could not add mtd "
+					"partition %d\n", part);
+		}
+	}
+
+	if (aux_mtd) {
+		aux_partition.size = aux_mtd->size;
+		err = mtd_device_register(aux_mtd, &aux_partition, 1);
+		if (err)
+			panic("axisflashmap: Could not initialize "
+			      "aux mtd device!\n");
+
+	}
+
+	return err;
+}
+
+/* This adds the above to the kernels init-call chain. */
+module_init(init_axis_flash);
+
+EXPORT_SYMBOL(axisflash_mtd);
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
new file mode 100644
index 0000000..877da19
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -0,0 +1,3536 @@
+/*
+ * Stream co-processor driver for the ETRAX FS
+ *
+ *    Copyright (C) 2003-2007  Axis Communications AB
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <linux/atomic.h>
+
+#include <linux/list.h>
+#include <linux/interrupt.h>
+
+#include <asm/signal.h>
+#include <asm/irq.h>
+
+#include <dma.h>
+#include <hwregs/dma.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/intr_vect_defs.h>
+
+#include <hwregs/strcop.h>
+#include <hwregs/strcop_defs.h>
+#include <cryptocop.h>
+
+#ifdef CONFIG_ETRAXFS
+#define IN_DMA 9
+#define OUT_DMA 8
+#define IN_DMA_INST regi_dma9
+#define OUT_DMA_INST regi_dma8
+#define DMA_IRQ DMA9_INTR_VECT
+#else
+#define IN_DMA 3
+#define OUT_DMA 2
+#define IN_DMA_INST regi_dma3
+#define OUT_DMA_INST regi_dma2
+#define DMA_IRQ DMA3_INTR_VECT
+#endif
+
+#define DESCR_ALLOC_PAD  (31)
+
+struct cryptocop_dma_desc {
+	char *free_buf; /* If non-null will be kfreed in free_cdesc() */
+	dma_descr_data *dma_descr;
+
+	unsigned char dma_descr_buf[sizeof(dma_descr_data) + DESCR_ALLOC_PAD];
+
+	unsigned int from_pool:1; /* If 1 'allocated' from the descriptor pool. */
+	struct cryptocop_dma_desc *next;
+};
+
+
+struct cryptocop_int_operation{
+	void                        *alloc_ptr;
+	cryptocop_session_id        sid;
+
+	dma_descr_context           ctx_out;
+	dma_descr_context           ctx_in;
+
+	/* DMA descriptors allocated by driver. */
+	struct cryptocop_dma_desc   *cdesc_out;
+	struct cryptocop_dma_desc   *cdesc_in;
+
+	/* Strcop config to use. */
+	cryptocop_3des_mode         tdes_mode;
+	cryptocop_csum_type         csum_mode;
+
+	/* DMA descrs provided by consumer. */
+	dma_descr_data              *ddesc_out;
+	dma_descr_data              *ddesc_in;
+};
+
+
+struct cryptocop_tfrm_ctx {
+	cryptocop_tfrm_id tid;
+	unsigned int blocklength;
+
+	unsigned int start_ix;
+
+	struct cryptocop_tfrm_cfg *tcfg;
+	struct cryptocop_transform_ctx *tctx;
+
+	unsigned char previous_src;
+	unsigned char current_src;
+
+	/* Values to use in metadata out. */
+	unsigned char hash_conf;
+	unsigned char hash_mode;
+	unsigned char ciph_conf;
+	unsigned char cbcmode;
+	unsigned char decrypt;
+
+	unsigned int requires_padding:1;
+	unsigned int strict_block_length:1;
+	unsigned int active:1;
+	unsigned int done:1;
+	size_t consumed;
+	size_t produced;
+
+	/* Pad (input) descriptors to put in the DMA out list when the transform
+	 * output is put on the DMA in list. */
+	struct cryptocop_dma_desc *pad_descs;
+
+	struct cryptocop_tfrm_ctx *prev_src;
+	struct cryptocop_tfrm_ctx *curr_src;
+
+	/* Mapping to HW. */
+	unsigned char unit_no;
+};
+
+
+struct cryptocop_private{
+	cryptocop_session_id sid;
+	struct cryptocop_private *next;
+};
+
+/* Session list. */
+
+struct cryptocop_transform_ctx{
+	struct cryptocop_transform_init init;
+	unsigned char dec_key[CRYPTOCOP_MAX_KEY_LENGTH];
+	unsigned int dec_key_set:1;
+
+	struct cryptocop_transform_ctx *next;
+};
+
+
+struct cryptocop_session{
+	cryptocop_session_id sid;
+
+	struct cryptocop_transform_ctx *tfrm_ctx;
+
+	struct cryptocop_session *next;
+};
+
+/* Priority levels for jobs sent to the cryptocop.  Checksum operations from
+   kernel have highest priority since TCPIP stack processing must not
+   be a bottleneck. */
+typedef enum {
+	cryptocop_prio_kernel_csum = 0,
+	cryptocop_prio_kernel = 1,
+	cryptocop_prio_user = 2,
+	cryptocop_prio_no_prios = 3
+} cryptocop_queue_priority;
+
+struct cryptocop_prio_queue{
+	struct list_head jobs;
+	cryptocop_queue_priority prio;
+};
+
+struct cryptocop_prio_job{
+	struct list_head node;
+	cryptocop_queue_priority prio;
+
+	struct cryptocop_operation *oper;
+	struct cryptocop_int_operation *iop;
+};
+
+struct ioctl_job_cb_ctx {
+	unsigned int processed:1;
+};
+
+
+static struct cryptocop_session *cryptocop_sessions = NULL;
+spinlock_t cryptocop_sessions_lock;
+
+/* Next Session ID to assign. */
+static cryptocop_session_id next_sid = 1;
+
+/* Pad for checksum. */
+static const char csum_zero_pad[1] = {0x00};
+
+/* Trash buffer for mem2mem operations. */
+#define MEM2MEM_DISCARD_BUF_LENGTH  (512)
+static unsigned char mem2mem_discard_buf[MEM2MEM_DISCARD_BUF_LENGTH];
+
+/* Descriptor pool. */
+/* FIXME Tweak this value. */
+#define CRYPTOCOP_DESCRIPTOR_POOL_SIZE   (100)
+static struct cryptocop_dma_desc descr_pool[CRYPTOCOP_DESCRIPTOR_POOL_SIZE];
+static struct cryptocop_dma_desc *descr_pool_free_list;
+static int descr_pool_no_free;
+static spinlock_t descr_pool_lock;
+
+/* Lock to stop cryptocop to start processing of a new operation. The holder
+   of this lock MUST call cryptocop_start_job() after it is unlocked. */
+spinlock_t cryptocop_process_lock;
+
+static struct cryptocop_prio_queue cryptocop_job_queues[cryptocop_prio_no_prios];
+static spinlock_t cryptocop_job_queue_lock;
+static struct cryptocop_prio_job *cryptocop_running_job = NULL;
+static spinlock_t running_job_lock;
+
+/* The interrupt handler appends completed jobs to this list. The scehduled
+ * tasklet removes them upon sending the response to the crypto consumer. */
+static struct list_head cryptocop_completed_jobs;
+static spinlock_t cryptocop_completed_jobs_lock;
+
+DECLARE_WAIT_QUEUE_HEAD(cryptocop_ioc_process_wq);
+
+
+/** Local functions. **/
+
+static int cryptocop_open(struct inode *, struct file *);
+
+static int cryptocop_release(struct inode *, struct file *);
+
+static long cryptocop_ioctl(struct file *file,
+			   unsigned int cmd, unsigned long arg);
+
+static void cryptocop_start_job(void);
+
+static int cryptocop_job_queue_insert(cryptocop_queue_priority prio, struct cryptocop_operation *operation);
+static int cryptocop_job_setup(struct cryptocop_prio_job **pj, struct cryptocop_operation *operation);
+
+static int cryptocop_job_queue_init(void);
+static void cryptocop_job_queue_close(void);
+
+static int create_md5_pad(int alloc_flag, unsigned long long hashed_length, char **pad, size_t *pad_length);
+
+static int create_sha1_pad(int alloc_flag, unsigned long long hashed_length, char **pad, size_t *pad_length);
+
+static int transform_ok(struct cryptocop_transform_init *tinit);
+
+static struct cryptocop_session *get_session(cryptocop_session_id sid);
+
+static struct cryptocop_transform_ctx *get_transform_ctx(struct cryptocop_session *sess, cryptocop_tfrm_id tid);
+
+static void delete_internal_operation(struct cryptocop_int_operation *iop);
+
+static void get_aes_decrypt_key(unsigned char *dec_key, const unsigned  char *key, unsigned int keylength);
+
+static int init_stream_coprocessor(void);
+
+static void __exit exit_stream_coprocessor(void);
+
+/*#define LDEBUG*/
+#ifdef LDEBUG
+#define DEBUG(s) s
+#define DEBUG_API(s) s
+static void print_cryptocop_operation(struct cryptocop_operation *cop);
+static void print_dma_descriptors(struct cryptocop_int_operation *iop);
+static void print_strcop_crypto_op(struct strcop_crypto_op *cop);
+static void print_lock_status(void);
+static void print_user_dma_lists(struct cryptocop_dma_list_operation *dma_op);
+#define assert(s) do{if (!(s)) panic(#s);} while(0);
+#else
+#define DEBUG(s)
+#define DEBUG_API(s)
+#define assert(s)
+#endif
+
+
+/* Transform constants. */
+#define DES_BLOCK_LENGTH   (8)
+#define AES_BLOCK_LENGTH   (16)
+#define MD5_BLOCK_LENGTH   (64)
+#define SHA1_BLOCK_LENGTH  (64)
+#define CSUM_BLOCK_LENGTH  (2)
+#define MD5_STATE_LENGTH   (16)
+#define SHA1_STATE_LENGTH  (20)
+
+/* The device number. */
+#define CRYPTOCOP_MAJOR    (254)
+#define CRYPTOCOP_MINOR    (0)
+
+
+
+const struct file_operations cryptocop_fops = {
+	.owner		= THIS_MODULE,
+	.open		= cryptocop_open,
+	.release	= cryptocop_release,
+	.unlocked_ioctl = cryptocop_ioctl,
+	.llseek		= noop_llseek,
+};
+
+
+static void free_cdesc(struct cryptocop_dma_desc *cdesc)
+{
+	DEBUG(printk("free_cdesc: cdesc 0x%p, from_pool=%d\n", cdesc, cdesc->from_pool));
+	kfree(cdesc->free_buf);
+
+	if (cdesc->from_pool) {
+		unsigned long int flags;
+		spin_lock_irqsave(&descr_pool_lock, flags);
+		cdesc->next = descr_pool_free_list;
+		descr_pool_free_list = cdesc;
+		++descr_pool_no_free;
+		spin_unlock_irqrestore(&descr_pool_lock, flags);
+	} else {
+		kfree(cdesc);
+	}
+}
+
+
+static struct cryptocop_dma_desc *alloc_cdesc(int alloc_flag)
+{
+	int use_pool = (alloc_flag & GFP_ATOMIC) ? 1 : 0;
+	struct cryptocop_dma_desc *cdesc;
+
+	if (use_pool) {
+		unsigned long int flags;
+		spin_lock_irqsave(&descr_pool_lock, flags);
+		if (!descr_pool_free_list) {
+			spin_unlock_irqrestore(&descr_pool_lock, flags);
+			DEBUG_API(printk("alloc_cdesc: pool is empty\n"));
+			return NULL;
+		}
+		cdesc = descr_pool_free_list;
+		descr_pool_free_list = descr_pool_free_list->next;
+		--descr_pool_no_free;
+		spin_unlock_irqrestore(&descr_pool_lock, flags);
+		cdesc->from_pool = 1;
+	} else {
+		cdesc = kmalloc(sizeof(struct cryptocop_dma_desc), alloc_flag);
+		if (!cdesc) {
+			DEBUG_API(printk("alloc_cdesc: kmalloc\n"));
+			return NULL;
+		}
+		cdesc->from_pool = 0;
+	}
+	cdesc->dma_descr = (dma_descr_data*)(((unsigned long int)cdesc + offsetof(struct cryptocop_dma_desc, dma_descr_buf) + DESCR_ALLOC_PAD) & ~0x0000001F);
+
+	cdesc->next = NULL;
+
+	cdesc->free_buf = NULL;
+	cdesc->dma_descr->out_eop = 0;
+	cdesc->dma_descr->in_eop = 0;
+	cdesc->dma_descr->intr = 0;
+	cdesc->dma_descr->eol = 0;
+	cdesc->dma_descr->wait = 0;
+	cdesc->dma_descr->buf = NULL;
+	cdesc->dma_descr->after = NULL;
+
+	DEBUG_API(printk("alloc_cdesc: return 0x%p, cdesc->dma_descr=0x%p, from_pool=%d\n", cdesc, cdesc->dma_descr, cdesc->from_pool));
+	return cdesc;
+}
+
+
+static void setup_descr_chain(struct cryptocop_dma_desc *cd)
+{
+	DEBUG(printk("setup_descr_chain: entering\n"));
+	while (cd) {
+		if (cd->next) {
+			cd->dma_descr->next = (dma_descr_data*)virt_to_phys(cd->next->dma_descr);
+		} else {
+			cd->dma_descr->next = NULL;
+		}
+		cd = cd->next;
+	}
+	DEBUG(printk("setup_descr_chain: exit\n"));
+}
+
+
+/* Create a pad descriptor for the transform.
+ * Return -1 for error, 0 if pad created. */
+static int create_pad_descriptor(struct cryptocop_tfrm_ctx *tc, struct cryptocop_dma_desc **pad_desc, int alloc_flag)
+{
+	struct cryptocop_dma_desc        *cdesc = NULL;
+	int                              error = 0;
+	struct strcop_meta_out           mo = {
+		.ciphsel = src_none,
+		.hashsel = src_none,
+		.csumsel = src_none
+	};
+	char                             *pad;
+	size_t                           plen;
+
+	DEBUG(printk("create_pad_descriptor: start.\n"));
+	/* Setup pad descriptor. */
+
+	DEBUG(printk("create_pad_descriptor: setting up padding.\n"));
+	cdesc = alloc_cdesc(alloc_flag);
+	if (!cdesc){
+		DEBUG_API(printk("create_pad_descriptor: alloc pad desc\n"));
+		goto error_cleanup;
+	}
+	switch (tc->unit_no) {
+	case src_md5:
+		error = create_md5_pad(alloc_flag, tc->consumed, &pad, &plen);
+		if (error){
+			DEBUG_API(printk("create_pad_descriptor: create_md5_pad_failed\n"));
+			goto error_cleanup;
+		}
+		cdesc->free_buf = pad;
+		mo.hashsel = src_dma;
+		mo.hashconf = tc->hash_conf;
+		mo.hashmode = tc->hash_mode;
+		break;
+	case src_sha1:
+		error = create_sha1_pad(alloc_flag, tc->consumed, &pad, &plen);
+		if (error){
+			DEBUG_API(printk("create_pad_descriptor: create_sha1_pad_failed\n"));
+			goto error_cleanup;
+		}
+		cdesc->free_buf = pad;
+		mo.hashsel = src_dma;
+		mo.hashconf = tc->hash_conf;
+		mo.hashmode = tc->hash_mode;
+		break;
+	case src_csum:
+		if (tc->consumed % tc->blocklength){
+			pad = (char*)csum_zero_pad;
+			plen = 1;
+		} else {
+			pad = (char*)cdesc; /* Use any pointer. */
+			plen = 0;
+		}
+		mo.csumsel = src_dma;
+		break;
+	}
+	cdesc->dma_descr->wait = 1;
+	cdesc->dma_descr->out_eop = 1; /* Since this is a pad output is pushed.  EOP is ok here since the padded unit is the only one active. */
+	cdesc->dma_descr->buf = (char*)virt_to_phys((char*)pad);
+	cdesc->dma_descr->after = cdesc->dma_descr->buf + plen;
+
+	cdesc->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_out, mo);
+	*pad_desc = cdesc;
+
+	return 0;
+
+ error_cleanup:
+	if (cdesc) free_cdesc(cdesc);
+	return -1;
+}
+
+
+static int setup_key_dl_desc(struct cryptocop_tfrm_ctx *tc, struct cryptocop_dma_desc **kd, int alloc_flag)
+{
+	struct cryptocop_dma_desc  *key_desc = alloc_cdesc(alloc_flag);
+	struct strcop_meta_out     mo = {0};
+
+	DEBUG(printk("setup_key_dl_desc\n"));
+
+	if (!key_desc) {
+		DEBUG_API(printk("setup_key_dl_desc: failed descriptor allocation.\n"));
+		return -ENOMEM;
+	}
+
+	/* Download key. */
+	if ((tc->tctx->init.alg == cryptocop_alg_aes) && (tc->tcfg->flags & CRYPTOCOP_DECRYPT)) {
+		/* Precook the AES decrypt key. */
+		if (!tc->tctx->dec_key_set){
+			get_aes_decrypt_key(tc->tctx->dec_key, tc->tctx->init.key, tc->tctx->init.keylen);
+			tc->tctx->dec_key_set = 1;
+		}
+		key_desc->dma_descr->buf = (char*)virt_to_phys(tc->tctx->dec_key);
+		key_desc->dma_descr->after = key_desc->dma_descr->buf + tc->tctx->init.keylen/8;
+	} else {
+		key_desc->dma_descr->buf = (char*)virt_to_phys(tc->tctx->init.key);
+		key_desc->dma_descr->after = key_desc->dma_descr->buf + tc->tctx->init.keylen/8;
+	}
+	/* Setup metadata. */
+	mo.dlkey = 1;
+	switch (tc->tctx->init.keylen) {
+	case 64:
+		mo.decrypt = 0;
+		mo.hashmode = 0;
+		break;
+	case 128:
+		mo.decrypt = 0;
+		mo.hashmode = 1;
+		break;
+	case 192:
+		mo.decrypt = 1;
+		mo.hashmode = 0;
+		break;
+	case 256:
+		mo.decrypt = 1;
+		mo.hashmode = 1;
+		break;
+	default:
+		break;
+	}
+	mo.ciphsel = mo.hashsel = mo.csumsel = src_none;
+	key_desc->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_out, mo);
+
+	key_desc->dma_descr->out_eop = 1;
+	key_desc->dma_descr->wait = 1;
+	key_desc->dma_descr->intr = 0;
+
+	*kd = key_desc;
+	return 0;
+}
+
+static int setup_cipher_iv_desc(struct cryptocop_tfrm_ctx *tc, struct cryptocop_dma_desc **id, int alloc_flag)
+{
+	struct cryptocop_dma_desc  *iv_desc = alloc_cdesc(alloc_flag);
+	struct strcop_meta_out     mo = {0};
+
+	DEBUG(printk("setup_cipher_iv_desc\n"));
+
+	if (!iv_desc) {
+		DEBUG_API(printk("setup_cipher_iv_desc: failed CBC IV descriptor allocation.\n"));
+		return -ENOMEM;
+	}
+	/* Download IV. */
+	iv_desc->dma_descr->buf = (char*)virt_to_phys(tc->tcfg->iv);
+	iv_desc->dma_descr->after = iv_desc->dma_descr->buf + tc->blocklength;
+
+	/* Setup metadata. */
+	mo.hashsel = mo.csumsel = src_none;
+	mo.ciphsel = src_dma;
+	mo.ciphconf = tc->ciph_conf;
+	mo.cbcmode = tc->cbcmode;
+
+	iv_desc->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_out, mo);
+
+	iv_desc->dma_descr->out_eop = 0;
+	iv_desc->dma_descr->wait = 1;
+	iv_desc->dma_descr->intr = 0;
+
+	*id = iv_desc;
+	return 0;
+}
+
+/* Map the ouput length of the transform to operation output starting on the inject index. */
+static int create_input_descriptors(struct cryptocop_operation *operation, struct cryptocop_tfrm_ctx *tc, struct cryptocop_dma_desc **id, int alloc_flag)
+{
+	int                        err = 0;
+	struct cryptocop_dma_desc  head = {0};
+	struct cryptocop_dma_desc  *outdesc = &head;
+	size_t                     iov_offset = 0;
+	size_t                     out_ix = 0;
+	int                        outiov_ix = 0;
+	struct strcop_meta_in      mi = {0};
+
+	size_t                     out_length = tc->produced;
+	int                        rem_length;
+	int                        dlength;
+
+	assert(out_length != 0);
+	if (((tc->produced + tc->tcfg->inject_ix) > operation->tfrm_op.outlen) || (tc->produced && (operation->tfrm_op.outlen == 0))) {
+		DEBUG_API(printk("create_input_descriptors: operation outdata too small\n"));
+		return -EINVAL;
+	}
+	/* Traverse the out iovec until the result inject index is reached. */
+	while ((outiov_ix < operation->tfrm_op.outcount) && ((out_ix + operation->tfrm_op.outdata[outiov_ix].iov_len) <= tc->tcfg->inject_ix)){
+		out_ix += operation->tfrm_op.outdata[outiov_ix].iov_len;
+		outiov_ix++;
+	}
+	if (outiov_ix >= operation->tfrm_op.outcount){
+		DEBUG_API(printk("create_input_descriptors: operation outdata too small\n"));
+		return -EINVAL;
+	}
+	iov_offset = tc->tcfg->inject_ix - out_ix;
+	mi.dmasel = tc->unit_no;
+
+	/* Setup the output descriptors. */
+	while ((out_length > 0) && (outiov_ix < operation->tfrm_op.outcount)) {
+		outdesc->next = alloc_cdesc(alloc_flag);
+		if (!outdesc->next) {
+			DEBUG_API(printk("create_input_descriptors: alloc_cdesc\n"));
+			err = -ENOMEM;
+			goto error_cleanup;
+		}
+		outdesc = outdesc->next;
+		rem_length = operation->tfrm_op.outdata[outiov_ix].iov_len - iov_offset;
+		dlength = (out_length < rem_length) ? out_length : rem_length;
+
+		DEBUG(printk("create_input_descriptors:\n"
+			     "outiov_ix=%d, rem_length=%d, dlength=%d\n"
+			     "iov_offset=%d, outdata[outiov_ix].iov_len=%d\n"
+			     "outcount=%d, outiov_ix=%d\n",
+			     outiov_ix, rem_length, dlength, iov_offset, operation->tfrm_op.outdata[outiov_ix].iov_len, operation->tfrm_op.outcount, outiov_ix));
+
+		outdesc->dma_descr->buf = (char*)virt_to_phys(operation->tfrm_op.outdata[outiov_ix].iov_base + iov_offset);
+		outdesc->dma_descr->after = outdesc->dma_descr->buf + dlength;
+		outdesc->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_in, mi);
+
+		out_length -= dlength;
+		iov_offset += dlength;
+		if (iov_offset >= operation->tfrm_op.outdata[outiov_ix].iov_len) {
+			iov_offset = 0;
+			++outiov_ix;
+		}
+	}
+	if (out_length > 0){
+		DEBUG_API(printk("create_input_descriptors: not enough room for output, %d remained\n", out_length));
+		err = -EINVAL;
+		goto error_cleanup;
+	}
+	/* Set sync in last descriptor. */
+	mi.sync = 1;
+	outdesc->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_in, mi);
+
+	*id = head.next;
+	return 0;
+
+ error_cleanup:
+	while (head.next) {
+		outdesc = head.next->next;
+		free_cdesc(head.next);
+		head.next = outdesc;
+	}
+	return err;
+}
+
+
+static int create_output_descriptors(struct cryptocop_operation *operation, int *iniov_ix, int *iniov_offset, size_t desc_len, struct cryptocop_dma_desc **current_out_cdesc, struct strcop_meta_out *meta_out, int alloc_flag)
+{
+	while (desc_len != 0) {
+		struct cryptocop_dma_desc  *cdesc;
+		int                        rem_length = operation->tfrm_op.indata[*iniov_ix].iov_len - *iniov_offset;
+		int                        dlength = (desc_len < rem_length) ? desc_len : rem_length;
+
+		cdesc = alloc_cdesc(alloc_flag);
+		if (!cdesc) {
+			DEBUG_API(printk("create_output_descriptors: alloc_cdesc\n"));
+			return -ENOMEM;
+		}
+		(*current_out_cdesc)->next = cdesc;
+		(*current_out_cdesc) = cdesc;
+
+		cdesc->free_buf = NULL;
+
+		cdesc->dma_descr->buf = (char*)virt_to_phys(operation->tfrm_op.indata[*iniov_ix].iov_base + *iniov_offset);
+		cdesc->dma_descr->after = cdesc->dma_descr->buf + dlength;
+
+		assert(desc_len >= dlength);
+		desc_len -= dlength;
+		*iniov_offset += dlength;
+		if (*iniov_offset >= operation->tfrm_op.indata[*iniov_ix].iov_len) {
+			*iniov_offset = 0;
+			++(*iniov_ix);
+			if (*iniov_ix > operation->tfrm_op.incount) {
+				DEBUG_API(printk("create_output_descriptors: not enough indata in operation."));
+				return  -EINVAL;
+			}
+		}
+		cdesc->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_out, (*meta_out));
+	} /* while (desc_len != 0) */
+	/* Last DMA descriptor gets a 'wait' bit to signal expected change in metadata. */
+	(*current_out_cdesc)->dma_descr->wait = 1; /* This will set extraneous WAIT in some situations, e.g. when padding hashes and checksums. */
+
+	return 0;
+}
+
+
+static int append_input_descriptors(struct cryptocop_operation *operation, struct cryptocop_dma_desc **current_in_cdesc, struct cryptocop_dma_desc **current_out_cdesc, struct cryptocop_tfrm_ctx *tc, int alloc_flag)
+{
+	DEBUG(printk("append_input_descriptors, tc=0x%p, unit_no=%d\n", tc, tc->unit_no));
+	if (tc->tcfg) {
+		int                        failed = 0;
+		struct cryptocop_dma_desc  *idescs = NULL;
+		DEBUG(printk("append_input_descriptors: pushing output, consumed %d produced %d bytes.\n", tc->consumed, tc->produced));
+		if (tc->pad_descs) {
+			DEBUG(printk("append_input_descriptors: append pad descriptors to DMA out list.\n"));
+			while (tc->pad_descs) {
+				DEBUG(printk("append descriptor 0x%p\n", tc->pad_descs));
+				(*current_out_cdesc)->next = tc->pad_descs;
+				tc->pad_descs = tc->pad_descs->next;
+				(*current_out_cdesc) = (*current_out_cdesc)->next;
+			}
+		}
+
+		/* Setup and append output descriptors to DMA in list. */
+		if (tc->unit_no == src_dma){
+			/* mem2mem.  Setup DMA in descriptors to discard all input prior to the requested mem2mem data. */
+			struct strcop_meta_in mi = {.sync = 0, .dmasel = src_dma};
+			unsigned int start_ix = tc->start_ix;
+			while (start_ix){
+				unsigned int desclen = start_ix < MEM2MEM_DISCARD_BUF_LENGTH ? start_ix : MEM2MEM_DISCARD_BUF_LENGTH;
+				(*current_in_cdesc)->next = alloc_cdesc(alloc_flag);
+				if (!(*current_in_cdesc)->next){
+					DEBUG_API(printk("append_input_descriptors: alloc_cdesc mem2mem discard failed\n"));
+					return -ENOMEM;
+				}
+				(*current_in_cdesc) = (*current_in_cdesc)->next;
+				(*current_in_cdesc)->dma_descr->buf = (char*)virt_to_phys(mem2mem_discard_buf);
+				(*current_in_cdesc)->dma_descr->after = (*current_in_cdesc)->dma_descr->buf + desclen;
+				(*current_in_cdesc)->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_in, mi);
+				start_ix -= desclen;
+			}
+			mi.sync = 1;
+			(*current_in_cdesc)->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_in, mi);
+		}
+
+		failed = create_input_descriptors(operation, tc, &idescs, alloc_flag);
+		if (failed){
+			DEBUG_API(printk("append_input_descriptors: output descriptor setup failed\n"));
+			return failed;
+		}
+		DEBUG(printk("append_input_descriptors: append output descriptors to DMA in list.\n"));
+		while (idescs) {
+			DEBUG(printk("append descriptor 0x%p\n", idescs));
+			(*current_in_cdesc)->next = idescs;
+			idescs = idescs->next;
+			(*current_in_cdesc) = (*current_in_cdesc)->next;
+		}
+	}
+	return 0;
+}
+
+
+
+static int cryptocop_setup_dma_list(struct cryptocop_operation *operation, struct cryptocop_int_operation **int_op, int alloc_flag)
+{
+	struct cryptocop_session *sess;
+	struct cryptocop_transform_ctx *tctx;
+
+	struct cryptocop_tfrm_ctx digest_ctx = {
+		.previous_src = src_none,
+		.current_src = src_none,
+		.start_ix = 0,
+		.requires_padding = 1,
+		.strict_block_length = 0,
+		.hash_conf = 0,
+		.hash_mode = 0,
+		.ciph_conf = 0,
+		.cbcmode = 0,
+		.decrypt = 0,
+		.consumed = 0,
+		.produced = 0,
+		.pad_descs = NULL,
+		.active = 0,
+		.done = 0,
+		.prev_src = NULL,
+		.curr_src = NULL,
+		.tcfg = NULL};
+	struct cryptocop_tfrm_ctx cipher_ctx = {
+		.previous_src = src_none,
+		.current_src = src_none,
+		.start_ix = 0,
+		.requires_padding = 0,
+		.strict_block_length = 1,
+		.hash_conf = 0,
+		.hash_mode = 0,
+		.ciph_conf = 0,
+		.cbcmode = 0,
+		.decrypt = 0,
+		.consumed = 0,
+		.produced = 0,
+		.pad_descs = NULL,
+		.active = 0,
+		.done = 0,
+		.prev_src = NULL,
+		.curr_src = NULL,
+		.tcfg = NULL};
+	struct cryptocop_tfrm_ctx csum_ctx = {
+		.previous_src = src_none,
+		.current_src = src_none,
+		.start_ix = 0,
+		.blocklength = 2,
+		.requires_padding = 1,
+		.strict_block_length = 0,
+		.hash_conf = 0,
+		.hash_mode = 0,
+		.ciph_conf = 0,
+		.cbcmode = 0,
+		.decrypt = 0,
+		.consumed = 0,
+		.produced = 0,
+		.pad_descs = NULL,
+		.active = 0,
+		.done = 0,
+		.tcfg = NULL,
+		.prev_src = NULL,
+		.curr_src = NULL,
+		.unit_no = src_csum};
+	struct cryptocop_tfrm_cfg *tcfg = operation->tfrm_op.tfrm_cfg;
+
+	unsigned int indata_ix = 0;
+
+	/* iovec accounting. */
+	int iniov_ix = 0;
+	int iniov_offset = 0;
+
+	/* Operation descriptor cfg traversal pointer. */
+	struct cryptocop_desc *odsc;
+
+	int failed = 0;
+	/* List heads for allocated descriptors. */
+	struct cryptocop_dma_desc out_cdesc_head = {0};
+	struct cryptocop_dma_desc in_cdesc_head = {0};
+
+	struct cryptocop_dma_desc *current_out_cdesc = &out_cdesc_head;
+	struct cryptocop_dma_desc *current_in_cdesc = &in_cdesc_head;
+
+	struct cryptocop_tfrm_ctx *output_tc = NULL;
+	void                      *iop_alloc_ptr;
+
+	assert(operation != NULL);
+	assert(int_op != NULL);
+
+	DEBUG(printk("cryptocop_setup_dma_list: start\n"));
+	DEBUG(print_cryptocop_operation(operation));
+
+	sess = get_session(operation->sid);
+	if (!sess) {
+		DEBUG_API(printk("cryptocop_setup_dma_list: no session found for operation.\n"));
+		failed = -EINVAL;
+		goto error_cleanup;
+	}
+	iop_alloc_ptr = kmalloc(DESCR_ALLOC_PAD + sizeof(struct cryptocop_int_operation), alloc_flag);
+	if (!iop_alloc_ptr) {
+		DEBUG_API(printk("cryptocop_setup_dma_list:  kmalloc cryptocop_int_operation\n"));
+		failed = -ENOMEM;
+		goto error_cleanup;
+	}
+	(*int_op) = (struct cryptocop_int_operation*)(((unsigned long int)(iop_alloc_ptr + DESCR_ALLOC_PAD + offsetof(struct cryptocop_int_operation, ctx_out)) & ~0x0000001F) - offsetof(struct cryptocop_int_operation, ctx_out));
+	DEBUG(memset((*int_op), 0xff, sizeof(struct cryptocop_int_operation)));
+	(*int_op)->alloc_ptr = iop_alloc_ptr;
+	DEBUG(printk("cryptocop_setup_dma_list: *int_op=0x%p, alloc_ptr=0x%p\n", *int_op, (*int_op)->alloc_ptr));
+
+	(*int_op)->sid = operation->sid;
+	(*int_op)->cdesc_out = NULL;
+	(*int_op)->cdesc_in = NULL;
+	(*int_op)->tdes_mode = cryptocop_3des_ede;
+	(*int_op)->csum_mode = cryptocop_csum_le;
+	(*int_op)->ddesc_out = NULL;
+	(*int_op)->ddesc_in = NULL;
+
+	/* Scan operation->tfrm_op.tfrm_cfg for bad configuration and set up the local contexts. */
+	if (!tcfg) {
+		DEBUG_API(printk("cryptocop_setup_dma_list: no configured transforms in operation.\n"));
+		failed = -EINVAL;
+		goto error_cleanup;
+	}
+	while (tcfg) {
+		tctx = get_transform_ctx(sess, tcfg->tid);
+		if (!tctx) {
+			DEBUG_API(printk("cryptocop_setup_dma_list: no transform id %d in session.\n", tcfg->tid));
+			failed = -EINVAL;
+			goto error_cleanup;
+		}
+		if (tcfg->inject_ix > operation->tfrm_op.outlen){
+			DEBUG_API(printk("cryptocop_setup_dma_list: transform id %d inject_ix (%d) > operation->tfrm_op.outlen(%d)", tcfg->tid, tcfg->inject_ix, operation->tfrm_op.outlen));
+			failed = -EINVAL;
+			goto error_cleanup;
+		}
+		switch (tctx->init.alg){
+		case cryptocop_alg_mem2mem:
+			if (cipher_ctx.tcfg != NULL){
+				DEBUG_API(printk("cryptocop_setup_dma_list: multiple ciphers in operation.\n"));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			/* mem2mem is handled as a NULL cipher. */
+			cipher_ctx.cbcmode = 0;
+			cipher_ctx.decrypt = 0;
+			cipher_ctx.blocklength = 1;
+			cipher_ctx.ciph_conf = 0;
+			cipher_ctx.unit_no = src_dma;
+			cipher_ctx.tcfg = tcfg;
+			cipher_ctx.tctx = tctx;
+			break;
+		case cryptocop_alg_des:
+		case cryptocop_alg_3des:
+		case cryptocop_alg_aes:
+			/* cipher */
+			if (cipher_ctx.tcfg != NULL){
+				DEBUG_API(printk("cryptocop_setup_dma_list: multiple ciphers in operation.\n"));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			cipher_ctx.tcfg = tcfg;
+			cipher_ctx.tctx = tctx;
+			if (cipher_ctx.tcfg->flags & CRYPTOCOP_DECRYPT){
+				cipher_ctx.decrypt = 1;
+			}
+			switch (tctx->init.cipher_mode) {
+			case cryptocop_cipher_mode_ecb:
+				cipher_ctx.cbcmode = 0;
+				break;
+			case cryptocop_cipher_mode_cbc:
+				cipher_ctx.cbcmode = 1;
+				break;
+			default:
+				DEBUG_API(printk("cryptocop_setup_dma_list: cipher_ctx, bad cipher mode==%d\n", tctx->init.cipher_mode));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			DEBUG(printk("cryptocop_setup_dma_list: cipher_ctx, set CBC mode==%d\n", cipher_ctx.cbcmode));
+			switch (tctx->init.alg){
+			case cryptocop_alg_des:
+				cipher_ctx.ciph_conf = 0;
+				cipher_ctx.unit_no = src_des;
+				cipher_ctx.blocklength = DES_BLOCK_LENGTH;
+				break;
+			case cryptocop_alg_3des:
+				cipher_ctx.ciph_conf = 1;
+				cipher_ctx.unit_no = src_des;
+				cipher_ctx.blocklength = DES_BLOCK_LENGTH;
+				break;
+			case cryptocop_alg_aes:
+				cipher_ctx.ciph_conf = 2;
+				cipher_ctx.unit_no = src_aes;
+				cipher_ctx.blocklength = AES_BLOCK_LENGTH;
+				break;
+			default:
+				panic("cryptocop_setup_dma_list: impossible algorithm %d\n", tctx->init.alg);
+			}
+			(*int_op)->tdes_mode = tctx->init.tdes_mode;
+			break;
+		case cryptocop_alg_md5:
+		case cryptocop_alg_sha1:
+			/* digest */
+			if (digest_ctx.tcfg != NULL){
+				DEBUG_API(printk("cryptocop_setup_dma_list: multiple digests in operation.\n"));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			digest_ctx.tcfg = tcfg;
+			digest_ctx.tctx = tctx;
+			digest_ctx.hash_mode = 0; /* Don't use explicit IV in this API. */
+			switch (tctx->init.alg){
+			case cryptocop_alg_md5:
+				digest_ctx.blocklength = MD5_BLOCK_LENGTH;
+				digest_ctx.unit_no = src_md5;
+				digest_ctx.hash_conf = 1; /* 1 => MD-5 */
+				break;
+			case cryptocop_alg_sha1:
+				digest_ctx.blocklength = SHA1_BLOCK_LENGTH;
+				digest_ctx.unit_no = src_sha1;
+				digest_ctx.hash_conf = 0; /* 0 => SHA-1 */
+				break;
+			default:
+				panic("cryptocop_setup_dma_list: impossible digest algorithm\n");
+			}
+			break;
+		case cryptocop_alg_csum:
+			/* digest */
+			if (csum_ctx.tcfg != NULL){
+				DEBUG_API(printk("cryptocop_setup_dma_list: multiple checksums in operation.\n"));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			(*int_op)->csum_mode = tctx->init.csum_mode;
+			csum_ctx.tcfg = tcfg;
+			csum_ctx.tctx = tctx;
+			break;
+		default:
+			/* no algorithm. */
+			DEBUG_API(printk("cryptocop_setup_dma_list: invalid algorithm %d specified in tfrm %d.\n", tctx->init.alg, tcfg->tid));
+			failed = -EINVAL;
+			goto error_cleanup;
+		}
+		tcfg = tcfg->next;
+	}
+	/* Download key if a cipher is used. */
+	if (cipher_ctx.tcfg && (cipher_ctx.tctx->init.alg != cryptocop_alg_mem2mem)){
+		struct cryptocop_dma_desc  *key_desc = NULL;
+
+		failed = setup_key_dl_desc(&cipher_ctx, &key_desc, alloc_flag);
+		if (failed) {
+			DEBUG_API(printk("cryptocop_setup_dma_list: setup key dl\n"));
+			goto error_cleanup;
+		}
+		current_out_cdesc->next = key_desc;
+		current_out_cdesc = key_desc;
+		indata_ix += (unsigned int)(key_desc->dma_descr->after - key_desc->dma_descr->buf);
+
+		/* Download explicit IV if a cipher is used and CBC mode and explicit IV selected. */
+		if ((cipher_ctx.tctx->init.cipher_mode == cryptocop_cipher_mode_cbc) && (cipher_ctx.tcfg->flags & CRYPTOCOP_EXPLICIT_IV)) {
+			struct cryptocop_dma_desc  *iv_desc = NULL;
+
+			DEBUG(printk("cryptocop_setup_dma_list: setup cipher CBC IV descriptor.\n"));
+
+			failed = setup_cipher_iv_desc(&cipher_ctx, &iv_desc, alloc_flag);
+			if (failed) {
+				DEBUG_API(printk("cryptocop_setup_dma_list: CBC IV descriptor.\n"));
+				goto error_cleanup;
+			}
+			current_out_cdesc->next = iv_desc;
+			current_out_cdesc = iv_desc;
+			indata_ix += (unsigned int)(iv_desc->dma_descr->after - iv_desc->dma_descr->buf);
+		}
+	}
+
+	/* Process descriptors. */
+	odsc = operation->tfrm_op.desc;
+	while (odsc) {
+		struct cryptocop_desc_cfg   *dcfg = odsc->cfg;
+		struct strcop_meta_out      meta_out = {0};
+		size_t                      desc_len = odsc->length;
+		int                         active_count, eop_needed_count;
+
+		output_tc = NULL;
+
+		DEBUG(printk("cryptocop_setup_dma_list: parsing an operation descriptor\n"));
+
+		while (dcfg) {
+			struct cryptocop_tfrm_ctx  *tc = NULL;
+
+			DEBUG(printk("cryptocop_setup_dma_list: parsing an operation descriptor configuration.\n"));
+			/* Get the local context for the transform and mark it as the output unit if it produces output. */
+			if (digest_ctx.tcfg && (digest_ctx.tcfg->tid == dcfg->tid)){
+				tc = &digest_ctx;
+			} else if (cipher_ctx.tcfg && (cipher_ctx.tcfg->tid == dcfg->tid)){
+				tc = &cipher_ctx;
+			} else if (csum_ctx.tcfg && (csum_ctx.tcfg->tid == dcfg->tid)){
+				tc = &csum_ctx;
+			}
+			if (!tc) {
+				DEBUG_API(printk("cryptocop_setup_dma_list: invalid transform %d specified in descriptor.\n", dcfg->tid));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			if (tc->done) {
+				DEBUG_API(printk("cryptocop_setup_dma_list: completed transform %d reused.\n", dcfg->tid));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			if (!tc->active) {
+				tc->start_ix = indata_ix;
+				tc->active = 1;
+			}
+
+			tc->previous_src = tc->current_src;
+			tc->prev_src = tc->curr_src;
+			/* Map source unit id to DMA source config. */
+			switch (dcfg->src){
+			case cryptocop_source_dma:
+				tc->current_src = src_dma;
+				break;
+			case cryptocop_source_des:
+				tc->current_src = src_des;
+				break;
+			case cryptocop_source_3des:
+				tc->current_src = src_des;
+				break;
+			case cryptocop_source_aes:
+				tc->current_src = src_aes;
+				break;
+			case cryptocop_source_md5:
+			case cryptocop_source_sha1:
+			case cryptocop_source_csum:
+			case cryptocop_source_none:
+			default:
+				/* We do not allow using accumulating style units (SHA-1, MD5, checksum) as sources to other units.
+				 */
+				DEBUG_API(printk("cryptocop_setup_dma_list: bad unit source configured %d.\n", dcfg->src));
+				failed = -EINVAL;
+				goto error_cleanup;
+			}
+			if (tc->current_src != src_dma) {
+				/* Find the unit we are sourcing from. */
+				if (digest_ctx.unit_no == tc->current_src){
+					tc->curr_src = &digest_ctx;
+				} else if (cipher_ctx.unit_no == tc->current_src){
+					tc->curr_src = &cipher_ctx;
+				} else if (csum_ctx.unit_no == tc->current_src){
+					tc->curr_src = &csum_ctx;
+				}
+				if ((tc->curr_src == tc) && (tc->unit_no != src_dma)){
+					DEBUG_API(printk("cryptocop_setup_dma_list: unit %d configured to source from itself.\n", tc->unit_no));
+					failed = -EINVAL;
+					goto error_cleanup;
+				}
+			} else {
+				tc->curr_src = NULL;
+			}
+
+			/* Detect source switch. */
+			DEBUG(printk("cryptocop_setup_dma_list: tc->active=%d tc->unit_no=%d tc->current_src=%d tc->previous_src=%d, tc->curr_src=0x%p, tc->prev_srv=0x%p\n", tc->active, tc->unit_no, tc->current_src, tc->previous_src, tc->curr_src, tc->prev_src));
+			if (tc->active && (tc->current_src != tc->previous_src)) {
+				/* Only allow source switch when both the old source unit and the new one have
+				 * no pending data to process (i.e. the consumed length must be a multiple of the
+				 * transform blocklength). */
+				/* Note: if the src == NULL we are actually sourcing from DMA out. */
+				if (((tc->prev_src != NULL) && (tc->prev_src->consumed % tc->prev_src->blocklength)) ||
+				    ((tc->curr_src != NULL) && (tc->curr_src->consumed % tc->curr_src->blocklength)))
+				{
+					DEBUG_API(printk("cryptocop_setup_dma_list: can only disconnect from or connect to a unit on a multiple of the blocklength, old: cons=%d, prod=%d, block=%d, new: cons=%d prod=%d, block=%d.\n", tc->prev_src ? tc->prev_src->consumed : INT_MIN, tc->prev_src ? tc->prev_src->produced : INT_MIN, tc->prev_src ? tc->prev_src->blocklength : INT_MIN, tc->curr_src ? tc->curr_src->consumed : INT_MIN, tc->curr_src ? tc->curr_src->produced : INT_MIN, tc->curr_src ? tc->curr_src->blocklength : INT_MIN));
+					failed = -EINVAL;
+					goto error_cleanup;
+				}
+			}
+			/* Detect unit deactivation. */
+			if (dcfg->last) {
+				/* Length check of this is handled below. */
+				tc->done = 1;
+			}
+			dcfg = dcfg->next;
+		} /* while (dcfg) */
+		DEBUG(printk("cryptocop_setup_dma_list: parsing operation descriptor configuration complete.\n"));
+
+		if (cipher_ctx.active && (cipher_ctx.curr_src != NULL) && !cipher_ctx.curr_src->active){
+			DEBUG_API(printk("cryptocop_setup_dma_list: cipher source from inactive unit %d\n", cipher_ctx.curr_src->unit_no));
+			failed = -EINVAL;
+			goto error_cleanup;
+		}
+		if (digest_ctx.active && (digest_ctx.curr_src != NULL) && !digest_ctx.curr_src->active){
+			DEBUG_API(printk("cryptocop_setup_dma_list: digest source from inactive unit %d\n", digest_ctx.curr_src->unit_no));
+			failed = -EINVAL;
+			goto error_cleanup;
+		}
+		if (csum_ctx.active && (csum_ctx.curr_src != NULL) && !csum_ctx.curr_src->active){
+			DEBUG_API(printk("cryptocop_setup_dma_list: cipher source from inactive unit %d\n", csum_ctx.curr_src->unit_no));
+			failed = -EINVAL;
+			goto error_cleanup;
+		}
+
+		/* Update consumed and produced lengths.
+
+		   The consumed length accounting here is actually cheating.  If a unit source from DMA (or any
+		   other unit that process data in blocks of one octet) it is correct, but if it source from a
+		   block processing unit, i.e. a cipher, it will be temporarily incorrect at some times.  However
+		   since it is only allowed--by the HW--to change source to or from a block processing unit at times where that
+		   unit has processed an exact multiple of its block length the end result will be correct.
+		   Beware that if the source change restriction change this code will need to be (much) reworked.
+		*/
+		DEBUG(printk("cryptocop_setup_dma_list: desc->length=%d, desc_len=%d.\n", odsc->length, desc_len));
+
+		if (csum_ctx.active) {
+			csum_ctx.consumed += desc_len;
+			if (csum_ctx.done) {
+				csum_ctx.produced = 2;
+			}
+			DEBUG(printk("cryptocop_setup_dma_list: csum_ctx producing: consumed=%d, produced=%d, blocklength=%d.\n", csum_ctx.consumed, csum_ctx.produced, csum_ctx.blocklength));
+		}
+		if (digest_ctx.active) {
+			digest_ctx.consumed += desc_len;
+			if (digest_ctx.done) {
+				if (digest_ctx.unit_no == src_md5) {
+					digest_ctx.produced = MD5_STATE_LENGTH;
+				} else {
+					digest_ctx.produced = SHA1_STATE_LENGTH;
+				}
+			}
+			DEBUG(printk("cryptocop_setup_dma_list: digest_ctx producing: consumed=%d, produced=%d, blocklength=%d.\n", digest_ctx.consumed, digest_ctx.produced, digest_ctx.blocklength));
+		}
+		if (cipher_ctx.active) {
+			/* Ciphers are allowed only to source from DMA out.  That is filtered above. */
+			assert(cipher_ctx.current_src == src_dma);
+			cipher_ctx.consumed += desc_len;
+			cipher_ctx.produced = cipher_ctx.blocklength * (cipher_ctx.consumed / cipher_ctx.blocklength);
+			if (cipher_ctx.cbcmode && !(cipher_ctx.tcfg->flags & CRYPTOCOP_EXPLICIT_IV) && cipher_ctx.produced){
+				cipher_ctx.produced -= cipher_ctx.blocklength; /* Compensate for CBC iv. */
+			}
+			DEBUG(printk("cryptocop_setup_dma_list: cipher_ctx producing: consumed=%d, produced=%d, blocklength=%d.\n", cipher_ctx.consumed, cipher_ctx.produced, cipher_ctx.blocklength));
+		}
+
+		/* Setup the DMA out descriptors. */
+		/* Configure the metadata. */
+		active_count = 0;
+		eop_needed_count = 0;
+		if (cipher_ctx.active) {
+			++active_count;
+			if (cipher_ctx.unit_no == src_dma){
+				/* mem2mem */
+				meta_out.ciphsel = src_none;
+			} else {
+				meta_out.ciphsel = cipher_ctx.current_src;
+			}
+			meta_out.ciphconf = cipher_ctx.ciph_conf;
+			meta_out.cbcmode = cipher_ctx.cbcmode;
+			meta_out.decrypt = cipher_ctx.decrypt;
+			DEBUG(printk("set ciphsel=%d ciphconf=%d cbcmode=%d decrypt=%d\n", meta_out.ciphsel, meta_out.ciphconf, meta_out.cbcmode, meta_out.decrypt));
+			if (cipher_ctx.done) ++eop_needed_count;
+		} else {
+			meta_out.ciphsel = src_none;
+		}
+
+		if (digest_ctx.active) {
+			++active_count;
+			meta_out.hashsel = digest_ctx.current_src;
+			meta_out.hashconf = digest_ctx.hash_conf;
+			meta_out.hashmode = 0; /* Explicit mode is not used here. */
+			DEBUG(printk("set hashsel=%d hashconf=%d hashmode=%d\n", meta_out.hashsel, meta_out.hashconf, meta_out.hashmode));
+			if (digest_ctx.done) {
+				assert(digest_ctx.pad_descs == NULL);
+				failed = create_pad_descriptor(&digest_ctx, &digest_ctx.pad_descs, alloc_flag);
+				if (failed) {
+					DEBUG_API(printk("cryptocop_setup_dma_list: failed digest pad creation.\n"));
+					goto error_cleanup;
+				}
+			}
+		} else {
+			meta_out.hashsel = src_none;
+		}
+
+		if (csum_ctx.active) {
+			++active_count;
+			meta_out.csumsel = csum_ctx.current_src;
+			if (csum_ctx.done) {
+				assert(csum_ctx.pad_descs == NULL);
+				failed = create_pad_descriptor(&csum_ctx, &csum_ctx.pad_descs, alloc_flag);
+				if (failed) {
+					DEBUG_API(printk("cryptocop_setup_dma_list: failed csum pad creation.\n"));
+					goto error_cleanup;
+				}
+			}
+		} else {
+			meta_out.csumsel = src_none;
+		}
+		DEBUG(printk("cryptocop_setup_dma_list: %d eop needed, %d active units\n", eop_needed_count, active_count));
+		/* Setup DMA out descriptors for the indata. */
+		failed = create_output_descriptors(operation, &iniov_ix, &iniov_offset, desc_len, &current_out_cdesc, &meta_out, alloc_flag);
+		if (failed) {
+			DEBUG_API(printk("cryptocop_setup_dma_list: create_output_descriptors %d\n", failed));
+			goto error_cleanup;
+		}
+		/* Setup out EOP.  If there are active units that are not done here they cannot get an EOP
+		 * so we ust setup a zero length descriptor to DMA to signal EOP only to done units.
+		 * If there is a pad descriptor EOP for the padded unit will be EOPed by it.
+		 */
+		assert(active_count >= eop_needed_count);
+		assert((eop_needed_count == 0) || (eop_needed_count == 1));
+		if (eop_needed_count) {
+			/* This means that the bulk operation (cipeher/m2m) is terminated. */
+			if (active_count > 1) {
+				/* Use zero length EOP descriptor. */
+				struct cryptocop_dma_desc *ed = alloc_cdesc(alloc_flag);
+				struct strcop_meta_out    ed_mo = {0};
+				if (!ed) {
+					DEBUG_API(printk("cryptocop_setup_dma_list: alloc EOP descriptor for cipher\n"));
+					failed = -ENOMEM;
+					goto error_cleanup;
+				}
+
+				assert(cipher_ctx.active && cipher_ctx.done);
+
+				if (cipher_ctx.unit_no == src_dma){
+					/* mem2mem */
+					ed_mo.ciphsel = src_none;
+				} else {
+					ed_mo.ciphsel = cipher_ctx.current_src;
+				}
+				ed_mo.ciphconf = cipher_ctx.ciph_conf;
+				ed_mo.cbcmode = cipher_ctx.cbcmode;
+				ed_mo.decrypt = cipher_ctx.decrypt;
+
+				ed->free_buf = NULL;
+				ed->dma_descr->wait = 1;
+				ed->dma_descr->out_eop = 1;
+
+				ed->dma_descr->buf = (char*)virt_to_phys(&ed); /* Use any valid physical address for zero length descriptor. */
+				ed->dma_descr->after = ed->dma_descr->buf;
+				ed->dma_descr->md = REG_TYPE_CONV(unsigned short int, struct strcop_meta_out, ed_mo);
+				current_out_cdesc->next = ed;
+				current_out_cdesc = ed;
+			} else {
+				/* Set EOP in the current out descriptor since the only active module is
+				 * the one needing the EOP. */
+
+				current_out_cdesc->dma_descr->out_eop = 1;
+			}
+		}
+
+		if (cipher_ctx.done && cipher_ctx.active) cipher_ctx.active = 0;
+		if (digest_ctx.done && digest_ctx.active) digest_ctx.active = 0;
+		if (csum_ctx.done && csum_ctx.active) csum_ctx.active = 0;
+		indata_ix += odsc->length;
+		odsc = odsc->next;
+	} /* while (odsc) */ /* Process descriptors. */
+	DEBUG(printk("cryptocop_setup_dma_list: done parsing operation descriptors\n"));
+	if (cipher_ctx.tcfg && (cipher_ctx.active || !cipher_ctx.done)){
+		DEBUG_API(printk("cryptocop_setup_dma_list: cipher operation not terminated.\n"));
+		failed = -EINVAL;
+		goto error_cleanup;
+	}
+	if (digest_ctx.tcfg && (digest_ctx.active || !digest_ctx.done)){
+		DEBUG_API(printk("cryptocop_setup_dma_list: digest operation not terminated.\n"));
+		failed = -EINVAL;
+		goto error_cleanup;
+	}
+	if (csum_ctx.tcfg && (csum_ctx.active || !csum_ctx.done)){
+		DEBUG_API(printk("cryptocop_setup_dma_list: csum operation not terminated.\n"));
+		failed = -EINVAL;
+		goto error_cleanup;
+	}
+
+	failed = append_input_descriptors(operation, &current_in_cdesc, &current_out_cdesc, &cipher_ctx, alloc_flag);
+	if (failed){
+		DEBUG_API(printk("cryptocop_setup_dma_list: append_input_descriptors cipher_ctx %d\n", failed));
+		goto error_cleanup;
+	}
+	failed = append_input_descriptors(operation, &current_in_cdesc, &current_out_cdesc, &digest_ctx, alloc_flag);
+	if (failed){
+		DEBUG_API(printk("cryptocop_setup_dma_list: append_input_descriptors cipher_ctx %d\n", failed));
+		goto error_cleanup;
+	}
+	failed = append_input_descriptors(operation, &current_in_cdesc, &current_out_cdesc, &csum_ctx, alloc_flag);
+	if (failed){
+		DEBUG_API(printk("cryptocop_setup_dma_list: append_input_descriptors cipher_ctx %d\n", failed));
+		goto error_cleanup;
+	}
+
+	DEBUG(printk("cryptocop_setup_dma_list: int_op=0x%p, *int_op=0x%p\n", int_op, *int_op));
+	(*int_op)->cdesc_out = out_cdesc_head.next;
+	(*int_op)->cdesc_in = in_cdesc_head.next;
+	DEBUG(printk("cryptocop_setup_dma_list: out_cdesc_head=0x%p in_cdesc_head=0x%p\n", (*int_op)->cdesc_out, (*int_op)->cdesc_in));
+
+	setup_descr_chain(out_cdesc_head.next);
+	setup_descr_chain(in_cdesc_head.next);
+
+	/* Last but not least: mark the last DMA in descriptor for a INTR and EOL and the the
+	 * last DMA out descriptor for EOL.
+	 */
+	current_in_cdesc->dma_descr->intr = 1;
+	current_in_cdesc->dma_descr->eol = 1;
+	current_out_cdesc->dma_descr->eol = 1;
+
+	/* Setup DMA contexts. */
+	(*int_op)->ctx_out.next = NULL;
+	(*int_op)->ctx_out.eol = 1;
+	(*int_op)->ctx_out.intr = 0;
+	(*int_op)->ctx_out.store_mode = 0;
+	(*int_op)->ctx_out.en = 0;
+	(*int_op)->ctx_out.dis = 0;
+	(*int_op)->ctx_out.md0 = 0;
+	(*int_op)->ctx_out.md1 = 0;
+	(*int_op)->ctx_out.md2 = 0;
+	(*int_op)->ctx_out.md3 = 0;
+	(*int_op)->ctx_out.md4 = 0;
+	(*int_op)->ctx_out.saved_data = (dma_descr_data*)virt_to_phys((*int_op)->cdesc_out->dma_descr);
+	(*int_op)->ctx_out.saved_data_buf = (*int_op)->cdesc_out->dma_descr->buf; /* Already physical address. */
+
+	(*int_op)->ctx_in.next = NULL;
+	(*int_op)->ctx_in.eol = 1;
+	(*int_op)->ctx_in.intr = 0;
+	(*int_op)->ctx_in.store_mode = 0;
+	(*int_op)->ctx_in.en = 0;
+	(*int_op)->ctx_in.dis = 0;
+	(*int_op)->ctx_in.md0 = 0;
+	(*int_op)->ctx_in.md1 = 0;
+	(*int_op)->ctx_in.md2 = 0;
+	(*int_op)->ctx_in.md3 = 0;
+	(*int_op)->ctx_in.md4 = 0;
+
+	(*int_op)->ctx_in.saved_data = (dma_descr_data*)virt_to_phys((*int_op)->cdesc_in->dma_descr);
+	(*int_op)->ctx_in.saved_data_buf = (*int_op)->cdesc_in->dma_descr->buf; /* Already physical address. */
+
+	DEBUG(printk("cryptocop_setup_dma_list: done\n"));
+	return 0;
+
+error_cleanup:
+	{
+		/* Free all allocated resources. */
+		struct cryptocop_dma_desc *tmp_cdesc;
+		while (digest_ctx.pad_descs){
+			tmp_cdesc = digest_ctx.pad_descs->next;
+			free_cdesc(digest_ctx.pad_descs);
+			digest_ctx.pad_descs = tmp_cdesc;
+		}
+		while (csum_ctx.pad_descs){
+			tmp_cdesc = csum_ctx.pad_descs->next;
+			free_cdesc(csum_ctx.pad_descs);
+			csum_ctx.pad_descs = tmp_cdesc;
+		}
+		assert(cipher_ctx.pad_descs == NULL); /* The ciphers are never padded. */
+
+		if (*int_op != NULL) delete_internal_operation(*int_op);
+	}
+	DEBUG_API(printk("cryptocop_setup_dma_list: done with error %d\n", failed));
+	return failed;
+}
+
+
+static void delete_internal_operation(struct cryptocop_int_operation *iop)
+{
+	void                      *ptr = iop->alloc_ptr;
+	struct cryptocop_dma_desc *cd = iop->cdesc_out;
+	struct cryptocop_dma_desc *next;
+
+	DEBUG(printk("delete_internal_operation: iop=0x%p, alloc_ptr=0x%p\n", iop, ptr));
+
+	while (cd) {
+		next = cd->next;
+		free_cdesc(cd);
+		cd = next;
+	}
+	cd = iop->cdesc_in;
+	while (cd) {
+		next = cd->next;
+		free_cdesc(cd);
+		cd = next;
+	}
+	kfree(ptr);
+}
+
+#define MD5_MIN_PAD_LENGTH (9)
+#define MD5_PAD_LENGTH_FIELD_LENGTH (8)
+
+static int create_md5_pad(int alloc_flag, unsigned long long hashed_length, char **pad, size_t *pad_length)
+{
+	size_t                  padlen = MD5_BLOCK_LENGTH - (hashed_length % MD5_BLOCK_LENGTH);
+	unsigned char           *p;
+	int                     i;
+	unsigned long long int  bit_length = hashed_length << 3;
+
+	if (padlen < MD5_MIN_PAD_LENGTH) padlen += MD5_BLOCK_LENGTH;
+
+	p = kzalloc(padlen, alloc_flag);
+	if (!p) return -ENOMEM;
+
+	*p = 0x80;
+
+	DEBUG(printk("create_md5_pad: hashed_length=%lld bits == %lld bytes\n", bit_length, hashed_length));
+
+	i = padlen - MD5_PAD_LENGTH_FIELD_LENGTH;
+	while (bit_length != 0){
+		p[i++] = bit_length % 0x100;
+		bit_length >>= 8;
+	}
+
+	*pad = (char*)p;
+	*pad_length = padlen;
+
+	return 0;
+}
+
+#define SHA1_MIN_PAD_LENGTH (9)
+#define SHA1_PAD_LENGTH_FIELD_LENGTH (8)
+
+static int create_sha1_pad(int alloc_flag, unsigned long long hashed_length, char **pad, size_t *pad_length)
+{
+	size_t                  padlen = SHA1_BLOCK_LENGTH - (hashed_length % SHA1_BLOCK_LENGTH);
+	unsigned char           *p;
+	int                     i;
+	unsigned long long int  bit_length = hashed_length << 3;
+
+	if (padlen < SHA1_MIN_PAD_LENGTH) padlen += SHA1_BLOCK_LENGTH;
+
+	p = kzalloc(padlen, alloc_flag);
+	if (!p) return -ENOMEM;
+
+	*p = 0x80;
+
+	DEBUG(printk("create_sha1_pad: hashed_length=%lld bits == %lld bytes\n", bit_length, hashed_length));
+
+	i = padlen - 1;
+	while (bit_length != 0){
+		p[i--] = bit_length % 0x100;
+		bit_length >>= 8;
+	}
+
+	*pad = (char*)p;
+	*pad_length = padlen;
+
+	return 0;
+}
+
+
+static int transform_ok(struct cryptocop_transform_init *tinit)
+{
+	switch (tinit->alg){
+	case cryptocop_alg_csum:
+		switch (tinit->csum_mode){
+		case cryptocop_csum_le:
+		case cryptocop_csum_be:
+			break;
+		default:
+			DEBUG_API(printk("transform_ok: Bad mode set for csum transform\n"));
+			return -EINVAL;
+		}
+	case cryptocop_alg_mem2mem:
+	case cryptocop_alg_md5:
+	case cryptocop_alg_sha1:
+		if (tinit->keylen != 0) {
+			DEBUG_API(printk("transform_ok: non-zero keylength, %d, for a digest/csum algorithm\n", tinit->keylen));
+			return -EINVAL; /* This check is a bit strict. */
+		}
+		break;
+	case cryptocop_alg_des:
+		if (tinit->keylen != 64) {
+			DEBUG_API(printk("transform_ok: keylen %d invalid for DES\n", tinit->keylen));
+			return -EINVAL;
+		}
+		break;
+	case cryptocop_alg_3des:
+		if (tinit->keylen != 192) {
+			DEBUG_API(printk("transform_ok: keylen %d invalid for 3DES\n", tinit->keylen));
+			return -EINVAL;
+		}
+		break;
+	case cryptocop_alg_aes:
+		if (tinit->keylen != 128 && tinit->keylen != 192 && tinit->keylen != 256) {
+			DEBUG_API(printk("transform_ok: keylen %d invalid for AES\n", tinit->keylen));
+			return -EINVAL;
+		}
+		break;
+	case cryptocop_no_alg:
+	default:
+		DEBUG_API(printk("transform_ok: no such algorithm %d\n", tinit->alg));
+		return -EINVAL;
+	}
+
+	switch (tinit->alg){
+	case cryptocop_alg_des:
+	case cryptocop_alg_3des:
+	case cryptocop_alg_aes:
+		if (tinit->cipher_mode != cryptocop_cipher_mode_ecb && tinit->cipher_mode != cryptocop_cipher_mode_cbc) return -EINVAL;
+	default:
+		 break;
+	}
+	return 0;
+}
+
+
+int cryptocop_new_session(cryptocop_session_id *sid, struct cryptocop_transform_init *tinit, int alloc_flag)
+{
+	struct cryptocop_session         *sess;
+	struct cryptocop_transform_init  *tfrm_in = tinit;
+	struct cryptocop_transform_init  *tmp_in;
+	int                              no_tfrms = 0;
+	int                              i;
+	unsigned long int                flags;
+
+	init_stream_coprocessor(); /* For safety if we are called early */
+
+	while (tfrm_in){
+		int err;
+		++no_tfrms;
+		if ((err = transform_ok(tfrm_in))) {
+			DEBUG_API(printk("cryptocop_new_session, bad transform\n"));
+			return err;
+		}
+		tfrm_in = tfrm_in->next;
+	}
+	if (0 == no_tfrms) {
+		DEBUG_API(printk("cryptocop_new_session, no transforms specified\n"));
+		return -EINVAL;
+	}
+
+	sess = kmalloc(sizeof(struct cryptocop_session), alloc_flag);
+	if (!sess){
+		DEBUG_API(printk("cryptocop_new_session, kmalloc cryptocop_session\n"));
+		return -ENOMEM;
+	}
+
+	sess->tfrm_ctx = kmalloc(no_tfrms * sizeof(struct cryptocop_transform_ctx), alloc_flag);
+	if (!sess->tfrm_ctx) {
+		DEBUG_API(printk("cryptocop_new_session, kmalloc cryptocop_transform_ctx\n"));
+		kfree(sess);
+		return -ENOMEM;
+	}
+
+	tfrm_in = tinit;
+	for (i = 0; i < no_tfrms; i++){
+		tmp_in = tfrm_in->next;
+		while (tmp_in){
+			if (tmp_in->tid == tfrm_in->tid) {
+				DEBUG_API(printk("cryptocop_new_session, duplicate transform ids\n"));
+				kfree(sess->tfrm_ctx);
+				kfree(sess);
+				return -EINVAL;
+			}
+			tmp_in = tmp_in->next;
+		}
+		memcpy(&sess->tfrm_ctx[i].init, tfrm_in, sizeof(struct cryptocop_transform_init));
+		sess->tfrm_ctx[i].dec_key_set = 0;
+		sess->tfrm_ctx[i].next = &sess->tfrm_ctx[i] + 1;
+
+		tfrm_in = tfrm_in->next;
+	}
+	sess->tfrm_ctx[i-1].next = NULL;
+
+	spin_lock_irqsave(&cryptocop_sessions_lock, flags);
+	sess->sid = next_sid;
+	next_sid++;
+	/* TODO If we are really paranoid we should do duplicate check to handle sid wraparound.
+	 *      OTOH 2^64 is a really large number of session. */
+	if (next_sid == 0) next_sid = 1;
+
+	/* Prepend to session list. */
+	sess->next = cryptocop_sessions;
+	cryptocop_sessions = sess;
+	spin_unlock_irqrestore(&cryptocop_sessions_lock, flags);
+	*sid = sess->sid;
+	return 0;
+}
+
+
+int cryptocop_free_session(cryptocop_session_id sid)
+{
+	struct cryptocop_transform_ctx    *tc;
+	struct cryptocop_session          *sess = NULL;
+	struct cryptocop_session          *psess = NULL;
+	unsigned long int                 flags;
+	int                               i;
+	LIST_HEAD(remove_list);
+	struct list_head                  *node, *tmp;
+	struct cryptocop_prio_job         *pj;
+
+	DEBUG(printk("cryptocop_free_session: sid=%lld\n", sid));
+
+	spin_lock_irqsave(&cryptocop_sessions_lock, flags);
+	sess = cryptocop_sessions;
+	while (sess && sess->sid != sid){
+		psess = sess;
+		sess = sess->next;
+	}
+	if (sess){
+		if (psess){
+			psess->next = sess->next;
+		} else {
+			cryptocop_sessions = sess->next;
+		}
+	}
+	spin_unlock_irqrestore(&cryptocop_sessions_lock, flags);
+
+	if (!sess) return -EINVAL;
+
+	/* Remove queued jobs. */
+	spin_lock_irqsave(&cryptocop_job_queue_lock, flags);
+
+	for (i = 0; i < cryptocop_prio_no_prios; i++){
+		if (!list_empty(&(cryptocop_job_queues[i].jobs))){
+			list_for_each_safe(node, tmp, &(cryptocop_job_queues[i].jobs)) {
+				pj = list_entry(node, struct cryptocop_prio_job, node);
+				if (pj->oper->sid == sid) {
+					list_move_tail(node, &remove_list);
+				}
+			}
+		}
+	}
+	spin_unlock_irqrestore(&cryptocop_job_queue_lock, flags);
+
+	list_for_each_safe(node, tmp, &remove_list) {
+		list_del(node);
+		pj = list_entry(node, struct cryptocop_prio_job, node);
+		pj->oper->operation_status = -EAGAIN;  /* EAGAIN is not ideal for job/session terminated but it's the best choice I know of. */
+		DEBUG(printk("cryptocop_free_session: pj=0x%p, pj->oper=0x%p, pj->iop=0x%p\n", pj, pj->oper, pj->iop));
+		pj->oper->cb(pj->oper, pj->oper->cb_data);
+		delete_internal_operation(pj->iop);
+		kfree(pj);
+	}
+
+	tc = sess->tfrm_ctx;
+	/* Erase keying data. */
+	while (tc){
+		DEBUG(printk("cryptocop_free_session: memset keys, tfrm id=%d\n", tc->init.tid));
+		memset(tc->init.key, 0xff, CRYPTOCOP_MAX_KEY_LENGTH);
+		memset(tc->dec_key, 0xff, CRYPTOCOP_MAX_KEY_LENGTH);
+		tc = tc->next;
+	}
+	kfree(sess->tfrm_ctx);
+	kfree(sess);
+
+	return 0;
+}
+
+static struct cryptocop_session *get_session(cryptocop_session_id sid)
+{
+	struct cryptocop_session    *sess;
+	unsigned long int           flags;
+
+	spin_lock_irqsave(&cryptocop_sessions_lock, flags);
+	sess = cryptocop_sessions;
+	while (sess && (sess->sid != sid)){
+		sess = sess->next;
+	}
+	spin_unlock_irqrestore(&cryptocop_sessions_lock, flags);
+
+	return sess;
+}
+
+static struct cryptocop_transform_ctx *get_transform_ctx(struct cryptocop_session *sess, cryptocop_tfrm_id tid)
+{
+	struct cryptocop_transform_ctx *tc = sess->tfrm_ctx;
+
+	DEBUG(printk("get_transform_ctx, sess=0x%p, tid=%d\n", sess, tid));
+	assert(sess != NULL);
+	while (tc && tc->init.tid != tid){
+		DEBUG(printk("tc=0x%p, tc->next=0x%p\n", tc, tc->next));
+		tc = tc->next;
+	}
+	DEBUG(printk("get_transform_ctx, returning tc=0x%p\n", tc));
+	return tc;
+}
+
+
+
+/* The AES s-transform matrix (s-box). */
+static const u8 aes_sbox[256] = {
+	99,  124, 119, 123, 242, 107, 111, 197, 48,  1,   103, 43,  254, 215, 171, 118,
+	202, 130, 201, 125, 250, 89,  71,  240, 173, 212, 162, 175, 156, 164, 114, 192,
+	183, 253, 147, 38,  54,  63,  247, 204, 52,  165, 229, 241, 113, 216, 49,  21,
+	4,   199, 35,  195, 24,  150, 5,   154, 7,   18,  128, 226, 235, 39,  178, 117,
+	9,   131, 44,  26,  27,  110, 90,  160, 82,  59,  214, 179, 41,  227, 47,  132,
+	83,  209, 0,   237, 32,  252, 177, 91,  106, 203, 190, 57,  74,  76,  88,  207,
+	208, 239, 170, 251, 67,  77,  51,  133, 69,  249, 2,   127, 80,  60,  159, 168,
+	81,  163, 64,  143, 146, 157, 56,  245, 188, 182, 218, 33,  16,  255, 243, 210,
+	205, 12,  19,  236, 95,  151, 68,  23,  196, 167, 126, 61,  100, 93,  25,  115,
+	96,  129, 79,  220, 34,  42,  144, 136, 70,  238, 184, 20,  222, 94,  11,  219,
+	224, 50,  58,  10,  73,  6,   36,  92,  194, 211, 172, 98,  145, 149, 228, 121,
+	231, 200, 55,  109, 141, 213, 78,  169, 108, 86,  244, 234, 101, 122, 174, 8,
+	186, 120, 37,  46,  28,  166, 180, 198, 232, 221, 116, 31,  75,  189, 139, 138,
+	112, 62,  181, 102, 72,  3,   246, 14,  97,  53,  87,  185, 134, 193, 29,  158,
+	225, 248, 152, 17,  105, 217, 142, 148, 155, 30,  135, 233, 206, 85,  40,  223,
+	140, 161, 137, 13,  191, 230, 66,  104, 65,  153, 45,  15,  176, 84,  187, 22
+};
+
+/* AES has a 32 bit word round constants for each round in the
+ * key schedule.  round_constant[i] is really Rcon[i+1] in FIPS187.
+ */
+static u32 round_constant[11] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, 0x6C000000
+};
+
+/* Apply the s-box to each of the four occtets in w. */
+static u32 aes_ks_subword(const u32 w)
+{
+	u8 bytes[4];
+
+	*(u32*)(&bytes[0]) = w;
+	bytes[0] = aes_sbox[bytes[0]];
+	bytes[1] = aes_sbox[bytes[1]];
+	bytes[2] = aes_sbox[bytes[2]];
+	bytes[3] = aes_sbox[bytes[3]];
+	return *(u32*)(&bytes[0]);
+}
+
+/* The encrypt (forward) Rijndael key schedule algorithm pseudo code:
+ * (Note that AES words are 32 bit long)
+ *
+ * KeyExpansion(byte key[4*Nk], word w[Nb*(Nr+1)], Nk){
+ * word temp
+ * i = 0
+ * while (i < Nk) {
+ *   w[i] = word(key[4*i, 4*i + 1, 4*i + 2, 4*i + 3])
+ *   i = i + 1
+ * }
+ * i = Nk
+ *
+ * while (i < (Nb * (Nr + 1))) {
+ *   temp = w[i - 1]
+ *   if ((i mod Nk) == 0) {
+ *     temp = SubWord(RotWord(temp)) xor Rcon[i/Nk]
+ *   }
+ *   else if ((Nk > 6) && ((i mod Nk) == 4)) {
+ *     temp = SubWord(temp)
+ *   }
+ *   w[i] = w[i - Nk] xor temp
+ * }
+ * RotWord(t) does a 8 bit cyclic shift left on a 32 bit word.
+ * SubWord(t) applies the AES s-box individually to each octet
+ * in a 32 bit word.
+ *
+ * For AES Nk can have the values 4, 6, and 8 (corresponding to
+ * values for Nr of 10, 12, and 14).  Nb is always 4.
+ *
+ * To construct w[i], w[i - 1] and w[i - Nk] must be
+ * available.  Consequently we must keep a state of the last Nk words
+ * to be able to create the last round keys.
+ */
+static void get_aes_decrypt_key(unsigned char *dec_key, const unsigned  char *key, unsigned int keylength)
+{
+	u32 temp;
+	u32 w_ring[8]; /* nk is max 8, use elements 0..(nk - 1) as a ringbuffer */
+	u8  w_last_ix;
+	int i;
+	u8  nr, nk;
+
+	switch (keylength){
+	case 128:
+		nk = 4;
+		nr = 10;
+		break;
+	case 192:
+		nk = 6;
+		nr = 12;
+		break;
+	case 256:
+		nk = 8;
+		nr = 14;
+		break;
+	default:
+		panic("stream co-processor: bad aes key length in get_aes_decrypt_key\n");
+	};
+
+	/* Need to do host byte order correction here since key is byte oriented and the
+	 * kx algorithm is word (u32) oriented. */
+	for (i = 0; i < nk; i+=1) {
+		w_ring[i] = be32_to_cpu(*(u32*)&key[4*i]);
+	}
+
+	i = (int)nk;
+	w_last_ix = i - 1;
+	while (i < (4 * (nr + 2))) {
+		temp = w_ring[w_last_ix];
+		if (!(i % nk)) {
+			/* RotWord(temp) */
+			temp = (temp << 8) | (temp >> 24);
+			temp = aes_ks_subword(temp);
+			temp ^= round_constant[i/nk - 1];
+		} else if ((nk > 6) && ((i % nk) == 4)) {
+			temp = aes_ks_subword(temp);
+		}
+		w_last_ix = (w_last_ix + 1) % nk; /* This is the same as (i-Nk) mod Nk */
+		temp ^= w_ring[w_last_ix];
+		w_ring[w_last_ix] = temp;
+
+		/* We need the round keys for round Nr+1 and Nr+2 (round key
+		 * Nr+2 is the round key beyond the last one used when
+		 * encrypting).  Rounds are numbered starting from 0, Nr=10
+		 * implies 11 rounds are used in encryption/decryption.
+		 */
+		if (i >= (4 * nr)) {
+			/* Need to do host byte order correction here, the key
+			 * is byte oriented. */
+			*(u32*)dec_key = cpu_to_be32(temp);
+			dec_key += 4;
+		}
+		++i;
+	}
+}
+
+
+/**** Job/operation management. ****/
+
+int cryptocop_job_queue_insert_csum(struct cryptocop_operation *operation)
+{
+	return cryptocop_job_queue_insert(cryptocop_prio_kernel_csum, operation);
+}
+
+int cryptocop_job_queue_insert_crypto(struct cryptocop_operation *operation)
+{
+	return cryptocop_job_queue_insert(cryptocop_prio_kernel, operation);
+}
+
+int cryptocop_job_queue_insert_user_job(struct cryptocop_operation *operation)
+{
+	return cryptocop_job_queue_insert(cryptocop_prio_user, operation);
+}
+
+static int cryptocop_job_queue_insert(cryptocop_queue_priority prio, struct cryptocop_operation *operation)
+{
+	int                           ret;
+	struct cryptocop_prio_job     *pj = NULL;
+	unsigned long int             flags;
+
+	DEBUG(printk("cryptocop_job_queue_insert(%d, 0x%p)\n", prio, operation));
+
+	if (!operation || !operation->cb){
+		DEBUG_API(printk("cryptocop_job_queue_insert oper=0x%p, NULL operation or callback\n", operation));
+		return -EINVAL;
+	}
+
+	if ((ret = cryptocop_job_setup(&pj, operation)) != 0){
+		DEBUG_API(printk("cryptocop_job_queue_insert: job setup failed\n"));
+		return ret;
+	}
+	assert(pj != NULL);
+
+	spin_lock_irqsave(&cryptocop_job_queue_lock, flags);
+	list_add_tail(&pj->node, &cryptocop_job_queues[prio].jobs);
+	spin_unlock_irqrestore(&cryptocop_job_queue_lock, flags);
+
+	/* Make sure a job is running */
+	cryptocop_start_job();
+	return 0;
+}
+
+static void cryptocop_do_tasklet(unsigned long unused);
+DECLARE_TASKLET (cryptocop_tasklet, cryptocop_do_tasklet, 0);
+
+static void cryptocop_do_tasklet(unsigned long unused)
+{
+	struct list_head             *node;
+	struct cryptocop_prio_job    *pj = NULL;
+	unsigned long                flags;
+
+	DEBUG(printk("cryptocop_do_tasklet: entering\n"));
+
+	do {
+		spin_lock_irqsave(&cryptocop_completed_jobs_lock, flags);
+		if (!list_empty(&cryptocop_completed_jobs)){
+			node = cryptocop_completed_jobs.next;
+			list_del(node);
+			pj = list_entry(node, struct cryptocop_prio_job, node);
+		} else {
+			pj = NULL;
+		}
+		spin_unlock_irqrestore(&cryptocop_completed_jobs_lock, flags);
+		if (pj) {
+			assert(pj->oper != NULL);
+
+			/* Notify consumer of operation completeness. */
+			DEBUG(printk("cryptocop_do_tasklet: callback 0x%p, data 0x%p\n", pj->oper->cb, pj->oper->cb_data));
+
+			pj->oper->operation_status = 0; /* Job is completed. */
+			pj->oper->cb(pj->oper, pj->oper->cb_data);
+			delete_internal_operation(pj->iop);
+			kfree(pj);
+		}
+	} while (pj != NULL);
+
+	DEBUG(printk("cryptocop_do_tasklet: exiting\n"));
+}
+
+static irqreturn_t
+dma_done_interrupt(int irq, void *dev_id)
+{
+	struct cryptocop_prio_job *done_job;
+	reg_dma_rw_ack_intr ack_intr = {
+		.data = 1,
+	};
+
+	REG_WR(dma, IN_DMA_INST, rw_ack_intr, ack_intr);
+
+	DEBUG(printk("cryptocop DMA done\n"));
+
+	spin_lock(&running_job_lock);
+	if (cryptocop_running_job == NULL){
+		printk("stream co-processor got interrupt when not busy\n");
+		spin_unlock(&running_job_lock);
+		return IRQ_HANDLED;
+	}
+	done_job = cryptocop_running_job;
+	cryptocop_running_job = NULL;
+	spin_unlock(&running_job_lock);
+
+	/* Start processing a job. */
+	if (!spin_trylock(&cryptocop_process_lock)){
+		DEBUG(printk("cryptocop irq handler, not starting a job\n"));
+	} else {
+		cryptocop_start_job();
+		spin_unlock(&cryptocop_process_lock);
+	}
+
+	done_job->oper->operation_status = 0; /* Job is completed. */
+	if (done_job->oper->fast_callback){
+		/* This operation wants callback from interrupt. */
+		done_job->oper->cb(done_job->oper, done_job->oper->cb_data);
+		delete_internal_operation(done_job->iop);
+		kfree(done_job);
+	} else {
+		spin_lock(&cryptocop_completed_jobs_lock);
+		list_add_tail(&(done_job->node), &cryptocop_completed_jobs);
+		spin_unlock(&cryptocop_completed_jobs_lock);
+		tasklet_schedule(&cryptocop_tasklet);
+	}
+
+	DEBUG(printk("cryptocop leave irq handler\n"));
+	return IRQ_HANDLED;
+}
+
+
+/* Setup interrupts and DMA channels. */
+static int init_cryptocop(void)
+{
+	unsigned long          flags;
+	reg_dma_rw_cfg         dma_cfg = {.en = 1};
+	reg_dma_rw_intr_mask   intr_mask_in = {.data = regk_dma_yes}; /* Only want descriptor interrupts from the DMA in channel. */
+	reg_dma_rw_ack_intr    ack_intr = {.data = 1,.in_eop = 1 };
+	reg_strcop_rw_cfg      strcop_cfg = {
+		.ipend = regk_strcop_little,
+		.td1 = regk_strcop_e,
+		.td2 = regk_strcop_d,
+		.td3 = regk_strcop_e,
+		.ignore_sync = 0,
+		.en = 1
+	};
+
+	if (request_irq(DMA_IRQ, dma_done_interrupt, 0,
+			"stream co-processor DMA", NULL))
+		panic("request_irq stream co-processor irq dma9");
+
+	(void)crisv32_request_dma(OUT_DMA, "strcop", DMA_PANIC_ON_ERROR,
+		0, dma_strp);
+	(void)crisv32_request_dma(IN_DMA, "strcop", DMA_PANIC_ON_ERROR,
+		0, dma_strp);
+
+	local_irq_save(flags);
+
+	/* Reset and enable the cryptocop. */
+	strcop_cfg.en = 0;
+	REG_WR(strcop, regi_strcop, rw_cfg, strcop_cfg);
+	strcop_cfg.en = 1;
+	REG_WR(strcop, regi_strcop, rw_cfg, strcop_cfg);
+
+	/* Enable DMAs. */
+	REG_WR(dma, IN_DMA_INST, rw_cfg, dma_cfg); /* input DMA */
+	REG_WR(dma, OUT_DMA_INST, rw_cfg, dma_cfg); /* output DMA */
+
+	/* Set up wordsize = 4 for DMAs. */
+	DMA_WR_CMD(OUT_DMA_INST, regk_dma_set_w_size4);
+	DMA_WR_CMD(IN_DMA_INST, regk_dma_set_w_size4);
+
+	/* Enable interrupts. */
+	REG_WR(dma, IN_DMA_INST, rw_intr_mask, intr_mask_in);
+
+	/* Clear intr ack. */
+	REG_WR(dma, IN_DMA_INST, rw_ack_intr, ack_intr);
+
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+/* Free used cryptocop hw resources (interrupt and DMA channels). */
+static void release_cryptocop(void)
+{
+	unsigned long          flags;
+	reg_dma_rw_cfg         dma_cfg = {.en = 0};
+	reg_dma_rw_intr_mask   intr_mask_in = {0};
+	reg_dma_rw_ack_intr    ack_intr = {.data = 1,.in_eop = 1 };
+
+	local_irq_save(flags);
+
+	/* Clear intr ack. */
+	REG_WR(dma, IN_DMA_INST, rw_ack_intr, ack_intr);
+
+	/* Disable DMAs. */
+	REG_WR(dma, IN_DMA_INST, rw_cfg, dma_cfg); /* input DMA */
+	REG_WR(dma, OUT_DMA_INST, rw_cfg, dma_cfg); /* output DMA */
+
+	/* Disable interrupts. */
+	REG_WR(dma, IN_DMA_INST, rw_intr_mask, intr_mask_in);
+
+	local_irq_restore(flags);
+
+	free_irq(DMA_IRQ, NULL);
+
+	(void)crisv32_free_dma(OUT_DMA);
+	(void)crisv32_free_dma(IN_DMA);
+}
+
+
+/* Init job queue. */
+static int cryptocop_job_queue_init(void)
+{
+	int i;
+
+	INIT_LIST_HEAD(&cryptocop_completed_jobs);
+
+	for (i = 0; i < cryptocop_prio_no_prios; i++){
+		cryptocop_job_queues[i].prio = (cryptocop_queue_priority)i;
+		INIT_LIST_HEAD(&cryptocop_job_queues[i].jobs);
+	}
+	return 0;
+}
+
+
+static void cryptocop_job_queue_close(void)
+{
+	struct list_head               *node, *tmp;
+	struct cryptocop_prio_job      *pj = NULL;
+	unsigned long int              process_flags, flags;
+	int                            i;
+
+	/* FIXME: This is as yet untested code. */
+
+	/* Stop strcop from getting an operation to process while we are closing the
+	   module. */
+	spin_lock_irqsave(&cryptocop_process_lock, process_flags);
+
+	/* Empty the job queue. */
+	for (i = 0; i < cryptocop_prio_no_prios; i++){
+		if (!list_empty(&(cryptocop_job_queues[i].jobs))){
+			list_for_each_safe(node, tmp, &(cryptocop_job_queues[i].jobs)) {
+				pj = list_entry(node, struct cryptocop_prio_job, node);
+				list_del(node);
+
+				/* Call callback to notify consumer of job removal. */
+				DEBUG(printk("cryptocop_job_queue_close: callback 0x%p, data 0x%p\n", pj->oper->cb, pj->oper->cb_data));
+				pj->oper->operation_status = -EINTR; /* Job is terminated without completion. */
+				pj->oper->cb(pj->oper, pj->oper->cb_data);
+
+				delete_internal_operation(pj->iop);
+				kfree(pj);
+			}
+		}
+	}
+	spin_unlock_irqrestore(&cryptocop_process_lock, process_flags);
+
+	/* Remove the running job, if any. */
+	spin_lock_irqsave(&running_job_lock, flags);
+	if (cryptocop_running_job){
+		reg_strcop_rw_cfg rw_cfg;
+		reg_dma_rw_cfg    dma_out_cfg, dma_in_cfg;
+
+		/* Stop DMA. */
+		dma_out_cfg = REG_RD(dma, OUT_DMA_INST, rw_cfg);
+		dma_out_cfg.en = regk_dma_no;
+		REG_WR(dma, OUT_DMA_INST, rw_cfg, dma_out_cfg);
+
+		dma_in_cfg = REG_RD(dma, IN_DMA_INST, rw_cfg);
+		dma_in_cfg.en = regk_dma_no;
+		REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg);
+
+		/* Disble the cryptocop. */
+		rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg);
+		rw_cfg.en = 0;
+		REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg);
+
+		pj = cryptocop_running_job;
+		cryptocop_running_job = NULL;
+
+		/* Call callback to notify consumer of job removal. */
+		DEBUG(printk("cryptocop_job_queue_close: callback 0x%p, data 0x%p\n", pj->oper->cb, pj->oper->cb_data));
+		pj->oper->operation_status = -EINTR; /* Job is terminated without completion. */
+		pj->oper->cb(pj->oper, pj->oper->cb_data);
+
+		delete_internal_operation(pj->iop);
+		kfree(pj);
+	}
+	spin_unlock_irqrestore(&running_job_lock, flags);
+
+	/* Remove completed jobs, if any. */
+	spin_lock_irqsave(&cryptocop_completed_jobs_lock, flags);
+
+	list_for_each_safe(node, tmp, &cryptocop_completed_jobs) {
+		pj = list_entry(node, struct cryptocop_prio_job, node);
+		list_del(node);
+		/* Call callback to notify consumer of job removal. */
+		DEBUG(printk("cryptocop_job_queue_close: callback 0x%p, data 0x%p\n", pj->oper->cb, pj->oper->cb_data));
+		pj->oper->operation_status = -EINTR; /* Job is terminated without completion. */
+		pj->oper->cb(pj->oper, pj->oper->cb_data);
+
+		delete_internal_operation(pj->iop);
+		kfree(pj);
+	}
+	spin_unlock_irqrestore(&cryptocop_completed_jobs_lock, flags);
+}
+
+
+static void cryptocop_start_job(void)
+{
+	int                          i;
+	struct cryptocop_prio_job    *pj;
+	unsigned long int            flags;
+	unsigned long int            running_job_flags;
+	reg_strcop_rw_cfg            rw_cfg = {.en = 1, .ignore_sync = 0};
+
+	DEBUG(printk("cryptocop_start_job: entering\n"));
+
+	spin_lock_irqsave(&running_job_lock, running_job_flags);
+	if (cryptocop_running_job != NULL){
+		/* Already running. */
+		DEBUG(printk("cryptocop_start_job: already running, exit\n"));
+		spin_unlock_irqrestore(&running_job_lock, running_job_flags);
+		return;
+	}
+	spin_lock_irqsave(&cryptocop_job_queue_lock, flags);
+
+	/* Check the queues in priority order. */
+	for (i = cryptocop_prio_kernel_csum; (i < cryptocop_prio_no_prios) && list_empty(&cryptocop_job_queues[i].jobs); i++);
+	if (i == cryptocop_prio_no_prios) {
+		spin_unlock_irqrestore(&cryptocop_job_queue_lock, flags);
+		spin_unlock_irqrestore(&running_job_lock, running_job_flags);
+		DEBUG(printk("cryptocop_start_job: no jobs to run\n"));
+		return; /* No jobs to run */
+	}
+	DEBUG(printk("starting job for prio %d\n", i));
+
+	/* TODO: Do not starve lower priority jobs.  Let in a lower
+	 * prio job for every N-th processed higher prio job or some
+	 * other scheduling policy.  This could reasonably be
+	 * tweakable since the optimal balance would depend on the
+	 * type of load on the system. */
+
+	/* Pull the DMA lists from the job and start the DMA client. */
+	pj = list_entry(cryptocop_job_queues[i].jobs.next, struct cryptocop_prio_job, node);
+	list_del(&pj->node);
+	spin_unlock_irqrestore(&cryptocop_job_queue_lock, flags);
+	cryptocop_running_job = pj;
+
+	/* Set config register (3DES and CSUM modes). */
+	switch (pj->iop->tdes_mode){
+	case cryptocop_3des_eee:
+		rw_cfg.td1 = regk_strcop_e;
+		rw_cfg.td2 = regk_strcop_e;
+		rw_cfg.td3 = regk_strcop_e;
+		break;
+	case cryptocop_3des_eed:
+		rw_cfg.td1 = regk_strcop_e;
+		rw_cfg.td2 = regk_strcop_e;
+		rw_cfg.td3 = regk_strcop_d;
+		break;
+	case cryptocop_3des_ede:
+		rw_cfg.td1 = regk_strcop_e;
+		rw_cfg.td2 = regk_strcop_d;
+		rw_cfg.td3 = regk_strcop_e;
+		break;
+	case cryptocop_3des_edd:
+		rw_cfg.td1 = regk_strcop_e;
+		rw_cfg.td2 = regk_strcop_d;
+		rw_cfg.td3 = regk_strcop_d;
+		break;
+	case cryptocop_3des_dee:
+		rw_cfg.td1 = regk_strcop_d;
+		rw_cfg.td2 = regk_strcop_e;
+		rw_cfg.td3 = regk_strcop_e;
+		break;
+	case cryptocop_3des_ded:
+		rw_cfg.td1 = regk_strcop_d;
+		rw_cfg.td2 = regk_strcop_e;
+		rw_cfg.td3 = regk_strcop_d;
+		break;
+	case cryptocop_3des_dde:
+		rw_cfg.td1 = regk_strcop_d;
+		rw_cfg.td2 = regk_strcop_d;
+		rw_cfg.td3 = regk_strcop_e;
+		break;
+	case cryptocop_3des_ddd:
+		rw_cfg.td1 = regk_strcop_d;
+		rw_cfg.td2 = regk_strcop_d;
+		rw_cfg.td3 = regk_strcop_d;
+		break;
+	default:
+		DEBUG(printk("cryptocop_setup_dma_list: bad 3DES mode\n"));
+	}
+	switch (pj->iop->csum_mode){
+	case cryptocop_csum_le:
+		rw_cfg.ipend = regk_strcop_little;
+		break;
+	case cryptocop_csum_be:
+		rw_cfg.ipend = regk_strcop_big;
+		break;
+	default:
+		DEBUG(printk("cryptocop_setup_dma_list: bad checksum mode\n"));
+	}
+	REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg);
+
+	DEBUG(printk("cryptocop_start_job: starting DMA, new cryptocop_running_job=0x%p\n"
+		     "ctx_in: 0x%p, phys: 0x%p\n"
+		     "ctx_out: 0x%p, phys: 0x%p\n",
+		     pj,
+		     &pj->iop->ctx_in, (char*)virt_to_phys(&pj->iop->ctx_in),
+		     &pj->iop->ctx_out, (char*)virt_to_phys(&pj->iop->ctx_out)));
+
+	/* Start input DMA. */
+	flush_dma_context(&pj->iop->ctx_in);
+	DMA_START_CONTEXT(IN_DMA_INST, virt_to_phys(&pj->iop->ctx_in));
+
+	/* Start output DMA. */
+	DMA_START_CONTEXT(OUT_DMA_INST, virt_to_phys(&pj->iop->ctx_out));
+
+	spin_unlock_irqrestore(&running_job_lock, running_job_flags);
+	DEBUG(printk("cryptocop_start_job: exiting\n"));
+}
+
+
+static int cryptocop_job_setup(struct cryptocop_prio_job **pj, struct cryptocop_operation *operation)
+{
+	int  err;
+	int  alloc_flag = operation->in_interrupt ? GFP_ATOMIC : GFP_KERNEL;
+	void *iop_alloc_ptr = NULL;
+
+	*pj = kmalloc(sizeof (struct cryptocop_prio_job), alloc_flag);
+	if (!*pj) return -ENOMEM;
+
+	DEBUG(printk("cryptocop_job_setup: operation=0x%p\n", operation));
+
+	(*pj)->oper = operation;
+	DEBUG(printk("cryptocop_job_setup, cb=0x%p cb_data=0x%p\n",  (*pj)->oper->cb, (*pj)->oper->cb_data));
+
+	if (operation->use_dmalists) {
+		DEBUG(print_user_dma_lists(&operation->list_op));
+		if (!operation->list_op.inlist || !operation->list_op.outlist || !operation->list_op.out_data_buf || !operation->list_op.in_data_buf){
+			DEBUG_API(printk("cryptocop_job_setup: bad indata (use_dmalists)\n"));
+			kfree(*pj);
+			return -EINVAL;
+		}
+		iop_alloc_ptr = kmalloc(DESCR_ALLOC_PAD + sizeof(struct cryptocop_int_operation), alloc_flag);
+		if (!iop_alloc_ptr) {
+			DEBUG_API(printk("cryptocop_job_setup: kmalloc cryptocop_int_operation\n"));
+			kfree(*pj);
+			return -ENOMEM;
+		}
+		(*pj)->iop = (struct cryptocop_int_operation*)(((unsigned long int)(iop_alloc_ptr + DESCR_ALLOC_PAD + offsetof(struct cryptocop_int_operation, ctx_out)) & ~0x0000001F) - offsetof(struct cryptocop_int_operation, ctx_out));
+		DEBUG(memset((*pj)->iop, 0xff, sizeof(struct cryptocop_int_operation)));
+		(*pj)->iop->alloc_ptr = iop_alloc_ptr;
+		(*pj)->iop->sid = operation->sid;
+		(*pj)->iop->cdesc_out = NULL;
+		(*pj)->iop->cdesc_in = NULL;
+		(*pj)->iop->tdes_mode = operation->list_op.tdes_mode;
+		(*pj)->iop->csum_mode = operation->list_op.csum_mode;
+		(*pj)->iop->ddesc_out = operation->list_op.outlist;
+		(*pj)->iop->ddesc_in = operation->list_op.inlist;
+
+		/* Setup DMA contexts. */
+		(*pj)->iop->ctx_out.next = NULL;
+		(*pj)->iop->ctx_out.eol = 1;
+		(*pj)->iop->ctx_out.saved_data = operation->list_op.outlist;
+		(*pj)->iop->ctx_out.saved_data_buf = operation->list_op.out_data_buf;
+
+		(*pj)->iop->ctx_in.next = NULL;
+		(*pj)->iop->ctx_in.eol = 1;
+		(*pj)->iop->ctx_in.saved_data = operation->list_op.inlist;
+		(*pj)->iop->ctx_in.saved_data_buf = operation->list_op.in_data_buf;
+	} else {
+		if ((err = cryptocop_setup_dma_list(operation, &(*pj)->iop, alloc_flag))) {
+			DEBUG_API(printk("cryptocop_job_setup: cryptocop_setup_dma_list failed %d\n", err));
+			kfree(*pj);
+			return err;
+		}
+	}
+	DEBUG(print_dma_descriptors((*pj)->iop));
+
+	DEBUG(printk("cryptocop_job_setup, DMA list setup successful\n"));
+
+	return 0;
+}
+
+static int cryptocop_open(struct inode *inode, struct file *filp)
+{
+	int p = iminor(inode);
+
+	if (p != CRYPTOCOP_MINOR) return -EINVAL;
+
+	filp->private_data = NULL;
+	return 0;
+}
+
+
+static int cryptocop_release(struct inode *inode, struct file *filp)
+{
+	struct cryptocop_private *dev = filp->private_data;
+	struct cryptocop_private *dev_next;
+
+	while (dev){
+		dev_next = dev->next;
+		if (dev->sid != CRYPTOCOP_SESSION_ID_NONE) {
+			(void)cryptocop_free_session(dev->sid);
+		}
+		kfree(dev);
+		dev = dev_next;
+	}
+
+	return 0;
+}
+
+
+static int cryptocop_ioctl_close_session(struct inode *inode, struct file *filp,
+					 unsigned int cmd, unsigned long arg)
+{
+	struct cryptocop_private  *dev = filp->private_data;
+	struct cryptocop_private  *prev_dev = NULL;
+	struct strcop_session_op  *sess_op = (struct strcop_session_op *)arg;
+	struct strcop_session_op  sop;
+	int                       err;
+
+	DEBUG(printk("cryptocop_ioctl_close_session\n"));
+
+	if (!access_ok(VERIFY_READ, sess_op, sizeof(struct strcop_session_op)))
+		return -EFAULT;
+	err = copy_from_user(&sop, sess_op, sizeof(struct strcop_session_op));
+	if (err) return -EFAULT;
+
+	while (dev && (dev->sid != sop.ses_id)) {
+		prev_dev = dev;
+		dev = dev->next;
+	}
+	if (dev){
+		if (prev_dev){
+			prev_dev->next = dev->next;
+		} else {
+			filp->private_data = dev->next;
+		}
+		err = cryptocop_free_session(dev->sid);
+		if (err) return -EFAULT;
+	} else {
+		DEBUG_API(printk("cryptocop_ioctl_close_session: session %lld not found\n", sop.ses_id));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+static void ioctl_process_job_callback(struct cryptocop_operation *op, void*cb_data)
+{
+	struct ioctl_job_cb_ctx *jc = (struct ioctl_job_cb_ctx *)cb_data;
+
+	DEBUG(printk("ioctl_process_job_callback: op=0x%p, cb_data=0x%p\n", op, cb_data));
+
+	jc->processed = 1;
+	wake_up(&cryptocop_ioc_process_wq);
+}
+
+
+#define CRYPTOCOP_IOCTL_CIPHER_TID  (1)
+#define CRYPTOCOP_IOCTL_DIGEST_TID  (2)
+#define CRYPTOCOP_IOCTL_CSUM_TID    (3)
+
+static size_t first_cfg_change_ix(struct strcop_crypto_op *crp_op)
+{
+	size_t ch_ix = 0;
+
+	if (crp_op->do_cipher) ch_ix = crp_op->cipher_start;
+	if (crp_op->do_digest && (crp_op->digest_start < ch_ix)) ch_ix = crp_op->digest_start;
+	if (crp_op->do_csum && (crp_op->csum_start < ch_ix)) ch_ix = crp_op->csum_start;
+
+	DEBUG(printk("first_cfg_change_ix: ix=%d\n", ch_ix));
+	return ch_ix;
+}
+
+
+static size_t next_cfg_change_ix(struct strcop_crypto_op *crp_op, size_t ix)
+{
+	size_t ch_ix = INT_MAX;
+	size_t tmp_ix = 0;
+
+	if (crp_op->do_cipher && ((crp_op->cipher_start + crp_op->cipher_len) > ix)){
+		if (crp_op->cipher_start > ix) {
+			ch_ix = crp_op->cipher_start;
+		} else {
+			ch_ix = crp_op->cipher_start + crp_op->cipher_len;
+		}
+	}
+	if (crp_op->do_digest && ((crp_op->digest_start + crp_op->digest_len) > ix)){
+		if (crp_op->digest_start > ix) {
+			tmp_ix = crp_op->digest_start;
+		} else {
+			tmp_ix = crp_op->digest_start + crp_op->digest_len;
+		}
+		if (tmp_ix < ch_ix) ch_ix = tmp_ix;
+	}
+	if (crp_op->do_csum && ((crp_op->csum_start + crp_op->csum_len) > ix)){
+		if (crp_op->csum_start > ix) {
+			tmp_ix = crp_op->csum_start;
+		} else {
+			tmp_ix = crp_op->csum_start + crp_op->csum_len;
+		}
+		if (tmp_ix < ch_ix) ch_ix = tmp_ix;
+	}
+	if (ch_ix == INT_MAX) ch_ix = ix;
+	DEBUG(printk("next_cfg_change_ix prev ix=%d, next ix=%d\n", ix, ch_ix));
+	return ch_ix;
+}
+
+
+/* Map map_length bytes from the pages starting on *pageix and *pageoffset to iovecs starting on *iovix.
+ * Return -1 for ok, 0 for fail. */
+static int map_pages_to_iovec(struct iovec *iov, int iovlen, int *iovix, struct page **pages, int nopages, int *pageix, int *pageoffset, int map_length )
+{
+	int tmplen;
+
+	assert(iov != NULL);
+	assert(iovix != NULL);
+	assert(pages != NULL);
+	assert(pageix != NULL);
+	assert(pageoffset != NULL);
+
+	DEBUG(printk("map_pages_to_iovec, map_length=%d, iovlen=%d, *iovix=%d, nopages=%d, *pageix=%d, *pageoffset=%d\n", map_length, iovlen, *iovix, nopages, *pageix, *pageoffset));
+
+	while (map_length > 0){
+		DEBUG(printk("map_pages_to_iovec, map_length=%d, iovlen=%d, *iovix=%d, nopages=%d, *pageix=%d, *pageoffset=%d\n", map_length, iovlen, *iovix, nopages, *pageix, *pageoffset));
+		if (*iovix >= iovlen){
+			DEBUG_API(printk("map_page_to_iovec: *iovix=%d >= iovlen=%d\n", *iovix, iovlen));
+			return 0;
+		}
+		if (*pageix >= nopages){
+			DEBUG_API(printk("map_page_to_iovec: *pageix=%d >= nopages=%d\n", *pageix, nopages));
+			return 0;
+		}
+		iov[*iovix].iov_base = (unsigned char*)page_address(pages[*pageix]) + *pageoffset;
+		tmplen = PAGE_SIZE - *pageoffset;
+		if (tmplen < map_length){
+			(*pageoffset) = 0;
+			(*pageix)++;
+		} else {
+			tmplen = map_length;
+			(*pageoffset) += map_length;
+		}
+		DEBUG(printk("mapping %d bytes from page %d (or %d) to iovec %d\n", tmplen, *pageix, *pageix-1, *iovix));
+		iov[*iovix].iov_len = tmplen;
+		map_length -= tmplen;
+		(*iovix)++;
+	}
+	DEBUG(printk("map_page_to_iovec, exit, *iovix=%d\n", *iovix));
+	return -1;
+}
+
+
+
+static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	int                             i;
+	struct cryptocop_private        *dev = filp->private_data;
+	struct strcop_crypto_op         *crp_oper = (struct strcop_crypto_op *)arg;
+	struct strcop_crypto_op         oper = {0};
+	int                             err = 0;
+	struct cryptocop_operation      *cop = NULL;
+
+	struct ioctl_job_cb_ctx         *jc = NULL;
+
+	struct page                     **inpages = NULL;
+	struct page                     **outpages = NULL;
+	int                             noinpages = 0;
+	int                             nooutpages = 0;
+
+	struct cryptocop_desc           descs[5]; /* Max 5 descriptors are needed, there are three transforms that
+						   * can get connected/disconnected on different places in the indata. */
+	struct cryptocop_desc_cfg       dcfgs[5*3];
+	int                             desc_ix = 0;
+	int                             dcfg_ix = 0;
+	struct cryptocop_tfrm_cfg       ciph_tcfg = {0};
+	struct cryptocop_tfrm_cfg       digest_tcfg = {0};
+	struct cryptocop_tfrm_cfg       csum_tcfg = {0};
+
+	unsigned char                   *digest_result = NULL;
+	int                             digest_length = 0;
+	int                             cblocklen = 0;
+	unsigned char                   csum_result[CSUM_BLOCK_LENGTH];
+	struct cryptocop_session        *sess;
+
+	int    iovlen = 0;
+	int    iovix = 0;
+	int    pageix = 0;
+	int    pageoffset = 0;
+
+	size_t prev_ix = 0;
+	size_t next_ix;
+
+	int    cipher_active, digest_active, csum_active;
+	int    end_digest, end_csum;
+	int    digest_done = 0;
+	int    cipher_done = 0;
+	int    csum_done = 0;
+
+	DEBUG(printk("cryptocop_ioctl_process\n"));
+
+	if (!access_ok(VERIFY_WRITE, crp_oper, sizeof(struct strcop_crypto_op))){
+		DEBUG_API(printk("cryptocop_ioctl_process: !access_ok crp_oper!\n"));
+		return -EFAULT;
+	}
+	if (copy_from_user(&oper, crp_oper, sizeof(struct strcop_crypto_op))) {
+		DEBUG_API(printk("cryptocop_ioctl_process: copy_from_user\n"));
+		return -EFAULT;
+	}
+	DEBUG(print_strcop_crypto_op(&oper));
+
+	while (dev && dev->sid != oper.ses_id) dev = dev->next;
+	if (!dev){
+		DEBUG_API(printk("cryptocop_ioctl_process: session %lld not found\n", oper.ses_id));
+		return -EINVAL;
+	}
+
+	/* Check buffers. */
+	if (((oper.indata + oper.inlen) < oper.indata) || ((oper.cipher_outdata + oper.cipher_outlen) < oper.cipher_outdata)){
+		DEBUG_API(printk("cryptocop_ioctl_process: user buffers wrapped around, bad user!\n"));
+		return -EINVAL;
+	}
+
+	if (!access_ok(VERIFY_WRITE, oper.cipher_outdata, oper.cipher_outlen)){
+		DEBUG_API(printk("cryptocop_ioctl_process: !access_ok out data!\n"));
+		return -EFAULT;
+	}
+	if (!access_ok(VERIFY_READ, oper.indata, oper.inlen)){
+		DEBUG_API(printk("cryptocop_ioctl_process: !access_ok in data!\n"));
+		return -EFAULT;
+	}
+
+	cop = kmalloc(sizeof(struct cryptocop_operation), GFP_KERNEL);
+	if (!cop) {
+		DEBUG_API(printk("cryptocop_ioctl_process: kmalloc\n"));
+		return -ENOMEM;
+	}
+	jc = kmalloc(sizeof(struct ioctl_job_cb_ctx), GFP_KERNEL);
+	if (!jc) {
+		DEBUG_API(printk("cryptocop_ioctl_process: kmalloc\n"));
+		err = -ENOMEM;
+		goto error_cleanup;
+	}
+	jc->processed = 0;
+
+	cop->cb_data = jc;
+	cop->cb = ioctl_process_job_callback;
+	cop->operation_status = 0;
+	cop->use_dmalists = 0;
+	cop->in_interrupt = 0;
+	cop->fast_callback = 0;
+	cop->tfrm_op.tfrm_cfg = NULL;
+	cop->tfrm_op.desc = NULL;
+	cop->tfrm_op.indata = NULL;
+	cop->tfrm_op.incount = 0;
+	cop->tfrm_op.inlen = 0;
+	cop->tfrm_op.outdata = NULL;
+	cop->tfrm_op.outcount = 0;
+	cop->tfrm_op.outlen = 0;
+
+	sess = get_session(oper.ses_id);
+	if (!sess){
+		DEBUG_API(printk("cryptocop_ioctl_process: bad session id.\n"));
+		kfree(cop);
+		kfree(jc);
+		return -EINVAL;
+	}
+
+	if (oper.do_cipher) {
+		unsigned int                    cipher_outlen = 0;
+		struct cryptocop_transform_ctx  *tc = get_transform_ctx(sess, CRYPTOCOP_IOCTL_CIPHER_TID);
+		if (!tc) {
+			DEBUG_API(printk("cryptocop_ioctl_process: no cipher transform in session.\n"));
+			err = -EINVAL;
+			goto error_cleanup;
+		}
+		ciph_tcfg.tid = CRYPTOCOP_IOCTL_CIPHER_TID;
+		ciph_tcfg.inject_ix = 0;
+		ciph_tcfg.flags = 0;
+		if ((oper.cipher_start < 0) || (oper.cipher_len <= 0) || (oper.cipher_start > oper.inlen) || ((oper.cipher_start + oper.cipher_len) > oper.inlen)){
+			DEBUG_API(printk("cryptocop_ioctl_process: bad cipher length\n"));
+			kfree(cop);
+			kfree(jc);
+			return -EINVAL;
+		}
+		cblocklen = tc->init.alg == cryptocop_alg_aes ? AES_BLOCK_LENGTH : DES_BLOCK_LENGTH;
+		if (oper.cipher_len % cblocklen) {
+			kfree(cop);
+			kfree(jc);
+			DEBUG_API(printk("cryptocop_ioctl_process: cipher inlength not multiple of block length.\n"));
+			return -EINVAL;
+		}
+		cipher_outlen = oper.cipher_len;
+		if (tc->init.cipher_mode == cryptocop_cipher_mode_cbc){
+			if (oper.cipher_explicit) {
+				ciph_tcfg.flags |= CRYPTOCOP_EXPLICIT_IV;
+				memcpy(ciph_tcfg.iv, oper.cipher_iv, cblocklen);
+			} else {
+				cipher_outlen = oper.cipher_len - cblocklen;
+			}
+		} else {
+			if (oper.cipher_explicit){
+				kfree(cop);
+				kfree(jc);
+				DEBUG_API(printk("cryptocop_ioctl_process: explicit_iv when not CBC mode\n"));
+				return -EINVAL;
+			}
+		}
+		if (oper.cipher_outlen != cipher_outlen) {
+			kfree(cop);
+			kfree(jc);
+			DEBUG_API(printk("cryptocop_ioctl_process: cipher_outlen incorrect, should be %d not %d.\n", cipher_outlen, oper.cipher_outlen));
+			return -EINVAL;
+		}
+
+		if (oper.decrypt){
+			ciph_tcfg.flags |= CRYPTOCOP_DECRYPT;
+		} else {
+			ciph_tcfg.flags |= CRYPTOCOP_ENCRYPT;
+		}
+		ciph_tcfg.next = cop->tfrm_op.tfrm_cfg;
+		cop->tfrm_op.tfrm_cfg = &ciph_tcfg;
+	}
+	if (oper.do_digest){
+		struct cryptocop_transform_ctx *tc = get_transform_ctx(sess, CRYPTOCOP_IOCTL_DIGEST_TID);
+		if (!tc) {
+			DEBUG_API(printk("cryptocop_ioctl_process: no digest transform in session.\n"));
+			err = -EINVAL;
+			goto error_cleanup;
+		}
+		digest_length = tc->init.alg == cryptocop_alg_md5 ? 16 : 20;
+		digest_result = kmalloc(digest_length, GFP_KERNEL);
+		if (!digest_result) {
+			DEBUG_API(printk("cryptocop_ioctl_process: kmalloc digest_result\n"));
+			err = -EINVAL;
+			goto error_cleanup;
+		}
+		DEBUG(memset(digest_result, 0xff, digest_length));
+
+		digest_tcfg.tid = CRYPTOCOP_IOCTL_DIGEST_TID;
+		digest_tcfg.inject_ix = 0;
+		ciph_tcfg.inject_ix += digest_length;
+		if ((oper.digest_start < 0) || (oper.digest_len <= 0) || (oper.digest_start > oper.inlen) || ((oper.digest_start + oper.digest_len) > oper.inlen)){
+			DEBUG_API(printk("cryptocop_ioctl_process: bad digest length\n"));
+			err = -EINVAL;
+			goto error_cleanup;
+		}
+
+		digest_tcfg.next = cop->tfrm_op.tfrm_cfg;
+		cop->tfrm_op.tfrm_cfg = &digest_tcfg;
+	}
+	if (oper.do_csum){
+		csum_tcfg.tid = CRYPTOCOP_IOCTL_CSUM_TID;
+		csum_tcfg.inject_ix = digest_length;
+		ciph_tcfg.inject_ix += 2;
+
+		if ((oper.csum_start < 0) || (oper.csum_len <= 0) || (oper.csum_start > oper.inlen) || ((oper.csum_start + oper.csum_len) > oper.inlen)){
+			DEBUG_API(printk("cryptocop_ioctl_process: bad csum length\n"));
+			kfree(cop);
+			kfree(jc);
+			return -EINVAL;
+		}
+
+		csum_tcfg.next = cop->tfrm_op.tfrm_cfg;
+		cop->tfrm_op.tfrm_cfg = &csum_tcfg;
+	}
+
+	prev_ix = first_cfg_change_ix(&oper);
+	if (prev_ix > oper.inlen) {
+		DEBUG_API(printk("cryptocop_ioctl_process: length mismatch\n"));
+		nooutpages = noinpages = 0;
+		err = -EINVAL;
+		goto error_cleanup;
+	}
+	DEBUG(printk("cryptocop_ioctl_process: inlen=%d, cipher_outlen=%d\n", oper.inlen, oper.cipher_outlen));
+
+	/* Map user pages for in and out data of the operation. */
+	noinpages = (((unsigned long int)(oper.indata + prev_ix) & ~PAGE_MASK) + oper.inlen - 1 - prev_ix + ~PAGE_MASK) >> PAGE_SHIFT;
+	DEBUG(printk("cryptocop_ioctl_process: noinpages=%d\n", noinpages));
+	inpages = kmalloc(noinpages * sizeof(struct page*), GFP_KERNEL);
+	if (!inpages){
+		DEBUG_API(printk("cryptocop_ioctl_process: kmalloc inpages\n"));
+		nooutpages = noinpages = 0;
+		err = -ENOMEM;
+		goto error_cleanup;
+	}
+	if (oper.do_cipher){
+		nooutpages = (((unsigned long int)oper.cipher_outdata & ~PAGE_MASK) + oper.cipher_outlen - 1 + ~PAGE_MASK) >> PAGE_SHIFT;
+		DEBUG(printk("cryptocop_ioctl_process: nooutpages=%d\n", nooutpages));
+		outpages = kmalloc(nooutpages * sizeof(struct page*), GFP_KERNEL);
+		if (!outpages){
+			DEBUG_API(printk("cryptocop_ioctl_process: kmalloc outpages\n"));
+			nooutpages = noinpages = 0;
+			err = -ENOMEM;
+			goto error_cleanup;
+		}
+	}
+
+	/* Acquire the mm page semaphore. */
+	down_read(&current->mm->mmap_sem);
+
+	err = get_user_pages(current,
+			     current->mm,
+			     (unsigned long int)(oper.indata + prev_ix),
+			     noinpages,
+			     0,  /* read access only for in data */
+			     0, /* no force */
+			     inpages,
+			     NULL);
+
+	if (err < 0) {
+		up_read(&current->mm->mmap_sem);
+		nooutpages = noinpages = 0;
+		DEBUG_API(printk("cryptocop_ioctl_process: get_user_pages indata\n"));
+		goto error_cleanup;
+	}
+	noinpages = err;
+	if (oper.do_cipher){
+		err = get_user_pages(current,
+				     current->mm,
+				     (unsigned long int)oper.cipher_outdata,
+				     nooutpages,
+				     1, /* write access for out data */
+				     0, /* no force */
+				     outpages,
+				     NULL);
+		up_read(&current->mm->mmap_sem);
+		if (err < 0) {
+			nooutpages = 0;
+			DEBUG_API(printk("cryptocop_ioctl_process: get_user_pages outdata\n"));
+			goto error_cleanup;
+		}
+		nooutpages = err;
+	} else {
+		up_read(&current->mm->mmap_sem);
+	}
+
+	/* Add 6 to nooutpages to make room for possibly inserted buffers for storing digest and
+	 * csum output and splits when units are (dis-)connected. */
+	cop->tfrm_op.indata = kmalloc((noinpages) * sizeof(struct iovec), GFP_KERNEL);
+	cop->tfrm_op.outdata = kmalloc((6 + nooutpages) * sizeof(struct iovec), GFP_KERNEL);
+	if (!cop->tfrm_op.indata || !cop->tfrm_op.outdata) {
+		DEBUG_API(printk("cryptocop_ioctl_process: kmalloc iovecs\n"));
+		err = -ENOMEM;
+		goto error_cleanup;
+	}
+
+	cop->tfrm_op.inlen = oper.inlen - prev_ix;
+	cop->tfrm_op.outlen = 0;
+	if (oper.do_cipher) cop->tfrm_op.outlen += oper.cipher_outlen;
+	if (oper.do_digest) cop->tfrm_op.outlen += digest_length;
+	if (oper.do_csum) cop->tfrm_op.outlen += 2;
+
+	/* Setup the in iovecs. */
+	cop->tfrm_op.incount = noinpages;
+	if (noinpages > 1){
+		size_t tmplen = cop->tfrm_op.inlen;
+
+		cop->tfrm_op.indata[0].iov_len = PAGE_SIZE - ((unsigned long int)(oper.indata + prev_ix) & ~PAGE_MASK);
+		cop->tfrm_op.indata[0].iov_base = (unsigned char*)page_address(inpages[0]) + ((unsigned long int)(oper.indata + prev_ix) & ~PAGE_MASK);
+		tmplen -= cop->tfrm_op.indata[0].iov_len;
+		for (i = 1; i<noinpages; i++){
+			cop->tfrm_op.indata[i].iov_len = tmplen < PAGE_SIZE ? tmplen : PAGE_SIZE;
+			cop->tfrm_op.indata[i].iov_base = (unsigned char*)page_address(inpages[i]);
+			tmplen -= PAGE_SIZE;
+		}
+	} else {
+		cop->tfrm_op.indata[0].iov_len = oper.inlen - prev_ix;
+		cop->tfrm_op.indata[0].iov_base = (unsigned char*)page_address(inpages[0]) + ((unsigned long int)(oper.indata + prev_ix) & ~PAGE_MASK);
+	}
+
+	iovlen = nooutpages + 6;
+	pageoffset = oper.do_cipher ? ((unsigned long int)oper.cipher_outdata & ~PAGE_MASK) : 0;
+
+	next_ix = next_cfg_change_ix(&oper, prev_ix);
+	if (prev_ix == next_ix){
+		DEBUG_API(printk("cryptocop_ioctl_process: length configuration broken.\n"));
+		err = -EINVAL;  /* This should be impossible barring bugs. */
+		goto error_cleanup;
+	}
+	while (prev_ix != next_ix){
+		end_digest = end_csum = cipher_active = digest_active = csum_active = 0;
+		descs[desc_ix].cfg = NULL;
+		descs[desc_ix].length = next_ix - prev_ix;
+
+		if (oper.do_cipher && (oper.cipher_start < next_ix) && (prev_ix < (oper.cipher_start + oper.cipher_len))) {
+			dcfgs[dcfg_ix].tid = CRYPTOCOP_IOCTL_CIPHER_TID;
+			dcfgs[dcfg_ix].src = cryptocop_source_dma;
+			cipher_active = 1;
+
+			if (next_ix == (oper.cipher_start + oper.cipher_len)){
+				cipher_done = 1;
+				dcfgs[dcfg_ix].last = 1;
+			} else {
+				dcfgs[dcfg_ix].last = 0;
+			}
+			dcfgs[dcfg_ix].next = descs[desc_ix].cfg;
+			descs[desc_ix].cfg = &dcfgs[dcfg_ix];
+			++dcfg_ix;
+		}
+		if (oper.do_digest && (oper.digest_start < next_ix) && (prev_ix < (oper.digest_start + oper.digest_len))) {
+			digest_active = 1;
+			dcfgs[dcfg_ix].tid = CRYPTOCOP_IOCTL_DIGEST_TID;
+			dcfgs[dcfg_ix].src = cryptocop_source_dma;
+			if (next_ix == (oper.digest_start + oper.digest_len)){
+				assert(!digest_done);
+				digest_done = 1;
+				dcfgs[dcfg_ix].last = 1;
+			} else {
+				dcfgs[dcfg_ix].last = 0;
+			}
+			dcfgs[dcfg_ix].next = descs[desc_ix].cfg;
+			descs[desc_ix].cfg = &dcfgs[dcfg_ix];
+			++dcfg_ix;
+		}
+		if (oper.do_csum && (oper.csum_start < next_ix) && (prev_ix < (oper.csum_start + oper.csum_len))){
+			csum_active = 1;
+			dcfgs[dcfg_ix].tid = CRYPTOCOP_IOCTL_CSUM_TID;
+			dcfgs[dcfg_ix].src = cryptocop_source_dma;
+			if (next_ix == (oper.csum_start + oper.csum_len)){
+				csum_done = 1;
+				dcfgs[dcfg_ix].last = 1;
+			} else {
+				dcfgs[dcfg_ix].last = 0;
+			}
+			dcfgs[dcfg_ix].next = descs[desc_ix].cfg;
+			descs[desc_ix].cfg = &dcfgs[dcfg_ix];
+			++dcfg_ix;
+		}
+		if (!descs[desc_ix].cfg){
+			DEBUG_API(printk("cryptocop_ioctl_process: data segment %d (%d to %d) had no active transforms\n", desc_ix, prev_ix, next_ix));
+			err = -EINVAL;
+			goto error_cleanup;
+		}
+		descs[desc_ix].next = &(descs[desc_ix]) + 1;
+		++desc_ix;
+		prev_ix = next_ix;
+		next_ix = next_cfg_change_ix(&oper, prev_ix);
+	}
+	if (desc_ix > 0){
+		descs[desc_ix-1].next = NULL;
+	} else {
+		descs[0].next = NULL;
+	}
+	if (oper.do_digest) {
+		DEBUG(printk("cryptocop_ioctl_process: mapping %d byte digest output to iovec %d\n", digest_length, iovix));
+		/* Add outdata iovec, length == <length of type of digest> */
+		cop->tfrm_op.outdata[iovix].iov_base = digest_result;
+		cop->tfrm_op.outdata[iovix].iov_len = digest_length;
+		++iovix;
+	}
+	if (oper.do_csum) {
+		/* Add outdata iovec, length == 2, the length of csum. */
+		DEBUG(printk("cryptocop_ioctl_process: mapping 2 byte csum output to iovec %d\n", iovix));
+		/* Add outdata iovec, length == <length of type of digest> */
+		cop->tfrm_op.outdata[iovix].iov_base = csum_result;
+		cop->tfrm_op.outdata[iovix].iov_len = 2;
+		++iovix;
+	}
+	if (oper.do_cipher) {
+		if (!map_pages_to_iovec(cop->tfrm_op.outdata, iovlen, &iovix, outpages, nooutpages, &pageix, &pageoffset, oper.cipher_outlen)){
+			DEBUG_API(printk("cryptocop_ioctl_process: failed to map pages to iovec.\n"));
+			err = -ENOSYS; /* This should be impossible barring bugs. */
+			goto error_cleanup;
+		}
+	}
+	DEBUG(printk("cryptocop_ioctl_process: setting cop->tfrm_op.outcount %d\n", iovix));
+	cop->tfrm_op.outcount = iovix;
+	assert(iovix <= (nooutpages + 6));
+
+	cop->sid = oper.ses_id;
+	cop->tfrm_op.desc = &descs[0];
+
+	DEBUG(printk("cryptocop_ioctl_process: inserting job, cb_data=0x%p\n", cop->cb_data));
+
+	if ((err = cryptocop_job_queue_insert_user_job(cop)) != 0) {
+		DEBUG_API(printk("cryptocop_ioctl_process: insert job %d\n", err));
+		err = -EINVAL;
+		goto error_cleanup;
+	}
+
+	DEBUG(printk("cryptocop_ioctl_process: begin wait for result\n"));
+
+	wait_event(cryptocop_ioc_process_wq, (jc->processed != 0));
+	DEBUG(printk("cryptocop_ioctl_process: end wait for result\n"));
+        if (!jc->processed){
+		printk(KERN_WARNING "cryptocop_ioctl_process: job not processed at completion\n");
+		err = -EIO;
+		goto error_cleanup;
+	}
+
+	/* Job process done.  Cipher output should already be correct in job so no post processing of outdata. */
+	DEBUG(printk("cryptocop_ioctl_process: operation_status = %d\n", cop->operation_status));
+	if (cop->operation_status == 0){
+		if (oper.do_digest){
+			DEBUG(printk("cryptocop_ioctl_process: copy %d bytes digest to user\n", digest_length));
+			err = copy_to_user((unsigned char*)crp_oper + offsetof(struct strcop_crypto_op, digest), digest_result, digest_length);
+			if (0 != err){
+				DEBUG_API(printk("cryptocop_ioctl_process: copy_to_user, digest length %d, err %d\n", digest_length, err));
+				err = -EFAULT;
+				goto error_cleanup;
+			}
+		}
+		if (oper.do_csum){
+			DEBUG(printk("cryptocop_ioctl_process: copy 2 bytes checksum to user\n"));
+			err = copy_to_user((unsigned char*)crp_oper + offsetof(struct strcop_crypto_op, csum), csum_result, 2);
+			if (0 != err){
+				DEBUG_API(printk("cryptocop_ioctl_process: copy_to_user, csum, err %d\n", err));
+				err = -EFAULT;
+				goto error_cleanup;
+			}
+		}
+		err = 0;
+	} else {
+		DEBUG(printk("cryptocop_ioctl_process: returning err = operation_status = %d\n", cop->operation_status));
+		err = cop->operation_status;
+	}
+
+ error_cleanup:
+	/* Release page caches. */
+	for (i = 0; i < noinpages; i++){
+		put_page(inpages[i]);
+	}
+	for (i = 0; i < nooutpages; i++){
+		int spdl_err;
+		/* Mark output pages dirty. */
+		spdl_err = set_page_dirty_lock(outpages[i]);
+		DEBUG(if (spdl_err < 0)printk("cryptocop_ioctl_process: set_page_dirty_lock returned %d\n", spdl_err));
+	}
+	for (i = 0; i < nooutpages; i++){
+		put_page(outpages[i]);
+	}
+
+	kfree(digest_result);
+	kfree(inpages);
+	kfree(outpages);
+	if (cop){
+		kfree(cop->tfrm_op.indata);
+		kfree(cop->tfrm_op.outdata);
+		kfree(cop);
+	}
+	kfree(jc);
+
+	DEBUG(print_lock_status());
+
+	return err;
+}
+
+
+static int cryptocop_ioctl_create_session(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	cryptocop_session_id             sid;
+	int                              err;
+	struct cryptocop_private         *dev;
+	struct strcop_session_op         *sess_op = (struct strcop_session_op *)arg;
+	struct strcop_session_op         sop;
+	struct cryptocop_transform_init  *tis = NULL;
+	struct cryptocop_transform_init  ti_cipher = {0};
+	struct cryptocop_transform_init  ti_digest = {0};
+	struct cryptocop_transform_init  ti_csum = {0};
+
+	if (!access_ok(VERIFY_WRITE, sess_op, sizeof(struct strcop_session_op)))
+		return -EFAULT;
+	err = copy_from_user(&sop, sess_op, sizeof(struct strcop_session_op));
+	if (err) return -EFAULT;
+	if (sop.cipher != cryptocop_cipher_none) {
+		if (!access_ok(VERIFY_READ, sop.key, sop.keylen)) return -EFAULT;
+	}
+	DEBUG(printk("cryptocop_ioctl_create_session, sess_op:\n"));
+
+	DEBUG(printk("\tcipher:%d\n"
+		     "\tcipher_mode:%d\n"
+		     "\tdigest:%d\n"
+		     "\tcsum:%d\n",
+		     (int)sop.cipher,
+		     (int)sop.cmode,
+		     (int)sop.digest,
+		     (int)sop.csum));
+
+	if (sop.cipher != cryptocop_cipher_none){
+		/* Init the cipher. */
+		switch (sop.cipher){
+		case cryptocop_cipher_des:
+			ti_cipher.alg = cryptocop_alg_des;
+			break;
+		case cryptocop_cipher_3des:
+			ti_cipher.alg = cryptocop_alg_3des;
+			break;
+		case cryptocop_cipher_aes:
+			ti_cipher.alg = cryptocop_alg_aes;
+			break;
+		default:
+			DEBUG_API(printk("create session, bad cipher algorithm %d\n", sop.cipher));
+			return -EINVAL;
+		};
+		DEBUG(printk("setting cipher transform %d\n", ti_cipher.alg));
+		copy_from_user(ti_cipher.key, sop.key, sop.keylen/8);
+		ti_cipher.keylen = sop.keylen;
+		switch (sop.cmode){
+		case cryptocop_cipher_mode_cbc:
+		case cryptocop_cipher_mode_ecb:
+			ti_cipher.cipher_mode = sop.cmode;
+			break;
+		default:
+			DEBUG_API(printk("create session, bad cipher mode %d\n", sop.cmode));
+			return -EINVAL;
+		}
+		DEBUG(printk("cryptocop_ioctl_create_session: setting CBC mode %d\n", ti_cipher.cipher_mode));
+		switch (sop.des3_mode){
+		case cryptocop_3des_eee:
+		case cryptocop_3des_eed:
+		case cryptocop_3des_ede:
+		case cryptocop_3des_edd:
+		case cryptocop_3des_dee:
+		case cryptocop_3des_ded:
+		case cryptocop_3des_dde:
+		case cryptocop_3des_ddd:
+			ti_cipher.tdes_mode = sop.des3_mode;
+			break;
+		default:
+			DEBUG_API(printk("create session, bad 3DES mode %d\n", sop.des3_mode));
+			return -EINVAL;
+		}
+		ti_cipher.tid = CRYPTOCOP_IOCTL_CIPHER_TID;
+		ti_cipher.next = tis;
+		tis = &ti_cipher;
+	} /* if (sop.cipher != cryptocop_cipher_none) */
+	if (sop.digest != cryptocop_digest_none){
+		DEBUG(printk("setting digest transform\n"));
+		switch (sop.digest){
+		case cryptocop_digest_md5:
+			ti_digest.alg = cryptocop_alg_md5;
+			break;
+		case cryptocop_digest_sha1:
+			ti_digest.alg = cryptocop_alg_sha1;
+			break;
+		default:
+			DEBUG_API(printk("create session, bad digest algorithm %d\n", sop.digest));
+			return -EINVAL;
+		}
+		ti_digest.tid = CRYPTOCOP_IOCTL_DIGEST_TID;
+		ti_digest.next = tis;
+		tis = &ti_digest;
+	} /* if (sop.digest != cryptocop_digest_none) */
+	if (sop.csum != cryptocop_csum_none){
+		DEBUG(printk("setting csum transform\n"));
+		switch (sop.csum){
+		case cryptocop_csum_le:
+		case cryptocop_csum_be:
+			ti_csum.csum_mode = sop.csum;
+			break;
+		default:
+			DEBUG_API(printk("create session, bad checksum algorithm %d\n", sop.csum));
+			return -EINVAL;
+		}
+		ti_csum.alg = cryptocop_alg_csum;
+		ti_csum.tid = CRYPTOCOP_IOCTL_CSUM_TID;
+		ti_csum.next = tis;
+		tis = &ti_csum;
+	} /* (sop.csum != cryptocop_csum_none) */
+	dev = kmalloc(sizeof(struct cryptocop_private), GFP_KERNEL);
+	if (!dev){
+		DEBUG_API(printk("create session, alloc dev\n"));
+		return -ENOMEM;
+	}
+
+	err = cryptocop_new_session(&sid, tis, GFP_KERNEL);
+	DEBUG({ if (err) printk("create session, cryptocop_new_session %d\n", err);});
+
+	if (err) {
+		kfree(dev);
+		return err;
+	}
+	sess_op->ses_id = sid;
+	dev->sid = sid;
+	dev->next = filp->private_data;
+	filp->private_data = dev;
+
+	return 0;
+}
+
+static long cryptocop_ioctl_unlocked(struct inode *inode,
+	struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	if (_IOC_TYPE(cmd) != ETRAXCRYPTOCOP_IOCTYPE) {
+		DEBUG_API(printk("cryptocop_ioctl: wrong type\n"));
+		return -ENOTTY;
+	}
+	if (_IOC_NR(cmd) > CRYPTOCOP_IO_MAXNR){
+		return -ENOTTY;
+	}
+	/* Access check of the argument.  Some commands, e.g. create session and process op,
+	   needs additional checks.  Those are handled in the command handling functions. */
+	if (_IOC_DIR(cmd) & _IOC_READ)
+		err = !access_ok(VERIFY_WRITE, (void *)arg, _IOC_SIZE(cmd));
+	else if (_IOC_DIR(cmd) & _IOC_WRITE)
+		err = !access_ok(VERIFY_READ, (void *)arg, _IOC_SIZE(cmd));
+	if (err) return -EFAULT;
+
+	switch (cmd) {
+	case CRYPTOCOP_IO_CREATE_SESSION:
+		return cryptocop_ioctl_create_session(inode, filp, cmd, arg);
+	case CRYPTOCOP_IO_CLOSE_SESSION:
+		return cryptocop_ioctl_close_session(inode, filp, cmd, arg);
+	case CRYPTOCOP_IO_PROCESS_OP:
+		return cryptocop_ioctl_process(inode, filp, cmd, arg);
+	default:
+		DEBUG_API(printk("cryptocop_ioctl: unknown command\n"));
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static long
+cryptocop_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+       long ret;
+
+       mutex_lock(&cryptocop_mutex);
+       ret = cryptocop_ioctl_unlocked(file_inode(filp), filp, cmd, arg);
+       mutex_unlock(&cryptocop_mutex);
+
+       return ret;
+}
+
+
+#ifdef LDEBUG
+static void print_dma_descriptors(struct cryptocop_int_operation *iop)
+{
+	struct cryptocop_dma_desc *cdesc_out = iop->cdesc_out;
+	struct cryptocop_dma_desc *cdesc_in = iop->cdesc_in;
+	int                       i;
+
+	printk("print_dma_descriptors start\n");
+
+	printk("iop:\n");
+	printk("\tsid: 0x%lld\n", iop->sid);
+
+	printk("\tcdesc_out: 0x%p\n", iop->cdesc_out);
+	printk("\tcdesc_in: 0x%p\n", iop->cdesc_in);
+	printk("\tddesc_out: 0x%p\n", iop->ddesc_out);
+	printk("\tddesc_in: 0x%p\n", iop->ddesc_in);
+
+	printk("\niop->ctx_out: 0x%p phys: 0x%p\n", &iop->ctx_out, (char*)virt_to_phys(&iop->ctx_out));
+	printk("\tnext: 0x%p\n"
+	       "\tsaved_data: 0x%p\n"
+	       "\tsaved_data_buf: 0x%p\n",
+	       iop->ctx_out.next,
+	       iop->ctx_out.saved_data,
+	       iop->ctx_out.saved_data_buf);
+
+	printk("\niop->ctx_in: 0x%p phys: 0x%p\n", &iop->ctx_in, (char*)virt_to_phys(&iop->ctx_in));
+	printk("\tnext: 0x%p\n"
+	       "\tsaved_data: 0x%p\n"
+	       "\tsaved_data_buf: 0x%p\n",
+	       iop->ctx_in.next,
+	       iop->ctx_in.saved_data,
+	       iop->ctx_in.saved_data_buf);
+
+	i = 0;
+	while (cdesc_out) {
+		dma_descr_data *td;
+		printk("cdesc_out %d, desc=0x%p\n", i, cdesc_out->dma_descr);
+		printk("\n\tvirt_to_phys(desc): 0x%p\n", (char*)virt_to_phys(cdesc_out->dma_descr));
+		td = cdesc_out->dma_descr;
+		printk("\n\tbuf: 0x%p\n"
+		       "\tafter: 0x%p\n"
+		       "\tmd: 0x%04x\n"
+		       "\tnext: 0x%p\n",
+		       td->buf,
+		       td->after,
+		       td->md,
+		       td->next);
+		printk("flags:\n"
+		       "\twait:\t%d\n"
+		       "\teol:\t%d\n"
+		       "\touteop:\t%d\n"
+		       "\tineop:\t%d\n"
+		       "\tintr:\t%d\n",
+		       td->wait,
+		       td->eol,
+		       td->out_eop,
+		       td->in_eop,
+		       td->intr);
+		cdesc_out = cdesc_out->next;
+		i++;
+	}
+	i = 0;
+	while (cdesc_in) {
+		dma_descr_data *td;
+		printk("cdesc_in %d, desc=0x%p\n", i, cdesc_in->dma_descr);
+		printk("\n\tvirt_to_phys(desc): 0x%p\n", (char*)virt_to_phys(cdesc_in->dma_descr));
+		td = cdesc_in->dma_descr;
+		printk("\n\tbuf: 0x%p\n"
+		       "\tafter: 0x%p\n"
+		       "\tmd: 0x%04x\n"
+		       "\tnext: 0x%p\n",
+		       td->buf,
+		       td->after,
+		       td->md,
+		       td->next);
+		printk("flags:\n"
+		       "\twait:\t%d\n"
+		       "\teol:\t%d\n"
+		       "\touteop:\t%d\n"
+		       "\tineop:\t%d\n"
+		       "\tintr:\t%d\n",
+		       td->wait,
+		       td->eol,
+		       td->out_eop,
+		       td->in_eop,
+		       td->intr);
+		cdesc_in = cdesc_in->next;
+		i++;
+	}
+
+	printk("print_dma_descriptors end\n");
+}
+
+
+static void print_strcop_crypto_op(struct strcop_crypto_op *cop)
+{
+	printk("print_strcop_crypto_op, 0x%p\n", cop);
+
+	/* Indata. */
+	printk("indata=0x%p\n"
+	       "inlen=%d\n"
+	       "do_cipher=%d\n"
+	       "decrypt=%d\n"
+	       "cipher_explicit=%d\n"
+	       "cipher_start=%d\n"
+	       "cipher_len=%d\n"
+	       "outdata=0x%p\n"
+	       "outlen=%d\n",
+	       cop->indata,
+	       cop->inlen,
+	       cop->do_cipher,
+	       cop->decrypt,
+	       cop->cipher_explicit,
+	       cop->cipher_start,
+	       cop->cipher_len,
+	       cop->cipher_outdata,
+	       cop->cipher_outlen);
+
+	printk("do_digest=%d\n"
+	       "digest_start=%d\n"
+	       "digest_len=%d\n",
+	       cop->do_digest,
+	       cop->digest_start,
+	       cop->digest_len);
+
+	printk("do_csum=%d\n"
+	       "csum_start=%d\n"
+	       "csum_len=%d\n",
+	       cop->do_csum,
+	       cop->csum_start,
+	       cop->csum_len);
+}
+
+static void print_cryptocop_operation(struct cryptocop_operation *cop)
+{
+	struct cryptocop_desc      *d;
+	struct cryptocop_tfrm_cfg  *tc;
+	struct cryptocop_desc_cfg  *dc;
+	int                        i;
+
+	printk("print_cryptocop_operation, cop=0x%p\n\n", cop);
+	printk("sid: %lld\n", cop->sid);
+	printk("operation_status=%d\n"
+	       "use_dmalists=%d\n"
+	       "in_interrupt=%d\n"
+	       "fast_callback=%d\n",
+	       cop->operation_status,
+	       cop->use_dmalists,
+	       cop->in_interrupt,
+	       cop->fast_callback);
+
+	if (cop->use_dmalists){
+		print_user_dma_lists(&cop->list_op);
+	} else {
+		printk("cop->tfrm_op\n"
+		       "tfrm_cfg=0x%p\n"
+		       "desc=0x%p\n"
+		       "indata=0x%p\n"
+		       "incount=%d\n"
+		       "inlen=%d\n"
+		       "outdata=0x%p\n"
+		       "outcount=%d\n"
+		       "outlen=%d\n\n",
+		       cop->tfrm_op.tfrm_cfg,
+		       cop->tfrm_op.desc,
+		       cop->tfrm_op.indata,
+		       cop->tfrm_op.incount,
+		       cop->tfrm_op.inlen,
+		       cop->tfrm_op.outdata,
+		       cop->tfrm_op.outcount,
+		       cop->tfrm_op.outlen);
+
+		tc = cop->tfrm_op.tfrm_cfg;
+		while (tc){
+			printk("tfrm_cfg, 0x%p\n"
+			       "tid=%d\n"
+			       "flags=%d\n"
+			       "inject_ix=%d\n"
+			       "next=0x%p\n",
+			       tc,
+			       tc->tid,
+			       tc->flags,
+			       tc->inject_ix,
+			       tc->next);
+			tc = tc->next;
+		}
+		d = cop->tfrm_op.desc;
+		while (d){
+			printk("\n======================desc, 0x%p\n"
+			       "length=%d\n"
+			       "cfg=0x%p\n"
+			       "next=0x%p\n",
+			       d,
+			       d->length,
+			       d->cfg,
+			       d->next);
+			dc = d->cfg;
+			while (dc){
+				printk("=========desc_cfg, 0x%p\n"
+				       "tid=%d\n"
+				       "src=%d\n"
+				       "last=%d\n"
+				       "next=0x%p\n",
+				       dc,
+				       dc->tid,
+				       dc->src,
+				       dc->last,
+				       dc->next);
+				dc = dc->next;
+			}
+			d = d->next;
+		}
+		printk("\n====iniov\n");
+		for (i = 0; i < cop->tfrm_op.incount; i++){
+			printk("indata[%d]\n"
+			       "base=0x%p\n"
+			       "len=%d\n",
+			       i,
+			       cop->tfrm_op.indata[i].iov_base,
+			       cop->tfrm_op.indata[i].iov_len);
+		}
+		printk("\n====outiov\n");
+		for (i = 0; i < cop->tfrm_op.outcount; i++){
+			printk("outdata[%d]\n"
+			       "base=0x%p\n"
+			       "len=%d\n",
+			       i,
+			       cop->tfrm_op.outdata[i].iov_base,
+			       cop->tfrm_op.outdata[i].iov_len);
+		}
+	}
+	printk("------------end print_cryptocop_operation\n");
+}
+
+
+static void print_user_dma_lists(struct cryptocop_dma_list_operation *dma_op)
+{
+	dma_descr_data *dd;
+	int i;
+
+	printk("print_user_dma_lists, dma_op=0x%p\n", dma_op);
+
+	printk("out_data_buf = 0x%p, phys_to_virt(out_data_buf) = 0x%p\n", dma_op->out_data_buf, phys_to_virt((unsigned long int)dma_op->out_data_buf));
+	printk("in_data_buf = 0x%p, phys_to_virt(in_data_buf) = 0x%p\n", dma_op->in_data_buf, phys_to_virt((unsigned long int)dma_op->in_data_buf));
+
+	printk("##############outlist\n");
+	dd = phys_to_virt((unsigned long int)dma_op->outlist);
+	i = 0;
+	while (dd != NULL) {
+		printk("#%d phys_to_virt(desc) 0x%p\n", i, dd);
+		printk("\n\tbuf: 0x%p\n"
+		       "\tafter: 0x%p\n"
+		       "\tmd: 0x%04x\n"
+		       "\tnext: 0x%p\n",
+		       dd->buf,
+		       dd->after,
+		       dd->md,
+		       dd->next);
+		printk("flags:\n"
+		       "\twait:\t%d\n"
+		       "\teol:\t%d\n"
+		       "\touteop:\t%d\n"
+		       "\tineop:\t%d\n"
+		       "\tintr:\t%d\n",
+		       dd->wait,
+		       dd->eol,
+		       dd->out_eop,
+		       dd->in_eop,
+		       dd->intr);
+		if (dd->eol)
+			dd = NULL;
+		else
+			dd = phys_to_virt((unsigned long int)dd->next);
+		++i;
+	}
+
+	printk("##############inlist\n");
+	dd = phys_to_virt((unsigned long int)dma_op->inlist);
+	i = 0;
+	while (dd != NULL) {
+		printk("#%d phys_to_virt(desc) 0x%p\n", i, dd);
+		printk("\n\tbuf: 0x%p\n"
+		       "\tafter: 0x%p\n"
+		       "\tmd: 0x%04x\n"
+		       "\tnext: 0x%p\n",
+		       dd->buf,
+		       dd->after,
+		       dd->md,
+		       dd->next);
+		printk("flags:\n"
+		       "\twait:\t%d\n"
+		       "\teol:\t%d\n"
+		       "\touteop:\t%d\n"
+		       "\tineop:\t%d\n"
+		       "\tintr:\t%d\n",
+		       dd->wait,
+		       dd->eol,
+		       dd->out_eop,
+		       dd->in_eop,
+		       dd->intr);
+		if (dd->eol)
+			dd = NULL;
+		else
+			dd = phys_to_virt((unsigned long int)dd->next);
+		++i;
+	}
+}
+
+
+static void print_lock_status(void)
+{
+	printk("**********************print_lock_status\n");
+	printk("cryptocop_completed_jobs_lock %d\n", spin_is_locked(&cryptocop_completed_jobs_lock));
+	printk("cryptocop_job_queue_lock %d\n", spin_is_locked(&cryptocop_job_queue_lock));
+	printk("descr_pool_lock %d\n", spin_is_locked(&descr_pool_lock));
+	printk("cryptocop_sessions_lock %d\n", spin_is_locked(cryptocop_sessions_lock));
+	printk("running_job_lock %d\n", spin_is_locked(running_job_lock));
+	printk("cryptocop_process_lock %d\n", spin_is_locked(cryptocop_process_lock));
+}
+#endif /* LDEBUG */
+
+
+static const char cryptocop_name[] = "ETRAX FS stream co-processor";
+
+static int init_stream_coprocessor(void)
+{
+	int err;
+	int i;
+	static int initialized = 0;
+
+	if (initialized)
+		return 0;
+
+	initialized = 1;
+
+	printk("ETRAX FS stream co-processor driver v0.01, (c) 2003 Axis Communications AB\n");
+
+	err = register_chrdev(CRYPTOCOP_MAJOR, cryptocop_name, &cryptocop_fops);
+	if (err < 0) {
+		printk(KERN_ERR "stream co-processor: could not get major number.\n");
+		return err;
+	}
+
+	err = init_cryptocop();
+	if (err) {
+		(void)unregister_chrdev(CRYPTOCOP_MAJOR, cryptocop_name);
+		return err;
+	}
+	err = cryptocop_job_queue_init();
+	if (err) {
+		release_cryptocop();
+		(void)unregister_chrdev(CRYPTOCOP_MAJOR, cryptocop_name);
+		return err;
+	}
+	/* Init the descriptor pool. */
+	for (i = 0; i < CRYPTOCOP_DESCRIPTOR_POOL_SIZE - 1; i++) {
+		descr_pool[i].from_pool = 1;
+		descr_pool[i].next = &descr_pool[i + 1];
+	}
+	descr_pool[i].from_pool = 1;
+	descr_pool[i].next = NULL;
+	descr_pool_free_list = &descr_pool[0];
+	descr_pool_no_free = CRYPTOCOP_DESCRIPTOR_POOL_SIZE;
+
+	spin_lock_init(&cryptocop_completed_jobs_lock);
+	spin_lock_init(&cryptocop_job_queue_lock);
+	spin_lock_init(&descr_pool_lock);
+	spin_lock_init(&cryptocop_sessions_lock);
+	spin_lock_init(&running_job_lock);
+	spin_lock_init(&cryptocop_process_lock);
+
+	cryptocop_sessions = NULL;
+	next_sid = 1;
+
+	cryptocop_running_job = NULL;
+
+	printk("stream co-processor: init done.\n");
+	return 0;
+}
+
+static void __exit exit_stream_coprocessor(void)
+{
+	release_cryptocop();
+	cryptocop_job_queue_close();
+}
+
+module_init(init_stream_coprocessor);
+module_exit(exit_stream_coprocessor);
+
diff --git a/arch/cris/arch-v32/drivers/iop_fw_load.c b/arch/cris/arch-v32/drivers/iop_fw_load.c
new file mode 100644
index 0000000..2f8ea0f
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/iop_fw_load.c
@@ -0,0 +1,230 @@
+/*
+ * Firmware loader for ETRAX FS IO-Processor
+ *
+ * Copyright (C) 2004  Axis Communications AB
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/iop/iop_reg_space.h>
+#include <hwregs/iop/iop_mpu_macros.h>
+#include <hwregs/iop/iop_mpu_defs.h>
+#include <hwregs/iop/iop_spu_defs.h>
+#include <hwregs/iop/iop_sw_cpu_defs.h>
+
+#define IOP_TIMEOUT 100
+
+#error "This driver is broken with regard to its driver core usage."
+#error "Please contact <greg@kroah.com> for details on how to fix it properly."
+
+static struct device iop_spu_device[2] = {
+	{ .init_name =     "iop-spu0", },
+	{ .init_name =     "iop-spu1", },
+};
+
+static struct device iop_mpu_device = {
+	.init_name =       "iop-mpu",
+};
+
+static int wait_mpu_idle(void)
+{
+	reg_iop_mpu_r_stat mpu_stat;
+	unsigned int timeout = IOP_TIMEOUT;
+
+	do {
+		mpu_stat = REG_RD(iop_mpu, regi_iop_mpu, r_stat);
+	} while (mpu_stat.instr_reg_busy == regk_iop_mpu_yes && --timeout > 0);
+	if (timeout == 0) {
+		printk(KERN_ERR "Timeout waiting for MPU to be idle\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+int iop_fw_load_spu(const unsigned char *fw_name, unsigned int spu_inst)
+{
+	reg_iop_sw_cpu_rw_mc_ctrl mc_ctrl = {
+		.wr_spu0_mem =    regk_iop_sw_cpu_no,
+		.wr_spu1_mem =    regk_iop_sw_cpu_no,
+		.size =           4,
+		.cmd =            regk_iop_sw_cpu_reg_copy,
+		.keep_owner =     regk_iop_sw_cpu_yes
+	};
+	reg_iop_spu_rw_ctrl spu_ctrl = {
+		.en  =            regk_iop_spu_no,
+		.fsm =            regk_iop_spu_no,
+	};
+	reg_iop_sw_cpu_r_mc_stat mc_stat;
+        const struct firmware *fw_entry;
+	u32 *data;
+	unsigned int timeout;
+	int retval, i;
+
+	if (spu_inst > 1)
+		return -ENODEV;
+
+	/* get firmware */
+	retval = request_firmware(&fw_entry,
+				  fw_name,
+				  &iop_spu_device[spu_inst]);
+	if (retval != 0)
+	{
+		printk(KERN_ERR
+		       "iop_load_spu: Failed to load firmware \"%s\"\n",
+		       fw_name);
+		return retval;
+	}
+	data = (u32 *) fw_entry->data;
+
+	/* acquire ownership of memory controller */
+	switch (spu_inst) {
+	case 0:
+		mc_ctrl.wr_spu0_mem = regk_iop_sw_cpu_yes;
+		REG_WR(iop_spu, regi_iop_spu0, rw_ctrl, spu_ctrl);
+		break;
+	case 1:
+		mc_ctrl.wr_spu1_mem = regk_iop_sw_cpu_yes;
+		REG_WR(iop_spu, regi_iop_spu1, rw_ctrl, spu_ctrl);
+		break;
+	}
+	timeout = IOP_TIMEOUT;
+	do {
+		REG_WR(iop_sw_cpu, regi_iop_sw_cpu, rw_mc_ctrl, mc_ctrl);
+		mc_stat = REG_RD(iop_sw_cpu, regi_iop_sw_cpu, r_mc_stat);
+	} while (mc_stat.owned_by_cpu == regk_iop_sw_cpu_no && --timeout > 0);
+	if (timeout == 0) {
+		printk(KERN_ERR "Timeout waiting to acquire MC\n");
+		retval = -EBUSY;
+		goto out;
+	}
+
+	/* write to SPU memory */
+	for (i = 0; i < (fw_entry->size/4); i++) {
+		switch (spu_inst) {
+		case 0:
+			REG_WR_INT(iop_spu, regi_iop_spu0, rw_seq_pc, (i*4));
+			break;
+		case 1:
+			REG_WR_INT(iop_spu, regi_iop_spu1, rw_seq_pc, (i*4));
+			break;
+		}
+		REG_WR_INT(iop_sw_cpu, regi_iop_sw_cpu, rw_mc_data, *data);
+		data++;
+	}
+
+	/* release ownership of memory controller */
+	(void) REG_RD(iop_sw_cpu, regi_iop_sw_cpu, rs_mc_data);
+
+ out:
+	release_firmware(fw_entry);
+	return retval;
+}
+
+int iop_fw_load_mpu(unsigned char *fw_name)
+{
+	const unsigned int start_addr = 0;
+	reg_iop_mpu_rw_ctrl mpu_ctrl;
+        const struct firmware *fw_entry;
+	u32 *data;
+	int retval, i;
+
+	/* get firmware */
+	retval = request_firmware(&fw_entry, fw_name, &iop_mpu_device);
+	if (retval != 0)
+	{
+		printk(KERN_ERR
+		       "iop_load_spu: Failed to load firmware \"%s\"\n",
+		       fw_name);
+		return retval;
+	}
+	data = (u32 *) fw_entry->data;
+
+	/* disable MPU */
+	mpu_ctrl.en = regk_iop_mpu_no;
+	REG_WR(iop_mpu, regi_iop_mpu, rw_ctrl, mpu_ctrl);
+	/* put start address in R0 */
+	REG_WR_VECT(iop_mpu, regi_iop_mpu, rw_r, 0, start_addr);
+	/* write to memory by executing 'SWX i, 4, R0' for each word */
+	if ((retval = wait_mpu_idle()) != 0)
+		goto out;
+	REG_WR(iop_mpu, regi_iop_mpu, rw_instr, MPU_SWX_IIR_INSTR(0, 4, 0));
+	for (i = 0; i < (fw_entry->size / 4); i++) {
+		REG_WR_INT(iop_mpu, regi_iop_mpu, rw_immediate, *data);
+		if ((retval = wait_mpu_idle()) != 0)
+			goto out;
+		data++;
+	}
+
+ out:
+	release_firmware(fw_entry);
+	return retval;
+}
+
+int iop_start_mpu(unsigned int start_addr)
+{
+	reg_iop_mpu_rw_ctrl mpu_ctrl = { .en = regk_iop_mpu_yes };
+	int retval;
+
+	/* disable MPU */
+	if ((retval = wait_mpu_idle()) != 0)
+		goto out;
+	REG_WR(iop_mpu, regi_iop_mpu, rw_instr, MPU_HALT());
+	if ((retval = wait_mpu_idle()) != 0)
+		goto out;
+	/* set PC and wait for it to bite */
+	if ((retval = wait_mpu_idle()) != 0)
+		goto out;
+	REG_WR_INT(iop_mpu, regi_iop_mpu, rw_instr, MPU_BA_I(start_addr));
+	if ((retval = wait_mpu_idle()) != 0)
+		goto out;
+	/* make sure the MPU starts executing with interrupts disabled */
+	REG_WR(iop_mpu, regi_iop_mpu, rw_instr, MPU_DI());
+	if ((retval = wait_mpu_idle()) != 0)
+		goto out;
+	/* enable MPU */
+	REG_WR(iop_mpu, regi_iop_mpu, rw_ctrl, mpu_ctrl);
+ out:
+	return retval;
+}
+
+static int __init iop_fw_load_init(void)
+{
+#if 0
+	/*
+	 * static struct devices can not be added directly to sysfs by ignoring
+	 * the driver model infrastructure.  To fix this properly, please use
+	 * the platform_bus to register these devices to be able to properly
+	 * use the firmware infrastructure.
+	 */
+	device_initialize(&iop_spu_device[0]);
+	kobject_set_name(&iop_spu_device[0].kobj, "iop-spu0");
+	kobject_add(&iop_spu_device[0].kobj);
+	device_initialize(&iop_spu_device[1]);
+	kobject_set_name(&iop_spu_device[1].kobj, "iop-spu1");
+	kobject_add(&iop_spu_device[1].kobj);
+	device_initialize(&iop_mpu_device);
+	kobject_set_name(&iop_mpu_device.kobj, "iop-mpu");
+	kobject_add(&iop_mpu_device.kobj);
+#endif
+	return 0;
+}
+
+static void __exit iop_fw_load_exit(void)
+{
+}
+
+module_init(iop_fw_load_init);
+module_exit(iop_fw_load_exit);
+
+MODULE_DESCRIPTION("ETRAX FS IO-Processor Firmware Loader");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(iop_fw_load_spu);
+EXPORT_SYMBOL(iop_fw_load_mpu);
+EXPORT_SYMBOL(iop_start_mpu);
diff --git a/arch/cris/arch-v32/drivers/mach-a3/Makefile b/arch/cris/arch-v32/drivers/mach-a3/Makefile
new file mode 100644
index 0000000..59028d0
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/mach-a3/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Etrax-specific drivers
+#
+
+obj-$(CONFIG_ETRAX_NANDFLASH)   += nandflash.o
diff --git a/arch/cris/arch-v32/drivers/mach-a3/nandflash.c b/arch/cris/arch-v32/drivers/mach-a3/nandflash.c
new file mode 100644
index 0000000..7fb5212
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/mach-a3/nandflash.c
@@ -0,0 +1,180 @@
+/*
+ *  arch/cris/arch-v32/drivers/nandflash.c
+ *
+ *  Copyright (c) 2007
+ *
+ *  Derived from drivers/mtd/nand/spia.c
+ *	  Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <arch/memmap.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/pio_defs.h>
+#include <pinmux.h>
+#include <asm/io.h>
+
+#define MANUAL_ALE_CLE_CONTROL 1
+
+#define regf_ALE	a0
+#define regf_CLE	a1
+#define regf_NCE	ce0_n
+
+#define CLE_BIT 10
+#define ALE_BIT 11
+#define CE_BIT 12
+
+struct mtd_info_wrapper {
+	struct mtd_info info;
+	struct nand_chip chip;
+};
+
+/* Bitmask for control pins */
+#define PIN_BITMASK ((1 << CE_BIT) | (1 << CLE_BIT) | (1 << ALE_BIT))
+
+static struct mtd_info *crisv32_mtd;
+/*
+ *	hardware specific access to control-lines
+ */
+static void crisv32_hwcontrol(struct mtd_info *mtd, int cmd,
+			      unsigned int ctrl)
+{
+	unsigned long flags;
+	reg_pio_rw_dout dout;
+	struct nand_chip *this = mtd->priv;
+
+	local_irq_save(flags);
+
+	/* control bits change */
+	if (ctrl & NAND_CTRL_CHANGE) {
+		dout = REG_RD(pio, regi_pio, rw_dout);
+		dout.regf_NCE = (ctrl & NAND_NCE) ? 0 : 1;
+
+#if !MANUAL_ALE_CLE_CONTROL
+		if (ctrl & NAND_ALE) {
+			/* A0 = ALE high */
+			this->IO_ADDR_W = (void __iomem *)REG_ADDR(pio,
+				regi_pio, rw_io_access1);
+		} else if (ctrl & NAND_CLE) {
+			/* A1 = CLE high */
+			this->IO_ADDR_W = (void __iomem *)REG_ADDR(pio,
+				regi_pio, rw_io_access2);
+		} else {
+			/* A1 = CLE and A0 = ALE low */
+			this->IO_ADDR_W = (void __iomem *)REG_ADDR(pio,
+				regi_pio, rw_io_access0);
+		}
+#else
+
+		dout.regf_CLE = (ctrl & NAND_CLE) ? 1 : 0;
+		dout.regf_ALE = (ctrl & NAND_ALE) ? 1 : 0;
+#endif
+		REG_WR(pio, regi_pio, rw_dout, dout);
+	}
+
+	/* command to chip */
+	if (cmd != NAND_CMD_NONE)
+		writeb(cmd, this->IO_ADDR_W);
+
+	local_irq_restore(flags);
+}
+
+/*
+*	read device ready pin
+*/
+static int crisv32_device_ready(struct mtd_info *mtd)
+{
+	reg_pio_r_din din = REG_RD(pio, regi_pio, r_din);
+	return din.rdy;
+}
+
+/*
+ * Main initialization routine
+ */
+struct mtd_info *__init crisv32_nand_flash_probe(void)
+{
+	void __iomem *read_cs;
+	void __iomem *write_cs;
+
+	struct mtd_info_wrapper *wrapper;
+	struct nand_chip *this;
+	int err = 0;
+
+	reg_pio_rw_man_ctrl man_ctrl = {
+		.regf_NCE = regk_pio_yes,
+#if MANUAL_ALE_CLE_CONTROL
+		.regf_ALE = regk_pio_yes,
+		.regf_CLE = regk_pio_yes
+#endif
+	};
+	reg_pio_rw_oe oe = {
+		.regf_NCE = regk_pio_yes,
+#if MANUAL_ALE_CLE_CONTROL
+		.regf_ALE = regk_pio_yes,
+		.regf_CLE = regk_pio_yes
+#endif
+	};
+	reg_pio_rw_dout dout = { .regf_NCE = 1 };
+
+	/* Allocate pio pins to pio */
+	crisv32_pinmux_alloc_fixed(pinmux_pio);
+	/* Set up CE, ALE, CLE (ce0_n, a0, a1) for manual control and output */
+	REG_WR(pio, regi_pio, rw_man_ctrl, man_ctrl);
+	REG_WR(pio, regi_pio, rw_dout, dout);
+	REG_WR(pio, regi_pio, rw_oe, oe);
+
+	/* Allocate memory for MTD device structure and private data */
+	wrapper = kzalloc(sizeof(struct mtd_info_wrapper), GFP_KERNEL);
+	if (!wrapper) {
+		printk(KERN_ERR "Unable to allocate CRISv32 NAND MTD "
+			"device structure.\n");
+		err = -ENOMEM;
+		return NULL;
+	}
+
+	read_cs = write_cs = (void __iomem *)REG_ADDR(pio, regi_pio,
+		rw_io_access0);
+
+	/* Get pointer to private data */
+	this = &wrapper->chip;
+	crisv32_mtd = &wrapper->info;
+
+	/* Link the private data with the MTD structure */
+	crisv32_mtd->priv = this;
+
+	/* Set address of NAND IO lines */
+	this->IO_ADDR_R = read_cs;
+	this->IO_ADDR_W = write_cs;
+	this->cmd_ctrl = crisv32_hwcontrol;
+	this->dev_ready = crisv32_device_ready;
+	/* 20 us command delay time */
+	this->chip_delay = 20;
+	this->ecc.mode = NAND_ECC_SOFT;
+
+	/* Enable the following for a flash based bad block table */
+	/* this->bbt_options = NAND_BBT_USE_FLASH; */
+
+	/* Scan to find existence of the device */
+	if (nand_scan(crisv32_mtd, 1)) {
+		err = -ENXIO;
+		goto out_mtd;
+	}
+
+	return crisv32_mtd;
+
+out_mtd:
+	kfree(wrapper);
+	return NULL;
+}
+
diff --git a/arch/cris/arch-v32/drivers/mach-fs/Makefile b/arch/cris/arch-v32/drivers/mach-fs/Makefile
new file mode 100644
index 0000000..59028d0
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/mach-fs/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Etrax-specific drivers
+#
+
+obj-$(CONFIG_ETRAX_NANDFLASH)   += nandflash.o
diff --git a/arch/cris/arch-v32/drivers/mach-fs/nandflash.c b/arch/cris/arch-v32/drivers/mach-fs/nandflash.c
new file mode 100644
index 0000000..e032384
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/mach-fs/nandflash.c
@@ -0,0 +1,174 @@
+/*
+ *  arch/cris/arch-v32/drivers/nandflash.c
+ *
+ *  Copyright (c) 2004
+ *
+ *  Derived from drivers/mtd/nand/spia.c
+ *	  Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <arch/memmap.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/gio_defs.h>
+#include <hwregs/bif_core_defs.h>
+#include <asm/io.h>
+
+#define CE_BIT 4
+#define CLE_BIT 5
+#define ALE_BIT 6
+#define BY_BIT 7
+
+struct mtd_info_wrapper {
+	struct mtd_info info;
+	struct nand_chip chip;
+};
+
+/* Bitmask for control pins */
+#define PIN_BITMASK ((1 << CE_BIT) | (1 << CLE_BIT) | (1 << ALE_BIT))
+
+/* Bitmask for mtd nand control bits */
+#define CTRL_BITMASK (NAND_NCE | NAND_CLE | NAND_ALE)
+
+
+static struct mtd_info *crisv32_mtd;
+/*
+ *	hardware specific access to control-lines
+ */
+static void crisv32_hwcontrol(struct mtd_info *mtd, int cmd,
+			      unsigned int ctrl)
+{
+	unsigned long flags;
+	reg_gio_rw_pa_dout dout;
+	struct nand_chip *this = mtd->priv;
+
+	local_irq_save(flags);
+
+	/* control bits change */
+	if (ctrl & NAND_CTRL_CHANGE) {
+		dout = REG_RD(gio, regi_gio, rw_pa_dout);
+		dout.data &= ~PIN_BITMASK;
+
+#if (CE_BIT == 4 && NAND_NCE == 1 &&  \
+     CLE_BIT == 5 && NAND_CLE == 2 && \
+     ALE_BIT == 6 && NAND_ALE == 4)
+		/* Pins in same order as control bits, but shifted.
+		 * Optimize for this case; works for 2.6.18 */
+		dout.data |= ((ctrl & CTRL_BITMASK) ^ NAND_NCE) << CE_BIT;
+#else
+		/* the slow way */
+		if (!(ctrl & NAND_NCE))
+			dout.data |= (1 << CE_BIT);
+		if (ctrl & NAND_CLE)
+			dout.data |= (1 << CLE_BIT);
+		if (ctrl & NAND_ALE)
+			dout.data |= (1 << ALE_BIT);
+#endif
+		REG_WR(gio, regi_gio, rw_pa_dout, dout);
+	}
+
+	/* command to chip */
+	if (cmd != NAND_CMD_NONE)
+		writeb(cmd, this->IO_ADDR_W);
+
+	local_irq_restore(flags);
+}
+
+/*
+*	read device ready pin
+*/
+static int crisv32_device_ready(struct mtd_info *mtd)
+{
+	reg_gio_r_pa_din din = REG_RD(gio, regi_gio, r_pa_din);
+	return ((din.data & (1 << BY_BIT)) >> BY_BIT);
+}
+
+/*
+ * Main initialization routine
+ */
+struct mtd_info *__init crisv32_nand_flash_probe(void)
+{
+	void __iomem *read_cs;
+	void __iomem *write_cs;
+
+	reg_bif_core_rw_grp3_cfg bif_cfg = REG_RD(bif_core, regi_bif_core,
+		rw_grp3_cfg);
+	reg_gio_rw_pa_oe pa_oe = REG_RD(gio, regi_gio, rw_pa_oe);
+	struct mtd_info_wrapper *wrapper;
+	struct nand_chip *this;
+	int err = 0;
+
+	/* Allocate memory for MTD device structure and private data */
+	wrapper = kzalloc(sizeof(struct mtd_info_wrapper), GFP_KERNEL);
+	if (!wrapper) {
+		printk(KERN_ERR "Unable to allocate CRISv32 NAND MTD "
+			"device structure.\n");
+		err = -ENOMEM;
+		return NULL;
+	}
+
+	read_cs = ioremap(MEM_CSP0_START | MEM_NON_CACHEABLE, 8192);
+	write_cs = ioremap(MEM_CSP1_START | MEM_NON_CACHEABLE, 8192);
+
+	if (!read_cs || !write_cs) {
+		printk(KERN_ERR "CRISv32 NAND ioremap failed\n");
+		err = -EIO;
+		goto out_mtd;
+	}
+
+	/* Get pointer to private data */
+	this = &wrapper->chip;
+	crisv32_mtd = &wrapper->info;
+
+	pa_oe.oe |= 1 << CE_BIT;
+	pa_oe.oe |= 1 << ALE_BIT;
+	pa_oe.oe |= 1 << CLE_BIT;
+	pa_oe.oe &= ~(1 << BY_BIT);
+	REG_WR(gio, regi_gio, rw_pa_oe, pa_oe);
+
+	bif_cfg.gated_csp0 = regk_bif_core_rd;
+	bif_cfg.gated_csp1 = regk_bif_core_wr;
+	REG_WR(bif_core, regi_bif_core, rw_grp3_cfg, bif_cfg);
+
+	/* Link the private data with the MTD structure */
+	crisv32_mtd->priv = this;
+
+	/* Set address of NAND IO lines */
+	this->IO_ADDR_R = read_cs;
+	this->IO_ADDR_W = write_cs;
+	this->cmd_ctrl = crisv32_hwcontrol;
+	this->dev_ready = crisv32_device_ready;
+	/* 20 us command delay time */
+	this->chip_delay = 20;
+	this->ecc.mode = NAND_ECC_SOFT;
+
+	/* Enable the following for a flash based bad block table */
+	/* this->bbt_options = NAND_BBT_USE_FLASH; */
+
+	/* Scan to find existence of the device */
+	if (nand_scan(crisv32_mtd, 1)) {
+		err = -ENXIO;
+		goto out_ior;
+	}
+
+	return crisv32_mtd;
+
+out_ior:
+	iounmap((void *)read_cs);
+	iounmap((void *)write_cs);
+out_mtd:
+	kfree(wrapper);
+	return NULL;
+}
+
diff --git a/arch/cris/arch-v32/drivers/pci/Makefile b/arch/cris/arch-v32/drivers/pci/Makefile
new file mode 100644
index 0000000..bff7482
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/pci/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Etrax cardbus driver
+#
+
+obj-$(CONFIG_ETRAX_CARDBUS)        += bios.o dma.o
diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c
new file mode 100644
index 0000000..64a5fb9
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/pci/bios.c
@@ -0,0 +1,99 @@
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <arch/hwregs/intr_vect.h>
+
+void pcibios_fixup_bus(struct pci_bus *b)
+{
+}
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	unsigned long prot;
+
+	/* Leave vm_pgoff as-is, the PCI space address is the physical
+	 * address on this platform.
+	 */
+	prot = pgprot_val(vma->vm_page_prot);
+	vma->vm_page_prot = __pgprot(prot);
+
+	/* Write-combine setting is ignored, it is changed via the mtrr
+	 * interfaces on this platform.
+	 */
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			     vma->vm_end - vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+resource_size_t
+pcibios_align_resource(void *data, const struct resource *res,
+		       resource_size_t size, resource_size_t align)
+{
+	resource_size_t start = res->start;
+
+	if ((res->flags & IORESOURCE_IO) && (start & 0x300))
+		start = (start + 0x3ff) & ~0x3ff;
+
+	return start;
+}
+
+int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for(idx=0; idx<6; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
+			continue;
+
+		r = &dev->resource[idx];
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (dev->resource[PCI_ROM_RESOURCE].start)
+		cmd |= PCI_COMMAND_MEMORY;
+	if (cmd != old_cmd) {
+		printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+int pcibios_enable_irq(struct pci_dev *dev)
+{
+	dev->irq = EXT_INTR_VECT;
+	return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	int err;
+
+	if ((err = pcibios_enable_resources(dev, mask)) < 0)
+		return err;
+
+	if (!dev->msi_enabled)
+		pcibios_enable_irq(dev);
+	return 0;
+}
diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c
new file mode 100644
index 0000000..ee55578
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/pci/dma.c
@@ -0,0 +1,50 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On cris there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ *
+ * Borrowed from i386.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <asm/io.h>
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t gfp)
+{
+	void *ret;
+	int order = get_order(size);
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+		return ret;
+
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+
+	ret = (void *)__get_free_pages(gfp, order);
+
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_phys(ret);
+	}
+	return ret;
+}
+
+void dma_free_coherent(struct device *dev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	int order = get_order(size);
+
+	if (!dma_release_from_coherent(dev, order, vaddr))
+		free_pages((unsigned long)vaddr, order);
+}
+
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
new file mode 100644
index 0000000..e989cee
--- /dev/null
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -0,0 +1,1709 @@
+/*
+ * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3.
+ *
+ * Copyright (c) 2005, 2008 Axis Communications AB
+ * Author: Mikael Starvik
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/poll.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/wait.h>
+
+#include <asm/io.h>
+#include <mach/dma.h>
+#include <pinmux.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/sser_defs.h>
+#include <hwregs/timer_defs.h>
+#include <hwregs/dma_defs.h>
+#include <hwregs/dma.h>
+#include <hwregs/intr_vect_defs.h>
+#include <hwregs/intr_vect.h>
+#include <hwregs/reg_map.h>
+#include <asm/sync_serial.h>
+
+
+/* The receiver is a bit tricky because of the continuous stream of data.*/
+/*                                                                       */
+/* Three DMA descriptors are linked together. Each DMA descriptor is     */
+/* responsible for port->bufchunk of a common buffer.                    */
+/*                                                                       */
+/* +---------------------------------------------+                       */
+/* |   +----------+   +----------+   +----------+ |                      */
+/* +-> | Descr[0] |-->| Descr[1] |-->| Descr[2] |-+                      */
+/*     +----------+   +----------+   +----------+                        */
+/*         |            |              |                                 */
+/*         v            v              v                                 */
+/*   +-------------------------------------+                             */
+/*   |        BUFFER                       |                             */
+/*   +-------------------------------------+                             */
+/*      |<- data_avail ->|                                               */
+/*    readp          writep                                              */
+/*                                                                       */
+/* If the application keeps up the pace readp will be right after writep.*/
+/* If the application can't keep the pace we have to throw away data.    */
+/* The idea is that readp should be ready with the data pointed out by	 */
+/* Descr[i] when the DMA has filled in Descr[i+1].                       */
+/* Otherwise we will discard	                                         */
+/* the rest of the data pointed out by Descr1 and set readp to the start */
+/* of Descr2                                                             */
+
+/* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */
+/* words can be handled */
+#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE
+#define NBR_IN_DESCR (8*6)
+#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR)
+
+#define NBR_OUT_DESCR 8
+#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR)
+
+#define DEFAULT_FRAME_RATE 0
+#define DEFAULT_WORD_RATE 7
+
+/* To be removed when we move to pure udev. */
+#define SYNC_SERIAL_MAJOR 125
+
+/* NOTE: Enabling some debug will likely cause overrun or underrun,
+ * especially if manual mode is used.
+ */
+#define DEBUG(x)
+#define DEBUGREAD(x)
+#define DEBUGWRITE(x)
+#define DEBUGPOLL(x)
+#define DEBUGRXINT(x)
+#define DEBUGTXINT(x)
+#define DEBUGTRDMA(x)
+#define DEBUGOUTBUF(x)
+
+enum syncser_irq_setup {
+	no_irq_setup = 0,
+	dma_irq_setup = 1,
+	manual_irq_setup = 2,
+};
+
+struct sync_port {
+	unsigned long regi_sser;
+	unsigned long regi_dmain;
+	unsigned long regi_dmaout;
+
+	/* Interrupt vectors. */
+	unsigned long dma_in_intr_vect; /* Used for DMA in. */
+	unsigned long dma_out_intr_vect; /* Used for DMA out. */
+	unsigned long syncser_intr_vect; /* Used when no DMA. */
+
+	/* DMA number for in and out. */
+	unsigned int dma_in_nbr;
+	unsigned int dma_out_nbr;
+
+	/* DMA owner. */
+	enum dma_owner req_dma;
+
+	char started; /* 1 if port has been started */
+	char port_nbr; /* Port 0 or 1 */
+	char busy; /* 1 if port is busy */
+
+	char enabled;  /* 1 if port is enabled */
+	char use_dma;  /* 1 if port uses dma */
+	char tr_running;
+
+	enum syncser_irq_setup init_irqs;
+	int output;
+	int input;
+
+	/* Next byte to be read by application */
+	unsigned char *readp;
+	/* Next byte to be written by etrax */
+	unsigned char *writep;
+
+	unsigned int in_buffer_size;
+	unsigned int in_buffer_len;
+	unsigned int inbufchunk;
+	/* Data buffers for in and output. */
+	unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32);
+	unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32);
+	unsigned char flip[IN_BUFFER_SIZE] __aligned(32);
+	struct timespec timestamp[NBR_IN_DESCR];
+	struct dma_descr_data *next_rx_desc;
+	struct dma_descr_data *prev_rx_desc;
+
+	struct timeval last_timestamp;
+	int read_ts_idx;
+	int write_ts_idx;
+
+	/* Pointer to the first available descriptor in the ring,
+	 * unless active_tr_descr == catch_tr_descr and a dma
+	 * transfer is active */
+	struct dma_descr_data *active_tr_descr;
+
+	/* Pointer to the first allocated descriptor in the ring */
+	struct dma_descr_data *catch_tr_descr;
+
+	/* Pointer to the descriptor with the current end-of-list */
+	struct dma_descr_data *prev_tr_descr;
+	int full;
+
+	/* Pointer to the first byte being read by DMA
+	 * or current position in out_buffer if not using DMA. */
+	unsigned char *out_rd_ptr;
+
+	/* Number of bytes currently locked for being read by DMA */
+	int out_buf_count;
+
+	dma_descr_context in_context __aligned(32);
+	dma_descr_context out_context __aligned(32);
+	dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16);
+	dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16);
+
+	wait_queue_head_t out_wait_q;
+	wait_queue_head_t in_wait_q;
+
+	spinlock_t lock;
+};
+
+static DEFINE_MUTEX(sync_serial_mutex);
+static int etrax_sync_serial_init(void);
+static void initialize_port(int portnbr);
+static inline int sync_data_avail(struct sync_port *port);
+
+static int sync_serial_open(struct inode *, struct file *);
+static int sync_serial_release(struct inode *, struct file *);
+static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
+
+static long sync_serial_ioctl(struct file *file,
+			      unsigned int cmd, unsigned long arg);
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg);
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos);
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
+				size_t count, loff_t *ppos);
+
+#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
+	(defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)))
+#define SYNC_SER_DMA
+#else
+#define SYNC_SER_MANUAL
+#endif
+
+#ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count);
+static void start_dma_in(struct sync_port *port);
+static irqreturn_t tr_interrupt(int irq, void *dev_id);
+static irqreturn_t rx_interrupt(int irq, void *dev_id);
+#endif
+#ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port);
+static irqreturn_t manual_interrupt(int irq, void *dev_id);
+#endif
+
+#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed
+#define artpec_request_dma crisv32_request_dma
+#define artpec_free_dma crisv32_free_dma
+
+#ifdef CONFIG_ETRAXFS
+/* ETRAX FS */
+#define DMA_OUT_NBR0		SYNC_SER0_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER0_RX_DMA_NBR
+#define DMA_OUT_NBR1		SYNC_SER1_TX_DMA_NBR
+#define DMA_IN_NBR1		SYNC_SER1_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser0
+#define PINMUX_SSER1		pinmux_sser1
+#define SYNCSER_INST0		regi_sser0
+#define SYNCSER_INST1		regi_sser1
+#define SYNCSER_INTR_VECT0	SSER0_INTR_VECT
+#define SYNCSER_INTR_VECT1	SSER1_INTR_VECT
+#define OUT_DMA_INST0		regi_dma4
+#define IN_DMA_INST0		regi_dma5
+#define DMA_OUT_INTR_VECT0	DMA4_INTR_VECT
+#define DMA_OUT_INTR_VECT1	DMA7_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA5_INTR_VECT
+#define DMA_IN_INTR_VECT1	DMA6_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser0
+#define REQ_DMA_SYNCSER1	dma_sser1
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
+#define PORT1_DMA 1
+#else
+#define PORT1_DMA 0
+#endif
+#elif defined(CONFIG_CRIS_MACH_ARTPEC3)
+/* ARTPEC-3 */
+#define DMA_OUT_NBR0		SYNC_SER_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser
+#define SYNCSER_INST0		regi_sser
+#define SYNCSER_INTR_VECT0	SSER_INTR_VECT
+#define OUT_DMA_INST0		regi_dma6
+#define IN_DMA_INST0		regi_dma7
+#define DMA_OUT_INTR_VECT0	DMA6_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA7_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser
+#define REQ_DMA_SYNCSER1	dma_sser
+#endif
+
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
+#define PORT0_DMA 1
+#else
+#define PORT0_DMA 0
+#endif
+
+/* The ports */
+static struct sync_port ports[] = {
+	{
+		.regi_sser		= SYNCSER_INST0,
+		.regi_dmaout		= OUT_DMA_INST0,
+		.regi_dmain		= IN_DMA_INST0,
+		.use_dma		= PORT0_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT0,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT0,
+		.dma_in_nbr		= DMA_IN_NBR0,
+		.dma_out_nbr		= DMA_OUT_NBR0,
+		.req_dma		= REQ_DMA_SYNCSER0,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT0,
+	},
+#ifdef CONFIG_ETRAXFS
+	{
+		.regi_sser		= SYNCSER_INST1,
+		.regi_dmaout		= regi_dma6,
+		.regi_dmain		= regi_dma7,
+		.use_dma		= PORT1_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT1,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT1,
+		.dma_in_nbr		= DMA_IN_NBR1,
+		.dma_out_nbr		= DMA_OUT_NBR1,
+		.req_dma		= REQ_DMA_SYNCSER1,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT1,
+	},
+#endif
+};
+
+#define NBR_PORTS ARRAY_SIZE(ports)
+
+static const struct file_operations syncser_fops = {
+	.owner		= THIS_MODULE,
+	.write		= sync_serial_write,
+	.read		= sync_serial_read,
+	.poll		= sync_serial_poll,
+	.unlocked_ioctl	= sync_serial_ioctl,
+	.open		= sync_serial_open,
+	.release	= sync_serial_release,
+	.llseek		= noop_llseek,
+};
+
+static dev_t syncser_first;
+static int minor_count = NBR_PORTS;
+#define SYNCSER_NAME "syncser"
+static struct cdev *syncser_cdev;
+static struct class *syncser_class;
+
+static void sync_serial_start_port(struct sync_port *port)
+{
+	reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+	reg_sser_rw_tr_cfg tr_cfg =
+		REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	reg_sser_rw_rec_cfg rec_cfg =
+		REG_RD(sser, port->regi_sser, rw_rec_cfg);
+	cfg.en = regk_sser_yes;
+	tr_cfg.tr_en = regk_sser_yes;
+	rec_cfg.rec_en = regk_sser_yes;
+	REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+	port->started = 1;
+}
+
+static void __init initialize_port(int portnbr)
+{
+	struct sync_port *port = &ports[portnbr];
+	reg_sser_rw_cfg cfg = { 0 };
+	reg_sser_rw_frm_cfg frm_cfg = { 0 };
+	reg_sser_rw_tr_cfg tr_cfg = { 0 };
+	reg_sser_rw_rec_cfg rec_cfg = { 0 };
+
+	DEBUG(pr_info("Init sync serial port %d\n", portnbr));
+
+	port->port_nbr = portnbr;
+	port->init_irqs = no_irq_setup;
+
+	port->out_rd_ptr = port->out_buffer;
+	port->out_buf_count = 0;
+
+	port->output = 1;
+	port->input = 0;
+
+	port->readp = port->flip;
+	port->writep = port->flip;
+	port->in_buffer_size = IN_BUFFER_SIZE;
+	port->in_buffer_len = 0;
+	port->inbufchunk = IN_DESCR_SIZE;
+
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
+
+	init_waitqueue_head(&port->out_wait_q);
+	init_waitqueue_head(&port->in_wait_q);
+
+	spin_lock_init(&port->lock);
+
+	cfg.out_clk_src = regk_sser_intern_clk;
+	cfg.out_clk_pol = regk_sser_pos;
+	cfg.clk_od_mode = regk_sser_no;
+	cfg.clk_dir = regk_sser_out;
+	cfg.gate_clk = regk_sser_no;
+	cfg.base_freq = regk_sser_f29_493;
+	cfg.clk_div = 256;
+	REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+
+	frm_cfg.wordrate = DEFAULT_WORD_RATE;
+	frm_cfg.type = regk_sser_edge;
+	frm_cfg.frame_pin_dir = regk_sser_out;
+	frm_cfg.frame_pin_use = regk_sser_frm;
+	frm_cfg.status_pin_dir = regk_sser_in;
+	frm_cfg.status_pin_use = regk_sser_hold;
+	frm_cfg.out_on = regk_sser_tr;
+	frm_cfg.tr_delay = 1;
+	REG_WR(sser, port->regi_sser, rw_frm_cfg, frm_cfg);
+
+	tr_cfg.urun_stop = regk_sser_no;
+	tr_cfg.sample_size = 7;
+	tr_cfg.sh_dir = regk_sser_msbfirst;
+	tr_cfg.use_dma = port->use_dma ? regk_sser_yes : regk_sser_no;
+#if 0
+	tr_cfg.rate_ctrl = regk_sser_bulk;
+	tr_cfg.data_pin_use = regk_sser_dout;
+#else
+	tr_cfg.rate_ctrl = regk_sser_iso;
+	tr_cfg.data_pin_use = regk_sser_dout;
+#endif
+	tr_cfg.bulk_wspace = 1;
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+
+	rec_cfg.sample_size = 7;
+	rec_cfg.sh_dir = regk_sser_msbfirst;
+	rec_cfg.use_dma = port->use_dma ? regk_sser_yes : regk_sser_no;
+	rec_cfg.fifo_thr = regk_sser_inf;
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+
+#ifdef SYNC_SER_DMA
+	{
+		int i;
+		/* Setup the descriptor ring for dma out/transmit. */
+		for (i = 0; i < NBR_OUT_DESCR; i++) {
+			dma_descr_data *descr = &port->out_descr[i];
+			descr->wait = 0;
+			descr->intr = 1;
+			descr->eol = 0;
+			descr->out_eop = 0;
+			descr->next =
+				(dma_descr_data *)virt_to_phys(&descr[i+1]);
+		}
+	}
+
+	/* Create a ring from the list. */
+	port->out_descr[NBR_OUT_DESCR-1].next =
+		(dma_descr_data *)virt_to_phys(&port->out_descr[0]);
+
+	/* Setup context for traversing the ring. */
+	port->active_tr_descr = &port->out_descr[0];
+	port->prev_tr_descr = &port->out_descr[NBR_OUT_DESCR-1];
+	port->catch_tr_descr = &port->out_descr[0];
+#endif
+}
+
+static inline int sync_data_avail(struct sync_port *port)
+{
+	return port->in_buffer_len;
+}
+
+static int sync_serial_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+	int dev = iminor(inode);
+	struct sync_port *port;
+#ifdef SYNC_SER_DMA
+	reg_dma_rw_cfg cfg = { .en = regk_dma_yes };
+	reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes };
+#endif
+
+	DEBUG(pr_debug("Open sync serial port %d\n", dev));
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+	/* Allow open this device twice (assuming one reader and one writer) */
+	if (port->busy == 2) {
+		DEBUG(pr_info("syncser%d is busy\n", dev));
+		return -EBUSY;
+	}
+
+	mutex_lock(&sync_serial_mutex);
+
+	/* Clear any stale date left in the flip buffer */
+	port->readp = port->writep = port->flip;
+	port->in_buffer_len = 0;
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
+
+	if (port->init_irqs != no_irq_setup) {
+		/* Init only on first call. */
+		port->busy++;
+		mutex_unlock(&sync_serial_mutex);
+		return 0;
+	}
+	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
+		const char *tmp;
+		DEBUG(pr_info("Using DMA for syncser%d\n", dev));
+
+		tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx";
+		if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0,
+				tmp, port)) {
+			pr_err("Can't alloc syncser%d TX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_out_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d TX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx";
+		if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0,
+				tmp, port)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_in_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		/* Enable DMAs */
+		REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
+		REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
+		/* Enable DMA IRQs */
+		REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
+		REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
+		/* Set up wordsize = 1 for DMAs. */
+		DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1);
+		DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1);
+
+		start_dma_in(port);
+		port->init_irqs = dma_irq_setup;
+#endif
+	} else { /* !port->use_dma */
+#ifdef SYNC_SER_MANUAL
+		const char *tmp = dev == 0 ? "syncser0 manual irq" :
+					     "syncser1 manual irq";
+		if (request_irq(port->syncser_intr_vect, manual_interrupt,
+				0, tmp, port)) {
+			pr_err("Can't alloc syncser%d manual irq",
+				dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		port->init_irqs = manual_irq_setup;
+#else
+		panic("sync_serial: Manual mode not supported\n");
+#endif /* SYNC_SER_MANUAL */
+	}
+	port->busy++;
+	ret = 0;
+
+unlock_and_exit:
+	mutex_unlock(&sync_serial_mutex);
+	return ret;
+}
+
+static int sync_serial_release(struct inode *inode, struct file *file)
+{
+	int dev = iminor(inode);
+	struct sync_port *port;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+	if (port->busy)
+		port->busy--;
+	if (!port->busy)
+		/* XXX */;
+	return 0;
+}
+
+static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
+{
+	int dev = iminor(file_inode(file));
+	unsigned int mask = 0;
+	struct sync_port *port;
+	DEBUGPOLL(
+	static unsigned int prev_mask;
+	);
+
+	port = &ports[dev];
+
+	if (!port->started)
+		sync_serial_start_port(port);
+
+	poll_wait(file, &port->out_wait_q, wait);
+	poll_wait(file, &port->in_wait_q, wait);
+
+	/* No active transfer, descriptors are available */
+	if (port->output && !port->tr_running)
+		mask |= POLLOUT | POLLWRNORM;
+
+	/* Descriptor and buffer space available. */
+	if (port->output &&
+	    port->active_tr_descr != port->catch_tr_descr &&
+	    port->out_buf_count < OUT_BUFFER_SIZE)
+		mask |=  POLLOUT | POLLWRNORM;
+
+	/* At least an inbufchunk of data */
+	if (port->input && sync_data_avail(port) >= port->inbufchunk)
+		mask |= POLLIN | POLLRDNORM;
+
+	DEBUGPOLL(
+	if (mask != prev_mask)
+		pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
+			mask,
+			mask & POLLOUT ? "POLLOUT" : "",
+			mask & POLLIN ? "POLLIN" : "");
+		prev_mask = mask;
+	);
+	return mask;
+}
+
+static ssize_t __sync_serial_read(struct file *file,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos,
+				  struct timespec *ts)
+{
+	unsigned long flags;
+	int dev = MINOR(file_inode(file)->i_rdev);
+	int avail;
+	struct sync_port *port;
+	unsigned char *start;
+	unsigned char *end;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+
+	if (!port->started)
+		sync_serial_start_port(port);
+
+	/* Calculate number of available bytes */
+	/* Save pointers to avoid that they are modified by interrupt */
+	spin_lock_irqsave(&port->lock, flags);
+	start = port->readp;
+	end = port->writep;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while ((start == end) && !port->in_buffer_len) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		wait_event_interruptible(port->in_wait_q,
+					 !(start == end && !port->full));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		spin_lock_irqsave(&port->lock, flags);
+		start = port->readp;
+		end = port->writep;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n",
+			  dev, count,
+			  start - port->flip, end - port->flip,
+			  port->in_buffer_size));
+
+	/* Lazy read, never return wrapped data. */
+	if (end > start)
+		avail = end - start;
+	else
+		avail = port->flip + port->in_buffer_size - start;
+
+	count = count > avail ? avail : count;
+	if (copy_to_user(buf, start, count))
+		return -EFAULT;
+
+	/* If timestamp requested, find timestamp of first returned byte
+	 * and copy it.
+	 * N.B: Applications that request timstamps MUST read data in
+	 * chunks that are multiples of IN_DESCR_SIZE.
+	 * Otherwise the timestamps will not be aligned to the data read.
+	 */
+	if (ts != NULL) {
+		int idx = port->read_ts_idx;
+		memcpy(ts, &port->timestamp[idx], sizeof(struct timespec));
+		port->read_ts_idx += count / IN_DESCR_SIZE;
+		if (port->read_ts_idx >= NBR_IN_DESCR)
+			port->read_ts_idx = 0;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->readp += count;
+	/* Check for wrap */
+	if (port->readp >= port->flip + port->in_buffer_size)
+		port->readp = port->flip;
+	port->in_buffer_len -= count;
+	port->full = 0;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	DEBUGREAD(pr_info("r %d\n", count));
+
+	return count;
+}
+
+static ssize_t sync_serial_input(struct file *file, unsigned long arg)
+{
+	struct ssp_request req;
+	int count;
+	int ret;
+
+	/* Copy the request structure from user-mode. */
+	ret = copy_from_user(&req, (struct ssp_request __user *)arg,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("sync_serial_input copy from user failed\n"));
+		return -EFAULT;
+	}
+
+	/* To get the timestamps aligned, make sure that 'len'
+	 * is a multiple of IN_DESCR_SIZE.
+	 */
+	if ((req.len % IN_DESCR_SIZE) != 0) {
+		DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n",
+			      req.len, IN_DESCR_SIZE));
+		return -EFAULT;
+	}
+
+	/* Do the actual read. */
+	/* Note that req.buf is actually a pointer to user space. */
+	count = __sync_serial_read(file, req.buf, req.len,
+				   NULL, &req.ts);
+
+	if (count < 0) {
+		DEBUG(pr_info("sync_serial_input read failed\n"));
+		return count;
+	}
+
+	/* Copy the request back to user-mode. */
+	ret = copy_to_user((struct ssp_request __user *)arg, &req,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("syncser input copy2user failed\n"));
+		return -EFAULT;
+	}
+
+	/* Return the number of bytes read. */
+	return count;
+}
+
+
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg)
+{
+	int return_val = 0;
+	int dma_w_size = regk_dma_set_w_size1;
+	int dev = iminor(file_inode(file));
+	struct sync_port *port;
+	reg_sser_rw_tr_cfg tr_cfg;
+	reg_sser_rw_rec_cfg rec_cfg;
+	reg_sser_rw_frm_cfg frm_cfg;
+	reg_sser_rw_cfg gen_cfg;
+	reg_sser_rw_intr_mask intr_mask;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -1;
+	}
+
+	if (cmd == SSP_INPUT)
+		return sync_serial_input(file, arg);
+
+	port = &ports[dev];
+	spin_lock_irq(&port->lock);
+
+	tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
+	frm_cfg = REG_RD(sser, port->regi_sser, rw_frm_cfg);
+	gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+	intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
+
+	switch (cmd) {
+	case SSP_SPEED:
+		if (GET_SPEED(arg) == CODEC) {
+			unsigned int freq;
+
+			gen_cfg.base_freq = regk_sser_f32;
+
+			/* Clock divider will internally be
+			 * gen_cfg.clk_div + 1.
+			 */
+
+			freq = GET_FREQ(arg);
+			switch (freq) {
+			case FREQ_32kHz:
+			case FREQ_64kHz:
+			case FREQ_128kHz:
+			case FREQ_256kHz:
+				gen_cfg.clk_div = 125 *
+					(1 << (freq - FREQ_256kHz)) - 1;
+				break;
+			case FREQ_512kHz:
+				gen_cfg.clk_div = 62;
+				break;
+			case FREQ_1MHz:
+			case FREQ_2MHz:
+			case FREQ_4MHz:
+				gen_cfg.clk_div = 8 * (1 << freq) - 1;
+				break;
+			}
+		} else if (GET_SPEED(arg) == CODEC_f32768) {
+			gen_cfg.base_freq = regk_sser_f32_768;
+			switch (GET_FREQ(arg)) {
+			case FREQ_4096kHz:
+				gen_cfg.clk_div = 7;
+				break;
+			default:
+				spin_unlock_irq(&port->lock);
+				return -EINVAL;
+			}
+		} else {
+			gen_cfg.base_freq = regk_sser_f29_493;
+			switch (GET_SPEED(arg)) {
+			case SSP150:
+				gen_cfg.clk_div = 29493000 / (150 * 8) - 1;
+				break;
+			case SSP300:
+				gen_cfg.clk_div = 29493000 / (300 * 8) - 1;
+				break;
+			case SSP600:
+				gen_cfg.clk_div = 29493000 / (600 * 8) - 1;
+				break;
+			case SSP1200:
+				gen_cfg.clk_div = 29493000 / (1200 * 8) - 1;
+				break;
+			case SSP2400:
+				gen_cfg.clk_div = 29493000 / (2400 * 8) - 1;
+				break;
+			case SSP4800:
+				gen_cfg.clk_div = 29493000 / (4800 * 8) - 1;
+				break;
+			case SSP9600:
+				gen_cfg.clk_div = 29493000 / (9600 * 8) - 1;
+				break;
+			case SSP19200:
+				gen_cfg.clk_div = 29493000 / (19200 * 8) - 1;
+				break;
+			case SSP28800:
+				gen_cfg.clk_div = 29493000 / (28800 * 8) - 1;
+				break;
+			case SSP57600:
+				gen_cfg.clk_div = 29493000 / (57600 * 8) - 1;
+				break;
+			case SSP115200:
+				gen_cfg.clk_div = 29493000 / (115200 * 8) - 1;
+				break;
+			case SSP230400:
+				gen_cfg.clk_div = 29493000 / (230400 * 8) - 1;
+				break;
+			case SSP460800:
+				gen_cfg.clk_div = 29493000 / (460800 * 8) - 1;
+				break;
+			case SSP921600:
+				gen_cfg.clk_div = 29493000 / (921600 * 8) - 1;
+				break;
+			case SSP3125000:
+				gen_cfg.base_freq = regk_sser_f100;
+				gen_cfg.clk_div = 100000000 / (3125000 * 8) - 1;
+				break;
+
+			}
+		}
+		frm_cfg.wordrate = GET_WORD_RATE(arg);
+
+		break;
+	case SSP_MODE:
+		switch (arg) {
+		case MASTER_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.out_on = regk_sser_tr;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		default:
+			spin_unlock_irq(&port->lock);
+			return -EINVAL;
+		}
+		if (!port->use_dma || arg == MASTER_OUTPUT ||
+				arg == SLAVE_OUTPUT)
+			intr_mask.rdav = regk_sser_yes;
+		break;
+	case SSP_FRAME_SYNC:
+		if (arg & NORMAL_SYNC) {
+			frm_cfg.rec_delay = 1;
+			frm_cfg.tr_delay = 1;
+		} else if (arg & EARLY_SYNC)
+			frm_cfg.rec_delay = frm_cfg.tr_delay = 0;
+		else if (arg & LATE_SYNC) {
+			frm_cfg.tr_delay = 2;
+			frm_cfg.rec_delay = 2;
+		} else if (arg & SECOND_WORD_SYNC) {
+			frm_cfg.rec_delay = 7;
+			frm_cfg.tr_delay = 1;
+		}
+
+		tr_cfg.bulk_wspace = frm_cfg.tr_delay;
+		frm_cfg.early_wend = regk_sser_yes;
+		if (arg & BIT_SYNC)
+			frm_cfg.type = regk_sser_edge;
+		else if (arg & WORD_SYNC)
+			frm_cfg.type = regk_sser_level;
+		else if (arg & EXTENDED_SYNC)
+			frm_cfg.early_wend = regk_sser_no;
+
+		if (arg & SYNC_ON)
+			frm_cfg.frame_pin_use = regk_sser_frm;
+		else if (arg & SYNC_OFF)
+			frm_cfg.frame_pin_use = regk_sser_gio0;
+
+		dma_w_size = regk_dma_set_w_size2;
+		if (arg & WORD_SIZE_8) {
+			rec_cfg.sample_size = tr_cfg.sample_size = 7;
+			dma_w_size = regk_dma_set_w_size1;
+		} else if (arg & WORD_SIZE_12)
+			rec_cfg.sample_size = tr_cfg.sample_size = 11;
+		else if (arg & WORD_SIZE_16)
+			rec_cfg.sample_size = tr_cfg.sample_size = 15;
+		else if (arg & WORD_SIZE_24)
+			rec_cfg.sample_size = tr_cfg.sample_size = 23;
+		else if (arg & WORD_SIZE_32)
+			rec_cfg.sample_size = tr_cfg.sample_size = 31;
+
+		if (arg & BIT_ORDER_MSB)
+			rec_cfg.sh_dir = tr_cfg.sh_dir = regk_sser_msbfirst;
+		else if (arg & BIT_ORDER_LSB)
+			rec_cfg.sh_dir = tr_cfg.sh_dir = regk_sser_lsbfirst;
+
+		if (arg & FLOW_CONTROL_ENABLE) {
+			frm_cfg.status_pin_use = regk_sser_frm;
+			rec_cfg.fifo_thr = regk_sser_thr16;
+		} else if (arg & FLOW_CONTROL_DISABLE) {
+			frm_cfg.status_pin_use = regk_sser_gio0;
+			rec_cfg.fifo_thr = regk_sser_inf;
+		}
+
+		if (arg & CLOCK_NOT_GATED)
+			gen_cfg.gate_clk = regk_sser_no;
+		else if (arg & CLOCK_GATED)
+			gen_cfg.gate_clk = regk_sser_yes;
+
+		break;
+	case SSP_IPOLARITY:
+		/* NOTE!! negedge is considered NORMAL */
+		if (arg & CLOCK_NORMAL)
+			rec_cfg.clk_pol = regk_sser_neg;
+		else if (arg & CLOCK_INVERT)
+			rec_cfg.clk_pol = regk_sser_pos;
+
+		if (arg & FRAME_NORMAL)
+			frm_cfg.level = regk_sser_pos_hi;
+		else if (arg & FRAME_INVERT)
+			frm_cfg.level = regk_sser_neg_lo;
+
+		if (arg & STATUS_NORMAL)
+			gen_cfg.hold_pol = regk_sser_pos;
+		else if (arg & STATUS_INVERT)
+			gen_cfg.hold_pol = regk_sser_neg;
+		break;
+	case SSP_OPOLARITY:
+		if (arg & CLOCK_NORMAL)
+			gen_cfg.out_clk_pol = regk_sser_pos;
+		else if (arg & CLOCK_INVERT)
+			gen_cfg.out_clk_pol = regk_sser_neg;
+
+		if (arg & FRAME_NORMAL)
+			frm_cfg.level = regk_sser_pos_hi;
+		else if (arg & FRAME_INVERT)
+			frm_cfg.level = regk_sser_neg_lo;
+
+		if (arg & STATUS_NORMAL)
+			gen_cfg.hold_pol = regk_sser_pos;
+		else if (arg & STATUS_INVERT)
+			gen_cfg.hold_pol = regk_sser_neg;
+		break;
+	case SSP_SPI:
+		rec_cfg.fifo_thr = regk_sser_inf;
+		rec_cfg.sh_dir = tr_cfg.sh_dir = regk_sser_msbfirst;
+		rec_cfg.sample_size = tr_cfg.sample_size = 7;
+		frm_cfg.frame_pin_use = regk_sser_frm;
+		frm_cfg.type = regk_sser_level;
+		frm_cfg.tr_delay = 1;
+		frm_cfg.level = regk_sser_neg_lo;
+		if (arg & SPI_SLAVE) {
+			rec_cfg.clk_pol = regk_sser_neg;
+			gen_cfg.clk_dir = regk_sser_in;
+			port->input = 1;
+			port->output = 0;
+		} else {
+			gen_cfg.out_clk_pol = regk_sser_pos;
+			port->input = 0;
+			port->output = 1;
+			gen_cfg.clk_dir = regk_sser_out;
+		}
+		break;
+	case SSP_INBUFCHUNK:
+		break;
+	default:
+		return_val = -1;
+	}
+
+
+	if (port->started) {
+		rec_cfg.rec_en = port->input;
+		gen_cfg.en = (port->output | port->input);
+	}
+
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+	REG_WR(sser, port->regi_sser, rw_frm_cfg, frm_cfg);
+	REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
+	REG_WR(sser, port->regi_sser, rw_cfg, gen_cfg);
+
+
+	if (cmd == SSP_FRAME_SYNC && (arg & (WORD_SIZE_8 | WORD_SIZE_12 |
+			WORD_SIZE_16 | WORD_SIZE_24 | WORD_SIZE_32))) {
+		int en = gen_cfg.en;
+		gen_cfg.en = 0;
+		REG_WR(sser, port->regi_sser, rw_cfg, gen_cfg);
+		/* ##### Should DMA be stoped before we change dma size? */
+		DMA_WR_CMD(port->regi_dmain, dma_w_size);
+		DMA_WR_CMD(port->regi_dmaout, dma_w_size);
+		gen_cfg.en = en;
+		REG_WR(sser, port->regi_sser, rw_cfg, gen_cfg);
+	}
+
+	spin_unlock_irq(&port->lock);
+	return return_val;
+}
+
+static long sync_serial_ioctl(struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	mutex_lock(&sync_serial_mutex);
+	ret = sync_serial_ioctl_unlocked(file, cmd, arg);
+	mutex_unlock(&sync_serial_mutex);
+
+	return ret;
+}
+
+/* NOTE: sync_serial_write does not support concurrency */
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	int dev = iminor(file_inode(file));
+	DECLARE_WAITQUEUE(wait, current);
+	struct sync_port *port;
+	int trunc_count;
+	unsigned long flags;
+	int bytes_free;
+	int out_buf_count;
+
+	unsigned char *rd_ptr;       /* First allocated byte in the buffer */
+	unsigned char *wr_ptr;       /* First free byte in the buffer */
+	unsigned char *buf_stop_ptr; /* Last byte + 1 */
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+
+	/* |<-         OUT_BUFFER_SIZE                          ->|
+	 *           |<- out_buf_count ->|
+	 *                               |<- trunc_count ->| ...->|
+	 *  ______________________________________________________
+	 * |  free   |   data            | free                   |
+	 * |_________|___________________|________________________|
+	 *           ^ rd_ptr            ^ wr_ptr
+	 */
+	DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n",
+			   port->port_nbr, count, port->active_tr_descr,
+			   port->catch_tr_descr));
+
+	/* Read variables that may be updated by interrupts */
+	spin_lock_irqsave(&port->lock, flags);
+	rd_ptr = port->out_rd_ptr;
+	out_buf_count = port->out_buf_count;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	/* Check if resources are available */
+	if (port->tr_running &&
+	    ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) ||
+	     out_buf_count >= OUT_BUFFER_SIZE)) {
+		DEBUGWRITE(pr_info("sser%d full\n", dev));
+		return -EAGAIN;
+	}
+
+	buf_stop_ptr = port->out_buffer + OUT_BUFFER_SIZE;
+
+	/* Determine pointer to the first free byte, before copying. */
+	wr_ptr = rd_ptr + out_buf_count;
+	if (wr_ptr >= buf_stop_ptr)
+		wr_ptr -= OUT_BUFFER_SIZE;
+
+	/* If we wrap the ring buffer, let the user space program handle it by
+	 * truncating the data. This could be more elegant, small buffer
+	 * fragments may occur.
+	 */
+	bytes_free = OUT_BUFFER_SIZE - out_buf_count;
+	if (wr_ptr + bytes_free > buf_stop_ptr)
+		bytes_free = buf_stop_ptr - wr_ptr;
+	trunc_count = (count < bytes_free) ? count : bytes_free;
+
+	if (copy_from_user(wr_ptr, buf, trunc_count))
+		return -EFAULT;
+
+	DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d     %p %p %p\n",
+			    out_buf_count, trunc_count,
+			    port->out_buf_count, port->out_buffer,
+			    wr_ptr, buf_stop_ptr));
+
+	/* Make sure transmitter/receiver is running */
+	if (!port->started) {
+		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+		reg_sser_rw_rec_cfg rec_cfg =
+			REG_RD(sser, port->regi_sser, rw_rec_cfg);
+		cfg.en = regk_sser_yes;
+		rec_cfg.rec_en = port->input;
+		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+		port->started = 1;
+	}
+
+	/* Setup wait if blocking */
+	if (!(file->f_flags & O_NONBLOCK)) {
+		add_wait_queue(&port->out_wait_q, &wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->out_buf_count += trunc_count;
+	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
+		start_dma_out(port, wr_ptr, trunc_count);
+#endif
+	} else if (!port->tr_running) {
+#ifdef SYNC_SER_MANUAL
+		reg_sser_rw_intr_mask intr_mask;
+		intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
+		/* Start sender by writing data */
+		send_word(port);
+		/* and enable transmitter ready IRQ */
+		intr_mask.trdy = 1;
+		REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
+#endif
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	/* Exit if non blocking */
+	if (file->f_flags & O_NONBLOCK) {
+		DEBUGWRITE(pr_info("w d%d c %u  %08x\n",
+				   port->port_nbr, trunc_count,
+				   REG_RD_INT(dma, port->regi_dmaout, r_intr)));
+		return trunc_count;
+	}
+
+	schedule();
+	remove_wait_queue(&port->out_wait_q, &wait);
+
+	if (signal_pending(current))
+		return -EINTR;
+
+	DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count));
+	return trunc_count;
+}
+
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	return __sync_serial_read(file, buf, count, ppos, NULL);
+}
+
+#ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port)
+{
+	reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	reg_sser_rw_tr_data tr_data =  {0};
+
+	switch (tr_cfg.sample_size) {
+	case 8:
+		port->out_buf_count--;
+		tr_data.data = *port->out_rd_ptr++;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 12:
+	{
+		int data = (*port->out_rd_ptr++) << 8;
+		data |= *port->out_rd_ptr++;
+		port->out_buf_count -= 2;
+		tr_data.data = data;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	}
+	case 16:
+		port->out_buf_count -= 2;
+		tr_data.data = *(unsigned short *)port->out_rd_ptr;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		port->out_rd_ptr += 2;
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 24:
+		port->out_buf_count -= 3;
+		tr_data.data = *(unsigned short *)port->out_rd_ptr;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		port->out_rd_ptr += 2;
+		tr_data.data = *port->out_rd_ptr++;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 32:
+		port->out_buf_count -= 4;
+		tr_data.data = *(unsigned short *)port->out_rd_ptr;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		port->out_rd_ptr += 2;
+		tr_data.data = *(unsigned short *)port->out_rd_ptr;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		port->out_rd_ptr += 2;
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	}
+}
+#endif
+
+#ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count)
+{
+	port->active_tr_descr->buf = (char *)virt_to_phys((char *)data);
+	port->active_tr_descr->after = port->active_tr_descr->buf + count;
+	port->active_tr_descr->intr = 1;
+
+	port->active_tr_descr->eol = 1;
+	port->prev_tr_descr->eol = 0;
+
+	DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n",
+		port->prev_tr_descr, port->active_tr_descr));
+	port->prev_tr_descr = port->active_tr_descr;
+	port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next);
+
+	if (!port->tr_running) {
+		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser,
+			rw_tr_cfg);
+
+		port->out_context.next = NULL;
+		port->out_context.saved_data =
+			(dma_descr_data *)virt_to_phys(port->prev_tr_descr);
+		port->out_context.saved_data_buf = port->prev_tr_descr->buf;
+
+		DMA_START_CONTEXT(port->regi_dmaout,
+			virt_to_phys((char *)&port->out_context));
+
+		tr_cfg.tr_en = regk_sser_yes;
+		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+		DEBUGTRDMA(pr_info("dma s\n"););
+	} else {
+		DMA_CONTINUE_DATA(port->regi_dmaout);
+		DEBUGTRDMA(pr_info("dma c\n"););
+	}
+
+	port->tr_running = 1;
+}
+
+static void start_dma_in(struct sync_port *port)
+{
+	int i;
+	char *buf;
+	unsigned long flags;
+	spin_lock_irqsave(&port->lock, flags);
+	port->writep = port->flip;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	buf = (char *)virt_to_phys(port->in_buffer);
+	for (i = 0; i < NBR_IN_DESCR; i++) {
+		port->in_descr[i].buf = buf;
+		port->in_descr[i].after = buf + port->inbufchunk;
+		port->in_descr[i].intr = 1;
+		port->in_descr[i].next =
+			(dma_descr_data *)virt_to_phys(&port->in_descr[i+1]);
+		port->in_descr[i].buf = buf;
+		buf += port->inbufchunk;
+	}
+	/* Link the last descriptor to the first */
+	port->in_descr[i-1].next =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
+	port->in_descr[i-1].eol = regk_sser_yes;
+	port->next_rx_desc = &port->in_descr[0];
+	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1];
+	port->in_context.saved_data =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
+	port->in_context.saved_data_buf = port->in_descr[0].buf;
+	DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context));
+}
+
+static irqreturn_t tr_interrupt(int irq, void *dev_id)
+{
+	reg_dma_r_masked_intr masked;
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
+	reg_dma_rw_stat stat;
+	int i;
+	int found = 0;
+	int stop_sser = 0;
+
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+		if (!port->enabled || !port->use_dma)
+			continue;
+
+		/* IRQ active for the port? */
+		masked = REG_RD(dma, port->regi_dmaout, r_masked_intr);
+		if (!masked.data)
+			continue;
+
+		found = 1;
+
+		/* Check if we should stop the DMA transfer */
+		stat = REG_RD(dma, port->regi_dmaout, rw_stat);
+		if (stat.list_state == regk_dma_data_at_eol)
+			stop_sser = 1;
+
+		/* Clear IRQ */
+		REG_WR(dma, port->regi_dmaout, rw_ack_intr, ack_intr);
+
+		if (!stop_sser) {
+			/* The DMA has completed a descriptor, EOL was not
+			 * encountered, so step relevant descriptor and
+			 * datapointers forward. */
+			int sent;
+			sent = port->catch_tr_descr->after -
+				port->catch_tr_descr->buf;
+			DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t"
+					   "in descr %p (ac: %p)\n",
+					   port->out_buf_count, sent,
+					   port->out_buf_count - sent,
+					   port->catch_tr_descr,
+					   port->active_tr_descr););
+			port->out_buf_count -= sent;
+			port->catch_tr_descr =
+				phys_to_virt((int) port->catch_tr_descr->next);
+			port->out_rd_ptr =
+				phys_to_virt((int) port->catch_tr_descr->buf);
+		} else {
+			reg_sser_rw_tr_cfg tr_cfg;
+			int j, sent;
+			/* EOL handler.
+			 * Note that if an EOL was encountered during the irq
+			 * locked section of sync_ser_write the DMA will be
+			 * restarted and the eol flag will be cleared.
+			 * The remaining descriptors will be traversed by
+			 * the descriptor interrupts as usual.
+			 */
+			j = 0;
+			while (!port->catch_tr_descr->eol) {
+				sent = port->catch_tr_descr->after -
+					port->catch_tr_descr->buf;
+				DEBUGOUTBUF(pr_info(
+					"traversing descr %p -%d (%d)\n",
+					port->catch_tr_descr,
+					sent,
+					port->out_buf_count));
+				port->out_buf_count -= sent;
+				port->catch_tr_descr = phys_to_virt(
+					(int)port->catch_tr_descr->next);
+				j++;
+				if (j >= NBR_OUT_DESCR) {
+					/* TODO: Reset and recover */
+					panic("sync_serial: missing eol");
+				}
+			}
+			sent = port->catch_tr_descr->after -
+				port->catch_tr_descr->buf;
+			DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n",
+				port->catch_tr_descr,
+				sent,
+				port->out_buf_count));
+
+			port->out_buf_count -= sent;
+
+			/* Update read pointer to first free byte, we
+			 * may already be writing data there. */
+			port->out_rd_ptr =
+				phys_to_virt((int) port->catch_tr_descr->after);
+			if (port->out_rd_ptr > port->out_buffer +
+					OUT_BUFFER_SIZE)
+				port->out_rd_ptr = port->out_buffer;
+
+			tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+			DEBUGTXINT(pr_info(
+				"tr_int DMA stop %d, set catch @ %p\n",
+				port->out_buf_count,
+				port->active_tr_descr));
+			if (port->out_buf_count != 0)
+				pr_err("sync_ser: buf not empty after eol\n");
+			port->catch_tr_descr = port->active_tr_descr;
+			port->tr_running = 0;
+			tr_cfg.tr_en = regk_sser_no;
+			REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+		}
+		/* wake up the waiting process */
+		wake_up_interruptible(&port->out_wait_q);
+	}
+	return IRQ_RETVAL(found);
+} /* tr_interrupt */
+
+
+static inline void handle_rx_packet(struct sync_port *port)
+{
+	int idx;
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
+	unsigned long flags;
+
+	DEBUGRXINT(pr_info("!"));
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* If we overrun the user experience is crap regardless if we
+	 * drop new or old data. Its much easier to get it right when
+	 * dropping new data so lets do that.
+	 */
+	if ((port->writep + port->inbufchunk <=
+	     port->flip + port->in_buffer_size) &&
+	    (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) {
+		memcpy(port->writep,
+		       phys_to_virt((unsigned)port->next_rx_desc->buf),
+		       port->inbufchunk);
+		port->writep += port->inbufchunk;
+		if (port->writep >= port->flip + port->in_buffer_size)
+			port->writep = port->flip;
+
+		/* Timestamp the new data chunk. */
+		if (port->write_ts_idx == NBR_IN_DESCR)
+			port->write_ts_idx = 0;
+		idx = port->write_ts_idx++;
+		ktime_get_ts(&port->timestamp[idx]);
+		port->in_buffer_len += port->inbufchunk;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	port->next_rx_desc->eol = 1;
+	port->prev_rx_desc->eol = 0;
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 0);
+	port->prev_rx_desc = port->next_rx_desc;
+	port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 1);
+	/* wake up the waiting process */
+	wake_up_interruptible(&port->in_wait_q);
+	DMA_CONTINUE(port->regi_dmain);
+	REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+
+}
+
+static irqreturn_t rx_interrupt(int irq, void *dev_id)
+{
+	reg_dma_r_masked_intr masked;
+
+	int i;
+	int found = 0;
+
+	DEBUG(pr_info("rx_interrupt\n"));
+
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+
+		if (!port->enabled || !port->use_dma)
+			continue;
+
+		masked = REG_RD(dma, port->regi_dmain, r_masked_intr);
+
+		if (!masked.data)
+			continue;
+
+		/* Descriptor interrupt */
+		found = 1;
+		while (REG_RD(dma, port->regi_dmain, rw_data) !=
+				virt_to_phys(port->next_rx_desc))
+			handle_rx_packet(port);
+	}
+	return IRQ_RETVAL(found);
+} /* rx_interrupt */
+#endif /* SYNC_SER_DMA */
+
+#ifdef SYNC_SER_MANUAL
+static irqreturn_t manual_interrupt(int irq, void *dev_id)
+{
+	unsigned long flags;
+	int i;
+	int found = 0;
+	reg_sser_r_masked_intr masked;
+
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+
+		if (!port->enabled || port->use_dma)
+			continue;
+
+		masked = REG_RD(sser, port->regi_sser, r_masked_intr);
+		/* Data received? */
+		if (masked.rdav) {
+			reg_sser_rw_rec_cfg rec_cfg =
+				REG_RD(sser, port->regi_sser, rw_rec_cfg);
+			reg_sser_r_rec_data data = REG_RD(sser,
+				port->regi_sser, r_rec_data);
+			found = 1;
+			/* Read data */
+			spin_lock_irqsave(&port->lock, flags);
+			switch (rec_cfg.sample_size) {
+			case 8:
+				*port->writep++ = data.data & 0xff;
+				break;
+			case 12:
+				*port->writep = (data.data & 0x0ff0) >> 4;
+				*(port->writep + 1) = data.data & 0x0f;
+				port->writep += 2;
+				break;
+			case 16:
+				*(unsigned short *)port->writep = data.data;
+				port->writep += 2;
+				break;
+			case 24:
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 3;
+				break;
+			case 32:
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 4;
+				break;
+			}
+
+			/* Wrap? */
+			if (port->writep >= port->flip + port->in_buffer_size)
+				port->writep = port->flip;
+			if (port->writep == port->readp) {
+				/* Receive buf overrun, discard oldest data */
+				port->readp++;
+				/* Wrap? */
+				if (port->readp >= port->flip +
+						port->in_buffer_size)
+					port->readp = port->flip;
+			}
+			spin_unlock_irqrestore(&port->lock, flags);
+			if (sync_data_avail(port) >= port->inbufchunk)
+				/* Wake up application */
+				wake_up_interruptible(&port->in_wait_q);
+		}
+
+		/* Transmitter ready? */
+		if (masked.trdy) {
+			found = 1;
+			/* More data to send */
+			if (port->out_buf_count > 0)
+				send_word(port);
+			else {
+				/* Transmission finished */
+				reg_sser_rw_intr_mask intr_mask;
+				intr_mask = REG_RD(sser, port->regi_sser,
+					rw_intr_mask);
+				intr_mask.trdy = 0;
+				REG_WR(sser, port->regi_sser,
+					rw_intr_mask, intr_mask);
+				/* Wake up application */
+				wake_up_interruptible(&port->out_wait_q);
+			}
+		}
+	}
+	return IRQ_RETVAL(found);
+}
+#endif
+
+static int __init etrax_sync_serial_init(void)
+{
+#if 1
+	/* This code will be removed when we move to udev for all devices. */
+	syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0);
+	if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR);
+		return -1;
+	}
+#else
+	/* Allocate dynamic major number. */
+	if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to allocate character device region\n");
+		return -1;
+	}
+#endif
+	syncser_cdev = cdev_alloc();
+	if (!syncser_cdev) {
+		pr_err("Failed to allocate cdev for syncser\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+	cdev_init(syncser_cdev, &syncser_fops);
+
+	/* Create a sysfs class for syncser */
+	syncser_class = class_create(THIS_MODULE, "syncser_class");
+
+	/* Initialize Ports */
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 0\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -EIO;
+	}
+	initialize_port(0);
+	ports[0].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 1\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		class_destroy(syncser_class);
+		return -EIO;
+	}
+	initialize_port(1);
+	ports[1].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+	/* Add it to system */
+	if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) {
+		pr_err("Failed to add syncser as char device\n");
+		device_destroy(syncser_class, syncser_first);
+		class_destroy(syncser_class);
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+
+
+	pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n",
+		SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first));
+
+	return 0;
+}
+
+static void __exit etrax_sync_serial_exit(void)
+{
+	int i;
+	device_destroy(syncser_class, syncser_first);
+	class_destroy(syncser_class);
+
+	if (syncser_cdev) {
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+	}
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+		if (port->init_irqs == dma_irq_setup) {
+			/* Free dma irqs and dma channels. */
+#ifdef SYNC_SER_DMA
+			artpec_free_dma(port->dma_in_nbr);
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+#endif
+		} else if (port->init_irqs == manual_irq_setup) {
+			/* Free manual irq. */
+			free_irq(port->syncser_intr_vect, port);
+		}
+	}
+
+	pr_info("ARTPEC synchronous serial port unregistered\n");
+}
+
+module_init(etrax_sync_serial_init);
+module_exit(etrax_sync_serial_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/arch/cris/arch-v32/kernel/Makefile b/arch/cris/arch-v32/kernel/Makefile
new file mode 100644
index 0000000..d9fc617
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the linux kernel.
+#
+
+extra-y	:= head.o
+
+
+obj-y   := entry.o traps.o irq.o debugport.o \
+	   process.o ptrace.o setup.o signal.o traps.o time.o \
+	   cache.o cacheflush.o
+
+obj-$(CONFIG_ETRAX_KGDB) += kgdb.o kgdb_asm.o
+obj-$(CONFIG_ETRAX_FAST_TIMER) += fasttimer.o
+obj-$(CONFIG_MODULES)    += crisksyms.o
+
+clean:
+
diff --git a/arch/cris/arch-v32/kernel/cache.c b/arch/cris/arch-v32/kernel/cache.c
new file mode 100644
index 0000000..f38433b
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/cache.c
@@ -0,0 +1,33 @@
+#include <linux/module.h>
+#include <asm/io.h>
+#include <arch/cache.h>
+#include <arch/hwregs/dma.h>
+
+/* This file is used to workaround a cache bug, Guinness TR 106. */
+
+inline void flush_dma_descr(struct dma_descr_data *descr, int flush_buf)
+{
+	/* Flush descriptor to make sure we get correct in_eop and after. */
+	asm volatile ("ftagd [%0]" :: "r" (descr));
+	/* Flush buffer pointed out by descriptor. */
+	if (flush_buf)
+		cris_flush_cache_range(phys_to_virt((unsigned)descr->buf),
+				(unsigned)(descr->after - descr->buf));
+}
+EXPORT_SYMBOL(flush_dma_descr);
+
+void flush_dma_list(struct dma_descr_data *descr)
+{
+	while (1) {
+		flush_dma_descr(descr, 1);
+		if (descr->eol)
+			break;
+		descr = phys_to_virt((unsigned)descr->next);
+	}
+}
+EXPORT_SYMBOL(flush_dma_list);
+
+/* From cacheflush.S */
+EXPORT_SYMBOL(cris_flush_cache);
+/* From cacheflush.S */
+EXPORT_SYMBOL(cris_flush_cache_range);
diff --git a/arch/cris/arch-v32/kernel/cacheflush.S b/arch/cris/arch-v32/kernel/cacheflush.S
new file mode 100644
index 0000000..6fc3d95
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/cacheflush.S
@@ -0,0 +1,99 @@
+	.global cris_flush_cache_range
+	.type	cris_flush_cache_range, @function
+cris_flush_cache_range:
+	move.d 1024, $r12
+	cmp.d $r11, $r12
+	bhi cris_flush_1KB
+	nop
+	add.d $r10, $r11
+	ftagd [$r10]
+cris_flush_last:
+	addq 32, $r10
+	cmp.d $r11, $r10
+	blt cris_flush_last
+	ftagd [$r10]
+	ret
+	nop
+cris_flush_1KB:
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ftagd [$r10]
+	addq 32, $r10
+	ba cris_flush_cache_range
+	sub.d $r12, $r11
+	.size	cris_flush_cache_range, . - cris_flush_cache_range
+
+	.global cris_flush_cache
+	.type	cris_flush_cache, @function
+cris_flush_cache:
+	moveq 0, $r10
+cris_flush_line:
+	move.d 16*1024, $r11
+	addq 16, $r10
+	cmp.d $r10, $r11
+	blt cris_flush_line
+	fidxd [$r10]
+	ret
+	nop
+	.size	cris_flush_cache, . - cris_flush_cache
+
diff --git a/arch/cris/arch-v32/kernel/crisksyms.c b/arch/cris/arch-v32/kernel/crisksyms.c
new file mode 100644
index 0000000..b056635
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/crisksyms.c
@@ -0,0 +1,25 @@
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <arch/dma.h>
+#include <arch/intmem.h>
+#include <mach/pinmux.h>
+
+/* Functions for allocating DMA channels */
+EXPORT_SYMBOL(crisv32_request_dma);
+EXPORT_SYMBOL(crisv32_free_dma);
+
+/* Functions for handling internal RAM */
+EXPORT_SYMBOL(crisv32_intmem_alloc);
+EXPORT_SYMBOL(crisv32_intmem_free);
+EXPORT_SYMBOL(crisv32_intmem_phys_to_virt);
+EXPORT_SYMBOL(crisv32_intmem_virt_to_phys);
+
+/* Functions for handling pinmux */
+EXPORT_SYMBOL(crisv32_pinmux_alloc);
+EXPORT_SYMBOL(crisv32_pinmux_alloc_fixed);
+EXPORT_SYMBOL(crisv32_pinmux_dealloc);
+EXPORT_SYMBOL(crisv32_pinmux_dealloc_fixed);
+
+/* Functions masking/unmasking interrupts */
+EXPORT_SYMBOL(crisv32_mask_irq);
+EXPORT_SYMBOL(crisv32_unmask_irq);
diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c
new file mode 100644
index 0000000..d2f3f9c
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/debugport.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2003, Axis Communications AB.
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/ser_defs.h>
+#include <hwregs/dma_defs.h>
+#include <mach/pinmux.h>
+
+struct dbg_port
+{
+	unsigned char nbr;
+	unsigned long instance;
+	unsigned int started;
+	unsigned long baudrate;
+	unsigned char parity;
+	unsigned int bits;
+};
+
+struct dbg_port ports[] =
+{
+  {
+    0,
+    regi_ser0,
+    0,
+    115200,
+    'N',
+    8
+  },
+  {
+    1,
+    regi_ser1,
+    0,
+    115200,
+    'N',
+    8
+  },
+  {
+    2,
+    regi_ser2,
+    0,
+    115200,
+    'N',
+    8
+  },
+  {
+    3,
+    regi_ser3,
+    0,
+    115200,
+    'N',
+    8
+  },
+#if CONFIG_ETRAX_SERIAL_PORTS == 5
+  {
+    4,
+    regi_ser4,
+    0,
+    115200,
+    'N',
+    8
+  },
+#endif
+};
+
+static struct dbg_port *port =
+#if defined(CONFIG_ETRAX_DEBUG_PORT0)
+	&ports[0];
+#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
+	&ports[1];
+#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
+	&ports[2];
+#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
+	&ports[3];
+#else
+	NULL;
+#endif
+
+#ifdef CONFIG_ETRAX_KGDB
+static struct dbg_port *kgdb_port =
+#if defined(CONFIG_ETRAX_KGDB_PORT0)
+	&ports[0];
+#elif defined(CONFIG_ETRAX_KGDB_PORT1)
+	&ports[1];
+#elif defined(CONFIG_ETRAX_KGDB_PORT2)
+	&ports[2];
+#elif defined(CONFIG_ETRAX_KGDB_PORT3)
+	&ports[3];
+#elif defined(CONFIG_ETRAX_KGDB_PORT4)
+	&ports[4];
+#else
+	NULL;
+#endif
+#endif
+
+static void start_port(struct dbg_port *p)
+{
+	/* Set up serial port registers */
+	reg_ser_rw_tr_ctrl tr_ctrl = {0};
+	reg_ser_rw_tr_dma_en tr_dma_en = {0};
+
+	reg_ser_rw_rec_ctrl rec_ctrl = {0};
+	reg_ser_rw_tr_baud_div tr_baud_div = {0};
+	reg_ser_rw_rec_baud_div rec_baud_div = {0};
+
+	if (!p || p->started)
+		return;
+
+	p->started = 1;
+
+	if (p->nbr == 1)
+		crisv32_pinmux_alloc_fixed(pinmux_ser1);
+	else if (p->nbr == 2)
+		crisv32_pinmux_alloc_fixed(pinmux_ser2);
+	else if (p->nbr == 3)
+		crisv32_pinmux_alloc_fixed(pinmux_ser3);
+#if CONFIG_ETRAX_SERIAL_PORTS == 5
+	else if (p->nbr == 4)
+		crisv32_pinmux_alloc_fixed(pinmux_ser4);
+#endif
+
+	tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493;
+	tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no;
+	tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8;
+	tr_ctrl.en = rec_ctrl.en = 1;
+
+	if (p->parity == 'O') {
+		tr_ctrl.par_en = regk_ser_yes;
+		tr_ctrl.par = regk_ser_odd;
+		rec_ctrl.par_en = regk_ser_yes;
+		rec_ctrl.par = regk_ser_odd;
+	} else if (p->parity == 'E') {
+		tr_ctrl.par_en = regk_ser_yes;
+		tr_ctrl.par = regk_ser_even;
+		rec_ctrl.par_en = regk_ser_yes;
+		rec_ctrl.par = regk_ser_odd;
+	}
+
+	if (p->bits == 7) {
+		tr_ctrl.data_bits = regk_ser_bits7;
+		rec_ctrl.data_bits = regk_ser_bits7;
+	}
+
+	REG_WR (ser, p->instance, rw_tr_baud_div, tr_baud_div);
+	REG_WR (ser, p->instance, rw_rec_baud_div, rec_baud_div);
+	REG_WR (ser, p->instance, rw_tr_dma_en, tr_dma_en);
+	REG_WR (ser, p->instance, rw_tr_ctrl, tr_ctrl);
+	REG_WR (ser, p->instance, rw_rec_ctrl, rec_ctrl);
+}
+
+#ifdef CONFIG_ETRAX_KGDB
+/* Use polling to get a single character from the kernel debug port */
+int getDebugChar(void)
+{
+	reg_ser_rs_stat_din stat;
+	reg_ser_rw_ack_intr ack_intr = { 0 };
+
+	do {
+		stat = REG_RD(ser, kgdb_port->instance, rs_stat_din);
+	} while (!stat.dav);
+
+	/* Ack the data_avail interrupt. */
+	ack_intr.dav = 1;
+	REG_WR(ser, kgdb_port->instance, rw_ack_intr, ack_intr);
+
+	return stat.data;
+}
+
+/* Use polling to put a single character to the kernel debug port */
+void putDebugChar(int val)
+{
+	reg_ser_r_stat_din stat;
+	do {
+		stat = REG_RD(ser, kgdb_port->instance, r_stat_din);
+	} while (!stat.tr_rdy);
+	REG_WR_INT(ser, kgdb_port->instance, rw_dout, val);
+}
+#endif /* CONFIG_ETRAX_KGDB */
+
+static void __init early_putch(int c)
+{
+	reg_ser_r_stat_din stat;
+	/* Wait until transmitter is ready and send. */
+	do
+		stat = REG_RD(ser, port->instance, r_stat_din);
+	while (!stat.tr_rdy);
+	REG_WR_INT(ser, port->instance, rw_dout, c);
+}
+
+static void __init
+early_console_write(struct console *con, const char *s, unsigned n)
+{
+	extern void reset_watchdog(void);
+	int i;
+
+	/* Send data. */
+	for (i = 0; i < n; i++) {
+		/* TODO: the '\n' -> '\n\r' translation should be done at the
+		   receiver. Remove it when the serial driver removes it.   */
+		if (s[i] == '\n')
+			early_putch('\r');
+		early_putch(s[i]);
+		reset_watchdog();
+	}
+}
+
+static struct console early_console_dev __initdata = {
+	.name   = "early",
+	.write  = early_console_write,
+	.flags  = CON_PRINTBUFFER | CON_BOOT,
+	.index  = -1
+};
+
+/* Register console for printk's, etc. */
+int __init init_etrax_debug(void)
+{
+        start_port(port);
+
+	/* Register an early console if a debug port was chosen.  */
+	register_console(&early_console_dev);
+
+#ifdef CONFIG_ETRAX_KGDB
+	start_port(kgdb_port);
+#endif /* CONFIG_ETRAX_KGDB */
+	return 0;
+}
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
new file mode 100644
index 0000000..b17a209
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -0,0 +1,908 @@
+/*
+ *  Copyright (C) 2000-2003 Axis Communications AB
+ *
+ *  Authors:	Bjorn Wesen (bjornw@axis.com)
+ *              Tobias Anderberg (tobiasa@axis.com), CRISv32 port.
+ *
+ * Code for the system-call and fault low-level handling routines.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * Stack layout in 'ret_from_system_call':
+ *	ptrace needs to have all regs on the stack.
+ *	if the order here is changed, it needs to be
+ *	updated in fork.c:copy_process, signal.c:do_signal,
+ *	ptrace.c and ptrace.h
+ *
+ */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+#include <hwregs/asm/reg_map_asm.h>
+#include <hwregs/asm/intr_vect_defs_asm.h>
+
+	;; Exported functions.
+	.globl system_call
+	.globl ret_from_intr
+	.globl ret_from_fork
+	.globl ret_from_kernel_thread
+	.globl resume
+	.globl multiple_interrupt
+	.globl nmi_interrupt
+	.globl spurious_interrupt
+	.globl do_sigtrap
+	.globl gdb_handle_exception
+	.globl sys_call_table
+
+	; Check if preemptive kernel scheduling should be done.
+#ifdef CONFIG_PREEMPT
+_resume_kernel:
+	di
+	; Load current task struct.
+	movs.w	-8192, $r0		; THREAD_SIZE = 8192
+	and.d	$sp, $r0
+
+	addoq	+TI_preempt_count, $r0, $acr
+	move.d	[$acr], $r10		; Preemption disabled?
+	bne	_Rexit
+	nop
+
+_need_resched:
+	addoq	+TI_flags, $r0, $acr
+	move.d	[$acr], $r10
+	btstq	TIF_NEED_RESCHED, $r10	; Check if need_resched is set.
+	bpl	_Rexit
+	nop
+
+	; Do preemptive kernel scheduling.
+	jsr	preempt_schedule_irq
+	nop
+
+	; Load new task struct.
+	movs.w	-8192, $r0		; THREAD_SIZE = 8192.
+	and.d	$sp, $r0
+
+	; One more time with new task.
+	ba	_need_resched
+	nop
+#else
+#define _resume_kernel _Rexit
+#endif
+
+	; Called at exit from fork. schedule_tail must be called to drop
+	; spinlock if CONFIG_PREEMPT.
+	.type	ret_from_fork,@function
+ret_from_fork:
+	jsr schedule_tail
+	nop
+	ba  ret_from_sys_call
+	nop
+	.size	ret_from_fork, . - ret_from_fork
+
+	.type	ret_from_kernel_thread,@function
+ret_from_kernel_thread:
+	jsr schedule_tail
+	nop
+	move.d	$r2, $r10
+	jsr	$r1
+	nop
+	moveq	0, $r9			; no syscall restarts, TYVM...
+	ba  ret_from_sys_call
+	nop
+	.size	ret_from_kernel_thread, . - ret_from_kernel_thread
+
+	.type	ret_from_intr,@function
+ret_from_intr:
+	moveq	0, $r9			; not a syscall
+
+	;; Check for resched if preemptive kernel, or if we're going back to
+	;; user-mode. This test matches the user_regs(regs) macro. Don't simply
+	;; test CCS since that doesn't necessarily reflect what mode we'll
+	;; return into.
+	addoq	+PT_ccs, $sp, $acr
+	move.d	[$acr], $r0
+	btstq	16, $r0			; User-mode flag.
+	bpl	_resume_kernel
+	.size	ret_from_intr, . - ret_from_intr + 2	; +2 includes the dslot.
+
+	; Note that di below is in delay slot.
+	.type	_resume_userspace,@function
+_resume_userspace:
+	di			; So need_resched and sigpending don't change.
+
+	movs.w	-8192, $r0		; THREAD_SIZE == 8192
+	and.d	$sp, $r0
+
+	addoq	+TI_flags, $r0, $acr	; current->work
+	move.d	[$acr], $r10
+	and.d	_TIF_WORK_MASK, $r10	; Work to be done on return?
+	bne	_work_pending
+	nop
+	ba	_Rexit
+	nop
+	.size	_resume_userspace, . - _resume_userspace
+
+	;; The system_call is called by a BREAK instruction, which looks pretty
+	;; much like any other exception.
+	;;
+	;; System calls can't be made from interrupts but we still stack ERP
+	;; to have a complete stack frame.
+	;;
+	;; In r9 we have the wanted syscall number. Arguments come in r10,r11,r12,
+	;; r13,mof,srp
+	;;
+	;; This function looks on the _surface_ like spaghetti programming, but it's
+	;; really designed so that the fast-path does not force cache-loading of
+	;; non-used instructions. Only the non-common cases cause the outlined code
+	;; to run..
+
+	.type	system_call,@function
+system_call:
+	;; Stack-frame similar to the irq heads, which is reversed in
+	;; ret_from_sys_call.
+
+	sub.d	92, $sp		; Skip EDA.
+	movem	$r13, [$sp]
+	move.d	$sp, $r8
+	addq	14*4, $r8
+	move.d	$acr, $r0
+	move	$srs, $r1
+	move	$mof, $r2
+	move	$spc, $r3
+	move	$ccs, $r4
+	move	$srp, $r5
+	move	$erp, $r6
+	move.d	$r9, $r7	; Store syscall number in EXS
+	subq	4, $sp
+	movem	$r7, [$r8]
+	ei			; Enable interrupts while processing syscalls.
+	move.d	$r10, [$sp]
+
+	; Set S-bit when kernel debugging to keep hardware breakpoints active.
+#ifdef CONFIG_ETRAX_KGDB
+	move $ccs, $r0
+	or.d (1<<9), $r0
+	move $r0, $ccs
+#endif
+
+	movs.w	-ENOSYS, $r0
+	addoq	+PT_r10, $sp, $acr
+	move.d	$r0, [$acr]
+
+	;; Check if this process is syscall-traced.
+	movs.w	-8192, $r0	; THREAD_SIZE == 8192
+	and.d	$sp, $r0
+
+	addoq	+TI_flags, $r0, $acr
+	move.d	[$acr], $r0
+	btstq	TIF_SYSCALL_TRACE, $r0
+	bmi	_syscall_trace_entry
+	nop
+
+_syscall_traced:
+	;; Check for sanity in the requested syscall number.
+	cmpu.w	NR_syscalls, $r9
+	bhs	ret_from_sys_call
+	lslq	2, $r9		;  Multiply by 4, in the delay slot.
+
+	;; The location on the stack for the register structure is passed as a
+	;; seventh argument. Some system calls need this.
+	move.d  $sp, $r0
+	subq	4, $sp
+	move.d	$r0, [$sp]
+
+	;; The registers carrying parameters (R10-R13) are intact. The optional
+	;; fifth and sixth parameters is in MOF and SRP respectively. Put them
+	;; back on the stack.
+	subq	4, $sp
+	move	$srp, [$sp]
+	subq	4, $sp
+	move	$mof, [$sp]
+
+	;; Actually to the system call.
+	addo.d	+sys_call_table, $r9, $acr
+	move.d	[$acr], $acr
+	jsr	$acr
+	nop
+
+	addq	3*4, $sp		; Pop the mof, srp and regs parameters.
+	addoq	+PT_r10, $sp, $acr
+	move.d	$r10, [$acr]		; Save the return value.
+
+	moveq	1, $r9			; "Parameter" to ret_from_sys_call to
+					; show it was a sys call.
+
+	;; Fall through into ret_from_sys_call to return.
+
+ret_from_sys_call:
+	;; R9 is a parameter:
+	;;  >= 1 from syscall
+	;;     0 from irq
+
+	;; Get the current task-struct pointer.
+	movs.w	-8192, $r0	; THREAD_SIZE == 8192
+	and.d	$sp, $r0
+
+	di		; Make sure need_resched and sigpending don't change.
+
+	addoq	+TI_flags, $r0, $acr
+	move.d	[$acr], $r1
+	and.d	_TIF_ALLWORK_MASK, $r1
+	bne	_syscall_exit_work
+	nop
+	.size	system_call, . - system_call
+
+	.type	_Rexit,@function
+_Rexit:
+#if defined(CONFIG_TRACE_IRQFLAGS)
+	addoq	+PT_ccs, $sp, $acr
+	move.d	[$acr], $r0
+	btstq	15, $r0		; I1
+	bpl	1f
+	nop
+	jsr	trace_hardirqs_on
+	nop
+1:
+#endif
+
+	;; This epilogue MUST match the prologues in multiple_interrupt, irq.h
+	;; and ptregs.h.
+	addq	4, $sp		; Skip orig_r10.
+	movem	[$sp+], $r13	; Registers R0-R13.
+	move.d	[$sp+], $acr
+	move	[$sp], $srs
+	addq	4, $sp
+	move	[$sp+], $mof
+	move	[$sp+], $spc
+	move	[$sp+], $ccs
+	move	[$sp+], $srp
+	move	[$sp+], $erp
+	addq    8, $sp		; Skip EXS, EDA.
+	jump	$erp
+	rfe			; Restore condition code stack in delay-slot.
+	.size	_Rexit, . - _Rexit
+
+	;; We get here after doing a syscall if extra work might need to be done
+	;; perform syscall exit tracing if needed.
+
+	.type	_syscall_exit_work,@function
+_syscall_exit_work:
+	;; R0 contains current at this point and irq's are disabled.
+
+	addoq	+TI_flags, $r0, $acr
+	move.d	[$acr], $r1
+	btstq	TIF_SYSCALL_TRACE, $r1
+	bpl	_work_pending
+	nop
+	ei
+	move.d	$r9, $r1		; Preserve R9.
+	jsr	do_syscall_trace
+	nop
+	move.d	$r1, $r9
+	ba	_resume_userspace
+	nop
+	.size	_syscall_exit_work, . - _syscall_exit_work
+
+	.type	_work_pending,@function
+_work_pending:
+	addoq	+TI_flags, $r0, $acr
+	move.d	[$acr], $r12		; The thread_info_flags parameter.
+	move.d	$sp, $r11		; The regs param.
+	jsr	do_work_pending
+	move.d	$r9, $r10		; The syscall/irq param.
+
+	ba _Rexit
+	nop
+	.size	_work_pending, . - _work_pending
+
+	;; We get here as a sidetrack when we've entered a syscall with the
+	;; trace-bit set. We need to call do_syscall_trace and then continue
+	;; with the call.
+
+_syscall_trace_entry:
+	;; PT_r10 in the frame contains -ENOSYS as required, at this point.
+
+	jsr	do_syscall_trace
+	nop
+
+	;; Now re-enter the syscall code to do the syscall itself. We need to
+	;; restore R9 here to contain the wanted syscall, and the other
+	;; parameter-bearing registers.
+	addoq	+PT_r9, $sp, $acr
+	move.d	[$acr], $r9
+	addoq	+PT_orig_r10, $sp, $acr
+	move.d	[$acr], $r10		; PT_r10 is already -ENOSYS.
+	addoq	+PT_r11, $sp, $acr
+	move.d	[$acr], $r11
+	addoq	+PT_r12, $sp, $acr
+	move.d	[$acr], $r12
+	addoq	+PT_r13, $sp, $acr
+	move.d	[$acr], $r13
+	addoq	+PT_mof, $sp, $acr
+	move	[$acr], $mof
+	addoq	+PT_srp, $sp, $acr
+	move	[$acr], $srp
+
+	ba	_syscall_traced
+	nop
+
+	;; Resume performs the actual task-switching, by switching stack
+	;; pointers. Input arguments are:
+	;;
+	;; R10 = prev
+	;; R11 = next
+	;; R12 = thread offset in task struct.
+	;;
+	;; Returns old current in R10.
+
+	.type	resume,@function
+resume:
+	subq	4, $sp			; Make space for srp.
+
+	add.d	$r12, $r10		; R10 = current tasks tss.
+	addoq	+THREAD_ccs, $r10, $acr
+	move	$srp, [$sp]		; Keep old/new PC on the stack.
+	move	$ccs, [$acr]		; Save IRQ enable state.
+	di
+
+	addoq	+THREAD_usp, $r10, $acr
+	subq	10*4, $sp		; Make room for R9.
+	move	$usp, [$acr]		; Save user-mode stackpointer.
+
+	;; See copy_thread for the reason why register R9 is saved.
+	movem	$r9, [$sp]		; Save non-scratch registers and R9.
+
+	addoq	+THREAD_ksp, $r10, $acr
+	move.d	$sp, $r10		; Return last running task in R10.
+	move.d	$sp, [$acr]		; Save kernel SP for old task.
+
+	and.d   -8192, $r10		; Get thread_info from stackpointer.
+	addoq	+TI_task, $r10, $acr
+	add.d	$r12, $r11		; Find the new tasks tss.
+	move.d	[$acr], $r10		; Get task.
+	addoq	+THREAD_ksp, $r11, $acr
+	move.d	[$acr], $sp		; Switch to new stackframe.
+	addoq	+THREAD_usp, $r11, $acr
+	movem	[$sp+], $r9		; Restore non-scratch registers and R9.
+
+	move	[$acr], $usp		; Restore user-mode stackpointer.
+
+	addoq	+THREAD_ccs, $r11, $acr
+	move.d	[$sp+], $r11
+	jump	$r11			; Restore PC.
+	move	[$acr], $ccs		; Restore IRQ enable status.
+	.size	resume, . - resume
+
+nmi_interrupt:
+
+;; If we receive a watchdog interrupt while it is not expected, then set
+;; up a canonical frame and dump register contents before dying.
+
+	;; This prologue MUST match the one in irq.h and the struct in ptregs.h!
+	subq	12, $sp		;  Skip EXS, EDA.
+	move	$nrp, [$sp]
+	subq	4, $sp
+	move	$srp, [$sp]
+	subq	4, $sp
+	move	$ccs, [$sp]
+	subq	4, $sp
+	move	$spc, [$sp]
+	subq	4, $sp
+	move	$mof, [$sp]
+	subq	4, $sp
+	move	$srs, [$sp]
+	subq	4, $sp
+	move.d	$acr, [$sp]
+	subq	14*4, $sp		; Make room for R0-R13.
+	movem	$r13, [$sp]		; Push R0-R13.
+	subq	4, $sp
+	move.d	$r10, [$sp]		; Push orig_r10.
+	move.d  REG_ADDR(intr_vect, regi_irq, r_nmi), $r0
+	move.d  [$r0], $r0
+	btstq	REG_BIT(intr_vect, r_nmi, watchdog), $r0
+	bpl     1f
+	nop
+	jsr	handle_watchdog_bite	; In time.c.
+	move.d	$sp, $r10		; Pointer to registers
+1:	btstq	REG_BIT(intr_vect, r_nmi, ext), $r0
+	bpl     1f
+	nop
+	jsr	handle_nmi
+	move.d	$sp, $r10		; Pointer to registers
+1:	addq    4, $sp			; Skip orig_r10
+	movem   [$sp+], $r13
+	move.d  [$sp+], $acr
+	move    [$sp], $srs
+	addq    4, $sp
+	move    [$sp+], $mof
+	move    [$sp+], $spc
+	move    [$sp+], $ccs
+	move	[$sp+], $srp
+	move	[$sp+], $nrp
+	addq    8, $sp			; Skip EXS, EDA.
+	jump    $nrp
+	rfn
+
+	.comm	cause_of_death, 4	;; Don't declare this anywhere.
+
+spurious_interrupt:
+	di
+	jump hard_reset_now
+	nop
+
+	;; This handles the case when multiple interrupts arrive at the same
+	;; time. Jump to the first set interrupt bit in a priority fashion. The
+	;; hardware will call the unserved interrupts after the handler
+	;; finishes.
+	.type	multiple_interrupt, @function
+multiple_interrupt:
+	;; This prologue MUST match the one in irq.h and the struct in ptregs.h!
+	subq	12, $sp		; Skip EXS, EDA.
+	move	$erp, [$sp]
+	subq	4, $sp
+	move	$srp, [$sp]
+	subq	4, $sp
+	move	$ccs, [$sp]
+	subq	4, $sp
+	move	$spc, [$sp]
+	subq	4, $sp
+	move	$mof, [$sp]
+	subq	4, $sp
+	move	$srs, [$sp]
+	subq	4, $sp
+	move.d	$acr, [$sp]
+	subq	14*4, $sp	; Make room for R0-R13.
+	movem	$r13, [$sp]	; Push R0-R13.
+	subq	4, $sp
+	move.d	$r10, [$sp]	; Push orig_r10.
+
+; Set S-bit when kernel debugging to keep hardware breakpoints active.
+#ifdef CONFIG_ETRAX_KGDB
+	move $ccs, $r0
+	or.d (1<<9), $r0
+	move $r0, $ccs
+#endif
+
+	jsr	crisv32_do_multiple
+	move.d	$sp, $r10
+	jump    ret_from_intr
+	nop
+	.size	multiple_interrupt, . - multiple_interrupt
+
+do_sigtrap:
+	;; Sigtraps the process that executed the BREAK instruction. Creates a
+	;; frame that Rexit expects.
+	subq	4, $sp
+	move    $eda, [$sp]
+	subq    4, $sp
+	move    $exs, [$sp]
+	subq    4, $sp
+	move	$erp, [$sp]
+	subq	4, $sp
+	move	$srp, [$sp]
+	subq	4, $sp
+	move	$ccs, [$sp]
+	subq	4, $sp
+	move	$spc, [$sp]
+	subq	4, $sp
+	move	$mof, [$sp]
+	subq	4, $sp
+	move	$srs, [$sp]
+	subq	4, $sp
+	move.d	$acr, [$sp]
+	di				; Need to disable irq's at this point.
+	subq	14*4, $sp		; Make room for r0-r13.
+	movem	$r13, [$sp]		; Push the r0-r13 registers.
+	subq	4, $sp
+	move.d	$r10, [$sp]		; Push orig_r10.
+
+	movs.w	-8192, $r9		; THREAD_SIZE == 8192
+	and.d	$sp, $r9
+
+	;; thread_info as first parameter
+	move.d  $r9, $r10
+	moveq	5, $r11			; SIGTRAP as second argument.
+	jsr	ugdb_trap_user
+	nop
+	jump	ret_from_intr		; Use the return routine for interrupts.
+	nop
+
+gdb_handle_exception:
+	subq	4, $sp
+	move.d	$r0, [$sp]
+#ifdef CONFIG_ETRAX_KGDB
+	move	$ccs, $r0		; U-flag not affected by previous insns.
+	btstq	16, $r0			; Test the U-flag.
+	bmi	_ugdb_handle_exception	; Go to user mode debugging.
+	nop				; Empty delay-slot (cannot pop R0 here).
+	ba	kgdb_handle_exception	; Go to kernel debugging.
+	move.d	[$sp+], $r0		; Restore R0 in delay slot.
+#endif
+
+_ugdb_handle_exception:
+	ba	do_sigtrap		; SIGTRAP the offending process.
+	move.d	[$sp+], $r0		; Restore R0 in delay slot.
+
+	.data
+
+	.section .rodata,"a"
+sys_call_table:
+	.long sys_restart_syscall	; 0 - old "setup()" system call, used
+					; for restarting.
+	.long sys_exit
+	.long sys_fork
+	.long sys_read
+	.long sys_write
+	.long sys_open		/* 5 */
+	.long sys_close
+	.long sys_waitpid
+	.long sys_creat
+	.long sys_link
+	.long sys_unlink	/* 10 */
+	.long sys_execve
+	.long sys_chdir
+	.long sys_time
+	.long sys_mknod
+	.long sys_chmod		/* 15 */
+	.long sys_lchown16
+	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_stat
+	.long sys_lseek
+	.long sys_getpid	/* 20 */
+	.long sys_mount
+	.long sys_oldumount
+	.long sys_setuid16
+	.long sys_getuid16
+	.long sys_stime		/* 25 */
+	.long sys_ptrace
+	.long sys_alarm
+	.long sys_fstat
+	.long sys_pause
+	.long sys_utime		/* 30 */
+	.long sys_ni_syscall	/* old stty syscall holder */
+	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_access
+	.long sys_nice
+	.long sys_ni_syscall	/* 35  old ftime syscall holder */
+	.long sys_sync
+	.long sys_kill
+	.long sys_rename
+	.long sys_mkdir
+	.long sys_rmdir		/* 40 */
+	.long sys_dup
+	.long sys_pipe
+	.long sys_times
+	.long sys_ni_syscall	/* old prof syscall holder */
+	.long sys_brk		/* 45 */
+	.long sys_setgid16
+	.long sys_getgid16
+	.long sys_signal
+	.long sys_geteuid16
+	.long sys_getegid16	/* 50 */
+	.long sys_acct
+	.long sys_umount	/* recycled never used phys( */
+	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_ioctl
+	.long sys_fcntl		/* 55 */
+	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_setpgid
+	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_ni_syscall	/* old sys_olduname holder */
+	.long sys_umask		/* 60 */
+	.long sys_chroot
+	.long sys_ustat
+	.long sys_dup2
+	.long sys_getppid
+	.long sys_getpgrp	/* 65 */
+	.long sys_setsid
+	.long sys_sigaction
+	.long sys_sgetmask
+	.long sys_ssetmask
+	.long sys_setreuid16	/* 70 */
+	.long sys_setregid16
+	.long sys_sigsuspend
+	.long sys_sigpending
+	.long sys_sethostname
+	.long sys_setrlimit	/* 75 */
+	.long sys_old_getrlimit
+	.long sys_getrusage
+	.long sys_gettimeofday
+	.long sys_settimeofday
+	.long sys_getgroups16	/* 80 */
+	.long sys_setgroups16
+	.long sys_select	/* was old_select in Linux/E100 */
+	.long sys_symlink
+	.long sys_lstat
+	.long sys_readlink	/* 85 */
+	.long sys_uselib
+	.long sys_swapon
+	.long sys_reboot
+	.long sys_old_readdir
+	.long sys_old_mmap	/* 90 */
+	.long sys_munmap
+	.long sys_truncate
+	.long sys_ftruncate
+	.long sys_fchmod
+	.long sys_fchown16	/* 95 */
+	.long sys_getpriority
+	.long sys_setpriority
+	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_statfs
+	.long sys_fstatfs	/* 100 */
+	.long sys_ni_syscall	/* sys_ioperm in i386 */
+	.long sys_socketcall
+	.long sys_syslog
+	.long sys_setitimer
+	.long sys_getitimer	/* 105 */
+	.long sys_newstat
+	.long sys_newlstat
+	.long sys_newfstat
+	.long sys_ni_syscall	/* old sys_uname holder */
+	.long sys_ni_syscall	/* sys_iopl in i386 */
+	.long sys_vhangup
+	.long sys_ni_syscall	/* old "idle" system call */
+	.long sys_ni_syscall	/* vm86old in i386 */
+	.long sys_wait4
+	.long sys_swapoff	/* 115 */
+	.long sys_sysinfo
+	.long sys_ipc
+	.long sys_fsync
+	.long sys_sigreturn
+	.long sys_clone		/* 120 */
+	.long sys_setdomainname
+	.long sys_newuname
+	.long sys_ni_syscall	/* sys_modify_ldt */
+	.long sys_adjtimex
+	.long sys_mprotect	/* 125 */
+	.long sys_sigprocmask
+	.long sys_ni_syscall	/* old "create_module" */
+	.long sys_init_module
+	.long sys_delete_module
+	.long sys_ni_syscall	/* 130:	old "get_kernel_syms" */
+	.long sys_quotactl
+	.long sys_getpgid
+	.long sys_fchdir
+	.long sys_bdflush
+	.long sys_sysfs		/* 135 */
+	.long sys_personality
+	.long sys_ni_syscall	/* for afs_syscall */
+	.long sys_setfsuid16
+	.long sys_setfsgid16
+	.long sys_llseek	/* 140 */
+	.long sys_getdents
+	.long sys_select
+	.long sys_flock
+	.long sys_msync
+	.long sys_readv		/* 145 */
+	.long sys_writev
+	.long sys_getsid
+	.long sys_fdatasync
+	.long sys_sysctl
+	.long sys_mlock		/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
+	.long sys_sched_setparam
+	.long sys_sched_getparam	/* 155 */
+	.long sys_sched_setscheduler
+	.long sys_sched_getscheduler
+	.long sys_sched_yield
+	.long sys_sched_get_priority_max
+	.long sys_sched_get_priority_min	/* 160 */
+	.long sys_sched_rr_get_interval
+	.long sys_nanosleep
+	.long sys_mremap
+	.long sys_setresuid16
+	.long sys_getresuid16	/* 165 */
+	.long sys_ni_syscall	/* sys_vm86 */
+	.long sys_ni_syscall	/* Old sys_query_module */
+	.long sys_poll
+	.long sys_ni_syscall	/* Old nfsservctl */
+	.long sys_setresgid16	/* 170 */
+	.long sys_getresgid16
+	.long sys_prctl
+	.long sys_rt_sigreturn
+	.long sys_rt_sigaction
+	.long sys_rt_sigprocmask	/* 175 */
+	.long sys_rt_sigpending
+	.long sys_rt_sigtimedwait
+	.long sys_rt_sigqueueinfo
+	.long sys_rt_sigsuspend
+	.long sys_pread64	/* 180 */
+	.long sys_pwrite64
+	.long sys_chown16
+	.long sys_getcwd
+	.long sys_capget
+	.long sys_capset	/* 185 */
+	.long sys_sigaltstack
+	.long sys_sendfile
+	.long sys_ni_syscall	/* streams1 */
+	.long sys_ni_syscall	/* streams2 */
+	.long sys_vfork		/* 190 */
+	.long sys_getrlimit
+	.long sys_mmap2
+	.long sys_truncate64
+	.long sys_ftruncate64
+	.long sys_stat64	/* 195 */
+	.long sys_lstat64
+	.long sys_fstat64
+	.long sys_lchown
+	.long sys_getuid
+	.long sys_getgid	/* 200 */
+	.long sys_geteuid
+	.long sys_getegid
+	.long sys_setreuid
+	.long sys_setregid
+	.long sys_getgroups	/* 205 */
+	.long sys_setgroups
+	.long sys_fchown
+	.long sys_setresuid
+	.long sys_getresuid
+	.long sys_setresgid	/* 210 */
+	.long sys_getresgid
+	.long sys_chown
+	.long sys_setuid
+	.long sys_setgid
+	.long sys_setfsuid	/* 215 */
+	.long sys_setfsgid
+	.long sys_pivot_root
+	.long sys_mincore
+	.long sys_madvise
+	.long sys_getdents64	/* 220 */
+	.long sys_fcntl64
+	.long sys_ni_syscall	/* reserved for TUX */
+	.long sys_ni_syscall
+	.long sys_gettid
+	.long sys_readahead	/* 225 */
+	.long sys_setxattr
+	.long sys_lsetxattr
+	.long sys_fsetxattr
+	.long sys_getxattr
+	.long sys_lgetxattr	/* 230 */
+	.long sys_fgetxattr
+	.long sys_listxattr
+	.long sys_llistxattr
+	.long sys_flistxattr
+	.long sys_removexattr	/* 235 */
+	.long sys_lremovexattr
+	.long sys_fremovexattr
+	.long sys_tkill
+	.long sys_sendfile64
+	.long sys_futex		/* 240 */
+	.long sys_sched_setaffinity
+	.long sys_sched_getaffinity
+	.long sys_ni_syscall	/* sys_set_thread_area */
+	.long sys_ni_syscall	/* sys_get_thread_area */
+	.long sys_io_setup	/* 245 */
+	.long sys_io_destroy
+	.long sys_io_getevents
+	.long sys_io_submit
+	.long sys_io_cancel
+	.long sys_fadvise64	/* 250 */
+	.long sys_ni_syscall
+	.long sys_exit_group
+	.long sys_lookup_dcookie
+	.long sys_epoll_create
+	.long sys_epoll_ctl	/* 255 */
+	.long sys_epoll_wait
+	.long sys_remap_file_pages
+	.long sys_set_tid_address
+	.long sys_timer_create
+	.long sys_timer_settime		/* 260 */
+	.long sys_timer_gettime
+	.long sys_timer_getoverrun
+	.long sys_timer_delete
+	.long sys_clock_settime
+	.long sys_clock_gettime		/* 265 */
+	.long sys_clock_getres
+	.long sys_clock_nanosleep
+	.long sys_statfs64
+	.long sys_fstatfs64
+	.long sys_tgkill	/* 270 */
+	.long sys_utimes
+	.long sys_fadvise64_64
+	.long sys_ni_syscall	/* sys_vserver */
+	.long sys_ni_syscall	/* sys_mbind */
+	.long sys_ni_syscall	/* 275 sys_get_mempolicy */
+	.long sys_ni_syscall	/* sys_set_mempolicy */
+	.long sys_mq_open
+	.long sys_mq_unlink
+	.long sys_mq_timedsend
+	.long sys_mq_timedreceive	/* 280 */
+	.long sys_mq_notify
+	.long sys_mq_getsetattr
+	.long sys_ni_syscall		/* reserved for kexec */
+	.long sys_waitid
+	.long sys_ni_syscall		/* 285 */ /* available */
+	.long sys_add_key
+	.long sys_request_key
+	.long sys_keyctl
+	.long sys_ioprio_set
+	.long sys_ioprio_get		/* 290 */
+	.long sys_inotify_init
+	.long sys_inotify_add_watch
+	.long sys_inotify_rm_watch
+	.long sys_migrate_pages
+	.long sys_openat		/* 295 */
+	.long sys_mkdirat
+	.long sys_mknodat
+	.long sys_fchownat
+	.long sys_futimesat
+	.long sys_fstatat64		/* 300 */
+	.long sys_unlinkat
+	.long sys_renameat
+	.long sys_linkat
+	.long sys_symlinkat
+	.long sys_readlinkat		/* 305 */
+	.long sys_fchmodat
+	.long sys_faccessat
+	.long sys_pselect6
+	.long sys_ppoll
+	.long sys_unshare		/* 310 */
+	.long sys_set_robust_list
+	.long sys_get_robust_list
+	.long sys_splice
+	.long sys_sync_file_range
+	.long sys_tee			/* 315 */
+	.long sys_vmsplice
+	.long sys_move_pages
+	.long sys_getcpu
+	.long sys_epoll_pwait
+	.long sys_utimensat		/* 320 */
+	.long sys_signalfd
+	.long sys_timerfd_create
+	.long sys_eventfd
+	.long sys_fallocate
+	.long sys_timerfd_settime       /* 325 */
+	.long sys_timerfd_gettime
+	.long sys_signalfd4
+	.long sys_eventfd2
+	.long sys_epoll_create1
+	.long sys_dup3			/* 330 */
+	.long sys_pipe2
+	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_pwritev
+	.long sys_setns			/* 335 */
+	.long sys_name_to_handle_at
+	.long sys_open_by_handle_at
+	.long sys_rt_tgsigqueueinfo
+	.long sys_perf_event_open
+	.long sys_recvmmsg		/* 340 */
+	.long sys_accept4
+	.long sys_fanotify_init
+	.long sys_fanotify_mark
+	.long sys_prlimit64
+	.long sys_clock_adjtime		/* 345 */
+	.long sys_syncfs
+	.long sys_sendmmsg
+	.long sys_process_vm_readv
+	.long sys_process_vm_writev
+	.long sys_kcmp			/* 350 */
+	.long sys_finit_module
+	.long sys_sched_setattr
+	.long sys_sched_getattr
+	.long sys_renameat2
+	.long sys_seccomp		/* 355 */
+	.long sys_getrandom
+	.long sys_memfd_create
+	.long sys_bpf
+	.long sys_execveat
+
+	/*
+	 * NOTE!! This doesn't have to be exact - we just have
+	 * to make sure we have _enough_ of the "sys_ni_syscall"
+	 * entries. Don't panic if you notice that this hasn't
+	 * been shrunk every time we add a new system call.
+	 */
+
+	.rept NR_syscalls - (.-sys_call_table) / 4
+		.long sys_ni_syscall
+	.endr
+
diff --git a/arch/cris/arch-v32/kernel/fasttimer.c b/arch/cris/arch-v32/kernel/fasttimer.c
new file mode 100644
index 0000000..5c84dbb
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/fasttimer.c
@@ -0,0 +1,793 @@
+/*
+ * linux/arch/cris/kernel/fasttimer.c
+ *
+ * Fast timers for ETRAX FS
+ *
+ * Copyright (C) 2000-2006 Axis Communications AB, Lund, Sweden
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+
+#include <asm/irq.h>
+
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/timer_defs.h>
+#include <asm/fasttimer.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+/*
+ * timer0 is running at 100MHz and generating jiffies timer ticks
+ * at 100 or 1000 HZ.
+ * fasttimer gives an API that gives timers that expire "between" the jiffies
+ * giving microsecond resolution (10 ns).
+ * fasttimer uses reg_timer_rw_trig register to get interrupt when
+ * r_time reaches a certain value.
+ */
+
+
+#define DEBUG_LOG_INCLUDED
+#define FAST_TIMER_LOG
+/* #define FAST_TIMER_TEST */
+
+#define FAST_TIMER_SANITY_CHECKS
+
+#ifdef FAST_TIMER_SANITY_CHECKS
+static int sanity_failed;
+#endif
+
+#define D1(x)
+#define D2(x)
+#define DP(x)
+
+static unsigned int fast_timer_running;
+static unsigned int fast_timers_added;
+static unsigned int fast_timers_started;
+static unsigned int fast_timers_expired;
+static unsigned int fast_timers_deleted;
+static unsigned int fast_timer_is_init;
+static unsigned int fast_timer_ints;
+
+struct fast_timer *fast_timer_list = NULL;
+
+#ifdef DEBUG_LOG_INCLUDED
+#define DEBUG_LOG_MAX 128
+static const char * debug_log_string[DEBUG_LOG_MAX];
+static unsigned long debug_log_value[DEBUG_LOG_MAX];
+static unsigned int debug_log_cnt;
+static unsigned int debug_log_cnt_wrapped;
+
+#define DEBUG_LOG(string, value) \
+{ \
+  unsigned long log_flags; \
+  local_irq_save(log_flags); \
+  debug_log_string[debug_log_cnt] = (string); \
+  debug_log_value[debug_log_cnt] = (unsigned long)(value); \
+  if (++debug_log_cnt >= DEBUG_LOG_MAX) \
+  { \
+    debug_log_cnt = debug_log_cnt % DEBUG_LOG_MAX; \
+    debug_log_cnt_wrapped = 1; \
+  } \
+  local_irq_restore(log_flags); \
+}
+#else
+#define DEBUG_LOG(string, value)
+#endif
+
+
+#define NUM_TIMER_STATS 16
+#ifdef FAST_TIMER_LOG
+struct fast_timer timer_added_log[NUM_TIMER_STATS];
+struct fast_timer timer_started_log[NUM_TIMER_STATS];
+struct fast_timer timer_expired_log[NUM_TIMER_STATS];
+#endif
+
+int timer_div_settings[NUM_TIMER_STATS];
+int timer_delay_settings[NUM_TIMER_STATS];
+
+struct work_struct fast_work;
+
+static void
+timer_trig_handler(struct work_struct *work);
+
+
+
+/* Not true gettimeofday, only checks the jiffies (uptime) + useconds */
+inline void do_gettimeofday_fast(struct fasttime_t *tv)
+{
+	tv->tv_jiff = jiffies;
+	tv->tv_usec = GET_JIFFIES_USEC();
+}
+
+inline int fasttime_cmp(struct fasttime_t *t0, struct fasttime_t *t1)
+{
+	/* Compare jiffies. Takes care of wrapping */
+	if (time_before(t0->tv_jiff, t1->tv_jiff))
+		return -1;
+	else if (time_after(t0->tv_jiff, t1->tv_jiff))
+		return 1;
+
+	/* Compare us */
+	if (t0->tv_usec < t1->tv_usec)
+		return -1;
+	else if (t0->tv_usec > t1->tv_usec)
+		return 1;
+	return 0;
+}
+
+/* Called with ints off */
+inline void start_timer_trig(unsigned long delay_us)
+{
+  reg_timer_rw_ack_intr ack_intr = { 0 };
+  reg_timer_rw_intr_mask intr_mask;
+  reg_timer_rw_trig trig;
+  reg_timer_rw_trig_cfg trig_cfg = { 0 };
+	reg_timer_r_time r_time0;
+	reg_timer_r_time r_time1;
+	unsigned char trig_wrap;
+	unsigned char time_wrap;
+
+	r_time0 = REG_RD(timer, regi_timer0, r_time);
+
+  D1(printk("start_timer_trig : %d us freq: %i div: %i\n",
+            delay_us, freq_index, div));
+  /* Clear trig irq */
+	intr_mask = REG_RD(timer, regi_timer0, rw_intr_mask);
+  intr_mask.trig = 0;
+	REG_WR(timer, regi_timer0, rw_intr_mask, intr_mask);
+
+	/* Set timer values and check if trigger wraps. */
+	/* r_time is 100MHz (10 ns resolution) */
+	trig_wrap = (trig = r_time0 + delay_us*(1000/10)) < r_time0;
+
+  timer_div_settings[fast_timers_started % NUM_TIMER_STATS] = trig;
+  timer_delay_settings[fast_timers_started % NUM_TIMER_STATS] = delay_us;
+
+  /* Ack interrupt */
+  ack_intr.trig = 1;
+	REG_WR(timer, regi_timer0, rw_ack_intr, ack_intr);
+
+  /* Start timer */
+	REG_WR(timer, regi_timer0, rw_trig, trig);
+  trig_cfg.tmr = regk_timer_time;
+	REG_WR(timer, regi_timer0, rw_trig_cfg, trig_cfg);
+
+  /* Check if we have already passed the trig time */
+	r_time1 = REG_RD(timer, regi_timer0, r_time);
+	time_wrap = r_time1 < r_time0;
+
+	if ((trig_wrap && !time_wrap) || (r_time1 < trig)) {
+    /* No, Enable trig irq */
+		intr_mask = REG_RD(timer, regi_timer0, rw_intr_mask);
+    intr_mask.trig = 1;
+		REG_WR(timer, regi_timer0, rw_intr_mask, intr_mask);
+    fast_timers_started++;
+    fast_timer_running = 1;
+	} else {
+    /* We have passed the time, disable trig point, ack intr */
+    trig_cfg.tmr = regk_timer_off;
+		REG_WR(timer, regi_timer0, rw_trig_cfg, trig_cfg);
+		REG_WR(timer, regi_timer0, rw_ack_intr, ack_intr);
+		/* call the int routine */
+		INIT_WORK(&fast_work, timer_trig_handler);
+		schedule_work(&fast_work);
+  }
+
+}
+
+/* In version 1.4 this function takes 27 - 50 us */
+void start_one_shot_timer(struct fast_timer *t,
+                          fast_timer_function_type *function,
+                          unsigned long data,
+                          unsigned long delay_us,
+                          const char *name)
+{
+  unsigned long flags;
+  struct fast_timer *tmp;
+
+  D1(printk("sft %s %d us\n", name, delay_us));
+
+  local_irq_save(flags);
+
+  do_gettimeofday_fast(&t->tv_set);
+  tmp = fast_timer_list;
+
+#ifdef FAST_TIMER_SANITY_CHECKS
+	/* Check so this is not in the list already... */
+	while (tmp != NULL) {
+		if (tmp == t) {
+			printk(KERN_DEBUG
+				"timer name: %s data: 0x%08lX already "
+				"in list!\n", name, data);
+			sanity_failed++;
+			goto done;
+		} else
+			tmp = tmp->next;
+	}
+	tmp = fast_timer_list;
+#endif
+
+  t->delay_us = delay_us;
+  t->function = function;
+  t->data = data;
+  t->name = name;
+
+  t->tv_expires.tv_usec = t->tv_set.tv_usec + delay_us % 1000000;
+	t->tv_expires.tv_jiff = t->tv_set.tv_jiff + delay_us / 1000000 / HZ;
+	if (t->tv_expires.tv_usec > 1000000) {
+    t->tv_expires.tv_usec -= 1000000;
+		t->tv_expires.tv_jiff += HZ;
+  }
+#ifdef FAST_TIMER_LOG
+  timer_added_log[fast_timers_added % NUM_TIMER_STATS] = *t;
+#endif
+  fast_timers_added++;
+
+  /* Check if this should timeout before anything else */
+  if (tmp == NULL || fasttime_cmp(&t->tv_expires, &tmp->tv_expires) < 0) {
+    /* Put first in list and modify the timer value */
+    t->prev = NULL;
+    t->next = fast_timer_list;
+    if (fast_timer_list)
+      fast_timer_list->prev = t;
+    fast_timer_list = t;
+#ifdef FAST_TIMER_LOG
+    timer_started_log[fast_timers_started % NUM_TIMER_STATS] = *t;
+#endif
+    start_timer_trig(delay_us);
+  } else {
+    /* Put in correct place in list */
+    while (tmp->next &&
+        fasttime_cmp(&t->tv_expires, &tmp->next->tv_expires) > 0)
+      tmp = tmp->next;
+    /* Insert t after tmp */
+    t->prev = tmp;
+    t->next = tmp->next;
+    if (tmp->next)
+    {
+      tmp->next->prev = t;
+    }
+    tmp->next = t;
+  }
+
+  D2(printk("start_one_shot_timer: %d us done\n", delay_us));
+
+done:
+  local_irq_restore(flags);
+} /* start_one_shot_timer */
+
+static inline int fast_timer_pending (const struct fast_timer * t)
+{
+  return (t->next != NULL) || (t->prev != NULL) || (t == fast_timer_list);
+}
+
+static inline int detach_fast_timer (struct fast_timer *t)
+{
+  struct fast_timer *next, *prev;
+  if (!fast_timer_pending(t))
+    return 0;
+  next = t->next;
+  prev = t->prev;
+  if (next)
+    next->prev = prev;
+  if (prev)
+    prev->next = next;
+  else
+    fast_timer_list = next;
+  fast_timers_deleted++;
+  return 1;
+}
+
+int del_fast_timer(struct fast_timer * t)
+{
+  unsigned long flags;
+  int ret;
+
+  local_irq_save(flags);
+  ret = detach_fast_timer(t);
+  t->next = t->prev = NULL;
+  local_irq_restore(flags);
+  return ret;
+} /* del_fast_timer */
+
+
+/* Interrupt routines or functions called in interrupt context */
+
+/* Timer interrupt handler for trig interrupts */
+
+static irqreturn_t
+timer_trig_interrupt(int irq, void *dev_id)
+{
+  reg_timer_r_masked_intr masked_intr;
+  /* Check if the timer interrupt is for us (a trig int) */
+	masked_intr = REG_RD(timer, regi_timer0, r_masked_intr);
+  if (!masked_intr.trig)
+    return IRQ_NONE;
+	timer_trig_handler(NULL);
+  return IRQ_HANDLED;
+}
+
+static void timer_trig_handler(struct work_struct *work)
+{
+  reg_timer_rw_ack_intr ack_intr = { 0 };
+  reg_timer_rw_intr_mask intr_mask;
+  reg_timer_rw_trig_cfg trig_cfg = { 0 };
+  struct fast_timer *t;
+  unsigned long flags;
+
+	/* We keep interrupts disabled not only when we modify the
+	 * fast timer list, but any time we hold a reference to a
+	 * timer in the list, since del_fast_timer may be called
+	 * from (another) interrupt context.  Thus, the only time
+	 * when interrupts are enabled is when calling the timer
+	 * callback function.
+	 */
+  local_irq_save(flags);
+
+  /* Clear timer trig interrupt */
+	intr_mask = REG_RD(timer, regi_timer0, rw_intr_mask);
+  intr_mask.trig = 0;
+  REG_WR(timer, regi_timer0, rw_intr_mask, intr_mask);
+
+  /* First stop timer, then ack interrupt */
+  /* Stop timer */
+  trig_cfg.tmr = regk_timer_off;
+	REG_WR(timer, regi_timer0, rw_trig_cfg, trig_cfg);
+
+  /* Ack interrupt */
+  ack_intr.trig = 1;
+	REG_WR(timer, regi_timer0, rw_ack_intr, ack_intr);
+
+  fast_timer_running = 0;
+  fast_timer_ints++;
+
+	fast_timer_function_type *f;
+	unsigned long d;
+
+  t = fast_timer_list;
+	while (t) {
+		struct fasttime_t tv;
+
+    /* Has it really expired? */
+    do_gettimeofday_fast(&tv);
+		D1(printk(KERN_DEBUG
+			"t: %is %06ius\n", tv.tv_jiff, tv.tv_usec));
+
+		if (fasttime_cmp(&t->tv_expires, &tv) <= 0) {
+      /* Yes it has expired */
+#ifdef FAST_TIMER_LOG
+      timer_expired_log[fast_timers_expired % NUM_TIMER_STATS] = *t;
+#endif
+      fast_timers_expired++;
+
+      /* Remove this timer before call, since it may reuse the timer */
+      if (t->prev)
+        t->prev->next = t->next;
+      else
+        fast_timer_list = t->next;
+      if (t->next)
+        t->next->prev = t->prev;
+      t->prev = NULL;
+      t->next = NULL;
+
+			/* Save function callback data before enabling
+			 * interrupts, since the timer may be removed and we
+			 * don't know how it was allocated (e.g. ->function
+			 * and ->data may become overwritten after deletion
+			 * if the timer was stack-allocated).
+			 */
+			f = t->function;
+			d = t->data;
+
+			if (f != NULL) {
+				/* Run the callback function with interrupts
+				 * enabled. */
+				local_irq_restore(flags);
+				f(d);
+				local_irq_save(flags);
+			} else
+        DEBUG_LOG("!trimertrig %i function==NULL!\n", fast_timer_ints);
+		} else {
+      /* Timer is to early, let's set it again using the normal routines */
+      D1(printk(".\n"));
+    }
+
+		t = fast_timer_list;
+		if (t != NULL) {
+      /* Start next timer.. */
+			long us = 0;
+			struct fasttime_t tv;
+
+      do_gettimeofday_fast(&tv);
+
+			/* time_after_eq takes care of wrapping */
+			if (time_after_eq(t->tv_expires.tv_jiff, tv.tv_jiff))
+				us = ((t->tv_expires.tv_jiff - tv.tv_jiff) *
+					1000000 / HZ + t->tv_expires.tv_usec -
+					tv.tv_usec);
+
+			if (us > 0) {
+				if (!fast_timer_running) {
+#ifdef FAST_TIMER_LOG
+          timer_started_log[fast_timers_started % NUM_TIMER_STATS] = *t;
+#endif
+          start_timer_trig(us);
+        }
+        break;
+			} else {
+        /* Timer already expired, let's handle it better late than never.
+         * The normal loop handles it
+         */
+        D1(printk("e! %d\n", us));
+      }
+    }
+  }
+
+	local_irq_restore(flags);
+
+	if (!t)
+    D1(printk("ttrig stop!\n"));
+}
+
+static void wake_up_func(unsigned long data)
+{
+  wait_queue_head_t  *sleep_wait_p = (wait_queue_head_t*)data;
+  wake_up(sleep_wait_p);
+}
+
+
+/* Useful API */
+
+void schedule_usleep(unsigned long us)
+{
+  struct fast_timer t;
+  wait_queue_head_t sleep_wait;
+  init_waitqueue_head(&sleep_wait);
+
+  D1(printk("schedule_usleep(%d)\n", us));
+  start_one_shot_timer(&t, wake_up_func, (unsigned long)&sleep_wait, us,
+                       "usleep");
+	/* Uninterruptible sleep on the fast timer. (The condition is
+	 * somewhat redundant since the timer is what wakes us up.) */
+	wait_event(sleep_wait, !fast_timer_pending(&t));
+
+  D1(printk("done schedule_usleep(%d)\n", us));
+}
+
+#ifdef CONFIG_PROC_FS
+/* This value is very much based on testing */
+#define BIG_BUF_SIZE (500 + NUM_TIMER_STATS * 300)
+
+static int proc_fasttimer_show(struct seq_file *m, void *v)
+{
+	unsigned long flags;
+	int i = 0;
+	int num_to_show;
+	struct fasttime_t tv;
+	struct fast_timer *t, *nextt;
+
+	do_gettimeofday_fast(&tv);
+
+	seq_printf(m, "Fast timers added:     %i\n", fast_timers_added);
+	seq_printf(m, "Fast timers started:   %i\n", fast_timers_started);
+	seq_printf(m, "Fast timer interrupts: %i\n", fast_timer_ints);
+	seq_printf(m, "Fast timers expired:   %i\n", fast_timers_expired);
+	seq_printf(m, "Fast timers deleted:   %i\n", fast_timers_deleted);
+	seq_printf(m, "Fast timer running:    %s\n",
+		   fast_timer_running ? "yes" : "no");
+	seq_printf(m, "Current time:          %lu.%06lu\n",
+		   (unsigned long)tv.tv_jiff,
+		   (unsigned long)tv.tv_usec);
+#ifdef FAST_TIMER_SANITY_CHECKS
+	seq_printf(m, "Sanity failed:         %i\n", sanity_failed);
+#endif
+	seq_putc(m, '\n');
+
+#ifdef DEBUG_LOG_INCLUDED
+	{
+		int end_i = debug_log_cnt;
+		i = 0;
+
+		if (debug_log_cnt_wrapped)
+			i = debug_log_cnt;
+
+		while ((i != end_i || debug_log_cnt_wrapped)) {
+			seq_printf(m, debug_log_string[i], debug_log_value[i]);
+			if (seq_has_overflowed(m))
+				return 0;
+			i = (i+1) % DEBUG_LOG_MAX;
+		}
+	}
+	seq_putc(m, '\n');
+#endif
+
+	num_to_show = (fast_timers_started < NUM_TIMER_STATS ? fast_timers_started:
+		       NUM_TIMER_STATS);
+	seq_printf(m, "Timers started: %i\n", fast_timers_started);
+	for (i = 0; i < num_to_show; i++) {
+		int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS;
+
+#if 1 //ndef FAST_TIMER_LOG
+		seq_printf(m, "div: %i delay: %i\n",
+			   timer_div_settings[cur],
+			   timer_delay_settings[cur]);
+#endif
+#ifdef FAST_TIMER_LOG
+		t = &timer_started_log[cur];
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
+			return 0;
+#endif
+	}
+	seq_putc(m, '\n');
+
+#ifdef FAST_TIMER_LOG
+	num_to_show = (fast_timers_added < NUM_TIMER_STATS ? fast_timers_added:
+		       NUM_TIMER_STATS);
+	seq_printf(m, "Timers added: %i\n", fast_timers_added);
+	for (i = 0; i < num_to_show; i++) {
+		t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS];
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
+			return 0;
+	}
+	seq_putc(m, '\n');
+
+	num_to_show = (fast_timers_expired < NUM_TIMER_STATS ? fast_timers_expired:
+		       NUM_TIMER_STATS);
+	seq_printf(m, "Timers expired: %i\n", fast_timers_expired);
+	for (i = 0; i < num_to_show; i++){
+		t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS];
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
+			return 0;
+	}
+	seq_putc(m, '\n');
+#endif
+
+	seq_puts(m, "Active timers:\n");
+	local_irq_save(flags);
+	t = fast_timer_list;
+	while (t != NULL){
+		nextt = t->next;
+		local_irq_restore(flags);
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
+			return 0;
+		local_irq_save(flags);
+		if (t->next != nextt)
+			printk("timer removed!\n");
+		t = nextt;
+	}
+	local_irq_restore(flags);
+	return 0;
+}
+
+static int proc_fasttimer_open(struct inode *inode, struct file *file)
+{
+	return single_open_size(file, proc_fasttimer_show, PDE_DATA(inode), BIG_BUF_SIZE);
+}
+
+static const struct file_operations proc_fasttimer_fops = {
+	.open		= proc_fasttimer_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+#endif /* PROC_FS */
+
+#ifdef FAST_TIMER_TEST
+static volatile unsigned long i = 0;
+static volatile int num_test_timeout = 0;
+static struct fast_timer tr[10];
+static int exp_num[10];
+
+static struct fasttime_t tv_exp[100];
+
+static void test_timeout(unsigned long data)
+{
+  do_gettimeofday_fast(&tv_exp[data]);
+  exp_num[data] = num_test_timeout;
+
+  num_test_timeout++;
+}
+
+static void test_timeout1(unsigned long data)
+{
+  do_gettimeofday_fast(&tv_exp[data]);
+  exp_num[data] = num_test_timeout;
+  if (data < 7)
+  {
+    start_one_shot_timer(&tr[i], test_timeout1, i, 1000, "timeout1");
+    i++;
+  }
+  num_test_timeout++;
+}
+
+DP(
+static char buf0[2000];
+static char buf1[2000];
+static char buf2[2000];
+static char buf3[2000];
+static char buf4[2000];
+);
+
+static char buf5[6000];
+static int j_u[1000];
+
+static void fast_timer_test(void)
+{
+  int prev_num;
+  int j;
+
+	struct fasttime_t tv, tv0, tv1, tv2;
+
+  printk("fast_timer_test() start\n");
+  do_gettimeofday_fast(&tv);
+
+  for (j = 0; j < 1000; j++)
+  {
+    j_u[j] = GET_JIFFIES_USEC();
+  }
+  for (j = 0; j < 100; j++)
+  {
+    do_gettimeofday_fast(&tv_exp[j]);
+  }
+  printk(KERN_DEBUG "fast_timer_test() %is %06i\n", tv.tv_jiff, tv.tv_usec);
+
+  for (j = 0; j < 1000; j++)
+  {
+    printk(KERN_DEBUG "%i %i %i %i %i\n",
+      j_u[j], j_u[j+1], j_u[j+2], j_u[j+3], j_u[j+4]);
+    j += 4;
+  }
+  for (j = 0; j < 100; j++)
+  {
+    printk(KERN_DEBUG "%i.%i %i.%i %i.%i %i.%i %i.%i\n",
+			tv_exp[j].tv_jiff, tv_exp[j].tv_usec,
+			tv_exp[j+1].tv_jiff, tv_exp[j+1].tv_usec,
+			tv_exp[j+2].tv_jiff, tv_exp[j+2].tv_usec,
+			tv_exp[j+3].tv_jiff, tv_exp[j+3].tv_usec,
+			tv_exp[j+4].tv_jiff, tv_exp[j+4].tv_usec);
+    j += 4;
+  }
+  do_gettimeofday_fast(&tv0);
+  start_one_shot_timer(&tr[i], test_timeout, i, 50000, "test0");
+  DP(proc_fasttimer_read(buf0, NULL, 0, 0, 0));
+  i++;
+  start_one_shot_timer(&tr[i], test_timeout, i, 70000, "test1");
+  DP(proc_fasttimer_read(buf1, NULL, 0, 0, 0));
+  i++;
+  start_one_shot_timer(&tr[i], test_timeout, i, 40000, "test2");
+  DP(proc_fasttimer_read(buf2, NULL, 0, 0, 0));
+  i++;
+  start_one_shot_timer(&tr[i], test_timeout, i, 60000, "test3");
+  DP(proc_fasttimer_read(buf3, NULL, 0, 0, 0));
+  i++;
+  start_one_shot_timer(&tr[i], test_timeout1, i, 55000, "test4xx");
+  DP(proc_fasttimer_read(buf4, NULL, 0, 0, 0));
+  i++;
+  do_gettimeofday_fast(&tv1);
+
+  proc_fasttimer_read(buf5, NULL, 0, 0, 0);
+
+  prev_num = num_test_timeout;
+  while (num_test_timeout < i)
+  {
+    if (num_test_timeout != prev_num)
+      prev_num = num_test_timeout;
+  }
+  do_gettimeofday_fast(&tv2);
+	printk(KERN_INFO "Timers started    %is %06i\n",
+		tv0.tv_jiff, tv0.tv_usec);
+	printk(KERN_INFO "Timers started at %is %06i\n",
+		tv1.tv_jiff, tv1.tv_usec);
+	printk(KERN_INFO "Timers done       %is %06i\n",
+		tv2.tv_jiff, tv2.tv_usec);
+  DP(printk("buf0:\n");
+     printk(buf0);
+     printk("buf1:\n");
+     printk(buf1);
+     printk("buf2:\n");
+     printk(buf2);
+     printk("buf3:\n");
+     printk(buf3);
+     printk("buf4:\n");
+     printk(buf4);
+  );
+  printk("buf5:\n");
+  printk(buf5);
+
+  printk("timers set:\n");
+  for(j = 0; j<i; j++)
+  {
+    struct fast_timer *t = &tr[j];
+    printk("%-10s set: %6is %06ius exp: %6is %06ius "
+           "data: 0x%08X func: 0x%08X\n",
+           t->name,
+			t->tv_set.tv_jiff,
+           t->tv_set.tv_usec,
+			t->tv_expires.tv_jiff,
+           t->tv_expires.tv_usec,
+           t->data,
+           t->function
+           );
+
+    printk("           del: %6ius     did exp: %6is %06ius as #%i error: %6li\n",
+           t->delay_us,
+			tv_exp[j].tv_jiff,
+           tv_exp[j].tv_usec,
+           exp_num[j],
+			(tv_exp[j].tv_jiff - t->tv_expires.tv_jiff) *
+				1000000 + tv_exp[j].tv_usec -
+				t->tv_expires.tv_usec);
+  }
+  proc_fasttimer_read(buf5, NULL, 0, 0, 0);
+  printk("buf5 after all done:\n");
+  printk(buf5);
+  printk("fast_timer_test() done\n");
+}
+#endif
+
+
+int fast_timer_init(void)
+{
+  /* For some reason, request_irq() hangs when called froom time_init() */
+  if (!fast_timer_is_init)
+  {
+    printk("fast_timer_init()\n");
+
+#ifdef CONFIG_PROC_FS
+    proc_create("fasttimer", 0, NULL, &proc_fasttimer_fops);
+#endif /* PROC_FS */
+		if (request_irq(TIMER0_INTR_VECT, timer_trig_interrupt,
+				IRQF_SHARED,
+				"fast timer int", &fast_timer_list))
+			printk(KERN_ERR "err: fasttimer irq\n");
+    fast_timer_is_init = 1;
+#ifdef FAST_TIMER_TEST
+    printk("do test\n");
+    fast_timer_test();
+#endif
+  }
+	return 0;
+}
+__initcall(fast_timer_init);
diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S
new file mode 100644
index 0000000..ea63668
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -0,0 +1,436 @@
+/*
+ * CRISv32 kernel startup code.
+ *
+ * Copyright (C) 2003, Axis Communications AB
+ */
+
+#define ASSEMBLER_MACROS_ONLY
+
+/*
+ * The macros found in mmu_defs_asm.h uses the ## concatenation operator, so
+ * -traditional must not be used when assembling this file.
+ */
+#include <arch/memmap.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/intr_vect.h>
+#include <hwregs/asm/mmu_defs_asm.h>
+#include <hwregs/asm/reg_map_asm.h>
+#include <mach/startup.inc>
+
+#define CRAMFS_MAGIC 0x28cd3d45
+#define JHEAD_MAGIC 0x1FF528A6
+#define JHEAD_SIZE 8
+#define RAM_INIT_MAGIC 0x56902387
+#define COMMAND_LINE_MAGIC 0x87109563
+#define NAND_BOOT_MAGIC 0x9a9db001
+
+	;; NOTE: R8 and R9 carry information from the decompressor (if the
+	;; kernel was compressed). They must not be used in the code below
+	;; until they are read!
+
+	;; Exported symbols.
+	.global etrax_irv
+	.global romfs_start
+	.global romfs_length
+	.global romfs_in_flash
+	.global nand_boot
+	.global swapper_pg_dir
+
+	.text
+tstart:
+	;; This is the entry point of the kernel. The CPU is currently in
+	;; supervisor mode.
+	;;
+	;; 0x00000000 if flash.
+	;; 0x40004000 if DRAM.
+	;;
+	di
+
+	START_CLOCKS
+
+	SETUP_WAIT_STATES
+
+	GIO_INIT
+
+	;; Setup and enable the MMU. Use same configuration for both the data
+	;; and the instruction MMU.
+	;;
+	;; Note; 3 cycles is needed for a bank-select to take effect. Further;
+	;; bank 1 is the instruction MMU, bank 2 is the data MMU.
+
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+	move.d	REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 8)	\
+		| REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 4)	\
+		| REG_FIELD(mmu, rw_mm_kbase_hi, base_d, 5)     \
+		| REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb), $r0
+#else
+	move.d	REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 8)	\
+		| REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 4)	\
+		| REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb), $r0
+#endif
+
+	;; Temporary map of 0x40 -> 0x40 and 0x00 -> 0x00.
+	move.d	REG_FIELD(mmu, rw_mm_kbase_lo, base_4, 4)  \
+		| REG_FIELD(mmu, rw_mm_kbase_lo, base_0, 0), $r1
+
+	;; Enable certain page protections and setup linear mapping
+	;; for f,e,c,b,4,0.
+
+	;; ARTPEC-3:
+	;; c,d used for linear kernel mapping, up to 512 MB
+	;; e used for vmalloc
+	;; f unused, but page mapped to get page faults
+
+	;; ETRAX FS:
+	;; c used for linear kernel mapping, up to 256 MB
+	;; d used for vmalloc
+	;; e,f used for memory-mapped NOR flash
+
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+	move.d	REG_STATE(mmu, rw_mm_cfg, we, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, acc, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, ex, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, inv, on)            \
+		| REG_STATE(mmu, rw_mm_cfg, seg_f, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_e, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_d, linear)      \
+		| REG_STATE(mmu, rw_mm_cfg, seg_c, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_b, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_a, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_9, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_8, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_7, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_6, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_5, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_4, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_3, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_2, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_1, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_0, linear), $r2
+#else
+	move.d	REG_STATE(mmu, rw_mm_cfg, we, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, acc, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, ex, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, inv, on)		\
+		| REG_STATE(mmu, rw_mm_cfg, seg_f, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_e, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_d, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_c, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_b, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_a, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_9, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_8, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_7, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_6, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_5, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_4, linear)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_3, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_2, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_1, page)	\
+		| REG_STATE(mmu, rw_mm_cfg, seg_0, linear), $r2
+#endif
+
+	;; Update instruction MMU.
+	move	1, $srs
+	nop
+	nop
+	nop
+	move	$r0, $s2	; kbase_hi.
+	move	$r1, $s1	; kbase_lo.
+	move	$r2, $s0	; mm_cfg, virtual memory configuration.
+
+	;; Update data MMU.
+	move	2, $srs
+	nop
+	nop
+	nop
+	move	$r0, $s2	; kbase_hi.
+	move	$r1, $s1	; kbase_lo
+	move	$r2, $s0	; mm_cfg, virtual memory configuration.
+
+	;; Enable data and instruction MMU.
+	move	0, $srs
+	moveq	0xf, $r0	;  IMMU, DMMU, DCache, Icache on
+	nop
+	nop
+	nop
+	move	$r0, $s0
+	nop
+	nop
+	nop
+
+	; Check if starting from DRAM (network->RAM boot or unpacked
+	; compressed kernel), or directly from flash.
+	lapcq	., $r0
+	and.d	0x7fffffff, $r0 ; Mask off the non-cache bit.
+	cmp.d	0x10000, $r0	; Arbitrary, something above this code.
+	blo	_inflash0
+	nop
+
+	jump	_inram		; Jump to cached RAM.
+	nop
+
+	;; Jumpgate.
+_inflash0:
+	jump _inflash
+	nop
+
+	;; Put the following in a section so that storage for it can be
+	;; reclaimed after init is finished.
+	.section ".init.text", "ax"
+
+_inflash:
+
+	;; Initialize DRAM.
+	cmp.d	RAM_INIT_MAGIC, $r8 ; Already initialized?
+	beq	_dram_initialized
+	nop
+
+#if defined CONFIG_ETRAXFS
+#include "../mach-fs/dram_init.S"
+#elif defined CONFIG_CRIS_MACH_ARTPEC3
+#include "../mach-a3/dram_init.S"
+#else
+#error Only ETRAXFS and ARTPEC-3 supported!
+#endif
+
+
+_dram_initialized:
+	;; Copy the text and data section to DRAM. This depends on that the
+	;; variables used below are correctly set up by the linker script.
+	;; The calculated value stored in R4 is used below.
+	;; Leave the cramfs file system (piggybacked after the kernel) in flash.
+	moveq	0, $r0		; Source.
+	move.d	text_start, $r1	; Destination.
+	move.d	__vmlinux_end, $r2
+	move.d	$r2, $r4
+	sub.d	$r1, $r4
+1:	move.w	[$r0+], $r3
+	move.w	$r3, [$r1+]
+	cmp.d	$r2, $r1
+	blo	1b
+	nop
+
+	;; Check for cramfs.
+	moveq	0, $r0
+	move.d	romfs_length, $r1
+	move.d	$r0, [$r1]
+	move.d	[$r4], $r0	; cramfs_super.magic
+	cmp.d	CRAMFS_MAGIC, $r0
+	bne 1f
+	nop
+
+	;; Set length and start of cramfs, set romfs_in_flash flag
+	addoq	+4, $r4, $acr
+	move.d	[$acr], $r0
+	move.d	romfs_length, $r1
+	move.d	$r0, [$r1]
+	add.d	0xf0000000, $r4	; Add cached flash start in virtual memory.
+	move.d	romfs_start, $r1
+	move.d	$r4, [$r1]
+1:	moveq	1, $r0
+	move.d	romfs_in_flash, $r1
+	move.d	$r0, [$r1]
+
+	jump	_start_it	; Jump to cached code.
+	nop
+
+_inram:
+	;; Check if booting from NAND flash; if so, set appropriate flags
+	;; and move on.
+	cmp.d	NAND_BOOT_MAGIC, $r12
+	bne	move_cramfs	; not nand, jump
+	moveq	1, $r0
+	move.d	nand_boot, $r1	; tell axisflashmap we're booting from NAND
+	move.d	$r0, [$r1]
+	moveq	0, $r0		; tell axisflashmap romfs is not in
+	move.d	romfs_in_flash, $r1 ; (directly accessed) flash
+	move.d	$r0, [$r1]
+	jump	_start_it	; continue with boot
+	nop
+
+move_cramfs:
+	;; kernel is in DRAM.
+	;; Must figure out if there is a piggybacked rootfs image or not.
+	;; Set romfs_length to 0 => no rootfs image available by default.
+	moveq	0, $r0
+	move.d	romfs_length, $r1
+	move.d	$r0, [$r1]
+
+	;; The kernel could have been unpacked to DRAM by the loader, but
+	;; the cramfs image could still be in the flash immediately
+	;; following the compressed kernel image. The loader passes the address
+	;; of the byte succeeding the last compressed byte in the flash in
+	;; register R9 when starting the kernel.
+	cmp.d	0x0ffffff8, $r9
+	bhs	_no_romfs_in_flash ; R9 points outside the flash area.
+	nop
+	;; cramfs rootfs might to be in flash. Check for it.
+	move.d	[$r9], $r0	; cramfs_super.magic
+	cmp.d	CRAMFS_MAGIC, $r0
+	bne	_no_romfs_in_flash
+	nop
+
+	;; found cramfs in flash. set address and size, and romfs_in_flash flag.
+	addoq	+4, $r9, $acr
+	move.d	[$acr], $r0
+	move.d	romfs_length, $r1
+	move.d	$r0, [$r1]
+	add.d	0xf0000000, $r9	; Add cached flash start in virtual memory.
+	move.d	romfs_start, $r1
+	move.d	$r9, [$r1]
+	moveq	1, $r0
+	move.d	romfs_in_flash, $r1
+	move.d	$r0, [$r1]
+
+	jump	_start_it	; Jump to cached code.
+	nop
+
+_no_romfs_in_flash:
+	;; No romfs in flash, so look for cramfs, or jffs2 with jhead,
+	;; after kernel in RAM, as is the case with network->RAM boot.
+	;; For cramfs, partition starts with magic and length.
+	;; For jffs2, a jhead is prepended which contains with magic and length.
+	;; The jhead is not part of the jffs2 partition however.
+	move.d	__bss_start, $r0
+	move.d	[$r0], $r1
+	cmp.d	CRAMFS_MAGIC, $r1 ; cramfs magic?
+	beq	2f		  ; yes, jump
+	nop
+	cmp.d	JHEAD_MAGIC, $r1 ; jffs2 (jhead) magic?
+	bne	4f		; no, skip copy
+	nop
+	addq	4, $r0		; location of jffs2 size
+	move.d	[$r0+], $r2	; fetch jffs2 size -> r2
+				; r0 now points to start of jffs2
+	ba	3f
+	nop
+2:
+	addoq	+4, $r0, $acr	; location of cramfs size
+	move.d	[$acr], $r2	; fetch cramfs size -> r2
+				; r0 still points to start of cramfs
+3:
+	;; Now, move the root fs to after kernel's BSS
+
+	move.d	_end, $r1	; start of cramfs -> r1
+	move.d	romfs_start, $r3
+	move.d	$r1, [$r3]	; store at romfs_start (for axisflashmap)
+	move.d	romfs_length, $r3
+	move.d	$r2, [$r3]	; store size at romfs_length
+
+	add.d	$r2, $r0	; copy from end and downwards
+	add.d	$r2, $r1
+
+	lsrq	1, $r2		; Size is in bytes, we copy words.
+	addq    1, $r2
+1:
+	move.w	[$r0], $r3
+	move.w	$r3, [$r1]
+	subq	2, $r0
+	subq	2, $r1
+	subq	1, $r2
+	bne	1b
+	nop
+
+4:
+	;; BSS move done.
+	;; Clear romfs_in_flash flag, as we now know romfs is in DRAM
+	;; Also clear nand_boot flag; if we got here, we know we've not
+	;; booted from NAND flash.
+	moveq	0, $r0
+	move.d	romfs_in_flash, $r1
+	move.d	$r0, [$r1]
+	moveq	0, $r0
+	move.d	nand_boot, $r1
+	move.d	$r0, [$r1]
+
+	jump	_start_it	; Jump to cached code.
+	nop
+
+_start_it:
+
+	;; Check if kernel command line is supplied
+	cmp.d	COMMAND_LINE_MAGIC, $r10
+	bne	no_command_line
+	nop
+
+	move.d	256, $r13
+	move.d  cris_command_line, $r10
+	or.d	0x80000000, $r11 ; Make it virtual
+1:
+	move.b  [$r11+], $r1
+	move.b  $r1, [$r10+]
+	subq	1, $r13
+	bne	1b
+	nop
+
+no_command_line:
+
+	;; The kernel stack contains a task structure for each task. This
+	;; the initial kernel stack is in the same page as the init_task,
+	;; but starts at the top of the page, i.e. + 8192 bytes.
+	move.d	init_thread_union + 8192, $sp
+	move.d	ebp_start, $r0	; Defined in linker-script.
+	move	$r0, $ebp
+	move.d	etrax_irv, $r1	; Set the exception base register and pointer.
+	move.d	$r0, [$r1]
+
+	;; Clear the BSS region from _bss_start to _end.
+	move.d	__bss_start, $r0
+	move.d	_end, $r1
+1:	clear.d	[$r0+]
+	cmp.d	$r1, $r0
+	blo 1b
+	nop
+
+	; Initialize registers to increase determinism
+	move.d __bss_start, $r0
+	movem [$r0], $r13
+
+#ifdef CONFIG_ETRAX_L2CACHE
+	jsr	l2cache_init
+	nop
+#endif
+
+	jump	start_kernel	; Jump to start_kernel() in init/main.c.
+	nop
+
+	.data
+etrax_irv:
+	.dword 0
+
+; Variables for communication with the Axis flash map driver (axisflashmap),
+; and for setting up memory in arch/cris/kernel/setup.c .
+
+; romfs_start is set to the start of the root file system, if it exists
+; in directly accessible memory (i.e. NOR Flash when booting from Flash,
+; or RAM when booting directly from a network-downloaded RAM image)
+romfs_start:
+	.dword 0
+
+; romfs_length is set to the size of the root file system image, if it exists
+; in directly accessible memory (see romfs_start). Otherwise it is set to 0.
+romfs_length:
+	.dword 0
+
+; romfs_in_flash is set to 1 if the root file system resides in directly
+; accessible flash memory (i.e. NOR flash). It is set to 0 for RAM boot
+; or NAND flash boot.
+romfs_in_flash:
+	.dword 0
+
+; nand_boot is set to 1 when the kernel has been booted from NAND flash
+nand_boot:
+	.dword 0
+
+swapper_pg_dir = 0xc0002000
+
+	.section ".init.data", "aw"
+
+#if defined CONFIG_ETRAXFS
+#include "../mach-fs/hw_settings.S"
+#elif defined CONFIG_CRIS_MACH_ARTPEC3
+#include "../mach-a3/hw_settings.S"
+#else
+#error Only ETRAXFS and ARTPEC-3 supported!
+#endif
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
new file mode 100644
index 0000000..6de8db6
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -0,0 +1,520 @@
+/*
+ * Copyright (C) 2003, Axis Communications AB.
+ */
+
+#include <asm/irq.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/intr_vect.h>
+#include <hwregs/intr_vect_defs.h>
+
+#define CPU_FIXED -1
+
+/* IRQ masks (refer to comment for crisv32_do_multiple) */
+#if TIMER0_INTR_VECT - FIRST_IRQ < 32
+#define TIMER_MASK (1 << (TIMER0_INTR_VECT - FIRST_IRQ))
+#undef TIMER_VECT1
+#else
+#define TIMER_MASK (1 << (TIMER0_INTR_VECT - FIRST_IRQ - 32))
+#define TIMER_VECT1
+#endif
+#ifdef CONFIG_ETRAX_KGDB
+#if defined(CONFIG_ETRAX_KGDB_PORT0)
+#define IGNOREMASK (1 << (SER0_INTR_VECT - FIRST_IRQ))
+#elif defined(CONFIG_ETRAX_KGDB_PORT1)
+#define IGNOREMASK (1 << (SER1_INTR_VECT - FIRST_IRQ))
+#elif defined(CONFIG_ETRAX_KGDB_PORT2)
+#define IGNOREMASK (1 << (SER2_INTR_VECT - FIRST_IRQ))
+#elif defined(CONFIG_ETRAX_KGDB_PORT3)
+#define IGNOREMASK (1 << (SER3_INTR_VECT - FIRST_IRQ))
+#endif
+#endif
+
+DEFINE_SPINLOCK(irq_lock);
+
+struct cris_irq_allocation
+{
+  int cpu; /* The CPU to which the IRQ is currently allocated. */
+  cpumask_t mask; /* The CPUs to which the IRQ may be allocated. */
+};
+
+struct cris_irq_allocation irq_allocations[NR_REAL_IRQS] =
+  { [0 ... NR_REAL_IRQS - 1] = {0, CPU_MASK_ALL} };
+
+static unsigned long irq_regs[NR_CPUS] =
+{
+  regi_irq,
+};
+
+#if NR_REAL_IRQS > 32
+#define NBR_REGS 2
+#else
+#define NBR_REGS 1
+#endif
+
+unsigned long cpu_irq_counters[NR_CPUS];
+unsigned long irq_counters[NR_REAL_IRQS];
+
+/* From irq.c. */
+extern void weird_irq(void);
+
+/* From entry.S. */
+extern void system_call(void);
+extern void nmi_interrupt(void);
+extern void multiple_interrupt(void);
+extern void gdb_handle_exception(void);
+extern void i_mmu_refill(void);
+extern void i_mmu_invalid(void);
+extern void i_mmu_access(void);
+extern void i_mmu_execute(void);
+extern void d_mmu_refill(void);
+extern void d_mmu_invalid(void);
+extern void d_mmu_access(void);
+extern void d_mmu_write(void);
+
+/* From kgdb.c. */
+extern void kgdb_init(void);
+extern void breakpoint(void);
+
+/* From traps.c.  */
+extern void breakh_BUG(void);
+
+/*
+ * Build the IRQ handler stubs using macros from irq.h.
+ */
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+BUILD_TIMER_IRQ(0x31, 0)
+#else
+BUILD_IRQ(0x31)
+#endif
+BUILD_IRQ(0x32)
+BUILD_IRQ(0x33)
+BUILD_IRQ(0x34)
+BUILD_IRQ(0x35)
+BUILD_IRQ(0x36)
+BUILD_IRQ(0x37)
+BUILD_IRQ(0x38)
+BUILD_IRQ(0x39)
+BUILD_IRQ(0x3a)
+BUILD_IRQ(0x3b)
+BUILD_IRQ(0x3c)
+BUILD_IRQ(0x3d)
+BUILD_IRQ(0x3e)
+BUILD_IRQ(0x3f)
+BUILD_IRQ(0x40)
+BUILD_IRQ(0x41)
+BUILD_IRQ(0x42)
+BUILD_IRQ(0x43)
+BUILD_IRQ(0x44)
+BUILD_IRQ(0x45)
+BUILD_IRQ(0x46)
+BUILD_IRQ(0x47)
+BUILD_IRQ(0x48)
+BUILD_IRQ(0x49)
+BUILD_IRQ(0x4a)
+#ifdef CONFIG_ETRAXFS
+BUILD_TIMER_IRQ(0x4b, 0)
+#else
+BUILD_IRQ(0x4b)
+#endif
+BUILD_IRQ(0x4c)
+BUILD_IRQ(0x4d)
+BUILD_IRQ(0x4e)
+BUILD_IRQ(0x4f)
+BUILD_IRQ(0x50)
+#if MACH_IRQS > 32
+BUILD_IRQ(0x51)
+BUILD_IRQ(0x52)
+BUILD_IRQ(0x53)
+BUILD_IRQ(0x54)
+BUILD_IRQ(0x55)
+BUILD_IRQ(0x56)
+BUILD_IRQ(0x57)
+BUILD_IRQ(0x58)
+BUILD_IRQ(0x59)
+BUILD_IRQ(0x5a)
+BUILD_IRQ(0x5b)
+BUILD_IRQ(0x5c)
+BUILD_IRQ(0x5d)
+BUILD_IRQ(0x5e)
+BUILD_IRQ(0x5f)
+BUILD_IRQ(0x60)
+BUILD_IRQ(0x61)
+BUILD_IRQ(0x62)
+BUILD_IRQ(0x63)
+BUILD_IRQ(0x64)
+BUILD_IRQ(0x65)
+BUILD_IRQ(0x66)
+BUILD_IRQ(0x67)
+BUILD_IRQ(0x68)
+BUILD_IRQ(0x69)
+BUILD_IRQ(0x6a)
+BUILD_IRQ(0x6b)
+BUILD_IRQ(0x6c)
+BUILD_IRQ(0x6d)
+BUILD_IRQ(0x6e)
+BUILD_IRQ(0x6f)
+BUILD_IRQ(0x70)
+#endif
+
+/* Pointers to the low-level handlers. */
+static void (*interrupt[MACH_IRQS])(void) = {
+	IRQ0x31_interrupt, IRQ0x32_interrupt, IRQ0x33_interrupt,
+	IRQ0x34_interrupt, IRQ0x35_interrupt, IRQ0x36_interrupt,
+	IRQ0x37_interrupt, IRQ0x38_interrupt, IRQ0x39_interrupt,
+	IRQ0x3a_interrupt, IRQ0x3b_interrupt, IRQ0x3c_interrupt,
+	IRQ0x3d_interrupt, IRQ0x3e_interrupt, IRQ0x3f_interrupt,
+	IRQ0x40_interrupt, IRQ0x41_interrupt, IRQ0x42_interrupt,
+	IRQ0x43_interrupt, IRQ0x44_interrupt, IRQ0x45_interrupt,
+	IRQ0x46_interrupt, IRQ0x47_interrupt, IRQ0x48_interrupt,
+	IRQ0x49_interrupt, IRQ0x4a_interrupt, IRQ0x4b_interrupt,
+	IRQ0x4c_interrupt, IRQ0x4d_interrupt, IRQ0x4e_interrupt,
+	IRQ0x4f_interrupt, IRQ0x50_interrupt,
+#if MACH_IRQS > 32
+	IRQ0x51_interrupt, IRQ0x52_interrupt, IRQ0x53_interrupt,
+	IRQ0x54_interrupt, IRQ0x55_interrupt, IRQ0x56_interrupt,
+	IRQ0x57_interrupt, IRQ0x58_interrupt, IRQ0x59_interrupt,
+	IRQ0x5a_interrupt, IRQ0x5b_interrupt, IRQ0x5c_interrupt,
+	IRQ0x5d_interrupt, IRQ0x5e_interrupt, IRQ0x5f_interrupt,
+	IRQ0x60_interrupt, IRQ0x61_interrupt, IRQ0x62_interrupt,
+	IRQ0x63_interrupt, IRQ0x64_interrupt, IRQ0x65_interrupt,
+	IRQ0x66_interrupt, IRQ0x67_interrupt, IRQ0x68_interrupt,
+	IRQ0x69_interrupt, IRQ0x6a_interrupt, IRQ0x6b_interrupt,
+	IRQ0x6c_interrupt, IRQ0x6d_interrupt, IRQ0x6e_interrupt,
+	IRQ0x6f_interrupt, IRQ0x70_interrupt,
+#endif
+};
+
+void
+block_irq(int irq, int cpu)
+{
+	int intr_mask;
+        unsigned long flags;
+
+	spin_lock_irqsave(&irq_lock, flags);
+	/* Remember, 1 let thru, 0 block. */
+	if (irq - FIRST_IRQ < 32) {
+		intr_mask = REG_RD_INT_VECT(intr_vect, irq_regs[cpu],
+			rw_mask, 0);
+		intr_mask &= ~(1 << (irq - FIRST_IRQ));
+		REG_WR_INT_VECT(intr_vect, irq_regs[cpu], rw_mask,
+			0, intr_mask);
+	} else {
+		intr_mask = REG_RD_INT_VECT(intr_vect, irq_regs[cpu],
+			rw_mask, 1);
+		intr_mask &= ~(1 << (irq - FIRST_IRQ - 32));
+		REG_WR_INT_VECT(intr_vect, irq_regs[cpu], rw_mask,
+			1, intr_mask);
+	}
+        spin_unlock_irqrestore(&irq_lock, flags);
+}
+
+void
+unblock_irq(int irq, int cpu)
+{
+	int intr_mask;
+        unsigned long flags;
+
+        spin_lock_irqsave(&irq_lock, flags);
+	/* Remember, 1 let thru, 0 block. */
+	if (irq - FIRST_IRQ < 32) {
+		intr_mask = REG_RD_INT_VECT(intr_vect, irq_regs[cpu],
+			rw_mask, 0);
+		intr_mask |= (1 << (irq - FIRST_IRQ));
+		REG_WR_INT_VECT(intr_vect, irq_regs[cpu], rw_mask,
+			0, intr_mask);
+	} else {
+		intr_mask = REG_RD_INT_VECT(intr_vect, irq_regs[cpu],
+			rw_mask, 1);
+		intr_mask |= (1 << (irq - FIRST_IRQ - 32));
+		REG_WR_INT_VECT(intr_vect, irq_regs[cpu], rw_mask,
+			1, intr_mask);
+	}
+        spin_unlock_irqrestore(&irq_lock, flags);
+}
+
+/* Find out which CPU the irq should be allocated to. */
+static int irq_cpu(int irq)
+{
+	int cpu;
+        unsigned long flags;
+
+        spin_lock_irqsave(&irq_lock, flags);
+        cpu = irq_allocations[irq - FIRST_IRQ].cpu;
+
+	/* Fixed interrupts stay on the local CPU. */
+	if (cpu == CPU_FIXED)
+        {
+		spin_unlock_irqrestore(&irq_lock, flags);
+		return smp_processor_id();
+        }
+
+
+	/* Let the interrupt stay if possible */
+	if (cpumask_test_cpu(cpu, &irq_allocations[irq - FIRST_IRQ].mask))
+		goto out;
+
+	/* IRQ must be moved to another CPU. */
+	cpu = cpumask_first(&irq_allocations[irq - FIRST_IRQ].mask);
+	irq_allocations[irq - FIRST_IRQ].cpu = cpu;
+out:
+	spin_unlock_irqrestore(&irq_lock, flags);
+	return cpu;
+}
+
+void crisv32_mask_irq(int irq)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		block_irq(irq, cpu);
+}
+
+void crisv32_unmask_irq(int irq)
+{
+	unblock_irq(irq, irq_cpu(irq));
+}
+
+
+static void enable_crisv32_irq(struct irq_data *data)
+{
+	crisv32_unmask_irq(data->irq);
+}
+
+static void disable_crisv32_irq(struct irq_data *data)
+{
+	crisv32_mask_irq(data->irq);
+}
+
+static int set_affinity_crisv32_irq(struct irq_data *data,
+				    const struct cpumask *dest, bool force)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq_lock, flags);
+	irq_allocations[data->irq - FIRST_IRQ].mask = *dest;
+	spin_unlock_irqrestore(&irq_lock, flags);
+	return 0;
+}
+
+static struct irq_chip crisv32_irq_type = {
+	.name			= "CRISv32",
+	.irq_shutdown		= disable_crisv32_irq,
+	.irq_enable		= enable_crisv32_irq,
+	.irq_disable		= disable_crisv32_irq,
+	.irq_set_affinity	= set_affinity_crisv32_irq,
+};
+
+void
+set_exception_vector(int n, irqvectptr addr)
+{
+	etrax_irv->v[n] = (irqvectptr) addr;
+}
+
+extern void do_IRQ(int irq, struct pt_regs * regs);
+
+void
+crisv32_do_IRQ(int irq, int block, struct pt_regs* regs)
+{
+	/* Interrupts that may not be moved to another CPU may
+	 * skip blocking. This is currently only valid for the
+	 * timer IRQ and the IPI and is used for the timer
+	 * interrupt to avoid watchdog starvation.
+	 */
+	if (!block) {
+		do_IRQ(irq, regs);
+		return;
+	}
+
+	block_irq(irq, smp_processor_id());
+	do_IRQ(irq, regs);
+
+	unblock_irq(irq, irq_cpu(irq));
+}
+
+/* If multiple interrupts occur simultaneously we get a multiple
+ * interrupt from the CPU and software has to sort out which
+ * interrupts that happened. There are two special cases here:
+ *
+ * 1. Timer interrupts may never be blocked because of the
+ *    watchdog (refer to comment in include/asr/arch/irq.h)
+ * 2. GDB serial port IRQs are unhandled here and will be handled
+ *    as a single IRQ when it strikes again because the GDB
+ *    stubb wants to save the registers in its own fashion.
+ */
+void
+crisv32_do_multiple(struct pt_regs* regs)
+{
+	int cpu;
+	int mask;
+	int masked[NBR_REGS];
+	int bit;
+	int i;
+
+	cpu = smp_processor_id();
+
+	/* An extra irq_enter here to prevent softIRQs to run after
+         * each do_IRQ. This will decrease the interrupt latency.
+	 */
+	irq_enter();
+
+	for (i = 0; i < NBR_REGS; i++) {
+		/* Get which IRQs that happened. */
+		masked[i] = REG_RD_INT_VECT(intr_vect, irq_regs[cpu],
+			r_masked_vect, i);
+
+		/* Calculate new IRQ mask with these IRQs disabled. */
+		mask = REG_RD_INT_VECT(intr_vect, irq_regs[cpu], rw_mask, i);
+		mask &= ~masked[i];
+
+	/* Timer IRQ is never masked */
+#ifdef TIMER_VECT1
+		if ((i == 1) && (masked[0] & TIMER_MASK))
+			mask |= TIMER_MASK;
+#else
+		if ((i == 0) && (masked[0] & TIMER_MASK))
+			mask |= TIMER_MASK;
+#endif
+		/* Block all the IRQs */
+		REG_WR_INT_VECT(intr_vect, irq_regs[cpu], rw_mask, i, mask);
+
+	/* Check for timer IRQ and handle it special. */
+#ifdef TIMER_VECT1
+		if ((i == 1) && (masked[i] & TIMER_MASK)) {
+			masked[i] &= ~TIMER_MASK;
+			do_IRQ(TIMER0_INTR_VECT, regs);
+		}
+#else
+		if ((i == 0) && (masked[i] & TIMER_MASK)) {
+			 masked[i] &= ~TIMER_MASK;
+			 do_IRQ(TIMER0_INTR_VECT, regs);
+		}
+#endif
+	}
+
+#ifdef IGNORE_MASK
+	/* Remove IRQs that can't be handled as multiple. */
+	masked[0] &= ~IGNORE_MASK;
+#endif
+
+	/* Handle the rest of the IRQs. */
+	for (i = 0; i < NBR_REGS; i++) {
+		for (bit = 0; bit < 32; bit++) {
+			if (masked[i] & (1 << bit))
+				do_IRQ(bit + FIRST_IRQ + i*32, regs);
+		}
+	}
+
+	/* Unblock all the IRQs. */
+	for (i = 0; i < NBR_REGS; i++) {
+		mask = REG_RD_INT_VECT(intr_vect, irq_regs[cpu], rw_mask, i);
+		mask |= masked[i];
+		REG_WR_INT_VECT(intr_vect, irq_regs[cpu], rw_mask, i, mask);
+	}
+
+	/* This irq_exit() will trigger the soft IRQs. */
+	irq_exit();
+}
+
+static int crisv32_irq_map(struct irq_domain *h, unsigned int virq,
+			   irq_hw_number_t hw_irq_num)
+{
+	irq_set_chip_and_handler(virq, &crisv32_irq_type, handle_simple_irq);
+
+	return 0;
+}
+
+static struct irq_domain_ops crisv32_irq_ops = {
+	.map	= crisv32_irq_map,
+	.xlate	= irq_domain_xlate_onecell,
+};
+
+/*
+ * This is called by start_kernel. It fixes the IRQ masks and setup the
+ * interrupt vector table to point to bad_interrupt pointers.
+ */
+void __init
+init_IRQ(void)
+{
+	int i;
+	int j;
+	reg_intr_vect_rw_mask vect_mask = {0};
+	struct device_node *np;
+	struct irq_domain *domain;
+
+	/* Clear all interrupts masks. */
+	for (i = 0; i < NBR_REGS; i++)
+		REG_WR_VECT(intr_vect, regi_irq, rw_mask, i, vect_mask);
+
+	for (i = 0; i < 256; i++)
+		etrax_irv->v[i] = weird_irq;
+
+	np = of_find_compatible_node(NULL, NULL, "axis,crisv32-intc");
+	domain = irq_domain_add_legacy(np, NBR_INTR_VECT - FIRST_IRQ,
+				       FIRST_IRQ, FIRST_IRQ,
+				       &crisv32_irq_ops, NULL);
+	BUG_ON(!domain);
+	irq_set_default_host(domain);
+	of_node_put(np);
+
+	for (i = FIRST_IRQ, j = 0; j < NBR_INTR_VECT; i++, j++) {
+		set_exception_vector(i, interrupt[j]);
+	}
+
+	/* Mark Timer and IPI IRQs as CPU local */
+	irq_allocations[TIMER0_INTR_VECT - FIRST_IRQ].cpu = CPU_FIXED;
+	irq_set_status_flags(TIMER0_INTR_VECT, IRQ_PER_CPU);
+	irq_allocations[IPI_INTR_VECT - FIRST_IRQ].cpu = CPU_FIXED;
+	irq_set_status_flags(IPI_INTR_VECT, IRQ_PER_CPU);
+
+	set_exception_vector(0x00, nmi_interrupt);
+	set_exception_vector(0x30, multiple_interrupt);
+
+	/* Set up handler for various MMU bus faults. */
+	set_exception_vector(0x04, i_mmu_refill);
+	set_exception_vector(0x05, i_mmu_invalid);
+	set_exception_vector(0x06, i_mmu_access);
+	set_exception_vector(0x07, i_mmu_execute);
+	set_exception_vector(0x08, d_mmu_refill);
+	set_exception_vector(0x09, d_mmu_invalid);
+	set_exception_vector(0x0a, d_mmu_access);
+	set_exception_vector(0x0b, d_mmu_write);
+
+#ifdef CONFIG_BUG
+	/* Break 14 handler, used to implement cheap BUG().  */
+	set_exception_vector(0x1e, breakh_BUG);
+#endif
+
+	/* The system-call trap is reached by "break 13". */
+	set_exception_vector(0x1d, system_call);
+
+	/* Exception handlers for debugging, both user-mode and kernel-mode. */
+
+	/* Break 8. */
+	set_exception_vector(0x18, gdb_handle_exception);
+	/* Hardware single step. */
+	set_exception_vector(0x3, gdb_handle_exception);
+	/* Hardware breakpoint. */
+	set_exception_vector(0xc, gdb_handle_exception);
+
+#ifdef CONFIG_ETRAX_KGDB
+	kgdb_init();
+	/* Everything is set up; now trap the kernel. */
+	breakpoint();
+#endif
+}
+
diff --git a/arch/cris/arch-v32/kernel/kgdb.c b/arch/cris/arch-v32/kernel/kgdb.c
new file mode 100644
index 0000000..e0fdea7
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/kgdb.c
@@ -0,0 +1,1592 @@
+/*
+ *  arch/cris/arch-v32/kernel/kgdb.c
+ *
+ *  CRIS v32 version by Orjan Friberg, Axis Communications AB.
+ *
+ *  S390 version
+ *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *
+ *  Originally written by Glenn Engel, Lake Stevens Instrument Division
+ *
+ *  Contributed by HP Systems
+ *
+ *  Modified for SPARC by Stu Grossman, Cygnus Support.
+ *
+ *  Modified for Linux/MIPS (and MIPS in general) by Andreas Busse
+ *  Send complaints, suggestions etc. to <andy@waldorf-gmbh.de>
+ *
+ *  Copyright (C) 1995 Andreas Busse
+ */
+
+/* FIXME: Check the documentation. */
+
+/*
+ *  kgdb usage notes:
+ *  -----------------
+ *
+ * If you select CONFIG_ETRAX_KGDB in the configuration, the kernel will be
+ * built with different gcc flags: "-g" is added to get debug infos, and
+ * "-fomit-frame-pointer" is omitted to make debugging easier. Since the
+ * resulting kernel will be quite big (approx. > 7 MB), it will be stripped
+ * before compresion. Such a kernel will behave just as usually, except if
+ * given a "debug=<device>" command line option. (Only serial devices are
+ * allowed for <device>, i.e. no printers or the like; possible values are
+ * machine depedend and are the same as for the usual debug device, the one
+ * for logging kernel messages.) If that option is given and the device can be
+ * initialized, the kernel will connect to the remote gdb in trap_init(). The
+ * serial parameters are fixed to 8N1 and 115200 bps, for easyness of
+ * implementation.
+ *
+ * To start a debugging session, start that gdb with the debugging kernel
+ * image (the one with the symbols, vmlinux.debug) named on the command line.
+ * This file will be used by gdb to get symbol and debugging infos about the
+ * kernel. Next, select remote debug mode by
+ *    target remote <device>
+ * where <device> is the name of the serial device over which the debugged
+ * machine is connected. Maybe you have to adjust the baud rate by
+ *    set remotebaud <rate>
+ * or also other parameters with stty:
+ *    shell stty ... </dev/...
+ * If the kernel to debug has already booted, it waited for gdb and now
+ * connects, and you'll see a breakpoint being reported. If the kernel isn't
+ * running yet, start it now. The order of gdb and the kernel doesn't matter.
+ * Another thing worth knowing about in the getting-started phase is how to
+ * debug the remote protocol itself. This is activated with
+ *    set remotedebug 1
+ * gdb will then print out each packet sent or received. You'll also get some
+ * messages about the gdb stub on the console of the debugged machine.
+ *
+ * If all that works, you can use lots of the usual debugging techniques on
+ * the kernel, e.g. inspecting and changing variables/memory, setting
+ * breakpoints, single stepping and so on. It's also possible to interrupt the
+ * debugged kernel by pressing C-c in gdb. Have fun! :-)
+ *
+ * The gdb stub is entered (and thus the remote gdb gets control) in the
+ * following situations:
+ *
+ *  - If breakpoint() is called. This is just after kgdb initialization, or if
+ *    a breakpoint() call has been put somewhere into the kernel source.
+ *    (Breakpoints can of course also be set the usual way in gdb.)
+ *    In eLinux, we call breakpoint() in init/main.c after IRQ initialization.
+ *
+ *  - If there is a kernel exception, i.e. bad_super_trap() or die_if_kernel()
+ *    are entered. All the CPU exceptions are mapped to (more or less..., see
+ *    the hard_trap_info array below) appropriate signal, which are reported
+ *    to gdb. die_if_kernel() is usually called after some kind of access
+ *    error and thus is reported as SIGSEGV.
+ *
+ *  - When panic() is called. This is reported as SIGABRT.
+ *
+ *  - If C-c is received over the serial line, which is treated as
+ *    SIGINT.
+ *
+ * Of course, all these signals are just faked for gdb, since there is no
+ * signal concept as such for the kernel. It also isn't possible --obviously--
+ * to set signal handlers from inside gdb, or restart the kernel with a
+ * signal.
+ *
+ * Current limitations:
+ *
+ *  - While the kernel is stopped, interrupts are disabled for safety reasons
+ *    (i.e., variables not changing magically or the like). But this also
+ *    means that the clock isn't running anymore, and that interrupts from the
+ *    hardware may get lost/not be served in time. This can cause some device
+ *    errors...
+ *
+ *  - When single-stepping, only one instruction of the current thread is
+ *    executed, but interrupts are allowed for that time and will be serviced
+ *    if pending. Be prepared for that.
+ *
+ *  - All debugging happens in kernel virtual address space. There's no way to
+ *    access physical memory not mapped in kernel space, or to access user
+ *    space. A way to work around this is using get_user_long & Co. in gdb
+ *    expressions, but only for the current process.
+ *
+ *  - Interrupting the kernel only works if interrupts are currently allowed,
+ *    and the interrupt of the serial line isn't blocked by some other means
+ *    (IPL too high, disabled, ...)
+ *
+ *  - The gdb stub is currently not reentrant, i.e. errors that happen therein
+ *    (e.g. accessing invalid memory) may not be caught correctly. This could
+ *    be removed in future by introducing a stack of struct registers.
+ *
+ */
+
+/*
+ *  To enable debugger support, two things need to happen.  One, a
+ *  call to kgdb_init() is necessary in order to allow any breakpoints
+ *  or error conditions to be properly intercepted and reported to gdb.
+ *  Two, a breakpoint needs to be generated to begin communication.  This
+ *  is most easily accomplished by a call to breakpoint().
+ *
+ *    The following gdb commands are supported:
+ *
+ * command          function                               Return value
+ *
+ *    g             return the value of the CPU registers  hex data or ENN
+ *    G             set the value of the CPU registers     OK or ENN
+ *
+ *    mAA..AA,LLLL  Read LLLL bytes at address AA..AA      hex data or ENN
+ *    MAA..AA,LLLL: Write LLLL bytes at address AA.AA      OK or ENN
+ *
+ *    c             Resume at current address              SNN   ( signal NN)
+ *    cAA..AA       Continue at address AA..AA             SNN
+ *
+ *    s             Step one instruction                   SNN
+ *    sAA..AA       Step one instruction from AA..AA       SNN
+ *
+ *    k             kill
+ *
+ *    ?             What was the last sigval ?             SNN   (signal NN)
+ *
+ *    bBB..BB	    Set baud rate to BB..BB		   OK or BNN, then sets
+ *							   baud rate
+ *
+ * All commands and responses are sent with a packet which includes a
+ * checksum.  A packet consists of
+ *
+ * $<packet info>#<checksum>.
+ *
+ * where
+ * <packet info> :: <characters representing the command or response>
+ * <checksum>    :: < two hex digits computed as modulo 256 sum of <packetinfo>>
+ *
+ * When a packet is received, it is first acknowledged with either '+' or '-'.
+ * '+' indicates a successful transfer.  '-' indicates a failed transfer.
+ *
+ * Example:
+ *
+ * Host:                  Reply:
+ * $m0,10#2a               +$00010203040506070809101112131415#42
+ *
+ */
+
+
+#include <linux/string.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/linkage.h>
+#include <linux/reboot.h>
+
+#include <asm/setup.h>
+#include <asm/ptrace.h>
+
+#include <asm/irq.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/intr_vect_defs.h>
+#include <hwregs/ser_defs.h>
+
+/* From entry.S. */
+extern void gdb_handle_exception(void);
+/* From kgdb_asm.S. */
+extern void kgdb_handle_exception(void);
+
+static int kgdb_started = 0;
+
+/********************************* Register image ****************************/
+
+typedef
+struct register_image
+{
+	                      /* Offset */
+	unsigned int   r0;    /* 0x00 */
+	unsigned int   r1;    /* 0x04 */
+	unsigned int   r2;    /* 0x08 */
+	unsigned int   r3;    /* 0x0C */
+	unsigned int   r4;    /* 0x10 */
+	unsigned int   r5;    /* 0x14 */
+	unsigned int   r6;    /* 0x18 */
+	unsigned int   r7;    /* 0x1C */
+	unsigned int   r8;    /* 0x20; Frame pointer (if any) */
+	unsigned int   r9;    /* 0x24 */
+	unsigned int   r10;   /* 0x28 */
+	unsigned int   r11;   /* 0x2C */
+	unsigned int   r12;   /* 0x30 */
+	unsigned int   r13;   /* 0x34 */
+	unsigned int   sp;    /* 0x38; R14, Stack pointer */
+	unsigned int   acr;   /* 0x3C; R15, Address calculation register. */
+
+	unsigned char  bz;    /* 0x40; P0, 8-bit zero register */
+	unsigned char  vr;    /* 0x41; P1, Version register (8-bit) */
+	unsigned int   pid;   /* 0x42; P2, Process ID */
+	unsigned char  srs;   /* 0x46; P3, Support register select (8-bit) */
+        unsigned short wz;    /* 0x47; P4, 16-bit zero register */
+	unsigned int   exs;   /* 0x49; P5, Exception status */
+	unsigned int   eda;   /* 0x4D; P6, Exception data address */
+	unsigned int   mof;   /* 0x51; P7, Multiply overflow register */
+	unsigned int   dz;    /* 0x55; P8, 32-bit zero register */
+	unsigned int   ebp;   /* 0x59; P9, Exception base pointer */
+	unsigned int   erp;   /* 0x5D; P10, Exception return pointer. Contains the PC we are interested in. */
+	unsigned int   srp;   /* 0x61; P11, Subroutine return pointer */
+	unsigned int   nrp;   /* 0x65; P12, NMI return pointer */
+	unsigned int   ccs;   /* 0x69; P13, Condition code stack */
+	unsigned int   usp;   /* 0x6D; P14, User mode stack pointer */
+	unsigned int   spc;   /* 0x71; P15, Single step PC */
+	unsigned int   pc;    /* 0x75; Pseudo register (for the most part set to ERP). */
+
+} registers;
+
+typedef
+struct bp_register_image
+{
+	/* Support register bank 0. */
+	unsigned int   s0_0;
+	unsigned int   s1_0;
+	unsigned int   s2_0;
+	unsigned int   s3_0;
+	unsigned int   s4_0;
+	unsigned int   s5_0;
+	unsigned int   s6_0;
+	unsigned int   s7_0;
+	unsigned int   s8_0;
+	unsigned int   s9_0;
+	unsigned int   s10_0;
+	unsigned int   s11_0;
+	unsigned int   s12_0;
+	unsigned int   s13_0;
+	unsigned int   s14_0;
+	unsigned int   s15_0;
+
+	/* Support register bank 1. */
+	unsigned int   s0_1;
+	unsigned int   s1_1;
+	unsigned int   s2_1;
+	unsigned int   s3_1;
+	unsigned int   s4_1;
+	unsigned int   s5_1;
+	unsigned int   s6_1;
+	unsigned int   s7_1;
+	unsigned int   s8_1;
+	unsigned int   s9_1;
+	unsigned int   s10_1;
+	unsigned int   s11_1;
+	unsigned int   s12_1;
+	unsigned int   s13_1;
+	unsigned int   s14_1;
+	unsigned int   s15_1;
+
+	/* Support register bank 2. */
+	unsigned int   s0_2;
+	unsigned int   s1_2;
+	unsigned int   s2_2;
+	unsigned int   s3_2;
+	unsigned int   s4_2;
+	unsigned int   s5_2;
+	unsigned int   s6_2;
+	unsigned int   s7_2;
+	unsigned int   s8_2;
+	unsigned int   s9_2;
+	unsigned int   s10_2;
+	unsigned int   s11_2;
+	unsigned int   s12_2;
+	unsigned int   s13_2;
+	unsigned int   s14_2;
+	unsigned int   s15_2;
+
+	/* Support register bank 3. */
+	unsigned int   s0_3; /* BP_CTRL */
+	unsigned int   s1_3; /* BP_I0_START */
+	unsigned int   s2_3; /* BP_I0_END */
+	unsigned int   s3_3; /* BP_D0_START */
+	unsigned int   s4_3; /* BP_D0_END */
+	unsigned int   s5_3; /* BP_D1_START */
+	unsigned int   s6_3; /* BP_D1_END */
+	unsigned int   s7_3; /* BP_D2_START */
+	unsigned int   s8_3; /* BP_D2_END */
+	unsigned int   s9_3; /* BP_D3_START */
+	unsigned int   s10_3; /* BP_D3_END */
+	unsigned int   s11_3; /* BP_D4_START */
+	unsigned int   s12_3; /* BP_D4_END */
+	unsigned int   s13_3; /* BP_D5_START */
+	unsigned int   s14_3; /* BP_D5_END */
+	unsigned int   s15_3; /* BP_RESERVED */
+
+} support_registers;
+
+enum register_name
+{
+	R0,  R1,  R2,  R3,
+	R4,  R5,  R6,  R7,
+	R8,  R9,  R10, R11,
+	R12, R13, SP,  ACR,
+
+	BZ,  VR,  PID, SRS,
+	WZ,  EXS, EDA, MOF,
+	DZ,  EBP, ERP, SRP,
+	NRP, CCS, USP, SPC,
+	PC,
+
+	S0,  S1,  S2,  S3,
+	S4,  S5,  S6,  S7,
+	S8,  S9,  S10, S11,
+	S12, S13, S14, S15
+
+};
+
+/* The register sizes of the registers in register_name. An unimplemented register
+   is designated by size 0 in this array. */
+static int register_size[] =
+{
+	4, 4, 4, 4,
+	4, 4, 4, 4,
+	4, 4, 4, 4,
+	4, 4, 4, 4,
+
+	1, 1, 4, 1,
+	2, 4, 4, 4,
+	4, 4, 4, 4,
+	4, 4, 4, 4,
+
+	4,
+
+	4, 4, 4, 4,
+	4, 4, 4, 4,
+	4, 4, 4, 4,
+	4, 4, 4
+
+};
+
+/* Contains the register image of the kernel.
+   (Global so that they can be reached from assembler code.) */
+registers reg;
+support_registers sreg;
+
+/************** Prototypes for local library functions ***********************/
+
+/* Copy of strcpy from libc. */
+static char *gdb_cris_strcpy(char *s1, const char *s2);
+
+/* Copy of strlen from libc. */
+static int gdb_cris_strlen(const char *s);
+
+/* Copy of memchr from libc. */
+static void *gdb_cris_memchr(const void *s, int c, int n);
+
+/* Copy of strtol from libc. Does only support base 16. */
+static int gdb_cris_strtol(const char *s, char **endptr, int base);
+
+/********************** Prototypes for local functions. **********************/
+
+/* Write a value to a specified register regno in the register image
+   of the current thread. */
+static int write_register(int regno, char *val);
+
+/* Read a value from a specified register in the register image. Returns the
+   status of the read operation. The register value is returned in valptr. */
+static int read_register(char regno, unsigned int *valptr);
+
+/* Serial port, reads one character. ETRAX 100 specific. from debugport.c */
+int getDebugChar(void);
+
+/* Serial port, writes one character. ETRAX 100 specific. from debugport.c */
+void putDebugChar(int val);
+
+/* Convert the memory, pointed to by mem into hexadecimal representation.
+   Put the result in buf, and return a pointer to the last character
+   in buf (null). */
+static char *mem2hex(char *buf, unsigned char *mem, int count);
+
+/* Put the content of the array, in binary representation, pointed to by buf
+   into memory pointed to by mem, and return a pointer to
+   the character after the last byte written. */
+static unsigned char *bin2mem(unsigned char *mem, unsigned char *buf, int count);
+
+/* Await the sequence $<data>#<checksum> and store <data> in the array buffer
+   returned. */
+static void getpacket(char *buffer);
+
+/* Send $<data>#<checksum> from the <data> in the array buffer. */
+static void putpacket(char *buffer);
+
+/* Build and send a response packet in order to inform the host the
+   stub is stopped. */
+static void stub_is_stopped(int sigval);
+
+/* All expected commands are sent from remote.c. Send a response according
+   to the description in remote.c. Not static since it needs to be reached
+   from assembler code. */
+void handle_exception(int sigval);
+
+/* Performs a complete re-start from scratch. ETRAX specific. */
+static void kill_restart(void);
+
+/******************** Prototypes for global functions. ***********************/
+
+/* The string str is prepended with the GDB printout token and sent. */
+void putDebugString(const unsigned char *str, int len);
+
+/* A static breakpoint to be used at startup. */
+void breakpoint(void);
+
+/* Avoid warning as the internal_stack is not used in the C-code. */
+#define USEDVAR(name)    { if (name) { ; } }
+#define USEDFUN(name) { void (*pf)(void) = (void *)name; USEDVAR(pf) }
+
+/********************************** Packet I/O ******************************/
+/* BUFMAX defines the maximum number of characters in
+   inbound/outbound buffers */
+/* FIXME: How do we know it's enough? */
+#define BUFMAX 512
+
+/* Run-length encoding maximum length. Send 64 at most. */
+#define RUNLENMAX 64
+
+/* The inbound/outbound buffers used in packet I/O */
+static char input_buffer[BUFMAX];
+static char output_buffer[BUFMAX];
+
+/* Error and warning messages. */
+enum error_type
+{
+	SUCCESS, E01, E02, E03, E04, E05, E06, E07, E08
+};
+
+static char *error_message[] =
+{
+	"",
+	"E01 Set current or general thread - H[c,g] - internal error.",
+	"E02 Change register content - P - cannot change read-only register.",
+	"E03 Thread is not alive.", /* T, not used. */
+	"E04 The command is not supported - [s,C,S,!,R,d,r] - internal error.",
+	"E05 Change register content - P - the register is not implemented..",
+	"E06 Change memory content - M - internal error.",
+	"E07 Change register content - P - the register is not stored on the stack",
+	"E08 Invalid parameter"
+};
+
+/********************************** Breakpoint *******************************/
+/* Use an internal stack in the breakpoint and interrupt response routines.
+   FIXME: How do we know the size of this stack is enough?
+   Global so it can be reached from assembler code. */
+#define INTERNAL_STACK_SIZE 1024
+char internal_stack[INTERNAL_STACK_SIZE];
+
+/* Due to the breakpoint return pointer, a state variable is needed to keep
+   track of whether it is a static (compiled) or dynamic (gdb-invoked)
+   breakpoint to be handled. A static breakpoint uses the content of register
+   ERP as it is whereas a dynamic breakpoint requires subtraction with 2
+   in order to execute the instruction. The first breakpoint is static; all
+   following are assumed to be dynamic. */
+static int dynamic_bp = 0;
+
+/********************************* String library ****************************/
+/* Single-step over library functions creates trap loops. */
+
+/* Copy char s2[] to s1[]. */
+static char*
+gdb_cris_strcpy(char *s1, const char *s2)
+{
+	char *s = s1;
+
+	for (s = s1; (*s++ = *s2++) != '\0'; )
+		;
+	return s1;
+}
+
+/* Find length of s[]. */
+static int
+gdb_cris_strlen(const char *s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; sc++)
+		;
+	return (sc - s);
+}
+
+/* Find first occurrence of c in s[n]. */
+static void*
+gdb_cris_memchr(const void *s, int c, int n)
+{
+	const unsigned char uc = c;
+	const unsigned char *su;
+
+	for (su = s; 0 < n; ++su, --n)
+		if (*su == uc)
+			return (void *)su;
+	return NULL;
+}
+/******************************* Standard library ****************************/
+/* Single-step over library functions creates trap loops. */
+/* Convert string to long. */
+static int
+gdb_cris_strtol(const char *s, char **endptr, int base)
+{
+	char *s1;
+	char *sd;
+	int x = 0;
+
+	for (s1 = (char*)s; (sd = gdb_cris_memchr(hex_asc, *s1, base)) != NULL; ++s1)
+		x = x * base + (sd - hex_asc);
+
+        if (endptr) {
+                /* Unconverted suffix is stored in endptr unless endptr is NULL. */
+                *endptr = s1;
+        }
+
+	return x;
+}
+
+/********************************* Register image ****************************/
+
+/* Write a value to a specified register in the register image of the current
+   thread. Returns status code SUCCESS, E02, E05 or E08. */
+static int
+write_register(int regno, char *val)
+{
+	int status = SUCCESS;
+
+        if (regno >= R0 && regno <= ACR) {
+		/* Consecutive 32-bit registers. */
+		if (hex2bin((unsigned char *)&reg.r0 + (regno - R0) * sizeof(unsigned int),
+			    val, sizeof(unsigned int)))
+			status = E08;
+
+	} else if (regno == BZ || regno == VR || regno == WZ || regno == DZ) {
+		/* Read-only registers. */
+		status = E02;
+
+	} else if (regno == PID) {
+		/* 32-bit register. (Even though we already checked SRS and WZ, we cannot
+		   combine this with the EXS - SPC write since SRS and WZ have different size.) */
+		if (hex2bin((unsigned char *)&reg.pid, val, sizeof(unsigned int)))
+			status = E08;
+
+	} else if (regno == SRS) {
+		/* 8-bit register. */
+		if (hex2bin((unsigned char *)&reg.srs, val, sizeof(unsigned char)))
+			status = E08;
+
+	} else if (regno >= EXS && regno <= SPC) {
+		/* Consecutive 32-bit registers. */
+		if (hex2bin((unsigned char *)&reg.exs + (regno - EXS) * sizeof(unsigned int),
+			    val, sizeof(unsigned int)))
+			status = E08;
+
+       } else if (regno == PC) {
+               /* Pseudo-register. Treat as read-only. */
+               status = E02;
+
+       } else if (regno >= S0 && regno <= S15) {
+               /* 32-bit registers. */
+               if (hex2bin((unsigned char *)&sreg.s0_0 + (reg.srs * 16 * sizeof(unsigned int)) + (regno - S0) * sizeof(unsigned int),
+			   val, sizeof(unsigned int)))
+			status = E08;
+	} else {
+		/* Non-existing register. */
+		status = E05;
+	}
+	return status;
+}
+
+/* Read a value from a specified register in the register image. Returns the
+   value in the register or -1 for non-implemented registers. */
+static int
+read_register(char regno, unsigned int *valptr)
+{
+	int status = SUCCESS;
+
+	/* We read the zero registers from the register struct (instead of just returning 0)
+	   to catch errors. */
+
+	if (regno >= R0 && regno <= ACR) {
+		/* Consecutive 32-bit registers. */
+		*valptr = *(unsigned int *)((char *)&reg.r0 + (regno - R0) * sizeof(unsigned int));
+
+	} else if (regno == BZ || regno == VR) {
+		/* Consecutive 8-bit registers. */
+		*valptr = (unsigned int)(*(unsigned char *)
+                                         ((char *)&reg.bz + (regno - BZ) * sizeof(char)));
+
+	} else if (regno == PID) {
+		/* 32-bit register. */
+		*valptr =  *(unsigned int *)((char *)&reg.pid);
+
+	} else if (regno == SRS) {
+		/* 8-bit register. */
+		*valptr = (unsigned int)(*(unsigned char *)((char *)&reg.srs));
+
+	} else if (regno == WZ) {
+		/* 16-bit register. */
+		*valptr = (unsigned int)(*(unsigned short *)(char *)&reg.wz);
+
+	} else if (regno >= EXS && regno <= PC) {
+		/* Consecutive 32-bit registers. */
+		*valptr = *(unsigned int *)((char *)&reg.exs + (regno - EXS) * sizeof(unsigned int));
+
+	} else if (regno >= S0 && regno <= S15) {
+		/* Consecutive 32-bit registers, located elsewhere. */
+		*valptr = *(unsigned int *)((char *)&sreg.s0_0 + (reg.srs * 16 * sizeof(unsigned int)) + (regno - S0) * sizeof(unsigned int));
+
+	} else {
+		/* Non-existing register. */
+		status = E05;
+	}
+	return status;
+
+}
+
+/********************************** Packet I/O ******************************/
+/* Convert the memory, pointed to by mem into hexadecimal representation.
+   Put the result in buf, and return a pointer to the last character
+   in buf (null). */
+
+static char *
+mem2hex(char *buf, unsigned char *mem, int count)
+{
+	int i;
+	int ch;
+
+        if (mem == NULL) {
+		/* Invalid address, caught by 'm' packet handler. */
+                for (i = 0; i < count; i++) {
+                        *buf++ = '0';
+                        *buf++ = '0';
+                }
+        } else {
+                /* Valid mem address. */
+		for (i = 0; i < count; i++) {
+			ch = *mem++;
+			buf = hex_byte_pack(buf, ch);
+		}
+        }
+        /* Terminate properly. */
+	*buf = '\0';
+	return buf;
+}
+
+/* Same as mem2hex, but puts it in network byte order. */
+static char *
+mem2hex_nbo(char *buf, unsigned char *mem, int count)
+{
+	int i;
+	int ch;
+
+	mem += count - 1;
+	for (i = 0; i < count; i++) {
+		ch = *mem--;
+		buf = hex_byte_pack(buf, ch);
+        }
+
+        /* Terminate properly. */
+	*buf = '\0';
+	return buf;
+}
+
+/* Put the content of the array, in binary representation, pointed to by buf
+   into memory pointed to by mem, and return a pointer to the character after
+   the last byte written.
+   Gdb will escape $, #, and the escape char (0x7d). */
+static unsigned char*
+bin2mem(unsigned char *mem, unsigned char *buf, int count)
+{
+	int i;
+	unsigned char *next;
+	for (i = 0; i < count; i++) {
+		/* Check for any escaped characters. Be paranoid and
+		   only unescape chars that should be escaped. */
+		if (*buf == 0x7d) {
+			next = buf + 1;
+			if (*next == 0x3 || *next == 0x4 || *next == 0x5D) {
+				 /* #, $, ESC */
+				buf++;
+				*buf += 0x20;
+			}
+		}
+		*mem++ = *buf++;
+	}
+	return mem;
+}
+
+/* Await the sequence $<data>#<checksum> and store <data> in the array buffer
+   returned. */
+static void
+getpacket(char *buffer)
+{
+	unsigned char checksum;
+	unsigned char xmitcsum;
+	int i;
+	int count;
+	char ch;
+
+	do {
+		while((ch = getDebugChar ()) != '$')
+			/* Wait for the start character $ and ignore all other characters */;
+		checksum = 0;
+		xmitcsum = -1;
+		count = 0;
+		/* Read until a # or the end of the buffer is reached */
+		while (count < BUFMAX) {
+			ch = getDebugChar();
+			if (ch == '#')
+				break;
+			checksum = checksum + ch;
+			buffer[count] = ch;
+			count = count + 1;
+		}
+
+		if (count >= BUFMAX)
+			continue;
+
+		buffer[count] = 0;
+
+		if (ch == '#') {
+			xmitcsum = hex_to_bin(getDebugChar()) << 4;
+			xmitcsum += hex_to_bin(getDebugChar());
+			if (checksum != xmitcsum) {
+				/* Wrong checksum */
+				putDebugChar('-');
+			} else {
+				/* Correct checksum */
+				putDebugChar('+');
+				/* If sequence characters are received, reply with them */
+				if (buffer[2] == ':') {
+					putDebugChar(buffer[0]);
+					putDebugChar(buffer[1]);
+					/* Remove the sequence characters from the buffer */
+					count = gdb_cris_strlen(buffer);
+					for (i = 3; i <= count; i++)
+						buffer[i - 3] = buffer[i];
+				}
+			}
+		}
+	} while (checksum != xmitcsum);
+}
+
+/* Send $<data>#<checksum> from the <data> in the array buffer. */
+
+static void
+putpacket(char *buffer)
+{
+	int checksum;
+	int runlen;
+	int encode;
+
+	do {
+		char *src = buffer;
+		putDebugChar('$');
+		checksum = 0;
+		while (*src) {
+			/* Do run length encoding */
+			putDebugChar(*src);
+			checksum += *src;
+			runlen = 0;
+			while (runlen < RUNLENMAX && *src == src[runlen]) {
+				runlen++;
+			}
+			if (runlen > 3) {
+				/* Got a useful amount */
+				putDebugChar ('*');
+				checksum += '*';
+				encode = runlen + ' ' - 4;
+				putDebugChar(encode);
+				checksum += encode;
+				src += runlen;
+			} else {
+				src++;
+			}
+		}
+		putDebugChar('#');
+		putDebugChar(hex_asc_hi(checksum));
+		putDebugChar(hex_asc_lo(checksum));
+	} while(kgdb_started && (getDebugChar() != '+'));
+}
+
+/* The string str is prepended with the GDB printout token and sent. Required
+   in traditional implementations. */
+void
+putDebugString(const unsigned char *str, int len)
+{
+	/* Move SPC forward if we are single-stepping. */
+	asm("spchere:");
+	asm("move $spc, $r10");
+	asm("cmp.d spchere, $r10");
+	asm("bne nosstep");
+	asm("nop");
+	asm("move.d spccont, $r10");
+	asm("move $r10, $spc");
+	asm("nosstep:");
+
+        output_buffer[0] = 'O';
+        mem2hex(&output_buffer[1], (unsigned char *)str, len);
+        putpacket(output_buffer);
+
+	asm("spccont:");
+}
+
+/********************************** Handle exceptions ************************/
+/* Build and send a response packet in order to inform the host the
+   stub is stopped. TAAn...:r...;n...:r...;n...:r...;
+                    AA = signal number
+                    n... = register number (hex)
+                    r... = register contents
+                    n... = `thread'
+                    r... = thread process ID.  This is a hex integer.
+                    n... = other string not starting with valid hex digit.
+                    gdb should ignore this n,r pair and go on to the next.
+                    This way we can extend the protocol. */
+static void
+stub_is_stopped(int sigval)
+{
+	char *ptr = output_buffer;
+	unsigned int reg_cont;
+
+	/* Send trap type (converted to signal) */
+
+	*ptr++ = 'T';
+	ptr = hex_byte_pack(ptr, sigval);
+
+	if (((reg.exs & 0xff00) >> 8) == 0xc) {
+
+		/* Some kind of hardware watchpoint triggered. Find which one
+		   and determine its type (read/write/access).  */
+		int S, bp, trig_bits = 0, rw_bits = 0;
+		int trig_mask = 0;
+		unsigned int *bp_d_regs = &sreg.s3_3;
+		/* In a lot of cases, the stopped data address will simply be EDA.
+		   In some cases, we adjust it to match the watched data range.
+		   (We don't want to change the actual EDA though). */
+		unsigned int stopped_data_address;
+		/* The S field of EXS. */
+		S = (reg.exs & 0xffff0000) >> 16;
+
+		if (S & 1) {
+			/* Instruction watchpoint. */
+			/* FIXME: Check against, and possibly adjust reported EDA. */
+		} else {
+			/* Data watchpoint.  Find the one that triggered. */
+			for (bp = 0; bp < 6; bp++) {
+
+				/* Dx_RD, Dx_WR in the S field of EXS for this BP. */
+				int bitpos_trig = 1 + bp * 2;
+				/* Dx_BPRD, Dx_BPWR in BP_CTRL for this BP. */
+				int bitpos_config = 2 + bp * 4;
+
+				/* Get read/write trig bits for this BP. */
+				trig_bits = (S & (3 << bitpos_trig)) >> bitpos_trig;
+
+				/* Read/write config bits for this BP. */
+				rw_bits = (sreg.s0_3 & (3 << bitpos_config)) >> bitpos_config;
+				if (trig_bits) {
+					/* Sanity check: the BP shouldn't trigger for accesses
+					   that it isn't configured for. */
+					if ((rw_bits == 0x1 && trig_bits != 0x1) ||
+					    (rw_bits == 0x2 && trig_bits != 0x2))
+						panic("Invalid r/w trigging for this BP");
+
+					/* Mark this BP as trigged for future reference. */
+					trig_mask |= (1 << bp);
+
+					if (reg.eda >= bp_d_regs[bp * 2] &&
+					    reg.eda <= bp_d_regs[bp * 2 + 1]) {
+						/* EDA within range for this BP; it must be the one
+						   we're looking for. */
+						stopped_data_address = reg.eda;
+						break;
+					}
+				}
+			}
+			if (bp < 6) {
+				/* Found a trigged BP with EDA within its configured data range. */
+			} else if (trig_mask) {
+				/* Something triggered, but EDA doesn't match any BP's range. */
+				for (bp = 0; bp < 6; bp++) {
+					/* Dx_BPRD, Dx_BPWR in BP_CTRL for this BP. */
+					int bitpos_config = 2 + bp * 4;
+
+					/* Read/write config bits for this BP (needed later). */
+					rw_bits = (sreg.s0_3 & (3 << bitpos_config)) >> bitpos_config;
+
+					if (trig_mask & (1 << bp)) {
+						/* EDA within 31 bytes of the configured start address? */
+						if (reg.eda + 31 >= bp_d_regs[bp * 2]) {
+							/* Changing the reported address to match
+							   the start address of the first applicable BP. */
+							stopped_data_address = bp_d_regs[bp * 2];
+							break;
+						} else {
+							/* We continue since we might find another useful BP. */
+							printk("EDA doesn't match trigged BP's range");
+						}
+					}
+				}
+			}
+
+			/* No match yet? */
+			BUG_ON(bp >= 6);
+			/* Note that we report the type according to what the BP is configured
+			   for (otherwise we'd never report an 'awatch'), not according to how
+			   it trigged. We did check that the trigged bits match what the BP is
+			   configured for though. */
+			if (rw_bits == 0x1) {
+				/* read */
+				strncpy(ptr, "rwatch", 6);
+				ptr += 6;
+			} else if (rw_bits == 0x2) {
+				/* write */
+				strncpy(ptr, "watch", 5);
+				ptr += 5;
+			} else if (rw_bits == 0x3) {
+				/* access */
+				strncpy(ptr, "awatch", 6);
+				ptr += 6;
+			} else {
+				panic("Invalid r/w bits for this BP.");
+			}
+
+			*ptr++ = ':';
+			/* Note that we don't read_register(EDA, ...) */
+			ptr = mem2hex_nbo(ptr, (unsigned char *)&stopped_data_address, register_size[EDA]);
+			*ptr++ = ';';
+		}
+	}
+	/* Only send PC, frame and stack pointer. */
+	read_register(PC, &reg_cont);
+	ptr = hex_byte_pack(ptr, PC);
+	*ptr++ = ':';
+	ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[PC]);
+	*ptr++ = ';';
+
+	read_register(R8, &reg_cont);
+	ptr = hex_byte_pack(ptr, R8);
+	*ptr++ = ':';
+	ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[R8]);
+	*ptr++ = ';';
+
+	read_register(SP, &reg_cont);
+	ptr = hex_byte_pack(ptr, SP);
+	*ptr++ = ':';
+	ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[SP]);
+	*ptr++ = ';';
+
+	/* Send ERP as well; this will save us an entire register fetch in some cases. */
+        read_register(ERP, &reg_cont);
+	ptr = hex_byte_pack(ptr, ERP);
+        *ptr++ = ':';
+        ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[ERP]);
+        *ptr++ = ';';
+
+	/* null-terminate and send it off */
+	*ptr = 0;
+	putpacket(output_buffer);
+}
+
+/* Returns the size of an instruction that has a delay slot. */
+
+int insn_size(unsigned long pc)
+{
+	unsigned short opcode = *(unsigned short *)pc;
+	int size = 0;
+
+	switch ((opcode & 0x0f00) >> 8) {
+	case 0x0:
+	case 0x9:
+	case 0xb:
+		size = 2;
+		break;
+	case 0xe:
+	case 0xf:
+		size = 6;
+		break;
+	case 0xd:
+		/* Could be 4 or 6; check more bits. */
+		if ((opcode & 0xff) == 0xff)
+			size = 4;
+		else
+			size = 6;
+		break;
+	default:
+		panic("Couldn't find size of opcode 0x%x at 0x%lx\n", opcode, pc);
+	}
+
+	return size;
+}
+
+void register_fixup(int sigval)
+{
+	/* Compensate for ACR push at the beginning of exception handler. */
+	reg.sp += 4;
+
+	/* Standard case. */
+	reg.pc = reg.erp;
+	if (reg.erp & 0x1) {
+		/* Delay slot bit set.  Report as stopped on proper instruction.  */
+		if (reg.spc) {
+			/* Rely on SPC if set. */
+			reg.pc = reg.spc;
+		} else {
+			/* Calculate the PC from the size of the instruction
+			   that the delay slot we're in belongs to. */
+			reg.pc += insn_size(reg.erp & ~1) - 1 ;
+		}
+	}
+
+	if ((reg.exs & 0x3) == 0x0) {
+		/* Bits 1 - 0 indicate the type of memory operation performed
+		   by the interrupted instruction. 0 means no memory operation,
+		   and EDA is undefined in that case. We zero it to avoid confusion. */
+		reg.eda = 0;
+	}
+
+	if (sigval == SIGTRAP) {
+		/* Break 8, single step or hardware breakpoint exception. */
+
+		/* Check IDX field of EXS. */
+		if (((reg.exs & 0xff00) >> 8) == 0x18) {
+
+			/* Break 8. */
+
+                        /* Static (compiled) breakpoints must return to the next instruction
+			   in order to avoid infinite loops (default value of ERP). Dynamic
+			   (gdb-invoked) must subtract the size of the break instruction from
+			   the ERP so that the instruction that was originally in the break
+			   instruction's place will be run when we return from the exception. */
+			if (!dynamic_bp) {
+				/* Assuming that all breakpoints are dynamic from now on. */
+				dynamic_bp = 1;
+			} else {
+
+				/* Only if not in a delay slot. */
+				if (!(reg.erp & 0x1)) {
+					reg.erp -= 2;
+					reg.pc -= 2;
+				}
+			}
+
+		} else if (((reg.exs & 0xff00) >> 8) == 0x3) {
+			/* Single step. */
+			/* Don't fiddle with S1. */
+
+		} else if (((reg.exs & 0xff00) >> 8) == 0xc) {
+
+			/* Hardware watchpoint exception. */
+
+			/* SPC has been updated so that we will get a single step exception
+			   when we return, but we don't want that. */
+			reg.spc = 0;
+
+			/* Don't fiddle with S1. */
+		}
+
+	} else if (sigval == SIGINT) {
+		/* Nothing special. */
+	}
+}
+
+static void insert_watchpoint(char type, int addr, int len)
+{
+	/* Breakpoint/watchpoint types (GDB terminology):
+	   0 = memory breakpoint for instructions
+	   (not supported; done via memory write instead)
+	   1 = hardware breakpoint for instructions (supported)
+	   2 = write watchpoint (supported)
+	   3 = read watchpoint (supported)
+	   4 = access watchpoint (supported) */
+
+	if (type < '1' || type > '4') {
+		output_buffer[0] = 0;
+		return;
+	}
+
+	/* Read watchpoints are set as access watchpoints, because of GDB's
+	   inability to deal with pure read watchpoints. */
+	if (type == '3')
+		type = '4';
+
+	if (type == '1') {
+		/* Hardware (instruction) breakpoint. */
+		/* Bit 0 in BP_CTRL holds the configuration for I0. */
+		if (sreg.s0_3 & 0x1) {
+			/* Already in use. */
+			gdb_cris_strcpy(output_buffer, error_message[E04]);
+			return;
+		}
+		/* Configure. */
+		sreg.s1_3 = addr;
+		sreg.s2_3 = (addr + len - 1);
+		sreg.s0_3 |= 1;
+	} else {
+		int bp;
+		unsigned int *bp_d_regs = &sreg.s3_3;
+
+		/* The watchpoint allocation scheme is the simplest possible.
+		   For example, if a region is watched for read and
+		   a write watch is requested, a new watchpoint will
+		   be used. Also, if a watch for a region that is already
+		   covered by one or more existing watchpoints, a new
+		   watchpoint will be used. */
+
+		/* First, find a free data watchpoint. */
+		for (bp = 0; bp < 6; bp++) {
+			/* Each data watchpoint's control registers occupy 2 bits
+			   (hence the 3), starting at bit 2 for D0 (hence the 2)
+			   with 4 bits between for each watchpoint (yes, the 4). */
+			if (!(sreg.s0_3 & (0x3 << (2 + (bp * 4))))) {
+				break;
+			}
+		}
+
+		if (bp > 5) {
+			/* We're out of watchpoints. */
+			gdb_cris_strcpy(output_buffer, error_message[E04]);
+			return;
+		}
+
+		/* Configure the control register first. */
+		if (type == '3' || type == '4') {
+			/* Trigger on read. */
+			sreg.s0_3 |= (1 << (2 + bp * 4));
+		}
+		if (type == '2' || type == '4') {
+			/* Trigger on write. */
+			sreg.s0_3 |= (2 << (2 + bp * 4));
+		}
+
+		/* Ugly pointer arithmetics to configure the watched range. */
+		bp_d_regs[bp * 2] = addr;
+		bp_d_regs[bp * 2 + 1] = (addr + len - 1);
+	}
+
+	/* Set the S1 flag to enable watchpoints. */
+	reg.ccs |= (1 << (S_CCS_BITNR + CCS_SHIFT));
+	gdb_cris_strcpy(output_buffer, "OK");
+}
+
+static void remove_watchpoint(char type, int addr, int len)
+{
+	/* Breakpoint/watchpoint types:
+	   0 = memory breakpoint for instructions
+	   (not supported; done via memory write instead)
+	   1 = hardware breakpoint for instructions (supported)
+	   2 = write watchpoint (supported)
+	   3 = read watchpoint (supported)
+	   4 = access watchpoint (supported) */
+	if (type < '1' || type > '4') {
+		output_buffer[0] = 0;
+		return;
+	}
+
+	/* Read watchpoints are set as access watchpoints, because of GDB's
+	   inability to deal with pure read watchpoints. */
+	if (type == '3')
+		type = '4';
+
+	if (type == '1') {
+		/* Hardware breakpoint. */
+		/* Bit 0 in BP_CTRL holds the configuration for I0. */
+		if (!(sreg.s0_3 & 0x1)) {
+			/* Not in use. */
+			gdb_cris_strcpy(output_buffer, error_message[E04]);
+			return;
+		}
+		/* Deconfigure. */
+		sreg.s1_3 = 0;
+		sreg.s2_3 = 0;
+		sreg.s0_3 &= ~1;
+	} else {
+		int bp;
+		unsigned int *bp_d_regs = &sreg.s3_3;
+		/* Try to find a watchpoint that is configured for the
+		   specified range, then check that read/write also matches. */
+
+		/* Ugly pointer arithmetic, since I cannot rely on a
+		   single switch (addr) as there may be several watchpoints with
+		   the same start address for example. */
+
+		for (bp = 0; bp < 6; bp++) {
+			if (bp_d_regs[bp * 2] == addr &&
+			    bp_d_regs[bp * 2 + 1] == (addr + len - 1)) {
+				/* Matching range. */
+				int bitpos = 2 + bp * 4;
+				int rw_bits;
+
+				/* Read/write bits for this BP. */
+				rw_bits = (sreg.s0_3 & (0x3 << bitpos)) >> bitpos;
+
+				if ((type == '3' && rw_bits == 0x1) ||
+				    (type == '2' && rw_bits == 0x2) ||
+				    (type == '4' && rw_bits == 0x3)) {
+					/* Read/write matched. */
+					break;
+				}
+			}
+		}
+
+		if (bp > 5) {
+			/* No watchpoint matched. */
+			gdb_cris_strcpy(output_buffer, error_message[E04]);
+			return;
+		}
+
+		/* Found a matching watchpoint. Now, deconfigure it by
+		   both disabling read/write in bp_ctrl and zeroing its
+		   start/end addresses. */
+		sreg.s0_3 &= ~(3 << (2 + (bp * 4)));
+		bp_d_regs[bp * 2] = 0;
+		bp_d_regs[bp * 2 + 1] = 0;
+	}
+
+	/* Note that we don't clear the S1 flag here. It's done when continuing.  */
+	gdb_cris_strcpy(output_buffer, "OK");
+}
+
+
+
+/* All expected commands are sent from remote.c. Send a response according
+   to the description in remote.c. */
+void
+handle_exception(int sigval)
+{
+	/* Avoid warning of not used. */
+
+	USEDFUN(handle_exception);
+	USEDVAR(internal_stack[0]);
+
+	register_fixup(sigval);
+
+	/* Send response. */
+	stub_is_stopped(sigval);
+
+	for (;;) {
+		output_buffer[0] = '\0';
+		getpacket(input_buffer);
+		switch (input_buffer[0]) {
+			case 'g':
+				/* Read registers: g
+				   Success: Each byte of register data is described by two hex digits.
+				   Registers are in the internal order for GDB, and the bytes
+				   in a register  are in the same order the machine uses.
+				   Failure: void. */
+			{
+				char *buf;
+				/* General and special registers. */
+				buf = mem2hex(output_buffer, (char *)&reg, sizeof(registers));
+				/* Support registers. */
+				/* -1 because of the null termination that mem2hex adds. */
+				mem2hex(buf,
+					(char *)&sreg + (reg.srs * 16 * sizeof(unsigned int)),
+					16 * sizeof(unsigned int));
+				break;
+			}
+			case 'G':
+				/* Write registers. GXX..XX
+				   Each byte of register data  is described by two hex digits.
+				   Success: OK
+				   Failure: E08. */
+				/* General and special registers. */
+				if (hex2bin((char *)&reg, &input_buffer[1], sizeof(registers)))
+					gdb_cris_strcpy(output_buffer, error_message[E08]);
+				/* Support registers. */
+				else if (hex2bin((char *)&sreg + (reg.srs * 16 * sizeof(unsigned int)),
+					&input_buffer[1] + sizeof(registers),
+					16 * sizeof(unsigned int)))
+					gdb_cris_strcpy(output_buffer, error_message[E08]);
+				else
+					gdb_cris_strcpy(output_buffer, "OK");
+				break;
+
+			case 'P':
+				/* Write register. Pn...=r...
+				   Write register n..., hex value without 0x, with value r...,
+				   which contains a hex value without 0x and two hex digits
+				   for each byte in the register (target byte order). P1f=11223344 means
+				   set register 31 to 44332211.
+				   Success: OK
+				   Failure: E02, E05 */
+				{
+					char *suffix;
+					int regno = gdb_cris_strtol(&input_buffer[1], &suffix, 16);
+					int status;
+
+					status = write_register(regno, suffix+1);
+
+					switch (status) {
+						case E02:
+							/* Do not support read-only registers. */
+							gdb_cris_strcpy(output_buffer, error_message[E02]);
+							break;
+						case E05:
+							/* Do not support non-existing registers. */
+							gdb_cris_strcpy(output_buffer, error_message[E05]);
+							break;
+						case E08:
+							/* Invalid parameter. */
+							gdb_cris_strcpy(output_buffer, error_message[E08]);
+							break;
+						default:
+							/* Valid register number. */
+							gdb_cris_strcpy(output_buffer, "OK");
+							break;
+					}
+				}
+				break;
+
+			case 'm':
+				/* Read from memory. mAA..AA,LLLL
+				   AA..AA is the address and LLLL is the length.
+				   Success: XX..XX is the memory content.  Can be fewer bytes than
+				   requested if only part of the data may be read. m6000120a,6c means
+				   retrieve 108 byte from base address 6000120a.
+				   Failure: void. */
+				{
+                                        char *suffix;
+					unsigned char *addr = (unsigned char *)gdb_cris_strtol(&input_buffer[1],
+                                                                                               &suffix, 16);
+					int len = gdb_cris_strtol(suffix+1, 0, 16);
+
+					/* Bogus read (i.e. outside the kernel's
+					   segment)? . */
+					if (!((unsigned int)addr >= 0xc0000000 &&
+					      (unsigned int)addr < 0xd0000000))
+						addr = NULL;
+
+                                        mem2hex(output_buffer, addr, len);
+                                }
+				break;
+
+			case 'X':
+				/* Write to memory. XAA..AA,LLLL:XX..XX
+				   AA..AA is the start address,  LLLL is the number of bytes, and
+				   XX..XX is the binary data.
+				   Success: OK
+				   Failure: void. */
+			case 'M':
+				/* Write to memory. MAA..AA,LLLL:XX..XX
+				   AA..AA is the start address,  LLLL is the number of bytes, and
+				   XX..XX is the hexadecimal data.
+				   Success: OK
+				   Failure: E08. */
+				{
+					char *lenptr;
+					char *dataptr;
+					unsigned char *addr = (unsigned char *)gdb_cris_strtol(&input_buffer[1],
+										      &lenptr, 16);
+					int len = gdb_cris_strtol(lenptr+1, &dataptr, 16);
+					if (*lenptr == ',' && *dataptr == ':') {
+						if (input_buffer[0] == 'M') {
+							if (hex2bin(addr, dataptr + 1, len))
+								gdb_cris_strcpy(output_buffer, error_message[E08]);
+							else
+								gdb_cris_strcpy(output_buffer, "OK");
+						} else /* X */ {
+							bin2mem(addr, dataptr + 1, len);
+							gdb_cris_strcpy(output_buffer, "OK");
+						}
+					} else {
+						gdb_cris_strcpy(output_buffer, error_message[E06]);
+					}
+				}
+				break;
+
+			case 'c':
+				/* Continue execution. cAA..AA
+				   AA..AA is the address where execution is resumed. If AA..AA is
+				   omitted, resume at the present address.
+				   Success: return to the executing thread.
+				   Failure: will never know. */
+
+				if (input_buffer[1] != '\0') {
+					/* FIXME: Doesn't handle address argument. */
+					gdb_cris_strcpy(output_buffer, error_message[E04]);
+					break;
+				}
+
+				/* Before continuing, make sure everything is set up correctly. */
+
+				/* Set the SPC to some unlikely value.  */
+				reg.spc = 0;
+				/* Set the S1 flag to 0 unless some watchpoint is enabled (since setting
+				   S1 to 0 would also disable watchpoints). (Note that bits 26-31 in BP_CTRL
+				   are reserved, so don't check against those). */
+				if ((sreg.s0_3 & 0x3fff) == 0) {
+					reg.ccs &= ~(1 << (S_CCS_BITNR + CCS_SHIFT));
+				}
+
+				return;
+
+			case 's':
+				/* Step. sAA..AA
+				   AA..AA is the address where execution is resumed. If AA..AA is
+				   omitted, resume at the present address. Success: return to the
+				   executing thread. Failure: will never know. */
+
+				if (input_buffer[1] != '\0') {
+					/* FIXME: Doesn't handle address argument. */
+					gdb_cris_strcpy(output_buffer, error_message[E04]);
+					break;
+				}
+
+				/* Set the SPC to PC, which is where we'll return
+				   (deduced previously). */
+				reg.spc = reg.pc;
+
+				/* Set the S1 (first stacked, not current) flag, which will
+				   kick into action when we rfe. */
+				reg.ccs |= (1 << (S_CCS_BITNR + CCS_SHIFT));
+				return;
+
+                       case 'Z':
+
+                               /* Insert breakpoint or watchpoint, Ztype,addr,length.
+                                  Remote protocol says: A remote target shall return an empty string
+                                  for an unrecognized breakpoint or watchpoint packet type. */
+                               {
+                                       char *lenptr;
+                                       char *dataptr;
+                                       int addr = gdb_cris_strtol(&input_buffer[3], &lenptr, 16);
+                                       int len = gdb_cris_strtol(lenptr + 1, &dataptr, 16);
+                                       char type = input_buffer[1];
+
+				       insert_watchpoint(type, addr, len);
+                                       break;
+                               }
+
+                       case 'z':
+                               /* Remove breakpoint or watchpoint, Ztype,addr,length.
+                                  Remote protocol says: A remote target shall return an empty string
+                                  for an unrecognized breakpoint or watchpoint packet type. */
+                               {
+                                       char *lenptr;
+                                       char *dataptr;
+                                       int addr = gdb_cris_strtol(&input_buffer[3], &lenptr, 16);
+                                       int len = gdb_cris_strtol(lenptr + 1, &dataptr, 16);
+                                       char type = input_buffer[1];
+
+                                       remove_watchpoint(type, addr, len);
+                                       break;
+                               }
+
+
+			case '?':
+				/* The last signal which caused a stop. ?
+				   Success: SAA, where AA is the signal number.
+				   Failure: void. */
+				output_buffer[0] = 'S';
+				output_buffer[1] = hex_asc_hi(sigval);
+				output_buffer[2] = hex_asc_lo(sigval);
+				output_buffer[3] = 0;
+				break;
+
+			case 'D':
+				/* Detach from host. D
+				   Success: OK, and return to the executing thread.
+				   Failure: will never know */
+				putpacket("OK");
+				return;
+
+			case 'k':
+			case 'r':
+				/* kill request or reset request.
+				   Success: restart of target.
+				   Failure: will never know. */
+				kill_restart();
+				break;
+
+			case 'C':
+			case 'S':
+			case '!':
+			case 'R':
+			case 'd':
+				/* Continue with signal sig. Csig;AA..AA
+				   Step with signal sig. Ssig;AA..AA
+				   Use the extended remote protocol. !
+				   Restart the target system. R0
+				   Toggle debug flag. d
+				   Search backwards. tAA:PP,MM
+				   Not supported: E04 */
+
+				/* FIXME: What's the difference between not supported
+				   and ignored (below)? */
+				gdb_cris_strcpy(output_buffer, error_message[E04]);
+				break;
+
+			default:
+				/* The stub should ignore other request and send an empty
+				   response ($#<checksum>). This way we can extend the protocol and GDB
+				   can tell whether the stub it is talking to uses the old or the new. */
+				output_buffer[0] = 0;
+				break;
+		}
+		putpacket(output_buffer);
+	}
+}
+
+void
+kgdb_init(void)
+{
+	reg_intr_vect_rw_mask intr_mask;
+	reg_ser_rw_intr_mask ser_intr_mask;
+
+	/* Configure the kgdb serial port. */
+#if defined(CONFIG_ETRAX_KGDB_PORT0)
+	/* Note: no shortcut registered (not handled by multiple_interrupt).
+	   See entry.S.  */
+	set_exception_vector(SER0_INTR_VECT, kgdb_handle_exception);
+	/* Enable the ser irq in the global config. */
+	intr_mask = REG_RD(intr_vect, regi_irq, rw_mask);
+	intr_mask.ser0 = 1;
+	REG_WR(intr_vect, regi_irq, rw_mask, intr_mask);
+
+	ser_intr_mask = REG_RD(ser, regi_ser0, rw_intr_mask);
+	ser_intr_mask.dav = regk_ser_yes;
+	REG_WR(ser, regi_ser0, rw_intr_mask, ser_intr_mask);
+#elif defined(CONFIG_ETRAX_KGDB_PORT1)
+	/* Note: no shortcut registered (not handled by multiple_interrupt).
+	   See entry.S.  */
+	set_exception_vector(SER1_INTR_VECT, kgdb_handle_exception);
+	/* Enable the ser irq in the global config. */
+	intr_mask = REG_RD(intr_vect, regi_irq, rw_mask);
+	intr_mask.ser1 = 1;
+	REG_WR(intr_vect, regi_irq, rw_mask, intr_mask);
+
+	ser_intr_mask = REG_RD(ser, regi_ser1, rw_intr_mask);
+	ser_intr_mask.dav = regk_ser_yes;
+	REG_WR(ser, regi_ser1, rw_intr_mask, ser_intr_mask);
+#elif defined(CONFIG_ETRAX_KGDB_PORT2)
+	/* Note: no shortcut registered (not handled by multiple_interrupt).
+	   See entry.S.  */
+	set_exception_vector(SER2_INTR_VECT, kgdb_handle_exception);
+	/* Enable the ser irq in the global config. */
+	intr_mask = REG_RD(intr_vect, regi_irq, rw_mask);
+	intr_mask.ser2 = 1;
+	REG_WR(intr_vect, regi_irq, rw_mask, intr_mask);
+
+	ser_intr_mask = REG_RD(ser, regi_ser2, rw_intr_mask);
+	ser_intr_mask.dav = regk_ser_yes;
+	REG_WR(ser, regi_ser2, rw_intr_mask, ser_intr_mask);
+#elif defined(CONFIG_ETRAX_KGDB_PORT3)
+	/* Note: no shortcut registered (not handled by multiple_interrupt).
+	   See entry.S.  */
+	set_exception_vector(SER3_INTR_VECT, kgdb_handle_exception);
+	/* Enable the ser irq in the global config. */
+	intr_mask = REG_RD(intr_vect, regi_irq, rw_mask);
+	intr_mask.ser3 = 1;
+	REG_WR(intr_vect, regi_irq, rw_mask, intr_mask);
+
+	ser_intr_mask = REG_RD(ser, regi_ser3, rw_intr_mask);
+	ser_intr_mask.dav = regk_ser_yes;
+	REG_WR(ser, regi_ser3, rw_intr_mask, ser_intr_mask);
+#endif
+
+}
+/* Performs a complete re-start from scratch. */
+static void
+kill_restart(void)
+{
+	machine_restart("");
+}
+
+/* Use this static breakpoint in the start-up only. */
+
+void
+breakpoint(void)
+{
+	kgdb_started = 1;
+	dynamic_bp = 0;     /* This is a static, not a dynamic breakpoint. */
+	__asm__ volatile ("break 8"); /* Jump to kgdb_handle_breakpoint. */
+}
+
+/****************************** End of file **********************************/
diff --git a/arch/cris/arch-v32/kernel/kgdb_asm.S b/arch/cris/arch-v32/kernel/kgdb_asm.S
new file mode 100644
index 0000000..f3a4760
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/kgdb_asm.S
@@ -0,0 +1,551 @@
+/*
+ *  Copyright (C) 2004 Axis Communications AB
+ *
+ * Code for handling break 8, hardware breakpoint, single step, and serial
+ * port exceptions for kernel debugging purposes.
+ */
+
+#include <hwregs/intr_vect.h>
+
+	;; Exported functions.
+	.globl kgdb_handle_exception
+
+kgdb_handle_exception:
+
+;; Create a register image of the caller.
+;;
+;; First of all, save the ACR on the stack since we need it for address calculations.
+;; We put it into the register struct later.
+
+  subq     4, $sp
+  move.d   $acr, [$sp]
+
+;; Now we are free to use ACR all we want.
+;; If we were running this handler with interrupts on, we would have to be careful
+;; to save and restore CCS manually, but since we aren't we treat it like every other
+;; register.
+
+  move.d   reg,  $acr
+  move.d   $r0,  [$acr]        ; Save R0 (start of register struct)
+  addq     4,    $acr
+  move.d   $r1,  [$acr]        ; Save R1
+  addq     4,    $acr
+  move.d   $r2,  [$acr]        ; Save R2
+  addq     4,    $acr
+  move.d   $r3,  [$acr]        ; Save R3
+  addq     4,    $acr
+  move.d   $r4,  [$acr]        ; Save R4
+  addq     4,    $acr
+  move.d   $r5,  [$acr]        ; Save R5
+  addq     4,    $acr
+  move.d   $r6,  [$acr]        ; Save R6
+  addq     4,    $acr
+  move.d   $r7,  [$acr]        ; Save R7
+  addq     4,    $acr
+  move.d   $r8,  [$acr]        ; Save R8
+  addq     4,    $acr
+  move.d   $r9,  [$acr]        ; Save R9
+  addq     4,    $acr
+  move.d   $r10, [$acr]        ; Save R10
+  addq     4,    $acr
+  move.d   $r11, [$acr]        ; Save R11
+  addq     4,    $acr
+  move.d   $r12, [$acr]        ; Save R12
+  addq     4,    $acr
+  move.d   $r13, [$acr]        ; Save R13
+  addq     4,    $acr
+  move.d   $sp,  [$acr]        ; Save SP (R14)
+  addq     4,    $acr
+
+  ;; The ACR register is already saved on the stack, so pop it from there.
+  move.d   [$sp],$r0
+  move.d   $r0,  [$acr]
+  addq     4,    $acr
+
+  move     $bz,  [$acr]
+  addq     1,    $acr
+  move     $vr,  [$acr]
+  addq     1,    $acr
+  move     $pid, [$acr]
+  addq     4,    $acr
+  move     $srs, [$acr]
+  addq     1,    $acr
+  move     $wz,  [$acr]
+  addq     2,    $acr
+  move     $exs, [$acr]
+  addq     4,    $acr
+  move     $eda, [$acr]
+  addq     4,    $acr
+  move     $mof, [$acr]
+  addq     4,    $acr
+  move     $dz,  [$acr]
+  addq     4,    $acr
+  move     $ebp, [$acr]
+  addq     4,    $acr
+  move     $erp, [$acr]
+  addq     4,    $acr
+  move     $srp, [$acr]
+  addq     4,    $acr
+  move     $nrp, [$acr]
+  addq     4,    $acr
+  move     $ccs, [$acr]
+  addq     4,    $acr
+  move     $usp, [$acr]
+  addq     4,    $acr
+  move     $spc, [$acr]
+  addq     4,     $acr
+
+;; Skip the pseudo-PC.
+  addq     4,     $acr
+
+;; Save the support registers in bank 0 - 3.
+  clear.d $r1 ; Bank counter
+  move.d  sreg, $acr
+
+;; Bank 0
+  move    $r1,  $srs
+  nop
+  nop
+  nop
+  move    $s0,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s1,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s2,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s3,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s4,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s5,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s6,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s7,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s8,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s9,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s10,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s11,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s12,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+
+  ;; Nothing in S13 - S15, bank 0
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+
+;; Bank 1 and bank 2 have the same layout, hence the loop.
+  addq    1, $r1
+1:
+  move    $r1,  $srs
+  nop
+  nop
+  nop
+  move    $s0,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s1,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s2,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s3,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s4,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s5,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s6,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+
+  ;; Nothing in S7 - S15, bank 1 and 2
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+  clear.d [$acr]
+  addq    4,     $acr
+
+  addq 1, $r1
+  cmpq 3, $r1
+  bne 1b
+  nop
+
+;; Bank 3
+  move    $r1,  $srs
+  nop
+  nop
+  nop
+  move    $s0,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s1,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s2,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s3,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s4,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s5,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s6,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s7,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s8,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s9,   $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s10,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s11,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s12,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s13,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+  move    $s14,  $r0
+  move.d  $r0,   [$acr]
+  addq    4,     $acr
+;; Nothing in S15, bank 3
+  clear.d [$acr]
+  addq    4,     $acr
+
+;; Check what got us here: get IDX field of EXS.
+  move $exs,    $r10
+  and.d 0xff00, $r10
+  lsrq 8,       $r10
+#if defined(CONFIG_ETRAX_KGDB_PORT0)
+  cmp.d SER0_INTR_VECT,   $r10 ; IRQ for serial port 0
+  beq sigint
+  nop
+#elif defined(CONFIG_ETRAX_KGDB_PORT1)
+  cmp.d SER1_INTR_VECT,   $r10 ; IRQ for serial port 1
+  beq sigint
+  nop
+#elif defined(CONFIG_ETRAX_KGDB_PORT2)
+  cmp.d SER2_INTR_VECT,   $r10 ; IRQ for serial port 2
+  beq sigint
+  nop
+#elif defined(CONFIG_ETRAX_KGDB_PORT3)
+  cmp.d SER3_INTR_VECT,   $r10 ; IRQ for serial port 3
+  beq sigint
+  nop
+#endif
+;; Multiple interrupt must be due to serial break.
+  cmp.d 0x30,   $r10 ; Multiple interrupt
+  beq sigint
+  nop
+;; Neither of those? Then it's a sigtrap.
+  ba handle_comm
+  moveq 5, $r10      ; Set SIGTRAP (delay slot)
+
+sigint:
+  ;; Serial interrupt; get character
+  jsr getDebugChar
+  nop                ; Delay slot
+  cmp.b 3, $r10      ; \003 (Ctrl-C)?
+  bne return         ; No, get out of here
+  nop
+  moveq 2, $r10      ; Set SIGINT
+
+;;
+;; Handle the communication
+;;
+handle_comm:
+  move.d   internal_stack+1020, $sp ; Use the internal stack which grows upwards
+  jsr      handle_exception         ; Interactive routine
+  nop
+
+;;
+;; Return to the caller
+;;
+return:
+
+;; First of all, write the support registers.
+  clear.d $r1 ; Bank counter
+  move.d  sreg, $acr
+
+;; Bank 0
+  move    $r1,  $srs
+  nop
+  nop
+  nop
+  move.d  [$acr], $r0
+  move    $r0,    $s0
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s1
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s2
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s3
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s4
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s5
+  addq    4,      $acr
+
+;; Nothing in S6 - S7, bank 0.
+  addq    4,      $acr
+  addq    4,      $acr
+
+  move.d  [$acr], $r0
+  move    $r0,    $s8
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s9
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s10
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s11
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s12
+  addq    4,      $acr
+
+;; Nothing in S13 - S15, bank 0
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+
+;; Bank 1 and bank 2 have the same layout, hence the loop.
+  addq    1, $r1
+2:
+  move    $r1,  $srs
+  nop
+  nop
+  nop
+  move.d  [$acr], $r0
+  move    $r0,    $s0
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s1
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s2
+  addq    4,      $acr
+
+;; S3 (MM_CAUSE) is read-only.
+  addq    4,      $acr
+
+  move.d  [$acr], $r0
+  move    $r0,    $s4
+  addq    4,      $acr
+
+;; FIXME: Actually write S5/S6? (Affects MM_CAUSE.)
+  addq    4,      $acr
+  addq    4,      $acr
+
+;; Nothing in S7 - S15, bank 1 and 2
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+  addq    4,      $acr
+
+  addq 1, $r1
+  cmpq 3, $r1
+  bne 2b
+  nop
+
+;; Bank 3
+  move    $r1,  $srs
+  nop
+  nop
+  nop
+  move.d  [$acr], $r0
+  move    $r0,    $s0
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s1
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s2
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s3
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s4
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s5
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s6
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s7
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s8
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s9
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s10
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s11
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s12
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s13
+  addq    4,      $acr
+  move.d  [$acr], $r0
+  move    $r0,    $s14
+  addq    4,      $acr
+
+;; Nothing in S15, bank 3
+  addq    4,      $acr
+
+;; Now, move on to the regular register restoration process.
+
+   move.d  reg,    $acr   ; Reset ACR to point at the beginning of the register image
+   move.d  [$acr], $r0    ; Restore R0
+   addq    4,      $acr
+   move.d  [$acr], $r1    ; Restore R1
+   addq    4,      $acr
+   move.d  [$acr], $r2    ; Restore R2
+   addq    4,      $acr
+   move.d  [$acr], $r3    ; Restore R3
+   addq    4,      $acr
+   move.d  [$acr], $r4    ; Restore R4
+   addq    4,      $acr
+   move.d  [$acr], $r5    ; Restore R5
+   addq    4,      $acr
+   move.d  [$acr], $r6    ; Restore R6
+   addq    4,      $acr
+   move.d  [$acr], $r7    ; Restore R7
+   addq    4,      $acr
+   move.d  [$acr], $r8    ; Restore R8
+   addq    4,      $acr
+   move.d  [$acr], $r9    ; Restore R9
+   addq    4,      $acr
+   move.d  [$acr], $r10   ; Restore R10
+   addq    4,      $acr
+   move.d  [$acr], $r11   ; Restore R11
+   addq    4,      $acr
+   move.d  [$acr], $r12   ; Restore R12
+   addq    4,      $acr
+   move.d  [$acr], $r13   ; Restore R13
+
+;;
+;; We restore all registers, even though some of them probably haven't changed.
+;;
+
+   addq    4,      $acr
+   move.d  [$acr], $sp    ; Restore SP (R14)
+
+   ;; ACR cannot be restored just yet.
+   addq    8,      $acr
+
+   ;; Skip BZ, VR.
+   addq    2,      $acr
+
+   move    [$acr], $pid   ; Restore PID
+   addq    4,      $acr
+   move    [$acr], $srs   ; Restore SRS
+   nop
+   nop
+   nop
+   addq    1,      $acr
+
+   ;; Skip WZ.
+   addq    2,      $acr
+
+   move    [$acr], $exs    ; Restore EXS.
+   addq    4,      $acr
+   move    [$acr], $eda    ; Restore EDA.
+   addq    4,      $acr
+   move    [$acr], $mof    ; Restore MOF.
+
+   ;; Skip DZ.
+   addq    8,      $acr
+
+   move    [$acr], $ebp    ; Restore EBP.
+   addq    4,      $acr
+   move    [$acr], $erp    ; Restore ERP.
+   addq    4,      $acr
+   move    [$acr], $srp    ; Restore SRP.
+   addq    4,      $acr
+   move    [$acr], $nrp    ; Restore NRP.
+   addq    4,      $acr
+   move    [$acr], $ccs    ; Restore CCS like an ordinary register.
+   addq    4,      $acr
+   move    [$acr], $usp    ; Restore USP
+   addq    4,      $acr
+   move    [$acr], $spc    ; Restore SPC
+                           ; No restoration of pseudo-PC of course.
+
+   move.d  reg,    $acr    ; Reset ACR to point at the beginning of the register image
+   add.d   15*4,   $acr
+   move.d  [$acr], $acr    ; Finally, restore ACR.
+   rete                    ; Same as jump ERP
+   rfe                     ; Shifts CCS
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
new file mode 100644
index 0000000..c7ce784
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -0,0 +1,184 @@
+/*
+ *  Copyright (C) 2000-2003  Axis Communications AB
+ *
+ *  Authors:   Bjorn Wesen (bjornw@axis.com)
+ *             Mikael Starvik (starvik@axis.com)
+ *             Tobias Anderberg (tobiasa@axis.com), CRISv32 port.
+ *
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/timer_defs.h>
+#include <hwregs/intr_vect_defs.h>
+#include <linux/ptrace.h>
+
+extern void stop_watchdog(void);
+
+/* We use this if we don't have any better idle routine. */
+void default_idle(void)
+{
+	local_irq_enable();
+	/* Halt until exception. */
+	__asm__ volatile("halt");
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+
+extern void deconfigure_bp(long pid);
+void exit_thread(void)
+{
+	deconfigure_bp(current->pid);
+}
+
+/*
+ * If the watchdog is enabled, disable interrupts and enter an infinite loop.
+ * The watchdog will reset the CPU after 0.1s. If the watchdog isn't enabled
+ * then enable it and wait.
+ */
+extern void arch_enable_nmi(void);
+
+void
+hard_reset_now(void)
+{
+	/*
+	 * Don't declare this variable elsewhere.  We don't want any other
+	 * code to know about it than the watchdog handler in entry.S and
+	 * this code, implementing hard reset through the watchdog.
+	 */
+#if defined(CONFIG_ETRAX_WATCHDOG)
+	extern int cause_of_death;
+#endif
+
+	printk("*** HARD RESET ***\n");
+	local_irq_disable();
+
+#if defined(CONFIG_ETRAX_WATCHDOG)
+	cause_of_death = 0xbedead;
+#else
+{
+	reg_timer_rw_wd_ctrl wd_ctrl = {0};
+
+	stop_watchdog();
+
+	wd_ctrl.key = 16;	/* Arbitrary key. */
+	wd_ctrl.cnt = 1;	/* Minimum time. */
+	wd_ctrl.cmd = regk_timer_start;
+
+        arch_enable_nmi();
+	REG_WR(timer, regi_timer0, rw_wd_ctrl, wd_ctrl);
+}
+#endif
+
+	while (1)
+		; /* Wait for reset. */
+}
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+unsigned long thread_saved_pc(struct task_struct *t)
+{
+	return task_pt_regs(t)->erp;
+}
+
+/*
+ * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
+ * It will be unnested during _resume and _ret_from_sys_call when the new thread
+ * is scheduled.
+ *
+ * Also setup the thread switching structure which is used to keep
+ * thread-specific data during _resumes.
+ */
+
+extern asmlinkage void ret_from_fork(void);
+extern asmlinkage void ret_from_kernel_thread(void);
+
+int
+copy_thread(unsigned long clone_flags, unsigned long usp,
+	unsigned long arg, struct task_struct *p)
+{
+	struct pt_regs *childregs = task_pt_regs(p);
+	struct switch_stack *swstack = ((struct switch_stack *) childregs) - 1;
+
+	/*
+	 * Put the pt_regs structure at the end of the new kernel stack page and
+	 * fix it up. Note: the task_struct doubles as the kernel stack for the
+	 * task.
+	 */
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(swstack, 0,
+			sizeof(struct switch_stack) + sizeof(struct pt_regs));
+		swstack->r1 = usp;
+		swstack->r2 = arg;
+		childregs->ccs = 1 << (I_CCS_BITNR + CCS_SHIFT);
+		swstack->return_ip = (unsigned long) ret_from_kernel_thread;
+		p->thread.ksp = (unsigned long) swstack;
+		p->thread.usp = 0;
+		return 0;
+	}
+	*childregs = *current_pt_regs();	/* Struct copy of pt_regs. */
+        childregs->r10 = 0;	/* Child returns 0 after a fork/clone. */
+
+	/* Set a new TLS ?
+	 * The TLS is in $mof because it is the 5th argument to sys_clone.
+	 */
+	if (p->mm && (clone_flags & CLONE_SETTLS)) {
+		task_thread_info(p)->tls = childregs->mof;
+	}
+
+	/* Put the switch stack right below the pt_regs. */
+
+	/* Parameter to ret_from_sys_call. 0 is don't restart the syscall. */
+	swstack->r9 = 0;
+
+	/*
+	 * We want to return into ret_from_sys_call after the _resume.
+	 * ret_from_fork will call ret_from_sys_call.
+	 */
+	swstack->return_ip = (unsigned long) ret_from_fork;
+
+	/* Fix the user-mode and kernel-mode stackpointer. */
+	p->thread.usp = usp ?: rdusp();
+	p->thread.ksp = (unsigned long) swstack;
+
+	return 0;
+}
+
+unsigned long
+get_wchan(struct task_struct *p)
+{
+	/* TODO */
+	return 0;
+}
+#undef last_sched
+#undef first_sched
+
+void show_regs(struct pt_regs * regs)
+{
+	unsigned long usp = rdusp();
+
+	show_regs_print_info(KERN_DEFAULT);
+
+        printk("ERP: %08lx SRP: %08lx  CCS: %08lx USP: %08lx MOF: %08lx\n",
+		regs->erp, regs->srp, regs->ccs, usp, regs->mof);
+
+	printk(" r0: %08lx  r1: %08lx   r2: %08lx  r3: %08lx\n",
+		regs->r0, regs->r1, regs->r2, regs->r3);
+
+	printk(" r4: %08lx  r5: %08lx   r6: %08lx  r7: %08lx\n",
+		regs->r4, regs->r5, regs->r6, regs->r7);
+
+	printk(" r8: %08lx  r9: %08lx  r10: %08lx r11: %08lx\n",
+		regs->r8, regs->r9, regs->r10, regs->r11);
+
+	printk("r12: %08lx r13: %08lx oR10: %08lx\n",
+		regs->r12, regs->r13, regs->orig_r10);
+}
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
new file mode 100644
index 0000000..f085229
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2000-2007, Axis Communications AB.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/signal.h>
+#include <linux/security.h>
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <arch/hwregs/supp_reg.h>
+
+/*
+ * Determines which bits in CCS the user has access to.
+ * 1 = access, 0 = no access.
+ */
+#define CCS_MASK 0x00087c00     /* SXNZVC */
+
+#define SBIT_USER (1 << (S_CCS_BITNR + CCS_SHIFT))
+
+static int put_debugreg(long pid, unsigned int regno, long data);
+static long get_debugreg(long pid, unsigned int regno);
+static unsigned long get_pseudo_pc(struct task_struct *child);
+void deconfigure_bp(long pid);
+
+extern unsigned long cris_signal_return_page;
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+long get_reg(struct task_struct *task, unsigned int regno)
+{
+	/* USP is a special case, it's not in the pt_regs struct but
+	 * in the tasks thread struct
+	 */
+	unsigned long ret;
+
+	if (regno <= PT_EDA)
+		ret = ((unsigned long *)task_pt_regs(task))[regno];
+	else if (regno == PT_USP)
+		ret = task->thread.usp;
+	else if (regno == PT_PPC)
+		ret = get_pseudo_pc(task);
+	else if (regno <= PT_MAX)
+		ret = get_debugreg(task->pid, regno);
+	else
+		ret = 0;
+
+	return ret;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int put_reg(struct task_struct *task, unsigned int regno, unsigned long data)
+{
+	if (regno <= PT_EDA)
+		((unsigned long *)task_pt_regs(task))[regno] = data;
+	else if (regno == PT_USP)
+		task->thread.usp = data;
+	else if (regno == PT_PPC) {
+		/* Write pseudo-PC to ERP only if changed. */
+		if (data != get_pseudo_pc(task))
+			task_pt_regs(task)->erp = data;
+	} else if (regno <= PT_MAX)
+		return put_debugreg(task->pid, regno, data);
+	else
+		return -1;
+	return 0;
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	unsigned long tmp;
+
+	/*
+	 * Set up SPC if not set already (in which case we have no other
+	 * choice but to trust it).
+	 */
+	if (!get_reg(child, PT_SPC)) {
+		/* In case we're stopped in a delay slot. */
+		tmp = get_reg(child, PT_ERP) & ~1;
+		put_reg(child, PT_SPC, tmp);
+	}
+	tmp = get_reg(child, PT_CCS) | SBIT_USER;
+	put_reg(child, PT_CCS, tmp);
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	put_reg(child, PT_SPC, 0);
+
+	if (!get_debugreg(child->pid, PT_BP_CTRL)) {
+		unsigned long tmp;
+		/* If no h/w bp configured, disable S bit. */
+		tmp = get_reg(child, PT_CCS) & ~SBIT_USER;
+		put_reg(child, PT_CCS, tmp);
+	}
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching.
+ *
+ * Make sure the single step bit is not set.
+ */
+void
+ptrace_disable(struct task_struct *child)
+{
+	/* Deconfigure SPC and S-bit. */
+	user_disable_single_step(child);
+	put_reg(child, PT_SPC, 0);
+
+	/* Deconfigure any watchpoints associated with the child. */
+	deconfigure_bp(child->pid);
+}
+
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	int ret;
+	unsigned int regno = addr >> 2;
+	unsigned long __user *datap = (unsigned long __user *)data;
+
+	switch (request) {
+		/* Read word at location address. */
+		case PTRACE_PEEKTEXT:
+		case PTRACE_PEEKDATA: {
+			unsigned long tmp;
+			int copied;
+
+			ret = -EIO;
+
+			/* The signal trampoline page is outside the normal user-addressable
+			 * space but still accessible. This is hack to make it possible to
+			 * access the signal handler code in GDB.
+			 */
+			if ((addr & PAGE_MASK) == cris_signal_return_page) {
+				/* The trampoline page is globally mapped, no page table to traverse.*/
+				tmp = *(unsigned long*)addr;
+			} else {
+				copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+
+				if (copied != sizeof(tmp))
+					break;
+			}
+
+			ret = put_user(tmp,datap);
+			break;
+		}
+
+		/* Read the word at location address in the USER area. */
+		case PTRACE_PEEKUSR: {
+			unsigned long tmp;
+
+			ret = -EIO;
+			if ((addr & 3) || regno > PT_MAX)
+				break;
+
+			tmp = get_reg(child, regno);
+			ret = put_user(tmp, datap);
+			break;
+		}
+
+		/* Write the word at location address. */
+		case PTRACE_POKETEXT:
+		case PTRACE_POKEDATA:
+			ret = generic_ptrace_pokedata(child, addr, data);
+			break;
+
+		/* Write the word at location address in the USER area. */
+		case PTRACE_POKEUSR:
+			ret = -EIO;
+			if ((addr & 3) || regno > PT_MAX)
+				break;
+
+			if (regno == PT_CCS) {
+				/* don't allow the tracing process to change stuff like
+				 * interrupt enable, kernel/user bit, dma enables etc.
+				 */
+				data &= CCS_MASK;
+				data |= get_reg(child, PT_CCS) & ~CCS_MASK;
+			}
+			if (put_reg(child, regno, data))
+				break;
+			ret = 0;
+			break;
+
+		/* Get all GP registers from the child. */
+		case PTRACE_GETREGS: {
+			int i;
+			unsigned long tmp;
+
+			for (i = 0; i <= PT_MAX; i++) {
+				tmp = get_reg(child, i);
+
+				if (put_user(tmp, datap)) {
+					ret = -EFAULT;
+					goto out_tsk;
+				}
+
+				datap++;
+			}
+
+			ret = 0;
+			break;
+		}
+
+		/* Set all GP registers in the child. */
+		case PTRACE_SETREGS: {
+			int i;
+			unsigned long tmp;
+
+			for (i = 0; i <= PT_MAX; i++) {
+				if (get_user(tmp, datap)) {
+					ret = -EFAULT;
+					goto out_tsk;
+				}
+
+				if (i == PT_CCS) {
+					tmp &= CCS_MASK;
+					tmp |= get_reg(child, PT_CCS) & ~CCS_MASK;
+				}
+
+				put_reg(child, i, tmp);
+				datap++;
+			}
+
+			ret = 0;
+			break;
+		}
+
+		default:
+			ret = ptrace_request(child, request, addr, data);
+			break;
+	}
+
+out_tsk:
+	return ret;
+}
+
+void do_syscall_trace(void)
+{
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	/* the 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				 ? 0x80 : 0));
+
+	/*
+	 * This isn't the same as continuing with a signal, but it will do for
+	 * normal use.
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+/* Returns the size of an instruction that has a delay slot. */
+
+static int insn_size(struct task_struct *child, unsigned long pc)
+{
+  unsigned long opcode;
+  int copied;
+  int opsize = 0;
+
+  /* Read the opcode at pc (do what PTRACE_PEEKTEXT would do). */
+  copied = access_process_vm(child, pc, &opcode, sizeof(opcode), 0);
+  if (copied != sizeof(opcode))
+    return 0;
+
+  switch ((opcode & 0x0f00) >> 8) {
+  case 0x0:
+  case 0x9:
+  case 0xb:
+	  opsize = 2;
+	  break;
+  case 0xe:
+  case 0xf:
+	  opsize = 6;
+	  break;
+  case 0xd:
+	  /* Could be 4 or 6; check more bits. */
+	  if ((opcode & 0xff) == 0xff)
+		  opsize = 4;
+	  else
+		  opsize = 6;
+	  break;
+  default:
+	  panic("ERROR: Couldn't find size of opcode 0x%lx at 0x%lx\n",
+		opcode, pc);
+  }
+
+  return opsize;
+}
+
+static unsigned long get_pseudo_pc(struct task_struct *child)
+{
+	/* Default value for PC is ERP. */
+	unsigned long pc = get_reg(child, PT_ERP);
+
+	if (pc & 0x1) {
+		unsigned long spc = get_reg(child, PT_SPC);
+		/* Delay slot bit set. Report as stopped on proper
+		   instruction. */
+		if (spc) {
+			/* Rely on SPC if set. FIXME: We might want to check
+			   that EXS indicates we stopped due to a single-step
+			   exception. */
+			pc = spc;
+		} else {
+			/* Calculate the PC from the size of the instruction
+			   that the delay slot we're in belongs to. */
+			pc += insn_size(child, pc & ~1) - 1;
+		}
+	}
+	return pc;
+}
+
+static long bp_owner = 0;
+
+/* Reachable from exit_thread in signal.c, so not static. */
+void deconfigure_bp(long pid)
+{
+	int bp;
+
+	/* Only deconfigure if the pid is the owner. */
+	if (bp_owner != pid)
+		return;
+
+	for (bp = 0; bp < 6; bp++) {
+		unsigned long tmp;
+		/* Deconfigure start and end address (also gets rid of ownership). */
+		put_debugreg(pid, PT_BP + 3 + (bp * 2), 0);
+		put_debugreg(pid, PT_BP + 4 + (bp * 2), 0);
+
+		/* Deconfigure relevant bits in control register. */
+		tmp = get_debugreg(pid, PT_BP_CTRL) & ~(3 << (2 + (bp * 4)));
+		put_debugreg(pid, PT_BP_CTRL, tmp);
+	}
+	/* No owner now. */
+	bp_owner = 0;
+}
+
+static int put_debugreg(long pid, unsigned int regno, long data)
+{
+	int ret = 0;
+	register int old_srs;
+
+#ifdef CONFIG_ETRAX_KGDB
+	/* Ignore write, but pretend it was ok if value is 0
+	   (we don't want POKEUSR/SETREGS failing unnessecarily). */
+	return (data == 0) ? ret : -1;
+#endif
+
+	/* Simple owner management. */
+	if (!bp_owner)
+		bp_owner = pid;
+	else if (bp_owner != pid) {
+		/* Ignore write, but pretend it was ok if value is 0
+		   (we don't want POKEUSR/SETREGS failing unnessecarily). */
+		return (data == 0) ? ret : -1;
+	}
+
+	/* Remember old SRS. */
+	SPEC_REG_RD(SPEC_REG_SRS, old_srs);
+	/* Switch to BP bank. */
+	SUPP_BANK_SEL(BANK_BP);
+
+	switch (regno - PT_BP) {
+	case 0:
+		SUPP_REG_WR(0, data); break;
+	case 1:
+	case 2:
+		if (data)
+			ret = -1;
+		break;
+	case 3:
+		SUPP_REG_WR(3, data); break;
+	case 4:
+		SUPP_REG_WR(4, data); break;
+	case 5:
+		SUPP_REG_WR(5, data); break;
+	case 6:
+		SUPP_REG_WR(6, data); break;
+	case 7:
+		SUPP_REG_WR(7, data); break;
+	case 8:
+		SUPP_REG_WR(8, data); break;
+	case 9:
+		SUPP_REG_WR(9, data); break;
+	case 10:
+		SUPP_REG_WR(10, data); break;
+	case 11:
+		SUPP_REG_WR(11, data); break;
+	case 12:
+		SUPP_REG_WR(12, data); break;
+	case 13:
+		SUPP_REG_WR(13, data); break;
+	case 14:
+		SUPP_REG_WR(14, data); break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	/* Restore SRS. */
+	SPEC_REG_WR(SPEC_REG_SRS, old_srs);
+	/* Just for show. */
+	NOP();
+	NOP();
+	NOP();
+
+	return ret;
+}
+
+static long get_debugreg(long pid, unsigned int regno)
+{
+	register int old_srs;
+	register long data;
+
+	if (pid != bp_owner) {
+		return 0;
+	}
+
+	/* Remember old SRS. */
+	SPEC_REG_RD(SPEC_REG_SRS, old_srs);
+	/* Switch to BP bank. */
+	SUPP_BANK_SEL(BANK_BP);
+
+	switch (regno - PT_BP) {
+	case 0:
+		SUPP_REG_RD(0, data); break;
+	case 1:
+	case 2:
+		/* error return value? */
+		data = 0;
+		break;
+	case 3:
+		SUPP_REG_RD(3, data); break;
+	case 4:
+		SUPP_REG_RD(4, data); break;
+	case 5:
+		SUPP_REG_RD(5, data); break;
+	case 6:
+		SUPP_REG_RD(6, data); break;
+	case 7:
+		SUPP_REG_RD(7, data); break;
+	case 8:
+		SUPP_REG_RD(8, data); break;
+	case 9:
+		SUPP_REG_RD(9, data); break;
+	case 10:
+		SUPP_REG_RD(10, data); break;
+	case 11:
+		SUPP_REG_RD(11, data); break;
+	case 12:
+		SUPP_REG_RD(12, data); break;
+	case 13:
+		SUPP_REG_RD(13, data); break;
+	case 14:
+		SUPP_REG_RD(14, data); break;
+	default:
+		/* error return value? */
+		data = 0;
+	}
+
+	/* Restore SRS. */
+	SPEC_REG_WR(SPEC_REG_SRS, old_srs);
+	/* Just for show. */
+	NOP();
+	NOP();
+	NOP();
+
+	return data;
+}
diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c
new file mode 100644
index 0000000..fe50287
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/setup.c
@@ -0,0 +1,162 @@
+/*
+ * Display CPU info in /proc/cpuinfo.
+ *
+ * Copyright (C) 2003, Axis Communications AB.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+#include <linux/param.h>
+
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+
+#ifdef CONFIG_PROC_FS
+
+#define HAS_FPU         0x0001
+#define HAS_MMU         0x0002
+#define HAS_ETHERNET100 0x0004
+#define HAS_TOKENRING   0x0008
+#define HAS_SCSI        0x0010
+#define HAS_ATA         0x0020
+#define HAS_USB         0x0040
+#define HAS_IRQ_BUG     0x0080
+#define HAS_MMU_BUG     0x0100
+
+struct cpu_info {
+	char *cpu_model;
+	unsigned short rev;
+	unsigned short cache_size;
+	unsigned short flags;
+};
+
+/* Some of these model are here for historical reasons only. */
+static struct cpu_info cpinfo[] = {
+	{"ETRAX 1", 0, 0, 0},
+	{"ETRAX 2", 1, 0, 0},
+	{"ETRAX 3", 2, 0, 0},
+	{"ETRAX 4", 3, 0, 0},
+	{"Simulator", 7, 8, HAS_ETHERNET100 | HAS_SCSI | HAS_ATA},
+	{"ETRAX 100", 8, 8, HAS_ETHERNET100 | HAS_SCSI | HAS_ATA | HAS_IRQ_BUG},
+	{"ETRAX 100", 9, 8, HAS_ETHERNET100 | HAS_SCSI | HAS_ATA},
+
+	{"ETRAX 100LX", 10, 8, HAS_ETHERNET100 | HAS_SCSI | HAS_ATA | HAS_USB
+			     | HAS_MMU | HAS_MMU_BUG},
+
+	{"ETRAX 100LX v2", 11, 8, HAS_ETHERNET100 | HAS_SCSI | HAS_ATA | HAS_USB
+			        | HAS_MMU},
+#ifdef CONFIG_ETRAXFS
+	{"ETRAX FS", 32, 32, HAS_ETHERNET100 | HAS_ATA | HAS_MMU},
+#else
+	{"ARTPEC-3", 32, 32, HAS_ETHERNET100 | HAS_MMU},
+#endif
+	{"Unknown", 0, 0, 0}
+};
+
+int show_cpuinfo(struct seq_file *m, void *v)
+{
+	int i;
+	int cpu = (int)v - 1;
+	unsigned long revision;
+	struct cpu_info *info;
+
+	info = &cpinfo[ARRAY_SIZE(cpinfo) - 1];
+
+	revision = rdvr();
+
+	for (i = 0; i < ARRAY_SIZE(cpinfo); i++) {
+		if (cpinfo[i].rev == revision) {
+			info = &cpinfo[i];
+			break;
+		}
+	}
+
+	seq_printf(m,
+		   "processor\t: %d\n"
+		   "cpu\t\t: CRIS\n"
+		   "cpu revision\t: %lu\n"
+		   "cpu model\t: %s\n"
+		   "cache size\t: %d KB\n"
+		   "fpu\t\t: %s\n"
+		   "mmu\t\t: %s\n"
+		   "mmu DMA bug\t: %s\n"
+		   "ethernet\t: %s Mbps\n"
+		   "token ring\t: %s\n"
+		   "scsi\t\t: %s\n"
+		   "ata\t\t: %s\n"
+		   "usb\t\t: %s\n"
+		   "bogomips\t: %lu.%02lu\n\n",
+
+		   cpu,
+		   revision,
+		   info->cpu_model,
+		   info->cache_size,
+		   info->flags & HAS_FPU ? "yes" : "no",
+		   info->flags & HAS_MMU ? "yes" : "no",
+		   info->flags & HAS_MMU_BUG ? "yes" : "no",
+		   info->flags & HAS_ETHERNET100 ? "10/100" : "10",
+		   info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
+		   info->flags & HAS_SCSI ? "yes" : "no",
+		   info->flags & HAS_ATA ? "yes" : "no",
+		   info->flags & HAS_USB ? "yes" : "no",
+		   (loops_per_jiffy * HZ + 500) / 500000,
+		   ((loops_per_jiffy * HZ + 500) / 5000) % 100);
+
+	return 0;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+void show_etrax_copyright(void)
+{
+#ifdef CONFIG_ETRAXFS
+	printk(KERN_INFO "Linux/CRISv32 port on ETRAX FS "
+		"(C) 2003, 2004 Axis Communications AB\n");
+#else
+	printk(KERN_INFO "Linux/CRISv32 port on ARTPEC-3 "
+		"(C) 2003-2009 Axis Communications AB\n");
+#endif
+}
+
+static struct i2c_board_info __initdata i2c_info[] = {
+	{I2C_BOARD_INFO("camblock", 0x43)},
+	{I2C_BOARD_INFO("tmp100", 0x48)},
+	{I2C_BOARD_INFO("tmp100", 0x4A)},
+	{I2C_BOARD_INFO("tmp100", 0x4C)},
+	{I2C_BOARD_INFO("tmp100", 0x4D)},
+	{I2C_BOARD_INFO("tmp100", 0x4E)},
+#ifdef CONFIG_RTC_DRV_PCF8563
+	{I2C_BOARD_INFO("pcf8563", 0x51)},
+#endif
+	{I2C_BOARD_INFO("pca9536", 0x41)},
+	{I2C_BOARD_INFO("fnp300", 0x40)},
+	{I2C_BOARD_INFO("fnp300", 0x42)},
+	{I2C_BOARD_INFO("adc101", 0x54)},
+};
+
+static struct i2c_board_info __initdata i2c_info2[] = {
+	{I2C_BOARD_INFO("camblock", 0x43)},
+	{I2C_BOARD_INFO("tmp100", 0x48)},
+	{I2C_BOARD_INFO("tmp100", 0x4A)},
+	{I2C_BOARD_INFO("tmp100", 0x4C)},
+	{I2C_BOARD_INFO("tmp100", 0x4D)},
+	{I2C_BOARD_INFO("tmp100", 0x4E)},
+	{I2C_BOARD_INFO("pca9536", 0x41)},
+	{I2C_BOARD_INFO("fnp300", 0x40)},
+	{I2C_BOARD_INFO("fnp300", 0x42)},
+	{I2C_BOARD_INFO("adc101", 0x54)},
+};
+
+static struct i2c_board_info __initdata i2c_info3[] = {
+	{I2C_BOARD_INFO("adc101", 0x54)},
+};
+
+static int __init etrax_init(void)
+{
+	i2c_register_board_info(0, i2c_info, ARRAY_SIZE(i2c_info));
+	i2c_register_board_info(1, i2c_info2, ARRAY_SIZE(i2c_info2));
+	i2c_register_board_info(2, i2c_info3, ARRAY_SIZE(i2c_info3));
+	return 0;
+}
+arch_initcall(etrax_init);
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
new file mode 100644
index 0000000..150d1d7
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -0,0 +1,539 @@
+/*
+ * Copyright (C) 2003, Axis Communications AB.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/syscalls.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <arch/hwregs/cpu_vect.h>
+
+extern unsigned long cris_signal_return_page;
+
+/*
+ * A syscall in CRIS is really a "break 13" instruction, which is 2
+ * bytes. The registers is manipulated so upon return the instruction
+ * will be executed again.
+ *
+ * This relies on that PC points to the instruction after the break call.
+ */
+#define RESTART_CRIS_SYS(regs) regs->r10 = regs->orig_r10; regs->erp -= 2;
+
+/* Signal frames. */
+struct signal_frame {
+	struct sigcontext sc;
+	unsigned long extramask[_NSIG_WORDS - 1];
+	unsigned char retcode[8];	/* Trampoline code. */
+};
+
+struct rt_signal_frame {
+	struct siginfo *pinfo;
+	void *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned char retcode[8];	/* Trampoline code. */
+};
+
+void do_signal(int restart, struct pt_regs *regs);
+void keep_debug_flags(unsigned long oldccs, unsigned long oldspc,
+		      struct pt_regs *regs);
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+	unsigned int err = 0;
+	unsigned long old_usp;
+
+        /* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/*
+	 * Restore the registers from &sc->regs. sc is already checked
+	 * for VERIFY_READ since the signal_frame was previously
+	 * checked in sys_sigreturn().
+	 */
+	if (__copy_from_user(regs, sc, sizeof(struct pt_regs)))
+		goto badframe;
+
+	/* Make that the user-mode flag is set. */
+	regs->ccs |= (1 << (U_CCS_BITNR + CCS_SHIFT));
+
+	/* Don't perform syscall restarting */
+	regs->exs = -1;
+
+	/* Restore the old USP. */
+	err |= __get_user(old_usp, &sc->usp);
+	wrusp(old_usp);
+
+	return err;
+
+badframe:
+	return 1;
+}
+
+asmlinkage int sys_sigreturn(void)
+{
+	struct pt_regs *regs = current_pt_regs();
+	sigset_t set;
+	struct signal_frame __user *frame;
+	unsigned long oldspc = regs->spc;
+	unsigned long oldccs = regs->ccs;
+
+	frame = (struct signal_frame *) rdusp();
+
+	/*
+	 * Since the signal is stacked on a dword boundary, the frame
+	 * should be dword aligned here as well. It it's not, then the
+	 * user is trying some funny business.
+	 */
+	if (((long)frame) & 3)
+		goto badframe;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__get_user(set.sig[0], &frame->sc.oldmask) ||
+	    (_NSIG_WORDS > 1 && __copy_from_user(&set.sig[1],
+						 frame->extramask,
+						 sizeof(frame->extramask))))
+		goto badframe;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(regs, &frame->sc))
+		goto badframe;
+
+	keep_debug_flags(oldccs, oldspc, regs);
+
+	return regs->r10;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+asmlinkage int sys_rt_sigreturn(void)
+{
+	struct pt_regs *regs = current_pt_regs();
+	sigset_t set;
+	struct rt_signal_frame __user *frame;
+	unsigned long oldspc = regs->spc;
+	unsigned long oldccs = regs->ccs;
+
+	frame = (struct rt_signal_frame *) rdusp();
+
+	/*
+	 * Since the signal is stacked on a dword boundary, the frame
+	 * should be dword aligned here as well. It it's not, then the
+	 * user is trying some funny business.
+	 */
+	if (((long)frame) & 3)
+		goto badframe;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+
+	keep_debug_flags(oldccs, oldspc, regs);
+
+	return regs->r10;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/* Setup a signal frame. */
+static int
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+		 unsigned long mask)
+{
+	int err;
+	unsigned long usp;
+
+	err = 0;
+	usp = rdusp();
+
+	/*
+	 * Copy the registers. They are located first in sc, so it's
+	 * possible to use sc directly.
+	 */
+	err |= __copy_to_user(sc, regs, sizeof(struct pt_regs));
+
+	err |= __put_user(mask, &sc->oldmask);
+	err |= __put_user(usp, &sc->usp);
+
+	return err;
+}
+
+/* Figure out where to put the new signal frame - usually on the stack. */
+static inline void __user *
+get_sigframe(struct ksignal *ksig, size_t frame_size)
+{
+	unsigned long sp = sigsp(rdusp(), ksig);
+
+	/* Make sure the frame is dword-aligned. */
+	sp &= ~3;
+
+	return (void __user *)(sp - frame_size);
+}
+
+/* Grab and setup a signal frame.
+ *
+ * Basically a lot of state-info is stacked, and arranged for the
+ * user-mode program to return to the kernel using either a trampiline
+ * which performs the syscall sigreturn(), or a provided user-mode
+ * trampoline.
+  */
+static int
+setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	int err;
+	unsigned long return_ip;
+	struct signal_frame __user *frame;
+
+	err = 0;
+	frame = get_sigframe(ksig, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
+
+	if (err)
+		return -EFAULT;
+
+	if (_NSIG_WORDS > 1) {
+		err |= __copy_to_user(frame->extramask, &set->sig[1],
+				      sizeof(frame->extramask));
+	}
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Set up to return from user-space. If provided, use a stub
+	 * already located in user-space.
+	 */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		return_ip = (unsigned long)ksig->ka.sa.sa_restorer;
+	} else {
+		/* Trampoline - the desired return ip is in the signal return page. */
+		return_ip = cris_signal_return_page;
+
+		/*
+		 * This is movu.w __NR_sigreturn, r9; break 13;
+		 *
+		 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+		 * reasons and because gdb uses it as a signature to notice
+		 * signal handler stack frames.
+		 */
+		err |= __put_user(0x9c5f,         (short __user*)(frame->retcode+0));
+		err |= __put_user(__NR_sigreturn, (short __user*)(frame->retcode+2));
+		err |= __put_user(0xe93d,         (short __user*)(frame->retcode+4));
+	}
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Set up registers for signal handler.
+	 *
+	 * Where the code enters now.
+	 * Where the code enter later.
+	 * First argument, signo.
+	 */
+	regs->erp = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->srp = return_ip;
+	regs->r10 = ksig->sig;
+
+	/* Actually move the USP to reflect the stacked frame. */
+	wrusp((unsigned long)frame);
+
+	return 0;
+}
+
+static int
+setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	int err;
+	unsigned long return_ip;
+	struct rt_signal_frame __user *frame;
+
+	err = 0;
+	frame = get_sigframe(ksig, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	if (err)
+		return -EFAULT;
+
+	/* Clear all the bits of the ucontext we don't use.  */
+	err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	err |= __save_altstack(&frame->uc.uc_stack, rdusp());
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Set up to return from user-space. If provided, use a stub
+	 * already located in user-space.
+	 */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		return_ip = (unsigned long) ksig->ka.sa.sa_restorer;
+	} else {
+		/* Trampoline - the desired return ip is in the signal return page. */
+		return_ip = cris_signal_return_page + 6;
+
+		/*
+		 * This is movu.w __NR_rt_sigreturn, r9; break 13;
+		 *
+		 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+		 * reasons and because gdb uses it as a signature to notice
+		 * signal handler stack frames.
+		 */
+		err |= __put_user(0x9c5f, (short __user*)(frame->retcode+0));
+
+		err |= __put_user(__NR_rt_sigreturn,
+				  (short __user*)(frame->retcode+2));
+
+		err |= __put_user(0xe93d, (short __user*)(frame->retcode+4));
+	}
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Set up registers for signal handler.
+	 *
+	 * Where the code enters now.
+	 * Where the code enters later.
+	 * First argument is signo.
+	 * Second argument is (siginfo_t *).
+	 * Third argument is unused.
+	 */
+	regs->erp = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->srp = return_ip;
+	regs->r10 = ksig->sig;
+	regs->r11 = (unsigned long) &frame->info;
+	regs->r12 = 0;
+
+	/* Actually move the usp to reflect the stacked frame. */
+	wrusp((unsigned long)frame);
+
+	return 0;
+}
+
+/* Invoke a signal handler to, well, handle the signal. */
+static inline void
+handle_signal(int canrestart, struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int ret;
+
+	/* Check if this got called from a system call. */
+	if (canrestart) {
+		/* If so, check system call restarting. */
+		switch (regs->r10) {
+			case -ERESTART_RESTARTBLOCK:
+			case -ERESTARTNOHAND:
+				/*
+				 * This means that the syscall should
+				 * only be restarted if there was no
+				 * handler for the signal, and since
+				 * this point isn't reached unless
+				 * there is a handler, there's no need
+				 * to restart.
+				 */
+				regs->r10 = -EINTR;
+				break;
+
+                        case -ERESTARTSYS:
+				/*
+				 * This means restart the syscall if
+                                 * there is no handler, or the handler
+                                 * was registered with SA_RESTART.
+				 */
+				if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
+					regs->r10 = -EINTR;
+					break;
+				}
+
+				/* Fall through. */
+
+			case -ERESTARTNOINTR:
+				/*
+				 * This means that the syscall should
+                                 * be called again after the signal
+                                 * handler returns.
+				 */
+				RESTART_CRIS_SYS(regs);
+				break;
+                }
+        }
+
+	/* Set up the stack frame. */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(ksig, oldset, regs);
+	else
+		ret = setup_frame(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, 0);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Also note that the regs structure given here as an argument, is the latest
+ * pushed pt_regs. It may or may not be the same as the first pushed registers
+ * when the initial usermode->kernelmode transition took place. Therefore
+ * we can use user_mode(regs) to see if we came directly from kernel or user
+ * mode below.
+ */
+void
+do_signal(int canrestart, struct pt_regs *regs)
+{
+	struct ksignal ksig;
+
+	canrestart = canrestart && ((int)regs->exs >= 0);
+
+	/*
+	 * The common case should go fast, which is why this point is
+	 * reached from kernel-mode. If that's the case, just return
+	 * without doing anything.
+	 */
+	if (!user_mode(regs))
+		return;
+
+	if (get_signal(&ksig)) {
+		/* Whee!  Actually deliver the signal.  */
+		handle_signal(canrestart, &ksig, regs);
+		return;
+	}
+
+	/* Got here from a system call? */
+	if (canrestart) {
+		/* Restart the system call - no handlers present. */
+		if (regs->r10 == -ERESTARTNOHAND ||
+		    regs->r10 == -ERESTARTSYS ||
+		    regs->r10 == -ERESTARTNOINTR) {
+			RESTART_CRIS_SYS(regs);
+		}
+
+		if (regs->r10 == -ERESTART_RESTARTBLOCK){
+			regs->r9 = __NR_restart_syscall;
+			regs->erp -= 2;
+		}
+	}
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	restore_saved_sigmask();
+}
+
+asmlinkage void
+ugdb_trap_user(struct thread_info *ti, int sig)
+{
+	if (((user_regs(ti)->exs & 0xff00) >> 8) != SINGLE_STEP_INTR_VECT) {
+		/* Zero single-step PC if the reason we stopped wasn't a single
+		   step exception. This is to avoid relying on it when it isn't
+		   reliable. */
+		user_regs(ti)->spc = 0;
+	}
+	/* FIXME: Filter out false h/w breakpoint hits (i.e. EDA
+	   not within any configured h/w breakpoint range). Synchronize with
+	   what already exists for kernel debugging.  */
+	if (((user_regs(ti)->exs & 0xff00) >> 8) == BREAK_8_INTR_VECT) {
+		/* Break 8: subtract 2 from ERP unless in a delay slot. */
+		if (!(user_regs(ti)->erp & 0x1))
+			user_regs(ti)->erp -= 2;
+	}
+	sys_kill(ti->task->pid, sig);
+}
+
+void
+keep_debug_flags(unsigned long oldccs, unsigned long oldspc,
+		 struct pt_regs *regs)
+{
+	if (oldccs & (1 << Q_CCS_BITNR)) {
+		/* Pending single step due to single-stepping the break 13
+		   in the signal trampoline: keep the Q flag. */
+		regs->ccs |= (1 << Q_CCS_BITNR);
+		/* S flag should be set - complain if it's not. */
+		if (!(oldccs & (1 << (S_CCS_BITNR + CCS_SHIFT)))) {
+			printk("Q flag but no S flag?");
+		}
+		regs->ccs |= (1 << (S_CCS_BITNR + CCS_SHIFT));
+		/* Assume the SPC is valid and interesting. */
+		regs->spc = oldspc;
+
+	} else if (oldccs & (1 << (S_CCS_BITNR + CCS_SHIFT))) {
+		/* If a h/w bp was set in the signal handler we need
+		   to keep the S flag. */
+		regs->ccs |= (1 << (S_CCS_BITNR + CCS_SHIFT));
+		/* Don't keep the old SPC though; if we got here due to
+		   a single-step, the Q flag should have been set. */
+	} else if (regs->spc) {
+		/* If we were single-stepping *before* the signal was taken,
+		   we don't want to restore that state now, because GDB will
+		   have forgotten all about it. */
+		regs->spc = 0;
+		regs->ccs &= ~(1 << (S_CCS_BITNR + CCS_SHIFT));
+	}
+}
+
+/* Set up the trampolines on the signal return page. */
+int __init
+cris_init_signal(void)
+{
+	u16* data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+	/* This is movu.w __NR_sigreturn, r9; break 13; */
+	data[0] = 0x9c5f;
+	data[1] = __NR_sigreturn;
+	data[2] = 0xe93d;
+	/* This is movu.w __NR_rt_sigreturn, r9; break 13; */
+	data[3] = 0x9c5f;
+	data[4] = __NR_rt_sigreturn;
+	data[5] = 0xe93d;
+
+	/* Map to userspace with appropriate permissions (no write access...) */
+	cris_signal_return_page = (unsigned long)
+          __ioremap_prot(virt_to_phys(data), PAGE_SIZE, PAGE_SIGNAL_TRAMPOLINE);
+
+	return 0;
+}
+
+__initcall(cris_init_signal);
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
new file mode 100644
index 0000000..d2a8440
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -0,0 +1,344 @@
+/*
+ *  linux/arch/cris/arch-v32/kernel/time.c
+ *
+ *  Copyright (C) 2003-2010 Axis Communications AB
+ *
+ */
+
+#include <linux/timex.h>
+#include <linux/time.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/swap.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/cpufreq.h>
+#include <linux/sched_clock.h>
+#include <linux/mm.h>
+#include <asm/types.h>
+#include <asm/signal.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/timer_defs.h>
+#include <hwregs/intr_vect_defs.h>
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+#include <hwregs/clkgen_defs.h>
+#endif
+
+/* Watchdog defines */
+#define ETRAX_WD_KEY_MASK	0x7F /* key is 7 bit */
+#define ETRAX_WD_HZ		763 /* watchdog counts at 763 Hz */
+/* Number of 763 counts before watchdog bites */
+#define ETRAX_WD_CNT		((2*ETRAX_WD_HZ)/HZ + 1)
+
+#define CRISV32_TIMER_FREQ	(100000000lu)
+
+unsigned long timer_regs[NR_CPUS] =
+{
+	regi_timer0,
+};
+
+extern int set_rtc_mmss(unsigned long nowtime);
+
+#ifdef CONFIG_CPU_FREQ
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data);
+
+static struct notifier_block cris_time_freq_notifier_block = {
+	.notifier_call = cris_time_freq_notifier,
+};
+#endif
+
+unsigned long get_ns_in_jiffie(void)
+{
+	reg_timer_r_tmr0_data data;
+	unsigned long ns;
+
+	data = REG_RD(timer, regi_timer0, r_tmr0_data);
+	ns = (TIMER0_DIV - data) * 10;
+	return ns;
+}
+
+/* From timer MDS describing the hardware watchdog:
+ * 4.3.1 Watchdog Operation
+ * The watchdog timer is an 8-bit timer with a configurable start value.
+ * Once started the watchdog counts downwards with a frequency of 763 Hz
+ * (100/131072 MHz). When the watchdog counts down to 1, it generates an
+ * NMI (Non Maskable Interrupt), and when it counts down to 0, it resets the
+ * chip.
+ */
+/* This gives us 1.3 ms to do something useful when the NMI comes */
+
+/* Right now, starting the watchdog is the same as resetting it */
+#define start_watchdog reset_watchdog
+
+#if defined(CONFIG_ETRAX_WATCHDOG)
+static short int watchdog_key = 42;  /* arbitrary 7 bit number */
+#endif
+
+/* Number of pages to consider "out of memory". It is normal that the memory
+ * is used though, so set this really low. */
+#define WATCHDOG_MIN_FREE_PAGES 8
+
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+/* for reliable NICE_DOGGY behaviour */
+static int bite_in_progress;
+#endif
+
+void reset_watchdog(void)
+{
+#if defined(CONFIG_ETRAX_WATCHDOG)
+	reg_timer_rw_wd_ctrl wd_ctrl = { 0 };
+
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+	if (unlikely(bite_in_progress))
+		return;
+#endif
+	/* Only keep watchdog happy as long as we have memory left! */
+	if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) {
+		/* Reset the watchdog with the inverse of the old key */
+		/* Invert key, which is 7 bits */
+		watchdog_key ^= ETRAX_WD_KEY_MASK;
+		wd_ctrl.cnt = ETRAX_WD_CNT;
+		wd_ctrl.cmd = regk_timer_start;
+		wd_ctrl.key = watchdog_key;
+		REG_WR(timer, regi_timer0, rw_wd_ctrl, wd_ctrl);
+	}
+#endif
+}
+
+/* stop the watchdog - we still need the correct key */
+
+void stop_watchdog(void)
+{
+#if defined(CONFIG_ETRAX_WATCHDOG)
+	reg_timer_rw_wd_ctrl wd_ctrl = { 0 };
+	watchdog_key ^= ETRAX_WD_KEY_MASK; /* invert key, which is 7 bits */
+	wd_ctrl.cnt = ETRAX_WD_CNT;
+	wd_ctrl.cmd = regk_timer_stop;
+	wd_ctrl.key = watchdog_key;
+	REG_WR(timer, regi_timer0, rw_wd_ctrl, wd_ctrl);
+#endif
+}
+
+extern void show_registers(struct pt_regs *regs);
+
+void handle_watchdog_bite(struct pt_regs *regs)
+{
+#if defined(CONFIG_ETRAX_WATCHDOG)
+	extern int cause_of_death;
+
+	nmi_enter();
+	oops_in_progress = 1;
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+	bite_in_progress = 1;
+#endif
+	printk(KERN_WARNING "Watchdog bite\n");
+
+	/* Check if forced restart or unexpected watchdog */
+	if (cause_of_death == 0xbedead) {
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+		/* There is a bug in Artpec-3 (voodoo TR 78) that requires
+		 * us to go to lower frequency for the reset to be reliable
+		 */
+		reg_clkgen_rw_clk_ctrl ctrl =
+			REG_RD(clkgen, regi_clkgen, rw_clk_ctrl);
+		ctrl.pll = 0;
+		REG_WR(clkgen, regi_clkgen, rw_clk_ctrl, ctrl);
+#endif
+		while(1);
+	}
+
+	/* Unexpected watchdog, stop the watchdog and dump registers. */
+	stop_watchdog();
+	printk(KERN_WARNING "Oops: bitten by watchdog\n");
+	show_registers(regs);
+	oops_in_progress = 0;
+	printk("\n"); /* Flush mtdoops.  */
+#ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
+	reset_watchdog();
+#endif
+	while(1) /* nothing */;
+#endif
+}
+
+extern void cris_profile_sample(struct pt_regs *regs);
+static void __iomem *timer_base;
+
+static int crisv32_clkevt_switch_state(struct clock_event_device *dev)
+{
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_hold,
+		.freq = regk_timer_f100,
+	};
+
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+	return 0;
+}
+
+static int crisv32_clkevt_next_event(unsigned long evt,
+				     struct clock_event_device *dev)
+{
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_ld,
+		.freq = regk_timer_f100,
+	};
+
+	REG_WR(timer, timer_base, rw_tmr0_div, evt);
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+	ctrl.op = regk_timer_run;
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+	return 0;
+}
+
+static irqreturn_t crisv32_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_hold,
+		.freq = regk_timer_f100,
+	};
+	reg_timer_rw_ack_intr ack = { .tmr0 = 1 };
+	reg_timer_r_masked_intr intr;
+
+	intr = REG_RD(timer, timer_base, r_masked_intr);
+	if (!intr.tmr0)
+		return IRQ_NONE;
+
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+	REG_WR(timer, timer_base, rw_ack_intr, ack);
+
+	reset_watchdog();
+#ifdef CONFIG_SYSTEM_PROFILER
+	cris_profile_sample(get_irq_regs());
+#endif
+
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct clock_event_device crisv32_clockevent = {
+	.name = "crisv32-timer",
+	.rating = 300,
+	.features = CLOCK_EVT_FEAT_ONESHOT,
+	.set_state_oneshot = crisv32_clkevt_switch_state,
+	.set_state_shutdown = crisv32_clkevt_switch_state,
+	.tick_resume = crisv32_clkevt_switch_state,
+	.set_next_event = crisv32_clkevt_next_event,
+};
+
+/* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
+static struct irqaction irq_timer = {
+	.handler = crisv32_timer_interrupt,
+	.flags = IRQF_TIMER | IRQF_SHARED,
+	.name = "crisv32-timer",
+	.dev_id = &crisv32_clockevent,
+};
+
+static u64 notrace crisv32_timer_sched_clock(void)
+{
+	return REG_RD(timer, timer_base, r_time);
+}
+
+static void __init crisv32_timer_init(void)
+{
+	reg_timer_rw_intr_mask timer_intr_mask;
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_hold,
+		.freq = regk_timer_f100,
+	};
+
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+	timer_intr_mask = REG_RD(timer, timer_base, rw_intr_mask);
+	timer_intr_mask.tmr0 = 1;
+	REG_WR(timer, timer_base, rw_intr_mask, timer_intr_mask);
+}
+
+void __init time_init(void)
+{
+	int irq;
+	int ret;
+
+	/* Probe for the RTC and read it if it exists.
+	 * Before the RTC can be probed the loops_per_usec variable needs
+	 * to be initialized to make usleep work. A better value for
+	 * loops_per_usec is calculated by the kernel later once the
+	 * clock has started.
+	 */
+	loops_per_usec = 50;
+
+	irq = TIMER0_INTR_VECT;
+	timer_base = (void __iomem *) regi_timer0;
+
+	crisv32_timer_init();
+
+	sched_clock_register(crisv32_timer_sched_clock, 32,
+			     CRISV32_TIMER_FREQ);
+
+	clocksource_mmio_init(timer_base + REG_RD_ADDR_timer_r_time,
+			      "crisv32-timer", CRISV32_TIMER_FREQ,
+			      300, 32, clocksource_mmio_readl_up);
+
+	crisv32_clockevent.cpumask = cpu_possible_mask;
+	crisv32_clockevent.irq = irq;
+
+	ret = setup_irq(irq, &irq_timer);
+	if (ret)
+		pr_warn("failed to setup irq %d\n", irq);
+
+	clockevents_config_and_register(&crisv32_clockevent,
+					CRISV32_TIMER_FREQ,
+					2, 0xffffffff);
+
+	/* Enable watchdog if we should use one. */
+
+#if defined(CONFIG_ETRAX_WATCHDOG)
+	printk(KERN_INFO "Enabling watchdog...\n");
+	start_watchdog();
+
+	/* If we use the hardware watchdog, we want to trap it as an NMI
+	 * and dump registers before it resets us.  For this to happen, we
+	 * must set the "m" NMI enable flag (which once set, is unset only
+	 * when an NMI is taken). */
+	{
+		unsigned long flags;
+		local_save_flags(flags);
+		flags |= (1<<30); /* NMI M flag is at bit 30 */
+		local_irq_restore(flags);
+	}
+#endif
+
+#ifdef CONFIG_CPU_FREQ
+	cpufreq_register_notifier(&cris_time_freq_notifier_block,
+				  CPUFREQ_TRANSITION_NOTIFIER);
+#endif
+}
+
+#ifdef CONFIG_CPU_FREQ
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freqs = data;
+	if (val == CPUFREQ_POSTCHANGE) {
+		reg_timer_r_tmr0_data data;
+		reg_timer_rw_tmr0_div div = (freqs->new * 500) / HZ;
+		do {
+			data = REG_RD(timer, timer_regs[freqs->cpu],
+				r_tmr0_data);
+		} while (data > 20);
+		REG_WR(timer, timer_regs[freqs->cpu], rw_tmr0_div, div);
+	}
+	return 0;
+}
+#endif
diff --git a/arch/cris/arch-v32/kernel/traps.c b/arch/cris/arch-v32/kernel/traps.c
new file mode 100644
index 0000000..8bbe09c
--- /dev/null
+++ b/arch/cris/arch-v32/kernel/traps.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2003-2006, Axis Communications AB.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <hwregs/supp_reg.h>
+#include <hwregs/intr_vect_defs.h>
+#include <asm/irq.h>
+
+void show_registers(struct pt_regs *regs)
+{
+	/*
+	 * It's possible to use either the USP register or current->thread.usp.
+	 * USP might not correspond to the current process for all cases this
+	 * function is called, and current->thread.usp isn't up to date for the
+	 * current process. Experience shows that using USP is the way to go.
+	 */
+	unsigned long usp = rdusp();
+	unsigned long d_mmu_cause;
+	unsigned long i_mmu_cause;
+
+	printk("CPU: %d\n", smp_processor_id());
+
+	printk("ERP: %08lx SRP: %08lx  CCS: %08lx USP: %08lx MOF: %08lx\n",
+	       regs->erp, regs->srp, regs->ccs, usp, regs->mof);
+
+	printk(" r0: %08lx  r1: %08lx   r2: %08lx  r3: %08lx\n",
+	       regs->r0, regs->r1, regs->r2, regs->r3);
+
+	printk(" r4: %08lx  r5: %08lx   r6: %08lx  r7: %08lx\n",
+	       regs->r4, regs->r5, regs->r6, regs->r7);
+
+	printk(" r8: %08lx  r9: %08lx  r10: %08lx r11: %08lx\n",
+	       regs->r8, regs->r9, regs->r10, regs->r11);
+
+	printk("r12: %08lx r13: %08lx oR10: %08lx acr: %08lx\n",
+	       regs->r12, regs->r13, regs->orig_r10, regs->acr);
+
+	printk(" sp: %08lx\n", (unsigned long)regs);
+
+	SUPP_BANK_SEL(BANK_IM);
+	SUPP_REG_RD(RW_MM_CAUSE, i_mmu_cause);
+
+	SUPP_BANK_SEL(BANK_DM);
+	SUPP_REG_RD(RW_MM_CAUSE, d_mmu_cause);
+
+	printk("       Data MMU Cause: %08lx\n", d_mmu_cause);
+	printk("Instruction MMU Cause: %08lx\n", i_mmu_cause);
+
+	printk("Process %s (pid: %d, stackpage=%08lx)\n",
+	       current->comm, current->pid, (unsigned long)current);
+
+	/*
+	 * When in-kernel, we also print out the stack and code at the
+	 * time of the fault..
+	 */
+	if (!user_mode(regs)) {
+		int i;
+
+		show_stack(NULL, (unsigned long *)usp);
+
+		/*
+		 * If the previous stack-dump wasn't a kernel one, dump the
+		 * kernel stack now.
+		 */
+		if (usp != 0)
+			show_stack(NULL, NULL);
+
+		printk("\nCode: ");
+
+		if (regs->erp < PAGE_OFFSET)
+			goto bad_value;
+
+		/*
+		 * Quite often the value at regs->erp doesn't point to the
+		 * interesting instruction, which often is the previous
+		 * instruction. So dump at an offset large enough that the
+		 * instruction decoding should be in sync at the interesting
+		 * point, but small enough to fit on a row. The regs->erp
+		 * location is pointed out in a ksymoops-friendly way by
+		 * wrapping the byte for that address in parenthesises.
+		 */
+		for (i = -12; i < 12; i++) {
+			unsigned char c;
+
+			if (__get_user(c, &((unsigned char *)regs->erp)[i])) {
+bad_value:
+				printk(" Bad IP value.");
+				break;
+			}
+
+			if (i == 0)
+				printk("(%02x) ", c);
+			else
+				printk("%02x ", c);
+		}
+		printk("\n");
+	}
+}
+
+void arch_enable_nmi(void)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+	flags |= (1 << 30); /* NMI M flag is at bit 30 */
+	local_irq_restore(flags);
+}
+
+extern void (*nmi_handler)(struct pt_regs *);
+void handle_nmi(struct pt_regs *regs)
+{
+#ifdef CONFIG_ETRAXFS
+	reg_intr_vect_r_nmi r;
+#endif
+
+	if (nmi_handler)
+		nmi_handler(regs);
+
+#ifdef CONFIG_ETRAXFS
+	/* Wait until nmi is no longer active. */
+	do {
+		r = REG_RD(intr_vect, regi_irq, r_nmi);
+	} while (r.ext == regk_intr_vect_on);
+#endif
+}
+
+
+#ifdef CONFIG_BUG
+extern void die_if_kernel(const char *str, struct pt_regs *regs, long err);
+
+/* Copy of the regs at BUG() time.  */
+struct pt_regs BUG_regs;
+
+void do_BUG(char *file, unsigned int line)
+{
+	printk("kernel BUG at %s:%d!\n", file, line);
+	die_if_kernel("Oops", &BUG_regs, 0);
+}
+EXPORT_SYMBOL(do_BUG);
+
+void fixup_BUG(struct pt_regs *regs)
+{
+	BUG_regs = *regs;
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+	/*
+	 * Fixup the BUG arguments through exception handlers.
+	 */
+	{
+		const struct exception_table_entry *fixup;
+
+		/*
+		 * ERP points at the "break 14" + 2, compensate for the 2
+		 * bytes.
+		 */
+		fixup = search_exception_tables(instruction_pointer(regs) - 2);
+		if (fixup) {
+			/* Adjust the instruction pointer in the stackframe. */
+			instruction_pointer(regs) = fixup->fixup;
+			arch_fixup(regs);
+		}
+	}
+#else
+	/* Dont try to lookup the filename + line, just dump regs.  */
+	do_BUG("unknown", 0);
+#endif
+}
+
+/*
+ * Break 14 handler. Save regs and jump into the fixup_BUG.
+ */
+__asm__  ( ".text\n\t"
+	   ".global breakh_BUG\n\t"
+	   "breakh_BUG:\n\t"
+	   SAVE_ALL
+	   KGDB_FIXUP
+	   "move.d $sp, $r10\n\t"
+	   "jsr fixup_BUG\n\t"
+	   "nop\n\t"
+	   "jump ret_from_intr\n\t"
+	   "nop\n\t");
+
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+void
+handle_BUG(struct pt_regs *regs)
+{
+}
+#endif
+#endif
diff --git a/arch/cris/arch-v32/lib/Makefile b/arch/cris/arch-v32/lib/Makefile
new file mode 100644
index 0000000..e91cf02
--- /dev/null
+++ b/arch/cris/arch-v32/lib/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for Etrax-specific library files..
+#
+
+lib-y  = checksum.o checksumcopy.o string.o usercopy.o memset.o \
+	csumcpfruser.o delay.o strcmp.o
+
diff --git a/arch/cris/arch-v32/lib/checksum.S b/arch/cris/arch-v32/lib/checksum.S
new file mode 100644
index 0000000..4a72a94
--- /dev/null
+++ b/arch/cris/arch-v32/lib/checksum.S
@@ -0,0 +1,88 @@
+/*
+ * A fast checksum routine using movem
+ * Copyright (c) 1998-2007 Axis Communications AB
+ *
+ * csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+
+	.globl	csum_partial
+	.type   csum_partial,@function
+csum_partial:
+
+	;; r10 - src
+	;; r11 - length
+	;; r12 - checksum
+
+	;; Optimized for large packets
+	subq	10*4, $r11
+	blt	_word_loop
+	move.d	$r11, $acr
+
+	subq	9*4,$sp
+	clearf	c
+	movem	$r8,[$sp]
+
+	;; do a movem checksum
+
+_mloop:	movem	[$r10+],$r9	; read 10 longwords
+	;; Loop count without touching the c flag.
+	addoq	-10*4, $acr, $acr
+	;; perform dword checksumming on the 10 longwords
+
+	addc	$r0,$r12
+	addc	$r1,$r12
+	addc	$r2,$r12
+	addc	$r3,$r12
+	addc	$r4,$r12
+	addc	$r5,$r12
+	addc	$r6,$r12
+	addc	$r7,$r12
+	addc	$r8,$r12
+	addc	$r9,$r12
+
+	;; test $acr without trashing carry.
+	move.d	$acr, $acr
+	bpl	_mloop
+	;; r11 <= acr  is not really needed in the mloop, just using the dslot
+	;; to prepare for what is needed after mloop.
+	move.d	$acr, $r11
+
+	;; fold the last carry into r13
+	addc	0, $r12
+	movem	[$sp+],$r8	; restore regs
+
+_word_loop:
+	addq	10*4,$r11	; compensate for last loop underflowing length
+
+	moveq	-1,$r9		; put 0xffff in r9, faster than move.d 0xffff,r9
+	lsrq	16,$r9
+
+	move.d	$r12,$r13
+	lsrq	16,$r13		; r13 = checksum >> 16
+	and.d	$r9,$r12	; checksum = checksum & 0xffff
+
+_no_fold:
+	subq	2,$r11
+	blt	_no_words
+	add.d	$r13,$r12	; checksum += r13
+
+	;; checksum the rest of the words
+_wloop:	subq	2,$r11
+	bge	_wloop
+	addu.w	[$r10+],$r12
+
+_no_words:
+	addq	2,$r11
+	;; see if we have one odd byte more
+	bne	_do_byte
+	nop
+	ret
+	move.d	$r12,$r10
+
+_do_byte:
+	;; copy and checksum the last byte
+	addu.b	[$r10],$r12
+	ret
+	move.d	$r12,$r10
+
+	.size   csum_partial, .-csum_partial
diff --git a/arch/cris/arch-v32/lib/checksumcopy.S b/arch/cris/arch-v32/lib/checksumcopy.S
new file mode 100644
index 0000000..54e209f
--- /dev/null
+++ b/arch/cris/arch-v32/lib/checksumcopy.S
@@ -0,0 +1,94 @@
+/*
+ * A fast checksum+copy routine using movem
+ * Copyright (c) 1998-2007 Axis Communications AB
+ *
+ * Authors:	Bjorn Wesen
+ *
+ * csum_partial_copy_nocheck(const char *src, char *dst,
+ *		             int len, unsigned int sum)
+ */
+
+	.globl	csum_partial_copy_nocheck
+	.type   csum_partial_copy_nocheck,@function
+csum_partial_copy_nocheck:
+
+	;; r10 - src
+	;; r11 - dst
+	;; r12 - length
+	;; r13 - checksum
+
+	;; Optimized for large packets
+	subq	10*4, $r12
+	blt	_word_loop
+	move.d	$r12, $acr
+
+	subq	9*4,$sp
+	clearf	c
+	movem	$r8,[$sp]
+
+	;; do a movem copy and checksum
+1:	;; A failing userspace access (the read) will have this as PC.
+_mloop:	movem	[$r10+],$r9	; read 10 longwords
+	addoq	-10*4, $acr, $acr ; loop counter in latency cycle
+	movem	$r9,[$r11+]	; write 10 longwords
+
+	;; perform dword checksumming on the 10 longwords
+	addc	$r0,$r13
+	addc	$r1,$r13
+	addc	$r2,$r13
+	addc	$r3,$r13
+	addc	$r4,$r13
+	addc	$r5,$r13
+	addc	$r6,$r13
+	addc	$r7,$r13
+	addc	$r8,$r13
+	addc	$r9,$r13
+
+	;; test $acr, without trashing carry.
+	move.d	$acr, $acr
+	bpl	_mloop
+	;; r12 <= acr  is needed after mloop and in the exception handlers.
+	move.d	$acr, $r12
+
+	;; fold the last carry into r13
+	addc	0, $r13
+	movem	[$sp+],$r8	; restore regs
+
+_word_loop:
+	addq	10*4,$r12	; compensate for last loop underflowing length
+
+	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
+	;; r9 can be used as temporary.
+	move.d	$r13,$r9
+	lsrq	16,$r9		; r0 = checksum >> 16
+	and.d	0xffff,$r13	; checksum = checksum & 0xffff
+
+	subq	2, $r12
+	blt	_no_words
+	add.d	$r9,$r13	; checksum += r0
+
+	;; copy and checksum the rest of the words
+2:	;; A failing userspace access for the read below will have this as PC.
+_wloop:	move.w	[$r10+],$r9
+	addu.w	$r9,$r13
+	subq	2,$r12
+	bge	_wloop
+	move.w	$r9,[$r11+]
+
+_no_words:
+	addq	2,$r12
+	bne	_do_byte
+	nop
+	ret
+	move.d	$r13,$r10
+
+_do_byte:
+	;; copy and checksum the last byte
+3:	;; A failing userspace access for the read below will have this as PC.
+	move.b	[$r10],$r9
+	addu.b	$r9,$r13
+	move.b	$r9,[$r11]
+	ret
+	move.d	$r13,$r10
+
+	.size   csum_partial_copy_nocheck, . - csum_partial_copy_nocheck
diff --git a/arch/cris/arch-v32/lib/csumcpfruser.S b/arch/cris/arch-v32/lib/csumcpfruser.S
new file mode 100644
index 0000000..600ec16
--- /dev/null
+++ b/arch/cris/arch-v32/lib/csumcpfruser.S
@@ -0,0 +1,69 @@
+/*
+ * Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
+ * csum_partial_copy_from_user by adding exception records.
+ *
+ * Copyright (C) 2001, 2003 Axis Communications AB.
+ *
+ * Author: Hans-Peter Nilsson.
+ */
+
+#include <asm/errno.h>
+
+/* Same function body, but a different name.  If we just added exception
+   records to _csum_partial_copy_nocheck and made it generic, we wouldn't
+   know a user fault from a kernel fault and we would have overhead in
+   each kernel caller for the error-pointer argument.
+
+   unsigned int csum_partial_copy_from_user
+     (const char *src, char *dst, int len, unsigned int sum, int *errptr);
+
+   Note that the errptr argument is only set if we encounter an error.
+   It is conveniently located on the stack, so the normal function body
+   does not have to handle it.  */
+
+#define csum_partial_copy_nocheck csum_partial_copy_from_user
+
+/* There are local labels numbered 1, 2 and 3 present to mark the
+   different from-user accesses.  */
+#include "checksumcopy.S"
+
+	.section .fixup,"ax"
+
+;; Here from the movem loop; restore stack.
+4:
+	movem	[$sp+],$r8
+;; r12 is already decremented.  Add back chunk_size-2.
+	addq	40-2,$r12
+
+;; Here from the word loop; r12 is off by 2; add it back.
+5:
+	addq	2,$r12
+
+;; Here from a failing single byte.
+6:
+
+;; Signal in *errptr that we had a failing access.
+	move.d	[$sp],$acr
+	moveq	-EFAULT,$r9
+	subq	4,$sp
+	move.d	$r9,[$acr]
+
+;; Clear the rest of the destination area using memset.  Preserve the
+;; checksum for the readable bytes.
+	move.d	$r13,[$sp]
+	subq	4,$sp
+	move.d	$r11,$r10
+	move	$srp,[$sp]
+	jsr	memset
+	clear.d	$r11
+
+	move	[$sp+],$srp
+	ret
+	move.d	[$sp+],$r10
+
+	.previous
+	.section __ex_table,"a"
+	.dword 1b,4b
+	.dword 2b,5b
+	.dword 3b,6b
+	.previous
diff --git a/arch/cris/arch-v32/lib/delay.c b/arch/cris/arch-v32/lib/delay.c
new file mode 100644
index 0000000..39f1ac9
--- /dev/null
+++ b/arch/cris/arch-v32/lib/delay.c
@@ -0,0 +1,28 @@
+/*
+ * Precise Delay Loops for ETRAX FS
+ *
+ * Copyright (C) 2006 Axis Communications AB.
+ *
+ */
+
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/timer_defs.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+/*
+ * On ETRAX FS, we can check the free-running read-only 100MHz timer
+ * getting 32-bit 10ns precision, theoretically good for 42.94967295
+ * seconds.  Unsigned arithmetic and careful expression handles
+ * wrapping.
+ */
+
+void cris_delay10ns(u32 n10ns)
+{
+	u32 t0 = REG_RD(timer, regi_timer0, r_time);
+	while (REG_RD(timer, regi_timer0, r_time) - t0 < n10ns)
+		;
+}
+EXPORT_SYMBOL(cris_delay10ns);
diff --git a/arch/cris/arch-v32/lib/memset.c b/arch/cris/arch-v32/lib/memset.c
new file mode 100644
index 0000000..c94ea9b
--- /dev/null
+++ b/arch/cris/arch-v32/lib/memset.c
@@ -0,0 +1,259 @@
+/* A memset for CRIS.
+   Copyright (C) 1999-2005 Axis Communications.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. Neither the name of Axis Communications nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
+   COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+   IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.  */
+
+/* FIXME: This file should really only be used for reference, as the
+   result is somewhat depending on gcc generating what we expect rather
+   than what we describe.  An assembly file should be used instead.  */
+
+/* Note the multiple occurrence of the expression "12*4", including the
+   asm.  It is hard to get it into the asm in a good way.  Thus better to
+   expose the problem everywhere: no macro.  */
+
+/* Assuming one cycle per dword written or read (ok, not really true; the
+   world is not ideal), and one cycle per instruction, then 43+3*(n/48-1)
+   <= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full
+   48-byte block to set.  */
+
+#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48)
+
+/* No name ambiguities in this file.  */
+__asm__ (".syntax no_register_prefix");
+
+void *memset(void *pdst, int c, unsigned int plen)
+{
+  /* Now we want the parameters in special registers.  Make sure the
+     compiler does something usable with this.  */
+
+  register char *return_dst __asm__ ("r10") = pdst;
+  register int n __asm__ ("r12") = plen;
+  register int lc __asm__ ("r11") = c;
+
+  /* Most apps use memset sanely.  Memsetting about 3..4 bytes or less get
+     penalized here compared to the generic implementation.  */
+
+  /* This is fragile performancewise at best.  Check with newer GCC
+     releases, if they compile cascaded "x |= x << 8" to sane code.  */
+  __asm__("movu.b %0,r13						\n\
+	   lslq 8,r13							\n\
+	   move.b %0,r13						\n\
+	   move.d r13,%0						\n\
+	   lslq 16,r13							\n\
+	   or.d r13,%0"
+          : "=r" (lc)		/* Inputs.  */
+	  : "0" (lc)		/* Outputs.  */
+	  : "r13");		/* Trash.  */
+
+  {
+    register char *dst __asm__ ("r13") = pdst;
+
+    if (((unsigned long) pdst & 3) != 0
+	/* Oops! n = 0 must be a valid call, regardless of alignment.  */
+	&& n >= 3)
+      {
+	if ((unsigned long) dst & 1)
+	  {
+	    *dst = (char) lc;
+	    n--;
+	    dst++;
+	  }
+
+	if ((unsigned long) dst & 2)
+	  {
+	    *(short *) dst = lc;
+	    n -= 2;
+	    dst += 2;
+	  }
+      }
+
+    /* Decide which setting method to use.  */
+    if (n >= MEMSET_BY_BLOCK_THRESHOLD)
+      {
+	/* It is not optimal to tell the compiler about clobbering any
+	   registers; that will move the saving/restoring of those registers
+	   to the function prologue/epilogue, and make non-block sizes
+	   suboptimal.  */
+	__asm__ volatile
+	  ("\
+	   ;; GCC does promise correct register allocations, but let's	\n\
+	   ;; make sure it keeps its promises.				\n\
+	   .ifnc %0-%1-%4,$r13-$r12-$r11				\n\
+	   .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"	\n\
+	   .endif							\n\
+									\n\
+	   ;; Save the registers we'll clobber in the movem process	\n\
+	   ;; on the stack.  Don't mention them to gcc, it will only be	\n\
+	   ;; upset.							\n\
+	   subq	   11*4,sp						\n\
+	   movem   r10,[sp]						\n\
+									\n\
+	   move.d  r11,r0						\n\
+	   move.d  r11,r1						\n\
+	   move.d  r11,r2						\n\
+	   move.d  r11,r3						\n\
+	   move.d  r11,r4						\n\
+	   move.d  r11,r5						\n\
+	   move.d  r11,r6						\n\
+	   move.d  r11,r7						\n\
+	   move.d  r11,r8						\n\
+	   move.d  r11,r9						\n\
+	   move.d  r11,r10						\n\
+									\n\
+	   ;; Now we've got this:					\n\
+	   ;; r13 - dst							\n\
+	   ;; r12 - n							\n\
+									\n\
+	   ;; Update n for the first loop				\n\
+	   subq	   12*4,r12						\n\
+0:									\n\
+"
+#ifdef __arch_common_v10_v32
+	   /* Cater to branch offset difference between v32 and v10.  We
+	      assume the branch below has an 8-bit offset.  */
+"	   setf\n"
+#endif
+"	   subq	  12*4,r12						\n\
+	   bge	   0b							\n\
+	   movem	r11,[r13+]					\n\
+									\n\
+	   ;; Compensate for last loop underflowing n.			\n\
+	   addq	  12*4,r12						\n\
+									\n\
+	   ;; Restore registers from stack.				\n\
+	   movem [sp+],r10"
+
+	   /* Outputs.	*/
+	   : "=r" (dst), "=r" (n)
+
+	   /* Inputs.  */
+	   : "0" (dst), "1" (n), "r" (lc));
+      }
+
+    /* An ad-hoc unroll, used for 4*12-1..16 bytes. */
+    while (n >= 16)
+      {
+	*(long *) dst = lc; dst += 4;
+	*(long *) dst = lc; dst += 4;
+	*(long *) dst = lc; dst += 4;
+	*(long *) dst = lc; dst += 4;
+	n -= 16;
+      }
+
+    switch (n)
+      {
+      case 0:
+        break;
+
+      case 1:
+        *dst = (char) lc;
+        break;
+
+      case 2:
+        *(short *) dst = (short) lc;
+        break;
+
+      case 3:
+        *(short *) dst = (short) lc; dst += 2;
+        *dst = (char) lc;
+        break;
+
+      case 4:
+        *(long *) dst = lc;
+        break;
+
+      case 5:
+        *(long *) dst = lc; dst += 4;
+        *dst = (char) lc;
+        break;
+
+      case 6:
+        *(long *) dst = lc; dst += 4;
+        *(short *) dst = (short) lc;
+        break;
+
+      case 7:
+        *(long *) dst = lc; dst += 4;
+        *(short *) dst = (short) lc; dst += 2;
+        *dst = (char) lc;
+        break;
+
+      case 8:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc;
+        break;
+
+      case 9:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *dst = (char) lc;
+        break;
+
+      case 10:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(short *) dst = (short) lc;
+        break;
+
+      case 11:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(short *) dst = (short) lc; dst += 2;
+        *dst = (char) lc;
+        break;
+
+      case 12:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc;
+        break;
+
+      case 13:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *dst = (char) lc;
+        break;
+
+      case 14:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(short *) dst = (short) lc;
+        break;
+
+      case 15:
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(long *) dst = lc; dst += 4;
+        *(short *) dst = (short) lc; dst += 2;
+        *dst = (char) lc;
+        break;
+      }
+  }
+
+  return return_dst;
+}
diff --git a/arch/cris/arch-v32/lib/strcmp.S b/arch/cris/arch-v32/lib/strcmp.S
new file mode 100644
index 0000000..8f7a1ee
--- /dev/null
+++ b/arch/cris/arch-v32/lib/strcmp.S
@@ -0,0 +1,21 @@
+; strcmp.S -- CRISv32 version.
+; Copyright (C) 2008 AXIS Communications AB
+; Written by Edgar E. Iglesias
+;
+; This source code is licensed under the GNU General Public License,
+; Version 2.  See the file COPYING for more details.
+
+	.global	strcmp
+	.type	strcmp,@function
+strcmp:
+1:
+	move.b	[$r10+], $r12
+	seq	$r13
+	sub.b	[$r11+], $r12
+	or.b	$r12, $r13
+	beq	1b
+	nop
+
+	ret
+	movs.b	$r12, $r10
+	.size	strcmp, . - strcmp
diff --git a/arch/cris/arch-v32/lib/string.c b/arch/cris/arch-v32/lib/string.c
new file mode 100644
index 0000000..c7bd6eb
--- /dev/null
+++ b/arch/cris/arch-v32/lib/string.c
@@ -0,0 +1,236 @@
+/* A memcpy for CRIS.
+   Copyright (C) 1994-2005 Axis Communications.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. Neither the name of Axis Communications nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
+   COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+   IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.  */
+
+/* FIXME: This file should really only be used for reference, as the
+   result is somewhat depending on gcc generating what we expect rather
+   than what we describe.  An assembly file should be used instead.  */
+
+#include <stddef.h>
+
+/* Break even between movem and move16 is really at 38.7 * 2, but
+   modulo 44, so up to the next multiple of 44, we use ordinary code.  */
+#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
+
+/* No name ambiguities in this file.  */
+__asm__ (".syntax no_register_prefix");
+
+void *
+memcpy(void *pdst, const void *psrc, size_t pn)
+{
+  /* Now we want the parameters put in special registers.
+     Make sure the compiler is able to make something useful of this.
+     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+
+     If gcc was allright, it really would need no temporaries, and no
+     stack space to save stuff on.  */
+
+  register void *return_dst __asm__ ("r10") = pdst;
+  register unsigned char *dst __asm__ ("r13") = pdst;
+  register unsigned const char *src __asm__ ("r11") = psrc;
+  register int n __asm__ ("r12") = pn;
+
+  /* When src is aligned but not dst, this makes a few extra needless
+     cycles.  I believe it would take as many to check that the
+     re-alignment was unnecessary.  */
+  if (((unsigned long) dst & 3) != 0
+      /* Don't align if we wouldn't copy more than a few bytes; so we
+	 don't have to check further for overflows.  */
+      && n >= 3)
+  {
+    if ((unsigned long) dst & 1)
+      {
+	n--;
+	*dst = *src;
+	src++;
+	dst++;
+      }
+
+    if ((unsigned long) dst & 2)
+      {
+	n -= 2;
+	*(short *) dst = *(short *) src;
+	src += 2;
+	dst += 2;
+      }
+  }
+
+  /* Decide which copying method to use.  */
+  if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
+    {
+      /* It is not optimal to tell the compiler about clobbering any
+	 registers; that will move the saving/restoring of those registers
+	 to the function prologue/epilogue, and make non-movem sizes
+	 suboptimal.  */
+      __asm__ volatile
+	("\
+	 ;; GCC does promise correct register allocations, but let's	\n\
+	 ;; make sure it keeps its promises.				\n\
+	 .ifnc %0-%1-%2,$r13-$r11-$r12					\n\
+	 .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"	\n\
+	 .endif								\n\
+									\n\
+	 ;; Save the registers we'll use in the movem process		\n\
+	 ;; on the stack.						\n\
+	 subq	11*4,sp							\n\
+	 movem	r10,[sp]						\n\
+									\n\
+	 ;; Now we've got this:						\n\
+	 ;; r11 - src							\n\
+	 ;; r13 - dst							\n\
+	 ;; r12 - n							\n\
+									\n\
+	 ;; Update n for the first loop.				\n\
+	 subq	 44,r12							\n\
+0:									\n\
+"
+#ifdef __arch_common_v10_v32
+	 /* Cater to branch offset difference between v32 and v10.  We
+	    assume the branch below has an 8-bit offset.  */
+"	 setf\n"
+#endif
+"	 movem	[r11+],r10						\n\
+	 subq	44,r12							\n\
+	 bge	 0b							\n\
+	 movem	r10,[r13+]						\n\
+									\n\
+	 ;; Compensate for last loop underflowing n.			\n\
+	 addq	44,r12							\n\
+									\n\
+	 ;; Restore registers from stack.				\n\
+	 movem [sp+],r10"
+
+	 /* Outputs.  */
+	 : "=r" (dst), "=r" (src), "=r" (n)
+
+	 /* Inputs.  */
+	 : "0" (dst), "1" (src), "2" (n));
+    }
+
+  while (n >= 16)
+    {
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+
+      n -= 16;
+    }
+
+  switch (n)
+    {
+    case 0:
+      break;
+
+    case 1:
+      *dst = *src;
+      break;
+
+    case 2:
+      *(short *) dst = *(short *) src;
+      break;
+
+    case 3:
+      *(short *) dst = *(short *) src; dst += 2; src += 2;
+      *dst = *src;
+      break;
+
+    case 4:
+      *(long *) dst = *(long *) src;
+      break;
+
+    case 5:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *dst = *src;
+      break;
+
+    case 6:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(short *) dst = *(short *) src;
+      break;
+
+    case 7:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(short *) dst = *(short *) src; dst += 2; src += 2;
+      *dst = *src;
+      break;
+
+    case 8:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src;
+      break;
+
+    case 9:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *dst = *src;
+      break;
+
+    case 10:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(short *) dst = *(short *) src;
+      break;
+
+    case 11:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(short *) dst = *(short *) src; dst += 2; src += 2;
+      *dst = *src;
+      break;
+
+    case 12:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src;
+      break;
+
+    case 13:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *dst = *src;
+      break;
+
+    case 14:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(short *) dst = *(short *) src;
+      break;
+
+    case 15:
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(long *) dst = *(long *) src; dst += 4; src += 4;
+      *(short *) dst = *(short *) src; dst += 2; src += 2;
+      *dst = *src;
+      break;
+    }
+
+  return return_dst;
+}
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
new file mode 100644
index 0000000..f0f335d
--- /dev/null
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -0,0 +1,469 @@
+/*
+ * User address space access functions.
+ * The non-inlined parts of asm-cris/uaccess.h are here.
+ *
+ * Copyright (C) 2000, 2003 Axis Communications AB.
+ *
+ * Written by Hans-Peter Nilsson.
+ * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
+ */
+
+#include <asm/uaccess.h>
+
+/* Asm:s have been tweaked (within the domain of correctness) to give
+   satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32".
+
+   Check regularly...
+
+   Note that for CRISv32, the PC saved at a bus-fault is the address
+   *at* the faulting instruction, with a special case for instructions
+   in delay slots: then it's the address of the branch.  Note also that
+   in contrast to v10, a postincrement in the instruction is *not*
+   performed at a bus-fault; the register is seen having the original
+   value in fault handlers.  */
+
+
+/* Copy to userspace.  This is based on the memcpy used for
+   kernel-to-kernel copying; see "string.c".  */
+
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
+{
+  /* We want the parameters put in special registers.
+     Make sure the compiler is able to make something useful of this.
+     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+
+     FIXME: Comment for old gcc version.  Check.
+     If gcc was alright, it really would need no temporaries, and no
+     stack space to save stuff on. */
+
+  register char *dst __asm__ ("r13") = pdst;
+  register const char *src __asm__ ("r11") = psrc;
+  register int n __asm__ ("r12") = pn;
+  register int retn __asm__ ("r10") = 0;
+
+
+  /* When src is aligned but not dst, this makes a few extra needless
+     cycles.  I believe it would take as many to check that the
+     re-alignment was unnecessary.  */
+  if (((unsigned long) dst & 3) != 0
+      /* Don't align if we wouldn't copy more than a few bytes; so we
+	 don't have to check further for overflows.  */
+      && n >= 3)
+  {
+    if ((unsigned long) dst & 1)
+    {
+      __asm_copy_to_user_1 (dst, src, retn);
+      n--;
+    }
+
+    if ((unsigned long) dst & 2)
+    {
+      __asm_copy_to_user_2 (dst, src, retn);
+      n -= 2;
+    }
+  }
+
+  /* Movem is dirt cheap.  The overheap is low enough to always use the
+     minimum possible block size as the threshold.  */
+  if (n >= 44)
+  {
+    /* For large copies we use 'movem'.  */
+
+    /* It is not optimal to tell the compiler about clobbering any
+       registers; that will move the saving/restoring of those registers
+       to the function prologue/epilogue, and make non-movem sizes
+       suboptimal.  */
+    __asm__ volatile ("\
+        ;; Check that the register asm declaration got right.		\n\
+        ;; The GCC manual explicitly says TRT will happen.		\n\
+	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
+	.err								\n\
+	.endif								\n\
+									\n\
+	;; Save the registers we'll use in the movem process		\n\
+	;; on the stack.						\n\
+	subq	11*4,$sp						\n\
+	movem	$r10,[$sp]						\n\
+									\n\
+	;; Now we've got this:						\n\
+	;; r11 - src							\n\
+	;; r13 - dst							\n\
+	;; r12 - n							\n\
+									\n\
+	;; Update n for the first loop					\n\
+	subq	44,$r12							\n\
+0:									\n\
+	movem	[$r11+],$r10						\n\
+	subq   44,$r12							\n\
+1:	bge	0b							\n\
+	movem	$r10,[$r13+]						\n\
+3:									\n\
+	addq   44,$r12  ;; compensate for last loop underflowing n	\n\
+									\n\
+	;; Restore registers from stack					\n\
+	movem [$sp+],$r10						\n\
+2:									\n\
+	.section .fixup,\"ax\"						\n\
+4:									\n\
+; When failing on any of the 1..44 bytes in a chunk, we adjust back the	\n\
+; source pointer and just drop through	to the by-16 and by-4 loops to	\n\
+; get the correct number of failing bytes.  This necessarily means a	\n\
+; few extra exceptions, but invalid user pointers shouldn't happen in	\n\
+; time-critical code anyway.						\n\
+	jump 3b								\n\
+	subq 44,$r11							\n\
+									\n\
+	.previous							\n\
+	.section __ex_table,\"a\"					\n\
+	.dword 1b,4b							\n\
+	.previous"
+
+     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
+     /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
+
+  }
+
+  while (n >= 16)
+  {
+    __asm_copy_to_user_16 (dst, src, retn);
+    n -= 16;
+  }
+
+  /* Having a separate by-four loops cuts down on cache footprint.
+     FIXME:  Test with and without; increasing switch to be 0..15.  */
+  while (n >= 4)
+  {
+    __asm_copy_to_user_4 (dst, src, retn);
+    n -= 4;
+  }
+
+  switch (n)
+  {
+    case 0:
+      break;
+    case 1:
+      __asm_copy_to_user_1 (dst, src, retn);
+      break;
+    case 2:
+      __asm_copy_to_user_2 (dst, src, retn);
+      break;
+    case 3:
+      __asm_copy_to_user_3 (dst, src, retn);
+      break;
+  }
+
+  return retn;
+}
+EXPORT_SYMBOL(__copy_user);
+
+/* Copy from user to kernel, zeroing the bytes that were inaccessible in
+   userland.  The return-value is the number of bytes that were
+   inaccessible.  */
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
+{
+  /* We want the parameters put in special registers.
+     Make sure the compiler is able to make something useful of this.
+     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+
+     FIXME: Comment for old gcc version.  Check.
+     If gcc was alright, it really would need no temporaries, and no
+     stack space to save stuff on.  */
+
+  register char *dst __asm__ ("r13") = pdst;
+  register const char *src __asm__ ("r11") = psrc;
+  register int n __asm__ ("r12") = pn;
+  register int retn __asm__ ("r10") = 0;
+
+  /* The best reason to align src is that we then know that a read-fault
+     was for aligned bytes; there's no 1..3 remaining good bytes to
+     pickle.  */
+  if (((unsigned long) src & 3) != 0)
+  {
+    if (((unsigned long) src & 1) && n != 0)
+    {
+      __asm_copy_from_user_1 (dst, src, retn);
+      n--;
+    }
+
+    if (((unsigned long) src & 2) && n >= 2)
+    {
+      __asm_copy_from_user_2 (dst, src, retn);
+      n -= 2;
+    }
+
+    /* We only need one check after the unalignment-adjustments, because
+       if both adjustments were done, either both or neither reference
+       had an exception.  */
+    if (retn != 0)
+      goto copy_exception_bytes;
+  }
+
+  /* Movem is dirt cheap.  The overheap is low enough to always use the
+     minimum possible block size as the threshold.  */
+  if (n >= 44)
+  {
+    /* It is not optimal to tell the compiler about clobbering any
+       registers; that will move the saving/restoring of those registers
+       to the function prologue/epilogue, and make non-movem sizes
+       suboptimal.  */
+    __asm__ volatile ("\
+	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
+	.err								\n\
+	.endif								\n\
+									\n\
+	;; Save the registers we'll use in the movem process		\n\
+	;; on the stack.						\n\
+	subq	11*4,$sp						\n\
+	movem	$r10,[$sp]						\n\
+									\n\
+	;; Now we've got this:						\n\
+	;; r11 - src							\n\
+	;; r13 - dst							\n\
+	;; r12 - n							\n\
+									\n\
+	;; Update n for the first loop					\n\
+	subq	44,$r12							\n\
+0:									\n\
+	movem	[$r11+],$r10						\n\
+									\n\
+	subq   44,$r12							\n\
+	bge	0b							\n\
+	movem	$r10,[$r13+]						\n\
+									\n\
+4:									\n\
+	addq   44,$r12  ;; compensate for last loop underflowing n	\n\
+									\n\
+	;; Restore registers from stack					\n\
+	movem [$sp+],$r10						\n\
+	.section .fixup,\"ax\"						\n\
+									\n\
+;; Do not jump back into the loop if we fail.  For some uses, we get a	\n\
+;; page fault somewhere on the line.  Without checking for page limits,	\n\
+;; we don't know where, but we need to copy accurately and keep an	\n\
+;; accurate count; not just clear the whole line.  To do that, we fall	\n\
+;; down in the code below, proceeding with smaller amounts.  It should	\n\
+;; be kept in mind that we have to cater to code like what at one time	\n\
+;; was in fs/super.c:							\n\
+;;  i = size - copy_from_user((void *)page, data, size);		\n\
+;; which would cause repeated faults while clearing the remainder of	\n\
+;; the SIZE bytes at PAGE after the first fault.			\n\
+;; A caveat here is that we must not fall through from a failing page	\n\
+;; to a valid page.							\n\
+									\n\
+3:									\n\
+	jump	4b ;; Fall through, pretending the fault didn't happen.	\n\
+	nop								\n\
+									\n\
+	.previous							\n\
+	.section __ex_table,\"a\"					\n\
+	.dword 0b,3b							\n\
+	.previous"
+
+     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
+     /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
+  }
+
+  /* Either we directly start copying here, using dword copying in a loop,
+     or we copy as much as possible with 'movem' and then the last block
+     (<44 bytes) is copied here.  This will work since 'movem' will have
+     updated src, dst and n.  (Except with failing src.)
+
+     Since we want to keep src accurate, we can't use
+     __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
+     retn, but not src (by design; it's value is ignored elsewhere).  */
+
+  while (n >= 4)
+  {
+    __asm_copy_from_user_4 (dst, src, retn);
+    n -= 4;
+
+    if (retn)
+      goto copy_exception_bytes;
+  }
+
+  /* If we get here, there were no memory read faults.  */
+  switch (n)
+  {
+    /* These copies are at least "naturally aligned" (so we don't have
+       to check each byte), due to the src alignment code before the
+       movem loop.  The *_3 case *will* get the correct count for retn.  */
+    case 0:
+      /* This case deliberately left in (if you have doubts check the
+	 generated assembly code).  */
+      break;
+    case 1:
+      __asm_copy_from_user_1 (dst, src, retn);
+      break;
+    case 2:
+      __asm_copy_from_user_2 (dst, src, retn);
+      break;
+    case 3:
+      __asm_copy_from_user_3 (dst, src, retn);
+      break;
+  }
+
+  /* If we get here, retn correctly reflects the number of failing
+     bytes.  */
+  return retn;
+
+copy_exception_bytes:
+  /* We already have "retn" bytes cleared, and need to clear the
+     remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
+     memset is preferred here, since this isn't speed-critical code and
+     we'd rather have this a leaf-function than calling memset.  */
+  {
+    char *endp;
+    for (endp = dst + n; dst < endp; dst++)
+      *dst = 0;
+  }
+
+  return retn + n;
+}
+EXPORT_SYMBOL(__copy_user_zeroing);
+
+/* Zero userspace.  */
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
+{
+  /* We want the parameters put in special registers.
+     Make sure the compiler is able to make something useful of this.
+      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+
+     FIXME: Comment for old gcc version.  Check.
+     If gcc was alright, it really would need no temporaries, and no
+     stack space to save stuff on. */
+
+  register char *dst __asm__ ("r13") = pto;
+  register int n __asm__ ("r12") = pn;
+  register int retn __asm__ ("r10") = 0;
+
+
+  if (((unsigned long) dst & 3) != 0
+     /* Don't align if we wouldn't copy more than a few bytes.  */
+      && n >= 3)
+  {
+    if ((unsigned long) dst & 1)
+    {
+      __asm_clear_1 (dst, retn);
+      n--;
+    }
+
+    if ((unsigned long) dst & 2)
+    {
+      __asm_clear_2 (dst, retn);
+      n -= 2;
+    }
+  }
+
+  /* Decide which copying method to use.
+     FIXME: This number is from the "ordinary" kernel memset.  */
+  if (n >= 48)
+  {
+    /* For large clears we use 'movem' */
+
+    /* It is not optimal to tell the compiler about clobbering any
+       call-saved registers; that will move the saving/restoring of
+       those registers to the function prologue/epilogue, and make
+       non-movem sizes suboptimal.
+
+       This method is not foolproof; it assumes that the "asm reg"
+       declarations at the beginning of the function really are used
+       here (beware: they may be moved to temporary registers).
+       This way, we do not have to save/move the registers around into
+       temporaries; we can safely use them straight away.
+
+      If you want to check that the allocation was right; then
+      check the equalities in the first comment.  It should say
+      something like "r13=r13, r11=r11, r12=r12". */
+    __asm__ volatile ("\
+	.ifnc %0%1%2,$r13$r12$r10					\n\
+	.err								\n\
+	.endif								\n\
+									\n\
+	;; Save the registers we'll clobber in the movem process	\n\
+	;; on the stack.  Don't mention them to gcc, it will only be	\n\
+	;; upset.							\n\
+	subq	11*4,$sp						\n\
+	movem	$r10,[$sp]						\n\
+									\n\
+	clear.d $r0							\n\
+	clear.d $r1							\n\
+	clear.d $r2							\n\
+	clear.d $r3							\n\
+	clear.d $r4							\n\
+	clear.d $r5							\n\
+	clear.d $r6							\n\
+	clear.d $r7							\n\
+	clear.d $r8							\n\
+	clear.d $r9							\n\
+	clear.d $r10							\n\
+	clear.d $r11							\n\
+									\n\
+	;; Now we've got this:						\n\
+	;; r13 - dst							\n\
+	;; r12 - n							\n\
+									\n\
+	;; Update n for the first loop					\n\
+	subq	12*4,$r12						\n\
+0:									\n\
+	subq   12*4,$r12						\n\
+1:									\n\
+	bge	0b							\n\
+	movem	$r11,[$r13+]						\n\
+									\n\
+	addq   12*4,$r12 ;; compensate for last loop underflowing n	\n\
+									\n\
+	;; Restore registers from stack					\n\
+	movem [$sp+],$r10						\n\
+2:									\n\
+	.section .fixup,\"ax\"						\n\
+3:									\n\
+	movem [$sp],$r10						\n\
+	addq 12*4,$r10							\n\
+	addq 12*4,$r13							\n\
+	movem $r10,[$sp]						\n\
+	jump 0b								\n\
+	clear.d $r10							\n\
+									\n\
+	.previous							\n\
+	.section __ex_table,\"a\"					\n\
+	.dword 1b,3b							\n\
+	.previous"
+
+     /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
+     /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
+     /* Clobber */ : "r11");
+  }
+
+  while (n >= 16)
+  {
+    __asm_clear_16 (dst, retn);
+    n -= 16;
+  }
+
+  /* Having a separate by-four loops cuts down on cache footprint.
+     FIXME:  Test with and without; increasing switch to be 0..15.  */
+  while (n >= 4)
+  {
+    __asm_clear_4 (dst, retn);
+    n -= 4;
+  }
+
+  switch (n)
+  {
+    case 0:
+      break;
+    case 1:
+      __asm_clear_1 (dst, retn);
+      break;
+    case 2:
+      __asm_clear_2 (dst, retn);
+      break;
+    case 3:
+      __asm_clear_3 (dst, retn);
+      break;
+  }
+
+  return retn;
+}
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/mach-a3/Kconfig b/arch/cris/arch-v32/mach-a3/Kconfig
new file mode 100644
index 0000000..8754727
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/Kconfig
@@ -0,0 +1,110 @@
+if CRIS_MACH_ARTPEC3
+
+menu "Artpec-3 options"
+       depends on CRIS_MACH_ARTPEC3
+
+config ETRAX_DRAM_VIRTUAL_BASE
+	hex
+	default "c0000000"
+
+config ETRAX_L2CACHE
+       bool
+       default y
+
+config ETRAX_SERIAL_PORTS
+       int
+       default 5
+
+config ETRAX_DDR2_MRS
+	hex "DDR2 MRS"
+	default "0"
+
+config ETRAX_DDR2_TIMING
+	hex "DDR2 SDRAM timing"
+	default "0"
+	help
+	  SDRAM timing parameters.
+
+config ETRAX_DDR2_CONFIG
+	hex "DDR2 config"
+	default "0"
+
+config ETRAX_DDR2_LATENCY
+	hex "DDR2 latency"
+	default "0"
+
+config ETRAX_PIO_CE0_CFG
+       hex "PIO CE0 configuration"
+       default "0"
+
+config ETRAX_PIO_CE1_CFG
+       hex "PIO CE1 configuration"
+       default "0"
+
+config ETRAX_PIO_CE2_CFG
+       hex "PIO CE2 configuration"
+       default "0"
+
+config ETRAX_DEF_GIO_PA_OE
+	hex "GIO_PA_OE"
+	default "00000000"
+	help
+	  Configures the direction of general port A bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PA_OUT
+	hex "GIO_PA_OUT"
+	default "00000000"
+	help
+	  Configures the initial data for the general port A bits.  Most
+	  products should use 00 here.
+
+config ETRAX_DEF_GIO_PB_OE
+	hex "GIO_PB_OE"
+	default "000000000"
+	help
+	  Configures the direction of general port B bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PB_OUT
+	hex "GIO_PB_OUT"
+	default "000000000"
+	help
+	  Configures the initial data for the general port B bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PC_OE
+	hex "GIO_PC_OE"
+	default "00000"
+	help
+	  Configures the direction of general port C bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PC_OUT
+	hex "GIO_PC_OUT"
+	default "00000"
+	help
+	  Configures the initial data for the general port C bits.  Most
+	  products should use 00000 here.
+
+endmenu
+
+endif
diff --git a/arch/cris/arch-v32/mach-a3/Makefile b/arch/cris/arch-v32/mach-a3/Makefile
new file mode 100644
index 0000000..0cc6eeb
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y   := dma.o pinmux.o arbiter.o
+
+clean:
+
diff --git a/arch/cris/arch-v32/mach-a3/arbiter.c b/arch/cris/arch-v32/mach-a3/arbiter.c
new file mode 100644
index 0000000..ab5c421
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/arbiter.c
@@ -0,0 +1,634 @@
+/*
+ * Memory arbiter functions. Allocates bandwidth through the
+ * arbiter and sets up arbiter breakpoints.
+ *
+ * The algorithm first assigns slots to the clients that has specified
+ * bandwidth (e.g. ethernet) and then the remaining slots are divided
+ * on all the active clients.
+ *
+ * Copyright (c) 2004-2007 Axis Communications AB.
+ *
+ * The artpec-3 has two arbiters. The memory hierarchy looks like this:
+ *
+ *
+ * CPU DMAs
+ *  |   |
+ *  |   |
+ * --------------    ------------------
+ * | foo arbiter|----| Internal memory|
+ * --------------    ------------------
+ *      |
+ * --------------
+ * | L2 cache   |
+ * --------------
+ *             |
+ * h264 etc    |
+ *    |        |
+ *    |        |
+ * --------------
+ * | bar arbiter|
+ * --------------
+ *       |
+ * ---------
+ * | SDRAM |
+ * ---------
+ *
+ */
+
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/marb_foo_defs.h>
+#include <hwregs/marb_bar_defs.h>
+#include <arbiter.h>
+#include <hwregs/intr_vect.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+
+#define D(x)
+
+struct crisv32_watch_entry {
+  unsigned long instance;
+  watch_callback *cb;
+  unsigned long start;
+  unsigned long end;
+  int used;
+};
+
+#define NUMBER_OF_BP 4
+#define SDRAM_BANDWIDTH 400000000
+#define INTMEM_BANDWIDTH 400000000
+#define NBR_OF_SLOTS 64
+#define NBR_OF_REGIONS 2
+#define NBR_OF_CLIENTS 15
+#define ARBITERS 2
+#define UNASSIGNED 100
+
+struct arbiter {
+  unsigned long instance;
+  int nbr_regions;
+  int nbr_clients;
+  int requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS];
+  int active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS];
+};
+
+static struct crisv32_watch_entry watches[ARBITERS][NUMBER_OF_BP] =
+{
+  {
+  {regi_marb_foo_bp0},
+  {regi_marb_foo_bp1},
+  {regi_marb_foo_bp2},
+  {regi_marb_foo_bp3}
+  },
+  {
+  {regi_marb_bar_bp0},
+  {regi_marb_bar_bp1},
+  {regi_marb_bar_bp2},
+  {regi_marb_bar_bp3}
+  }
+};
+
+struct arbiter arbiters[ARBITERS] =
+{
+  { /* L2 cache arbiter */
+    .instance = regi_marb_foo,
+    .nbr_regions = 2,
+    .nbr_clients = 15
+  },
+  { /* DDR2 arbiter */
+    .instance = regi_marb_bar,
+    .nbr_regions = 1,
+    .nbr_clients = 9
+  }
+};
+
+static int max_bandwidth[NBR_OF_REGIONS] = {SDRAM_BANDWIDTH, INTMEM_BANDWIDTH};
+
+DEFINE_SPINLOCK(arbiter_lock);
+
+static irqreturn_t
+crisv32_foo_arbiter_irq(int irq, void *dev_id);
+static irqreturn_t
+crisv32_bar_arbiter_irq(int irq, void *dev_id);
+
+/*
+ * "I'm the arbiter, I know the score.
+ *  From square one I'll be watching all 64."
+ * (memory arbiter slots, that is)
+ *
+ *  Or in other words:
+ * Program the memory arbiter slots for "region" according to what's
+ * in requested_slots[] and active_clients[], while minimizing
+ * latency. A caller may pass a non-zero positive amount for
+ * "unused_slots", which must then be the unallocated, remaining
+ * number of slots, free to hand out to any client.
+ */
+
+static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)
+{
+	int slot;
+	int client;
+	int interval = 0;
+
+	/*
+	 * This vector corresponds to the hardware arbiter slots (see
+	 * the hardware documentation for semantics). We initialize
+	 * each slot with a suitable sentinel value outside the valid
+	 * range {0 .. NBR_OF_CLIENTS - 1} and replace them with
+	 * client indexes. Then it's fed to the hardware.
+	 */
+	s8 val[NBR_OF_SLOTS];
+
+	for (slot = 0; slot < NBR_OF_SLOTS; slot++)
+	    val[slot] = -1;
+
+	for (client = 0; client < arbiters[arbiter].nbr_clients; client++) {
+	    int pos;
+	    /* Allocate the requested non-zero number of slots, but
+	     * also give clients with zero-requests one slot each
+	     * while stocks last. We do the latter here, in client
+	     * order. This makes sure zero-request clients are the
+	     * first to get to any spare slots, else those slots
+	     * could, when bandwidth is allocated close to the limit,
+	     * all be allocated to low-index non-zero-request clients
+	     * in the default-fill loop below. Another positive but
+	     * secondary effect is a somewhat better spread of the
+	     * zero-bandwidth clients in the vector, avoiding some of
+	     * the latency that could otherwise be caused by the
+	     * partitioning of non-zero-bandwidth clients at low
+	     * indexes and zero-bandwidth clients at high
+	     * indexes. (Note that this spreading can only affect the
+	     * unallocated bandwidth.)  All the above only matters for
+	     * memory-intensive situations, of course.
+	     */
+	    if (!arbiters[arbiter].requested_slots[region][client]) {
+		/*
+		 * Skip inactive clients. Also skip zero-slot
+		 * allocations in this pass when there are no known
+		 * free slots.
+		 */
+		if (!arbiters[arbiter].active_clients[region][client] ||
+				unused_slots <= 0)
+			continue;
+
+		unused_slots--;
+
+		/* Only allocate one slot for this client. */
+		interval = NBR_OF_SLOTS;
+	    } else
+		interval = NBR_OF_SLOTS /
+			arbiters[arbiter].requested_slots[region][client];
+
+	    pos = 0;
+	    while (pos < NBR_OF_SLOTS) {
+		if (val[pos] >= 0)
+		   pos++;
+		else {
+			val[pos] = client;
+			pos += interval;
+		}
+	    }
+	}
+
+	client = 0;
+	for (slot = 0; slot < NBR_OF_SLOTS; slot++) {
+		/*
+		 * Allocate remaining slots in round-robin
+		 * client-number order for active clients. For this
+		 * pass, we ignore requested bandwidth and previous
+		 * allocations.
+		 */
+		if (val[slot] < 0) {
+			int first = client;
+			while (!arbiters[arbiter].active_clients[region][client]) {
+				client = (client + 1) %
+					arbiters[arbiter].nbr_clients;
+				if (client == first)
+				   break;
+			}
+			val[slot] = client;
+			client = (client + 1) % arbiters[arbiter].nbr_clients;
+		}
+		if (arbiter == 0) {
+			if (region == EXT_REGION)
+				REG_WR_INT_VECT(marb_foo, regi_marb_foo,
+					rw_l2_slots, slot, val[slot]);
+			else if (region == INT_REGION)
+				REG_WR_INT_VECT(marb_foo, regi_marb_foo,
+					rw_intm_slots, slot, val[slot]);
+		} else {
+			REG_WR_INT_VECT(marb_bar, regi_marb_bar,
+				rw_ddr2_slots, slot, val[slot]);
+		}
+	}
+}
+
+extern char _stext, _etext;
+
+static void crisv32_arbiter_init(void)
+{
+	static int initialized;
+
+	if (initialized)
+		return;
+
+	initialized = 1;
+
+	/*
+	 * CPU caches are always set to active, but with zero
+	 * bandwidth allocated. It should be ok to allocate zero
+	 * bandwidth for the caches, because DMA for other channels
+	 * will supposedly finish, once their programmed amount is
+	 * done, and then the caches will get access according to the
+	 * "fixed scheme" for unclaimed slots. Though, if for some
+	 * use-case somewhere, there's a maximum CPU latency for
+	 * e.g. some interrupt, we have to start allocating specific
+	 * bandwidth for the CPU caches too.
+	 */
+	arbiters[0].active_clients[EXT_REGION][11] = 1;
+	arbiters[0].active_clients[EXT_REGION][12] = 1;
+	crisv32_arbiter_config(0, EXT_REGION, 0);
+	crisv32_arbiter_config(0, INT_REGION, 0);
+	crisv32_arbiter_config(1, EXT_REGION, 0);
+
+	if (request_irq(MEMARB_FOO_INTR_VECT, crisv32_foo_arbiter_irq,
+			0, "arbiter", NULL))
+		printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
+
+	if (request_irq(MEMARB_BAR_INTR_VECT, crisv32_bar_arbiter_irq,
+			0, "arbiter", NULL))
+		printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
+
+#ifndef CONFIG_ETRAX_KGDB
+	/* Global watch for writes to kernel text segment. */
+	crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
+		MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients),
+			      arbiter_all_write, NULL);
+#endif
+
+	/* Set up max burst sizes by default */
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_rd_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_wr_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_ccd_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_wr_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_rd_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_rd_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_vout_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_fifo_burst, 3);
+	REG_WR_INT(marb_bar, regi_marb_bar, rw_l2cache_burst, 3);
+}
+
+int crisv32_arbiter_allocate_bandwidth(int client, int region,
+				      unsigned long bandwidth)
+{
+	int i;
+	int total_assigned = 0;
+	int total_clients = 0;
+	int req;
+	int arbiter = 0;
+
+	crisv32_arbiter_init();
+
+	if (client & 0xffff0000) {
+		arbiter = 1;
+		client >>= 16;
+	}
+
+	for (i = 0; i < arbiters[arbiter].nbr_clients; i++) {
+		total_assigned += arbiters[arbiter].requested_slots[region][i];
+		total_clients += arbiters[arbiter].active_clients[region][i];
+	}
+
+	/* Avoid division by 0 for 0-bandwidth requests. */
+	req = bandwidth == 0
+		? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);
+
+	/*
+	 * We make sure that there are enough slots only for non-zero
+	 * requests. Requesting 0 bandwidth *may* allocate slots,
+	 * though if all bandwidth is allocated, such a client won't
+	 * get any and will have to rely on getting memory access
+	 * according to the fixed scheme that's the default when one
+	 * of the slot-allocated clients doesn't claim their slot.
+	 */
+	if (total_assigned + req > NBR_OF_SLOTS)
+	   return -ENOMEM;
+
+	arbiters[arbiter].active_clients[region][client] = 1;
+	arbiters[arbiter].requested_slots[region][client] = req;
+	crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
+
+	/* Propagate allocation from foo to bar */
+	if (arbiter == 0)
+		crisv32_arbiter_allocate_bandwidth(8 << 16,
+			EXT_REGION, bandwidth);
+	return 0;
+}
+
+/*
+ * Main entry for bandwidth deallocation.
+ *
+ * Strictly speaking, for a somewhat constant set of clients where
+ * each client gets a constant bandwidth and is just enabled or
+ * disabled (somewhat dynamically), no action is necessary here to
+ * avoid starvation for non-zero-allocation clients, as the allocated
+ * slots will just be unused. However, handing out those unused slots
+ * to active clients avoids needless latency if the "fixed scheme"
+ * would give unclaimed slots to an eager low-index client.
+ */
+
+void crisv32_arbiter_deallocate_bandwidth(int client, int region)
+{
+	int i;
+	int total_assigned = 0;
+	int arbiter = 0;
+
+	if (client & 0xffff0000)
+		arbiter = 1;
+
+	arbiters[arbiter].requested_slots[region][client] = 0;
+	arbiters[arbiter].active_clients[region][client] = 0;
+
+	for (i = 0; i < arbiters[arbiter].nbr_clients; i++)
+		total_assigned += arbiters[arbiter].requested_slots[region][i];
+
+	crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
+}
+
+int crisv32_arbiter_watch(unsigned long start, unsigned long size,
+			  unsigned long clients, unsigned long accesses,
+			  watch_callback *cb)
+{
+	int i;
+	int arbiter;
+	int used[2];
+	int ret = 0;
+
+	crisv32_arbiter_init();
+
+	if (start > 0x80000000) {
+		printk(KERN_ERR "Arbiter: %lX doesn't look like a "
+			"physical address", start);
+		return -EFAULT;
+	}
+
+	spin_lock(&arbiter_lock);
+
+	if (clients & 0xffff)
+		used[0] = 1;
+	if (clients & 0xffff0000)
+		used[1] = 1;
+
+	for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
+		if (!used[arbiter])
+			continue;
+
+		for (i = 0; i < NUMBER_OF_BP; i++) {
+			if (!watches[arbiter][i].used) {
+				unsigned intr_mask;
+				if (arbiter)
+					intr_mask = REG_RD_INT(marb_bar,
+						regi_marb_bar, rw_intr_mask);
+				else
+					intr_mask = REG_RD_INT(marb_foo,
+						regi_marb_foo, rw_intr_mask);
+
+				watches[arbiter][i].used = 1;
+				watches[arbiter][i].start = start;
+				watches[arbiter][i].end = start + size;
+				watches[arbiter][i].cb = cb;
+
+				ret |= (i + 1) << (arbiter + 8);
+				if (arbiter) {
+					REG_WR_INT(marb_bar_bp,
+						watches[arbiter][i].instance,
+						rw_first_addr,
+						watches[arbiter][i].start);
+					REG_WR_INT(marb_bar_bp,
+						watches[arbiter][i].instance,
+						rw_last_addr,
+						watches[arbiter][i].end);
+					REG_WR_INT(marb_bar_bp,
+						watches[arbiter][i].instance,
+						rw_op, accesses);
+					REG_WR_INT(marb_bar_bp,
+						watches[arbiter][i].instance,
+						rw_clients,
+						clients & 0xffff);
+				} else {
+					REG_WR_INT(marb_foo_bp,
+						watches[arbiter][i].instance,
+						rw_first_addr,
+						watches[arbiter][i].start);
+					REG_WR_INT(marb_foo_bp,
+						watches[arbiter][i].instance,
+						rw_last_addr,
+						watches[arbiter][i].end);
+					REG_WR_INT(marb_foo_bp,
+						watches[arbiter][i].instance,
+						rw_op, accesses);
+					REG_WR_INT(marb_foo_bp,
+						watches[arbiter][i].instance,
+						rw_clients, clients >> 16);
+				}
+
+				if (i == 0)
+					intr_mask |= 1;
+				else if (i == 1)
+					intr_mask |= 2;
+				else if (i == 2)
+					intr_mask |= 4;
+				else if (i == 3)
+					intr_mask |= 8;
+
+				if (arbiter)
+					REG_WR_INT(marb_bar, regi_marb_bar,
+						rw_intr_mask, intr_mask);
+				else
+					REG_WR_INT(marb_foo, regi_marb_foo,
+						rw_intr_mask, intr_mask);
+
+				spin_unlock(&arbiter_lock);
+
+				break;
+			}
+		}
+	}
+	spin_unlock(&arbiter_lock);
+	if (ret)
+		return ret;
+	else
+		return -ENOMEM;
+}
+
+int crisv32_arbiter_unwatch(int id)
+{
+	int arbiter;
+	int intr_mask;
+
+	crisv32_arbiter_init();
+
+	spin_lock(&arbiter_lock);
+
+	for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
+		int id2;
+
+		if (arbiter)
+			intr_mask = REG_RD_INT(marb_bar, regi_marb_bar,
+				rw_intr_mask);
+		else
+			intr_mask = REG_RD_INT(marb_foo, regi_marb_foo,
+				rw_intr_mask);
+
+		id2 = (id & (0xff << (arbiter + 8))) >> (arbiter + 8);
+		if (id2 == 0)
+			continue;
+		id2--;
+		if ((id2 >= NUMBER_OF_BP) || (!watches[arbiter][id2].used)) {
+			spin_unlock(&arbiter_lock);
+			return -EINVAL;
+		}
+
+		memset(&watches[arbiter][id2], 0,
+			sizeof(struct crisv32_watch_entry));
+
+		if (id2 == 0)
+			intr_mask &= ~1;
+		else if (id2 == 1)
+			intr_mask &= ~2;
+		else if (id2 == 2)
+			intr_mask &= ~4;
+		else if (id2 == 3)
+			intr_mask &= ~8;
+
+		if (arbiter)
+			REG_WR_INT(marb_bar, regi_marb_bar, rw_intr_mask,
+				intr_mask);
+		else
+			REG_WR_INT(marb_foo, regi_marb_foo, rw_intr_mask,
+				intr_mask);
+	}
+
+	spin_unlock(&arbiter_lock);
+	return 0;
+}
+
+extern void show_registers(struct pt_regs *regs);
+
+
+static irqreturn_t
+crisv32_foo_arbiter_irq(int irq, void *dev_id)
+{
+	reg_marb_foo_r_masked_intr masked_intr =
+		REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
+	reg_marb_foo_bp_r_brk_clients r_clients;
+	reg_marb_foo_bp_r_brk_addr r_addr;
+	reg_marb_foo_bp_r_brk_op r_op;
+	reg_marb_foo_bp_r_brk_first_client r_first;
+	reg_marb_foo_bp_r_brk_size r_size;
+	reg_marb_foo_bp_rw_ack ack = {0};
+	reg_marb_foo_rw_ack_intr ack_intr = {
+		.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
+	};
+	struct crisv32_watch_entry *watch;
+	unsigned arbiter = (unsigned)dev_id;
+
+	masked_intr = REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
+
+	if (masked_intr.bp0)
+		watch = &watches[arbiter][0];
+	else if (masked_intr.bp1)
+		watch = &watches[arbiter][1];
+	else if (masked_intr.bp2)
+		watch = &watches[arbiter][2];
+	else if (masked_intr.bp3)
+		watch = &watches[arbiter][3];
+	else
+		return IRQ_NONE;
+
+	/* Retrieve all useful information and print it. */
+	r_clients = REG_RD(marb_foo_bp, watch->instance, r_brk_clients);
+	r_addr = REG_RD(marb_foo_bp, watch->instance, r_brk_addr);
+	r_op = REG_RD(marb_foo_bp, watch->instance, r_brk_op);
+	r_first = REG_RD(marb_foo_bp, watch->instance, r_brk_first_client);
+	r_size = REG_RD(marb_foo_bp, watch->instance, r_brk_size);
+
+	printk(KERN_DEBUG "Arbiter IRQ\n");
+	printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
+	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_clients, r_clients),
+	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_addr, r_addr),
+	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_op, r_op),
+	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_first_client, r_first),
+	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_size, r_size));
+
+	REG_WR(marb_foo_bp, watch->instance, rw_ack, ack);
+	REG_WR(marb_foo, regi_marb_foo, rw_ack_intr, ack_intr);
+
+	printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs());
+
+	if (watch->cb)
+		watch->cb();
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+crisv32_bar_arbiter_irq(int irq, void *dev_id)
+{
+	reg_marb_bar_r_masked_intr masked_intr =
+		REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
+	reg_marb_bar_bp_r_brk_clients r_clients;
+	reg_marb_bar_bp_r_brk_addr r_addr;
+	reg_marb_bar_bp_r_brk_op r_op;
+	reg_marb_bar_bp_r_brk_first_client r_first;
+	reg_marb_bar_bp_r_brk_size r_size;
+	reg_marb_bar_bp_rw_ack ack = {0};
+	reg_marb_bar_rw_ack_intr ack_intr = {
+		.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
+	};
+	struct crisv32_watch_entry *watch;
+	unsigned arbiter = (unsigned)dev_id;
+
+	masked_intr = REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
+
+	if (masked_intr.bp0)
+		watch = &watches[arbiter][0];
+	else if (masked_intr.bp1)
+		watch = &watches[arbiter][1];
+	else if (masked_intr.bp2)
+		watch = &watches[arbiter][2];
+	else if (masked_intr.bp3)
+		watch = &watches[arbiter][3];
+	else
+		return IRQ_NONE;
+
+	/* Retrieve all useful information and print it. */
+	r_clients = REG_RD(marb_bar_bp, watch->instance, r_brk_clients);
+	r_addr = REG_RD(marb_bar_bp, watch->instance, r_brk_addr);
+	r_op = REG_RD(marb_bar_bp, watch->instance, r_brk_op);
+	r_first = REG_RD(marb_bar_bp, watch->instance, r_brk_first_client);
+	r_size = REG_RD(marb_bar_bp, watch->instance, r_brk_size);
+
+	printk(KERN_DEBUG "Arbiter IRQ\n");
+	printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
+	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_clients, r_clients),
+	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_addr, r_addr),
+	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_op, r_op),
+	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_first_client, r_first),
+	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_size, r_size));
+
+	REG_WR(marb_bar_bp, watch->instance, rw_ack, ack);
+	REG_WR(marb_bar, regi_marb_bar, rw_ack_intr, ack_intr);
+
+	printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()->erp);
+
+	if (watch->cb)
+		watch->cb();
+
+	return IRQ_HANDLED;
+}
+
diff --git a/arch/cris/arch-v32/mach-a3/dma.c b/arch/cris/arch-v32/mach-a3/dma.c
new file mode 100644
index 0000000..47c64bf
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/dma.c
@@ -0,0 +1,184 @@
+/* Wrapper for DMA channel allocator that starts clocks etc */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <mach/dma.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/marb_defs.h>
+#include <hwregs/clkgen_defs.h>
+#include <hwregs/strmux_defs.h>
+#include <linux/errno.h>
+#include <arbiter.h>
+
+static char used_dma_channels[MAX_DMA_CHANNELS];
+static const char *used_dma_channels_users[MAX_DMA_CHANNELS];
+
+static DEFINE_SPINLOCK(dma_lock);
+
+int crisv32_request_dma(unsigned int dmanr, const char *device_id,
+	unsigned options, unsigned int bandwidth, enum dma_owner owner)
+{
+	unsigned long flags;
+	reg_clkgen_rw_clk_ctrl clk_ctrl;
+	reg_strmux_rw_cfg strmux_cfg;
+
+	if (crisv32_arbiter_allocate_bandwidth(dmanr,
+			options & DMA_INT_MEM ? INT_REGION : EXT_REGION,
+			bandwidth))
+		return -ENOMEM;
+
+	spin_lock_irqsave(&dma_lock, flags);
+
+	if (used_dma_channels[dmanr]) {
+		spin_unlock_irqrestore(&dma_lock, flags);
+		if (options & DMA_VERBOSE_ON_ERROR)
+			printk(KERN_ERR "Failed to request DMA %i for %s, "
+				"already allocated by %s\n",
+				dmanr,
+				device_id,
+				used_dma_channels_users[dmanr]);
+
+		if (options & DMA_PANIC_ON_ERROR)
+			panic("request_dma error!");
+		spin_unlock_irqrestore(&dma_lock, flags);
+		return -EBUSY;
+	}
+	clk_ctrl = REG_RD(clkgen, regi_clkgen, rw_clk_ctrl);
+	strmux_cfg = REG_RD(strmux, regi_strmux, rw_cfg);
+
+	switch (dmanr) {
+	case 0:
+	case 1:
+		clk_ctrl.dma0_1_eth = 1;
+		break;
+	case 2:
+	case 3:
+		clk_ctrl.dma2_3_strcop = 1;
+		break;
+	case 4:
+	case 5:
+		clk_ctrl.dma4_5_iop = 1;
+		break;
+	case 6:
+	case 7:
+		clk_ctrl.sser_ser_dma6_7 = 1;
+		break;
+	case 9:
+	case 11:
+		clk_ctrl.dma9_11 = 1;
+		break;
+#if MAX_DMA_CHANNELS-1 != 11
+#error Check dma.c
+#endif
+	default:
+		spin_unlock_irqrestore(&dma_lock, flags);
+		if (options & DMA_VERBOSE_ON_ERROR)
+			printk(KERN_ERR "Failed to request DMA %i for %s, "
+				"only 0-%i valid)\n",
+				dmanr, device_id, MAX_DMA_CHANNELS-1);
+
+		if (options & DMA_PANIC_ON_ERROR)
+			panic("request_dma error!");
+		return -EINVAL;
+	}
+
+	switch (owner) {
+	case dma_eth:
+		if (dmanr == 0)
+			strmux_cfg.dma0 = regk_strmux_eth;
+		else if (dmanr == 1)
+			strmux_cfg.dma1 = regk_strmux_eth;
+		else
+			panic("Invalid DMA channel for eth\n");
+		break;
+	case dma_ser0:
+		if (dmanr == 0)
+			strmux_cfg.dma0 = regk_strmux_ser0;
+		else if (dmanr == 1)
+			strmux_cfg.dma1 = regk_strmux_ser0;
+		else
+			panic("Invalid DMA channel for ser0\n");
+		break;
+	case dma_ser3:
+		if (dmanr == 2)
+			strmux_cfg.dma2 = regk_strmux_ser3;
+		else if (dmanr == 3)
+			strmux_cfg.dma3 = regk_strmux_ser3;
+		else
+			panic("Invalid DMA channel for ser3\n");
+		break;
+	case dma_strp:
+		if (dmanr == 2)
+			strmux_cfg.dma2 = regk_strmux_strcop;
+		else if (dmanr == 3)
+			strmux_cfg.dma3 = regk_strmux_strcop;
+		else
+			panic("Invalid DMA channel for strp\n");
+		break;
+	case dma_ser1:
+		if (dmanr == 4)
+			strmux_cfg.dma4 = regk_strmux_ser1;
+		else if (dmanr == 5)
+			strmux_cfg.dma5 = regk_strmux_ser1;
+		else
+			panic("Invalid DMA channel for ser1\n");
+		break;
+	case dma_iop:
+		if (dmanr == 4)
+			strmux_cfg.dma4 = regk_strmux_iop;
+		else if (dmanr == 5)
+			strmux_cfg.dma5 = regk_strmux_iop;
+		else
+			panic("Invalid DMA channel for iop\n");
+		break;
+	case dma_ser2:
+		if (dmanr == 6)
+			strmux_cfg.dma6 = regk_strmux_ser2;
+		else if (dmanr == 7)
+			strmux_cfg.dma7 = regk_strmux_ser2;
+		else
+			panic("Invalid DMA channel for ser2\n");
+		break;
+	case dma_sser:
+		if (dmanr == 6)
+			strmux_cfg.dma6 = regk_strmux_sser;
+		else if (dmanr == 7)
+			strmux_cfg.dma7 = regk_strmux_sser;
+		else
+			panic("Invalid DMA channel for sser\n");
+		break;
+	case dma_ser4:
+		if (dmanr == 9)
+			strmux_cfg.dma9 = regk_strmux_ser4;
+		else
+			panic("Invalid DMA channel for ser4\n");
+		break;
+	case dma_jpeg:
+		if (dmanr == 9)
+			strmux_cfg.dma9 = regk_strmux_jpeg;
+		else
+			panic("Invalid DMA channel for JPEG\n");
+		break;
+	case dma_h264:
+		if (dmanr == 11)
+			strmux_cfg.dma11 = regk_strmux_h264;
+		else
+			panic("Invalid DMA channel for H264\n");
+		break;
+	}
+
+	used_dma_channels[dmanr] = 1;
+	used_dma_channels_users[dmanr] = device_id;
+	REG_WR(clkgen, regi_clkgen, rw_clk_ctrl, clk_ctrl);
+	REG_WR(strmux, regi_strmux, rw_cfg, strmux_cfg);
+	spin_unlock_irqrestore(&dma_lock, flags);
+	return 0;
+}
+
+void crisv32_free_dma(unsigned int dmanr)
+{
+	spin_lock(&dma_lock);
+	used_dma_channels[dmanr] = 0;
+	spin_unlock(&dma_lock);
+}
diff --git a/arch/cris/arch-v32/mach-a3/dram_init.S b/arch/cris/arch-v32/mach-a3/dram_init.S
new file mode 100644
index 0000000..ec8648b
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/dram_init.S
@@ -0,0 +1,118 @@
+/*
+ * DDR SDRAM initialization - alter with care
+ * This file is intended to be included from other assembler files
+ *
+ * Note: This file may not modify r8 or r9 because they are used to
+ * carry information from the decompresser to the kernel
+ *
+ * Copyright (C) 2005-2007 Axis Communications AB
+ *
+ * Authors:  Mikael Starvik <starvik@axis.com>
+ */
+
+/* Just to be certain the config file is included, we include it here
+ * explicitely instead of depending on it being included in the file that
+ * uses this code.
+ */
+
+#include <hwregs/asm/reg_map_asm.h>
+#include <hwregs/asm/ddr2_defs_asm.h>
+
+	;; WARNING! The registers r8 and r9 are used as parameters carrying
+	;; information from the decompressor (if the kernel was compressed).
+	;; They should not be used in the code below.
+
+	;; Refer to ddr2 MDS for initialization sequence
+
+	; 2. Wait 200us
+	move.d   10000, $r2
+1:	bne      1b
+	subq     1, $r2
+
+	; Start clock
+	move.d   REG_ADDR(ddr2, regi_ddr2_ctrl, rw_phy_cfg), $r0
+	move.d   REG_STATE(ddr2, rw_phy_cfg, en, yes), $r1
+	move.d   $r1, [$r0]
+
+	; 2. Wait 200us
+	move.d   10000, $r2
+1:	bne      1b
+	subq     1, $r2
+
+	; Reset phy and start calibration
+	move.d   REG_ADDR(ddr2, regi_ddr2_ctrl, rw_phy_ctrl), $r0
+	move.d   REG_STATE(ddr2, rw_phy_ctrl, rst, yes) | \
+		 REG_STATE(ddr2, rw_phy_ctrl, cal_rst, yes), $r1
+	move.d   $r1, [$r0]
+	move.d	 REG_STATE(ddr2, rw_phy_ctrl, cal_start, yes), $r1
+	move.d   $r1, [$r0]
+
+	; 2. Wait 200us
+	move.d   10000, $r2
+1:	bne      1b
+	subq     1, $r2
+
+	; Issue commands
+	move.d   REG_ADDR(ddr2, regi_ddr2_ctrl, rw_ctrl), $r0
+	move.d   sdram_commands_start, $r2
+command_loop:
+	movu.b  [$r2+], $r1
+	movu.w  [$r2+], $r3
+do_cmd:
+	lslq     16, $r1
+	or.d     $r3, $r1
+	move.d   $r1, [$r0]
+	; 2. Wait 200us
+	move.d   10000, $r4
+1:	bne      1b
+	subq     1, $r4
+	cmp.d    sdram_commands_end, $r2
+	blo      command_loop
+	nop
+
+	; Set timing
+	move.d   REG_ADDR(ddr2, regi_ddr2_ctrl, rw_timing), $r0
+	move.d   CONFIG_ETRAX_DDR2_TIMING, $r1
+	move.d   $r1, [$r0]
+
+	; Set latency
+	move.d   REG_ADDR(ddr2, regi_ddr2_ctrl, rw_latency), $r0
+	move.d   CONFIG_ETRAX_DDR2_LATENCY, $r1
+	move.d   $r1, [$r0]
+
+	; Set configuration
+	move.d   REG_ADDR(ddr2, regi_ddr2_ctrl, rw_cfg), $r0
+	move.d   CONFIG_ETRAX_DDR2_CONFIG, $r1
+	move.d   $r1, [$r0]
+
+	ba after_sdram_commands
+	nop
+
+sdram_commands_start:
+	.byte regk_ddr2_deselect
+	.word 0
+	.byte regk_ddr2_pre
+	.word regk_ddr2_pre_all
+	.byte regk_ddr2_emrs2
+	.word 0
+	.byte regk_ddr2_emrs3
+	.word 0
+	.byte regk_ddr2_emrs
+	.word regk_ddr2_dll_en
+	.byte regk_ddr2_mrs
+	.word regk_ddr2_dll_rst
+	.byte regk_ddr2_pre
+	.word regk_ddr2_pre_all
+	.byte regk_ddr2_ref
+	.word 0
+	.byte regk_ddr2_ref
+	.word 0
+	.byte regk_ddr2_mrs
+	.word CONFIG_ETRAX_DDR2_MRS & 0xffff
+	.byte regk_ddr2_emrs
+	.word regk_ddr2_ocd_default | regk_ddr2_dll_en
+	.byte regk_ddr2_emrs
+	.word regk_ddr2_ocd_exit | regk_ddr2_dll_en | (CONFIG_ETRAX_DDR2_MRS >> 16)
+sdram_commands_end:
+	.align 1
+after_sdram_commands:
diff --git a/arch/cris/arch-v32/mach-a3/hw_settings.S b/arch/cris/arch-v32/mach-a3/hw_settings.S
new file mode 100644
index 0000000..0145725
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/hw_settings.S
@@ -0,0 +1,53 @@
+/*
+ * This table is used by some tools to extract hardware parameters.
+ * The table should be included in the kernel and the decompressor.
+ * Don't forget to update the tools if you change this table.
+ *
+ * Copyright (C) 2001-2007 Axis Communications AB
+ *
+ * Authors:  Mikael Starvik <starvik@axis.com>
+ */
+
+#include <hwregs/asm/reg_map_asm.h>
+#include <hwregs/asm/ddr2_defs_asm.h>
+#include <hwregs/asm/gio_defs_asm.h>
+
+	.ascii "HW_PARAM_MAGIC" ; Magic number
+	.dword 0xc0004000	; Kernel start address
+
+	; Debug port
+#ifdef CONFIG_ETRAX_DEBUG_PORT0
+	.dword 0
+#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
+	.dword 1
+#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
+	.dword 2
+#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
+	.dword 3
+#else
+	.dword 4 ; No debug
+#endif
+
+	; Register values
+	.dword REG_ADDR(ddr2, regi_ddr2_ctrl, rw_cfg)
+	.dword CONFIG_ETRAX_DDR2_CONFIG
+	.dword REG_ADDR(ddr2, regi_ddr2_ctrl, rw_latency)
+	.dword CONFIG_ETRAX_DDR2_LATENCY
+	.dword REG_ADDR(ddr2, regi_ddr2_ctrl, rw_timing)
+	.dword CONFIG_ETRAX_DDR2_TIMING
+	.dword CONFIG_ETRAX_DDR2_MRS
+
+	.dword REG_ADDR(gio, regi_gio, rw_pa_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PA_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pa_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PA_OE
+	.dword REG_ADDR(gio, regi_gio, rw_pb_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PB_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pb_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PB_OE
+	.dword REG_ADDR(gio, regi_gio, rw_pc_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PC_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pc_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PC_OE
+
+	.dword 0 ; No more register values
diff --git a/arch/cris/arch-v32/mach-a3/pinmux.c b/arch/cris/arch-v32/mach-a3/pinmux.c
new file mode 100644
index 0000000..591f775
--- /dev/null
+++ b/arch/cris/arch-v32/mach-a3/pinmux.c
@@ -0,0 +1,388 @@
+/*
+ * Allocator for I/O pins. All pins are allocated to GPIO at bootup.
+ * Unassigned pins and GPIO pins can be allocated to a fixed interface
+ * or the I/O processor instead.
+ *
+ * Copyright (c) 2005-2007 Axis Communications AB.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <pinmux.h>
+#include <hwregs/pinmux_defs.h>
+#include <hwregs/clkgen_defs.h>
+
+#undef DEBUG
+
+#define PINS 80
+#define PORT_PINS 32
+#define PORTS 3
+
+static char pins[PINS];
+static DEFINE_SPINLOCK(pinmux_lock);
+
+static void crisv32_pinmux_set(int port);
+
+int
+crisv32_pinmux_init(void)
+{
+	static int initialized;
+
+	if (!initialized) {
+		initialized = 1;
+		REG_WR_INT(pinmux, regi_pinmux, rw_hwprot, 0);
+		crisv32_pinmux_alloc(PORT_A, 0, 31, pinmux_gpio);
+		crisv32_pinmux_alloc(PORT_B, 0, 31, pinmux_gpio);
+		crisv32_pinmux_alloc(PORT_C, 0, 15, pinmux_gpio);
+	}
+
+	return 0;
+}
+
+int
+crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
+{
+	int i;
+	unsigned long flags;
+
+	crisv32_pinmux_init();
+
+	if (port >= PORTS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	for (i = first_pin; i <= last_pin; i++) {
+		if ((pins[port * PORT_PINS + i] != pinmux_none) &&
+		    (pins[port * PORT_PINS + i] != pinmux_gpio) &&
+		    (pins[port * PORT_PINS + i] != mode)) {
+			spin_unlock_irqrestore(&pinmux_lock, flags);
+#ifdef DEBUG
+			panic("Pinmux alloc failed!\n");
+#endif
+			return -EPERM;
+		}
+	}
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port * PORT_PINS + i] = mode;
+
+	crisv32_pinmux_set(port);
+
+	spin_unlock_irqrestore(&pinmux_lock, flags);
+
+	return 0;
+}
+
+int
+crisv32_pinmux_alloc_fixed(enum fixed_function function)
+{
+	int ret = -EINVAL;
+	char saved[sizeof pins];
+	unsigned long flags;
+	reg_pinmux_rw_hwprot hwprot;
+	reg_clkgen_rw_clk_ctrl clk_ctrl;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	/* Save internal data for recovery */
+	memcpy(saved, pins, sizeof pins);
+
+	crisv32_pinmux_init(); /* must be done before we read rw_hwprot */
+
+	hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
+	clk_ctrl = REG_RD(clkgen, regi_clkgen, rw_clk_ctrl);
+
+	switch (function) {
+	case pinmux_eth:
+		clk_ctrl.eth = regk_clkgen_yes;
+		clk_ctrl.dma0_1_eth = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_B, 8, 23, pinmux_fixed);
+		ret |= crisv32_pinmux_alloc(PORT_B, 24, 25, pinmux_fixed);
+		hwprot.eth = hwprot.eth_mdio = regk_pinmux_yes;
+		break;
+	case pinmux_geth:
+		ret = crisv32_pinmux_alloc(PORT_B, 0, 7, pinmux_fixed);
+		hwprot.geth = regk_pinmux_yes;
+		break;
+	case pinmux_tg_cmos:
+		clk_ctrl.ccd_tg_100 = clk_ctrl.ccd_tg_200 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_B, 27, 29, pinmux_fixed);
+		hwprot.tg_clk = regk_pinmux_yes;
+		break;
+	case pinmux_tg_ccd:
+		clk_ctrl.ccd_tg_100 = clk_ctrl.ccd_tg_200 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_B, 27, 31, pinmux_fixed);
+		ret |= crisv32_pinmux_alloc(PORT_C, 0, 15, pinmux_fixed);
+		hwprot.tg = hwprot.tg_clk = regk_pinmux_yes;
+		break;
+	case pinmux_vout:
+		clk_ctrl.strdma0_2_video = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_A, 8, 18, pinmux_fixed);
+		hwprot.vout = hwprot.vout_sync = regk_pinmux_yes;
+		break;
+	case pinmux_ser1:
+		clk_ctrl.sser_ser_dma6_7 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_A, 24, 25, pinmux_fixed);
+		hwprot.ser1 = regk_pinmux_yes;
+		break;
+	case pinmux_ser2:
+		clk_ctrl.sser_ser_dma6_7 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_A, 26, 27, pinmux_fixed);
+		hwprot.ser2 = regk_pinmux_yes;
+		break;
+	case pinmux_ser3:
+		clk_ctrl.sser_ser_dma6_7 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_A, 28, 29, pinmux_fixed);
+		hwprot.ser3 = regk_pinmux_yes;
+		break;
+	case pinmux_ser4:
+		clk_ctrl.sser_ser_dma6_7 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_A, 30, 31, pinmux_fixed);
+		hwprot.ser4 = regk_pinmux_yes;
+		break;
+	case pinmux_sser:
+		clk_ctrl.sser_ser_dma6_7 = regk_clkgen_yes;
+		ret = crisv32_pinmux_alloc(PORT_A, 19, 23, pinmux_fixed);
+		hwprot.sser = regk_pinmux_yes;
+		break;
+	case pinmux_pio:
+		hwprot.pio = regk_pinmux_yes;
+		ret = 0;
+		break;
+	case pinmux_pwm0:
+		ret = crisv32_pinmux_alloc(PORT_A, 30, 30, pinmux_fixed);
+		hwprot.pwm0 = regk_pinmux_yes;
+		break;
+	case pinmux_pwm1:
+		ret = crisv32_pinmux_alloc(PORT_A, 31, 31, pinmux_fixed);
+		hwprot.pwm1 = regk_pinmux_yes;
+		break;
+	case pinmux_pwm2:
+		ret = crisv32_pinmux_alloc(PORT_B, 26, 26, pinmux_fixed);
+		hwprot.pwm2 = regk_pinmux_yes;
+		break;
+	case pinmux_i2c0:
+		ret = crisv32_pinmux_alloc(PORT_A, 0, 1, pinmux_fixed);
+		hwprot.i2c0 = regk_pinmux_yes;
+		break;
+	case pinmux_i2c1:
+		ret = crisv32_pinmux_alloc(PORT_A, 2, 3, pinmux_fixed);
+		hwprot.i2c1 = regk_pinmux_yes;
+		break;
+	case pinmux_i2c1_3wire:
+		ret = crisv32_pinmux_alloc(PORT_A, 2, 3, pinmux_fixed);
+		ret |= crisv32_pinmux_alloc(PORT_A, 7, 7, pinmux_fixed);
+		hwprot.i2c1 = hwprot.i2c1_sen = regk_pinmux_yes;
+		break;
+	case pinmux_i2c1_sda1:
+		ret = crisv32_pinmux_alloc(PORT_A, 2, 4, pinmux_fixed);
+		hwprot.i2c1 = hwprot.i2c1_sda1 = regk_pinmux_yes;
+		break;
+	case pinmux_i2c1_sda2:
+		ret = crisv32_pinmux_alloc(PORT_A, 2, 3, pinmux_fixed);
+		ret |= crisv32_pinmux_alloc(PORT_A, 5, 5, pinmux_fixed);
+		hwprot.i2c1 = hwprot.i2c1_sda2 = regk_pinmux_yes;
+		break;
+	case pinmux_i2c1_sda3:
+		ret = crisv32_pinmux_alloc(PORT_A, 2, 3, pinmux_fixed);
+		ret |= crisv32_pinmux_alloc(PORT_A, 6, 6, pinmux_fixed);
+		hwprot.i2c1 = hwprot.i2c1_sda3 = regk_pinmux_yes;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (!ret) {
+		REG_WR(pinmux, regi_pinmux, rw_hwprot, hwprot);
+		REG_WR(clkgen, regi_clkgen, rw_clk_ctrl, clk_ctrl);
+	} else
+		memcpy(pins, saved, sizeof pins);
+
+  spin_unlock_irqrestore(&pinmux_lock, flags);
+
+  return ret;
+}
+
+void
+crisv32_pinmux_set(int port)
+{
+	int i;
+	int gpio_val = 0;
+	int iop_val = 0;
+	int pin = port * PORT_PINS;
+
+	for (i = 0; (i < PORT_PINS) && (pin < PINS); i++, pin++) {
+		if (pins[pin] == pinmux_gpio)
+			gpio_val |= (1 << i);
+		else if (pins[pin] == pinmux_iop)
+			iop_val |= (1 << i);
+	}
+
+	REG_WRITE(int, regi_pinmux + REG_RD_ADDR_pinmux_rw_gio_pa + 4 * port,
+		gpio_val);
+	REG_WRITE(int, regi_pinmux + REG_RD_ADDR_pinmux_rw_iop_pa + 4 * port,
+		iop_val);
+
+#ifdef DEBUG
+       crisv32_pinmux_dump();
+#endif
+}
+
+int
+crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
+	int i;
+	unsigned long flags;
+
+	crisv32_pinmux_init();
+
+	if (port > PORTS || port < 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port * PORT_PINS + i] = pinmux_none;
+
+	crisv32_pinmux_set(port);
+	spin_unlock_irqrestore(&pinmux_lock, flags);
+
+	return 0;
+}
+
+int
+crisv32_pinmux_dealloc_fixed(enum fixed_function function)
+{
+	int ret = -EINVAL;
+	char saved[sizeof pins];
+	unsigned long flags;
+	reg_pinmux_rw_hwprot hwprot;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	/* Save internal data for recovery */
+	memcpy(saved, pins, sizeof pins);
+
+	crisv32_pinmux_init(); /* must be done before we read rw_hwprot */
+
+	hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
+
+	switch (function) {
+	case pinmux_eth:
+		ret = crisv32_pinmux_dealloc(PORT_B, 8, 23);
+		ret |= crisv32_pinmux_dealloc(PORT_B, 24, 25);
+		ret |= crisv32_pinmux_dealloc(PORT_B, 0, 7);
+		hwprot.eth = hwprot.eth_mdio = hwprot.geth = regk_pinmux_no;
+		break;
+	case pinmux_tg_cmos:
+		ret = crisv32_pinmux_dealloc(PORT_B, 27, 29);
+		hwprot.tg_clk = regk_pinmux_no;
+		break;
+	case pinmux_tg_ccd:
+		ret = crisv32_pinmux_dealloc(PORT_B, 27, 31);
+		ret |= crisv32_pinmux_dealloc(PORT_C, 0, 15);
+		hwprot.tg = hwprot.tg_clk = regk_pinmux_no;
+		break;
+	case pinmux_vout:
+		ret = crisv32_pinmux_dealloc(PORT_A, 8, 18);
+		hwprot.vout = hwprot.vout_sync = regk_pinmux_no;
+		break;
+	case pinmux_ser1:
+		ret = crisv32_pinmux_dealloc(PORT_A, 24, 25);
+		hwprot.ser1 = regk_pinmux_no;
+		break;
+	case pinmux_ser2:
+		ret = crisv32_pinmux_dealloc(PORT_A, 26, 27);
+		hwprot.ser2 = regk_pinmux_no;
+		break;
+	case pinmux_ser3:
+		ret = crisv32_pinmux_dealloc(PORT_A, 28, 29);
+		hwprot.ser3 = regk_pinmux_no;
+		break;
+	case pinmux_ser4:
+		ret = crisv32_pinmux_dealloc(PORT_A, 30, 31);
+		hwprot.ser4 = regk_pinmux_no;
+		break;
+	case pinmux_sser:
+		ret = crisv32_pinmux_dealloc(PORT_A, 19, 23);
+		hwprot.sser = regk_pinmux_no;
+		break;
+	case pinmux_pwm0:
+		ret = crisv32_pinmux_dealloc(PORT_A, 30, 30);
+		hwprot.pwm0 = regk_pinmux_no;
+		break;
+	case pinmux_pwm1:
+		ret = crisv32_pinmux_dealloc(PORT_A, 31, 31);
+		hwprot.pwm1 = regk_pinmux_no;
+		break;
+	case pinmux_pwm2:
+		ret = crisv32_pinmux_dealloc(PORT_B, 26, 26);
+		hwprot.pwm2 = regk_pinmux_no;
+		break;
+	case pinmux_i2c0:
+		ret = crisv32_pinmux_dealloc(PORT_A, 0, 1);
+		hwprot.i2c0 = regk_pinmux_no;
+		break;
+	case pinmux_i2c1:
+		ret = crisv32_pinmux_dealloc(PORT_A, 2, 3);
+		hwprot.i2c1 = regk_pinmux_no;
+		break;
+	case pinmux_i2c1_3wire:
+		ret = crisv32_pinmux_dealloc(PORT_A, 2, 3);
+		ret |= crisv32_pinmux_dealloc(PORT_A, 7, 7);
+		hwprot.i2c1 = hwprot.i2c1_sen = regk_pinmux_no;
+		break;
+	case pinmux_i2c1_sda1:
+		ret = crisv32_pinmux_dealloc(PORT_A, 2, 4);
+		hwprot.i2c1_sda1 = regk_pinmux_no;
+		break;
+	case pinmux_i2c1_sda2:
+		ret = crisv32_pinmux_dealloc(PORT_A, 2, 3);
+		ret |= crisv32_pinmux_dealloc(PORT_A, 5, 5);
+		hwprot.i2c1_sda2 = regk_pinmux_no;
+		break;
+	case pinmux_i2c1_sda3:
+		ret = crisv32_pinmux_dealloc(PORT_A, 2, 3);
+		ret |= crisv32_pinmux_dealloc(PORT_A, 6, 6);
+		hwprot.i2c1_sda3 = regk_pinmux_no;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (!ret)
+		REG_WR(pinmux, regi_pinmux, rw_hwprot, hwprot);
+	else
+		memcpy(pins, saved, sizeof pins);
+
+  spin_unlock_irqrestore(&pinmux_lock, flags);
+
+  return ret;
+}
+
+void
+crisv32_pinmux_dump(void)
+{
+	int i, j;
+	int pin = 0;
+
+	crisv32_pinmux_init();
+
+	for (i = 0; i < PORTS; i++) {
+		pin++;
+		printk(KERN_DEBUG "Port %c\n", 'A'+i);
+		for (j = 0; (j < PORT_PINS) && (pin < PINS); j++, pin++)
+			printk(KERN_DEBUG
+				"  Pin %d = %d\n", j, pins[i * PORT_PINS + j]);
+	}
+}
+
+__initcall(crisv32_pinmux_init);
diff --git a/arch/cris/arch-v32/mach-fs/Kconfig b/arch/cris/arch-v32/mach-fs/Kconfig
new file mode 100644
index 0000000..7d1ab97
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/Kconfig
@@ -0,0 +1,197 @@
+if ETRAXFS
+
+menu "ETRAX FS options"
+       depends on ETRAXFS
+
+config ETRAX_DRAM_VIRTUAL_BASE
+	hex
+	depends on ETRAX_ARCH_V32
+	default "c0000000"
+
+config ETRAX_SERIAL_PORTS
+       int
+       default 4
+
+config ETRAX_MEM_GRP1_CONFIG
+	hex "MEM_GRP1_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "4044a"
+	help
+	  Waitstates for flash. The default value is suitable for the
+	  standard flashes used in axis products (120 ns).
+
+config ETRAX_MEM_GRP2_CONFIG
+	hex "MEM_GRP2_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  Waitstates for SRAM. 0 is a good choice for most Axis products.
+
+config ETRAX_MEM_GRP3_CONFIG
+	hex "MEM_GRP3_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  Waitstates for CSP0-3. 0 is a good choice for most Axis products.
+	  It may need to be changed if external devices such as extra
+	  register-mapped LEDs are used.
+
+config ETRAX_MEM_GRP4_CONFIG
+	hex "MEM_GRP4_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  Waitstates for CSP4-6. 0 is a good choice for most Axis products.
+
+config ETRAX_SDRAM_GRP0_CONFIG
+	hex "SDRAM_GRP0_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "336"
+	help
+	  SDRAM configuration for group 0. The value depends on the
+	  hardware configuration. The default value is suitable
+	  for 32 MB organized as two 16 bits chips (e.g. Axis
+	  part number 18550) connected as one 32 bit device (i.e. in
+	  the same group).
+
+config ETRAX_SDRAM_GRP1_CONFIG
+	hex "SDRAM_GRP1_CONFIG"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  SDRAM configuration for group 1. The default value is 0
+	  because group 1 is not used in the default configuration,
+	  described in the help for SDRAM_GRP0_CONFIG.
+
+config ETRAX_SDRAM_TIMING
+	hex "SDRAM_TIMING"
+	depends on ETRAX_ARCH_V32
+	default "104a"
+	help
+	  SDRAM timing parameters. The default value is ok for
+	  most hardwares but large SDRAMs may require a faster
+	  refresh (a.k.a 8K refresh). The default value implies
+	  100MHz clock and SDR mode.
+
+config ETRAX_SDRAM_COMMAND
+	hex "SDRAM_COMMAND"
+	depends on ETRAX_ARCH_V32
+	default "0"
+	help
+	  SDRAM command. Should be 0 unless you really know what
+	  you are doing (may be != 0 for unusual address line
+	  mappings such as in a MCM)..
+
+config ETRAX_DEF_GIO_PA_OE
+	hex "GIO_PA_OE"
+	depends on ETRAX_ARCH_V32
+	default "1c"
+	help
+	  Configures the direction of general port A bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PA_OUT
+	hex "GIO_PA_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00"
+	help
+	  Configures the initial data for the general port A bits.  Most
+	  products should use 00 here.
+
+config ETRAX_DEF_GIO_PB_OE
+	hex "GIO_PB_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port B bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PB_OUT
+	hex "GIO_PB_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port B bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PC_OE
+	hex "GIO_PC_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port C bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PC_OUT
+	hex "GIO_PC_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port C bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PD_OE
+	hex "GIO_PD_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port D bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PD_OUT
+	hex "GIO_PD_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port D bits.  Most
+	  products should use 00000 here.
+
+config ETRAX_DEF_GIO_PE_OE
+	hex "GIO_PE_OE"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the direction of general port E bits.  1 is out, 0 is in.
+	  This is often totally different depending on the product used.
+	  There are some guidelines though - if you know that only LED's are
+	  connected to port PA, then they are usually connected to bits 2-4
+	  and you can therefore use 1c.  On other boards which don't have the
+	  LED's at the general ports, these bits are used for all kinds of
+	  stuff.  If you don't know what to use, it is always safe to put all
+	  as inputs, although floating inputs isn't good.
+
+config ETRAX_DEF_GIO_PE_OUT
+	hex "GIO_PE_OUT"
+	depends on ETRAX_ARCH_V32
+	default "00000"
+	help
+	  Configures the initial data for the general port E bits.  Most
+	  products should use 00000 here.
+
+endmenu
+
+endif
diff --git a/arch/cris/arch-v32/mach-fs/Makefile b/arch/cris/arch-v32/mach-fs/Makefile
new file mode 100644
index 0000000..0cc6eeb
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y   := dma.o pinmux.o arbiter.o
+
+clean:
+
diff --git a/arch/cris/arch-v32/mach-fs/arbiter.c b/arch/cris/arch-v32/mach-fs/arbiter.c
new file mode 100644
index 0000000..c97f4d8
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/arbiter.c
@@ -0,0 +1,404 @@
+/*
+ * Memory arbiter functions. Allocates bandwidth through the
+ * arbiter and sets up arbiter breakpoints.
+ *
+ * The algorithm first assigns slots to the clients that has specified
+ * bandwidth (e.g. ethernet) and then the remaining slots are divided
+ * on all the active clients.
+ *
+ * Copyright (c) 2004-2007 Axis Communications AB.
+ */
+
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/marb_defs.h>
+#include <arbiter.h>
+#include <hwregs/intr_vect.h>
+#include <linux/interrupt.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+
+struct crisv32_watch_entry {
+	unsigned long instance;
+	watch_callback *cb;
+	unsigned long start;
+	unsigned long end;
+	int used;
+};
+
+#define NUMBER_OF_BP 4
+#define NBR_OF_CLIENTS 14
+#define NBR_OF_SLOTS 64
+#define SDRAM_BANDWIDTH 100000000	/* Some kind of expected value */
+#define INTMEM_BANDWIDTH 400000000
+#define NBR_OF_REGIONS 2
+
+static struct crisv32_watch_entry watches[NUMBER_OF_BP] = {
+	{regi_marb_bp0},
+	{regi_marb_bp1},
+	{regi_marb_bp2},
+	{regi_marb_bp3}
+};
+
+static u8 requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS];
+static u8 active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS];
+static int max_bandwidth[NBR_OF_REGIONS] =
+    { SDRAM_BANDWIDTH, INTMEM_BANDWIDTH };
+
+DEFINE_SPINLOCK(arbiter_lock);
+
+static irqreturn_t crisv32_arbiter_irq(int irq, void *dev_id);
+
+/*
+ * "I'm the arbiter, I know the score.
+ *  From square one I'll be watching all 64."
+ * (memory arbiter slots, that is)
+ *
+ *  Or in other words:
+ * Program the memory arbiter slots for "region" according to what's
+ * in requested_slots[] and active_clients[], while minimizing
+ * latency. A caller may pass a non-zero positive amount for
+ * "unused_slots", which must then be the unallocated, remaining
+ * number of slots, free to hand out to any client.
+ */
+
+static void crisv32_arbiter_config(int region, int unused_slots)
+{
+	int slot;
+	int client;
+	int interval = 0;
+
+	/*
+	 * This vector corresponds to the hardware arbiter slots (see
+	 * the hardware documentation for semantics). We initialize
+	 * each slot with a suitable sentinel value outside the valid
+	 * range {0 .. NBR_OF_CLIENTS - 1} and replace them with
+	 * client indexes. Then it's fed to the hardware.
+	 */
+	s8 val[NBR_OF_SLOTS];
+
+	for (slot = 0; slot < NBR_OF_SLOTS; slot++)
+		val[slot] = -1;
+
+	for (client = 0; client < NBR_OF_CLIENTS; client++) {
+		int pos;
+		/* Allocate the requested non-zero number of slots, but
+		 * also give clients with zero-requests one slot each
+		 * while stocks last. We do the latter here, in client
+		 * order. This makes sure zero-request clients are the
+		 * first to get to any spare slots, else those slots
+		 * could, when bandwidth is allocated close to the limit,
+		 * all be allocated to low-index non-zero-request clients
+		 * in the default-fill loop below. Another positive but
+		 * secondary effect is a somewhat better spread of the
+		 * zero-bandwidth clients in the vector, avoiding some of
+		 * the latency that could otherwise be caused by the
+		 * partitioning of non-zero-bandwidth clients at low
+		 * indexes and zero-bandwidth clients at high
+		 * indexes. (Note that this spreading can only affect the
+		 * unallocated bandwidth.)  All the above only matters for
+		 * memory-intensive situations, of course.
+		 */
+		if (!requested_slots[region][client]) {
+			/*
+			 * Skip inactive clients. Also skip zero-slot
+			 * allocations in this pass when there are no known
+			 * free slots.
+			 */
+			if (!active_clients[region][client]
+			    || unused_slots <= 0)
+				continue;
+
+			unused_slots--;
+
+			/* Only allocate one slot for this client. */
+			interval = NBR_OF_SLOTS;
+		} else
+			interval =
+			    NBR_OF_SLOTS / requested_slots[region][client];
+
+		pos = 0;
+		while (pos < NBR_OF_SLOTS) {
+			if (val[pos] >= 0)
+				pos++;
+			else {
+				val[pos] = client;
+				pos += interval;
+			}
+		}
+	}
+
+	client = 0;
+	for (slot = 0; slot < NBR_OF_SLOTS; slot++) {
+		/*
+		 * Allocate remaining slots in round-robin
+		 * client-number order for active clients. For this
+		 * pass, we ignore requested bandwidth and previous
+		 * allocations.
+		 */
+		if (val[slot] < 0) {
+			int first = client;
+			while (!active_clients[region][client]) {
+				client = (client + 1) % NBR_OF_CLIENTS;
+				if (client == first)
+					break;
+			}
+			val[slot] = client;
+			client = (client + 1) % NBR_OF_CLIENTS;
+		}
+		if (region == EXT_REGION)
+			REG_WR_INT_VECT(marb, regi_marb, rw_ext_slots, slot,
+					val[slot]);
+		else if (region == INT_REGION)
+			REG_WR_INT_VECT(marb, regi_marb, rw_int_slots, slot,
+					val[slot]);
+	}
+}
+
+extern char _stext, _etext;
+
+static void crisv32_arbiter_init(void)
+{
+	static int initialized;
+
+	if (initialized)
+		return;
+
+	initialized = 1;
+
+	/*
+	 * CPU caches are always set to active, but with zero
+	 * bandwidth allocated. It should be ok to allocate zero
+	 * bandwidth for the caches, because DMA for other channels
+	 * will supposedly finish, once their programmed amount is
+	 * done, and then the caches will get access according to the
+	 * "fixed scheme" for unclaimed slots. Though, if for some
+	 * use-case somewhere, there's a maximum CPU latency for
+	 * e.g. some interrupt, we have to start allocating specific
+	 * bandwidth for the CPU caches too.
+	 */
+	active_clients[EXT_REGION][10] = active_clients[EXT_REGION][11] = 1;
+	crisv32_arbiter_config(EXT_REGION, 0);
+	crisv32_arbiter_config(INT_REGION, 0);
+
+	if (request_irq(MEMARB_INTR_VECT, crisv32_arbiter_irq, 0,
+			"arbiter", NULL))
+		printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
+
+#ifndef CONFIG_ETRAX_KGDB
+	/* Global watch for writes to kernel text segment. */
+	crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
+			      arbiter_all_clients, arbiter_all_write, NULL);
+#endif
+}
+
+/* Main entry for bandwidth allocation. */
+
+int crisv32_arbiter_allocate_bandwidth(int client, int region,
+				       unsigned long bandwidth)
+{
+	int i;
+	int total_assigned = 0;
+	int total_clients = 0;
+	int req;
+
+	crisv32_arbiter_init();
+
+	for (i = 0; i < NBR_OF_CLIENTS; i++) {
+		total_assigned += requested_slots[region][i];
+		total_clients += active_clients[region][i];
+	}
+
+	/* Avoid division by 0 for 0-bandwidth requests. */
+	req = bandwidth == 0
+	    ? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);
+
+	/*
+	 * We make sure that there are enough slots only for non-zero
+	 * requests. Requesting 0 bandwidth *may* allocate slots,
+	 * though if all bandwidth is allocated, such a client won't
+	 * get any and will have to rely on getting memory access
+	 * according to the fixed scheme that's the default when one
+	 * of the slot-allocated clients doesn't claim their slot.
+	 */
+	if (total_assigned + req > NBR_OF_SLOTS)
+		return -ENOMEM;
+
+	active_clients[region][client] = 1;
+	requested_slots[region][client] = req;
+	crisv32_arbiter_config(region, NBR_OF_SLOTS - total_assigned);
+
+	return 0;
+}
+
+/*
+ * Main entry for bandwidth deallocation.
+ *
+ * Strictly speaking, for a somewhat constant set of clients where
+ * each client gets a constant bandwidth and is just enabled or
+ * disabled (somewhat dynamically), no action is necessary here to
+ * avoid starvation for non-zero-allocation clients, as the allocated
+ * slots will just be unused. However, handing out those unused slots
+ * to active clients avoids needless latency if the "fixed scheme"
+ * would give unclaimed slots to an eager low-index client.
+ */
+
+void crisv32_arbiter_deallocate_bandwidth(int client, int region)
+{
+	int i;
+	int total_assigned = 0;
+
+	requested_slots[region][client] = 0;
+	active_clients[region][client] = 0;
+
+	for (i = 0; i < NBR_OF_CLIENTS; i++)
+		total_assigned += requested_slots[region][i];
+
+	crisv32_arbiter_config(region, NBR_OF_SLOTS - total_assigned);
+}
+
+int crisv32_arbiter_watch(unsigned long start, unsigned long size,
+			  unsigned long clients, unsigned long accesses,
+			  watch_callback *cb)
+{
+	int i;
+
+	crisv32_arbiter_init();
+
+	if (start > 0x80000000) {
+		printk(KERN_ERR "Arbiter: %lX doesn't look like a "
+			"physical address", start);
+		return -EFAULT;
+	}
+
+	spin_lock(&arbiter_lock);
+
+	for (i = 0; i < NUMBER_OF_BP; i++) {
+		if (!watches[i].used) {
+			reg_marb_rw_intr_mask intr_mask =
+			    REG_RD(marb, regi_marb, rw_intr_mask);
+
+			watches[i].used = 1;
+			watches[i].start = start;
+			watches[i].end = start + size;
+			watches[i].cb = cb;
+
+			REG_WR_INT(marb_bp, watches[i].instance, rw_first_addr,
+				   watches[i].start);
+			REG_WR_INT(marb_bp, watches[i].instance, rw_last_addr,
+				   watches[i].end);
+			REG_WR_INT(marb_bp, watches[i].instance, rw_op,
+				   accesses);
+			REG_WR_INT(marb_bp, watches[i].instance, rw_clients,
+				   clients);
+
+			if (i == 0)
+				intr_mask.bp0 = regk_marb_yes;
+			else if (i == 1)
+				intr_mask.bp1 = regk_marb_yes;
+			else if (i == 2)
+				intr_mask.bp2 = regk_marb_yes;
+			else if (i == 3)
+				intr_mask.bp3 = regk_marb_yes;
+
+			REG_WR(marb, regi_marb, rw_intr_mask, intr_mask);
+			spin_unlock(&arbiter_lock);
+
+			return i;
+		}
+	}
+	spin_unlock(&arbiter_lock);
+	return -ENOMEM;
+}
+
+int crisv32_arbiter_unwatch(int id)
+{
+	reg_marb_rw_intr_mask intr_mask = REG_RD(marb, regi_marb, rw_intr_mask);
+
+	crisv32_arbiter_init();
+
+	spin_lock(&arbiter_lock);
+
+	if ((id < 0) || (id >= NUMBER_OF_BP) || (!watches[id].used)) {
+		spin_unlock(&arbiter_lock);
+		return -EINVAL;
+	}
+
+	memset(&watches[id], 0, sizeof(struct crisv32_watch_entry));
+
+	if (id == 0)
+		intr_mask.bp0 = regk_marb_no;
+	else if (id == 1)
+		intr_mask.bp1 = regk_marb_no;
+	else if (id == 2)
+		intr_mask.bp2 = regk_marb_no;
+	else if (id == 3)
+		intr_mask.bp3 = regk_marb_no;
+
+	REG_WR(marb, regi_marb, rw_intr_mask, intr_mask);
+
+	spin_unlock(&arbiter_lock);
+	return 0;
+}
+
+extern void show_registers(struct pt_regs *regs);
+
+static irqreturn_t crisv32_arbiter_irq(int irq, void *dev_id)
+{
+	reg_marb_r_masked_intr masked_intr =
+	    REG_RD(marb, regi_marb, r_masked_intr);
+	reg_marb_bp_r_brk_clients r_clients;
+	reg_marb_bp_r_brk_addr r_addr;
+	reg_marb_bp_r_brk_op r_op;
+	reg_marb_bp_r_brk_first_client r_first;
+	reg_marb_bp_r_brk_size r_size;
+	reg_marb_bp_rw_ack ack = { 0 };
+	reg_marb_rw_ack_intr ack_intr = {
+		.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
+	};
+	struct crisv32_watch_entry *watch;
+
+	if (masked_intr.bp0) {
+		watch = &watches[0];
+		ack_intr.bp0 = regk_marb_yes;
+	} else if (masked_intr.bp1) {
+		watch = &watches[1];
+		ack_intr.bp1 = regk_marb_yes;
+	} else if (masked_intr.bp2) {
+		watch = &watches[2];
+		ack_intr.bp2 = regk_marb_yes;
+	} else if (masked_intr.bp3) {
+		watch = &watches[3];
+		ack_intr.bp3 = regk_marb_yes;
+	} else {
+		return IRQ_NONE;
+	}
+
+	/* Retrieve all useful information and print it. */
+	r_clients = REG_RD(marb_bp, watch->instance, r_brk_clients);
+	r_addr = REG_RD(marb_bp, watch->instance, r_brk_addr);
+	r_op = REG_RD(marb_bp, watch->instance, r_brk_op);
+	r_first = REG_RD(marb_bp, watch->instance, r_brk_first_client);
+	r_size = REG_RD(marb_bp, watch->instance, r_brk_size);
+
+	printk(KERN_INFO "Arbiter IRQ\n");
+	printk(KERN_INFO "Clients %X addr %X op %X first %X size %X\n",
+	       REG_TYPE_CONV(int, reg_marb_bp_r_brk_clients, r_clients),
+	       REG_TYPE_CONV(int, reg_marb_bp_r_brk_addr, r_addr),
+	       REG_TYPE_CONV(int, reg_marb_bp_r_brk_op, r_op),
+	       REG_TYPE_CONV(int, reg_marb_bp_r_brk_first_client, r_first),
+	       REG_TYPE_CONV(int, reg_marb_bp_r_brk_size, r_size));
+
+	REG_WR(marb_bp, watch->instance, rw_ack, ack);
+	REG_WR(marb, regi_marb, rw_ack_intr, ack_intr);
+
+	printk(KERN_INFO "IRQ occurred at %lX\n", get_irq_regs()->erp);
+
+	if (watch->cb)
+		watch->cb();
+
+	return IRQ_HANDLED;
+}
diff --git a/arch/cris/arch-v32/mach-fs/dma.c b/arch/cris/arch-v32/mach-fs/dma.c
new file mode 100644
index 0000000..fc6416a
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/dma.c
@@ -0,0 +1,229 @@
+/* Wrapper for DMA channel allocator that starts clocks etc */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <asm/dma.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/marb_defs.h>
+#include <hwregs/config_defs.h>
+#include <hwregs/strmux_defs.h>
+#include <linux/errno.h>
+#include <mach/arbiter.h>
+
+static char used_dma_channels[MAX_DMA_CHANNELS];
+static const char *used_dma_channels_users[MAX_DMA_CHANNELS];
+
+static DEFINE_SPINLOCK(dma_lock);
+
+int crisv32_request_dma(unsigned int dmanr, const char *device_id,
+			unsigned options, unsigned int bandwidth,
+			enum dma_owner owner)
+{
+	unsigned long flags;
+	reg_config_rw_clk_ctrl clk_ctrl;
+	reg_strmux_rw_cfg strmux_cfg;
+
+	if (crisv32_arbiter_allocate_bandwidth(dmanr,
+					       options & DMA_INT_MEM ?
+					       INT_REGION : EXT_REGION,
+					       bandwidth))
+		return -ENOMEM;
+
+	spin_lock_irqsave(&dma_lock, flags);
+
+	if (used_dma_channels[dmanr]) {
+		spin_unlock_irqrestore(&dma_lock, flags);
+		if (options & DMA_VERBOSE_ON_ERROR) {
+			printk(KERN_ERR "Failed to request DMA %i for %s, "
+				"already allocated by %s\n",
+				dmanr,
+				device_id,
+				used_dma_channels_users[dmanr]);
+		}
+		if (options & DMA_PANIC_ON_ERROR)
+			panic("request_dma error!");
+		spin_unlock_irqrestore(&dma_lock, flags);
+		return -EBUSY;
+	}
+	clk_ctrl = REG_RD(config, regi_config, rw_clk_ctrl);
+	strmux_cfg = REG_RD(strmux, regi_strmux, rw_cfg);
+
+	switch (dmanr) {
+	case 0:
+	case 1:
+		clk_ctrl.dma01_eth0 = 1;
+		break;
+	case 2:
+	case 3:
+		clk_ctrl.dma23 = 1;
+		break;
+	case 4:
+	case 5:
+		clk_ctrl.dma45 = 1;
+		break;
+	case 6:
+	case 7:
+		clk_ctrl.dma67 = 1;
+		break;
+	case 8:
+	case 9:
+		clk_ctrl.dma89_strcop = 1;
+		break;
+#if MAX_DMA_CHANNELS-1 != 9
+#error Check dma.c
+#endif
+	default:
+		spin_unlock_irqrestore(&dma_lock, flags);
+		if (options & DMA_VERBOSE_ON_ERROR) {
+			printk(KERN_ERR "Failed to request DMA %i for %s, "
+				"only 0-%i valid)\n",
+				dmanr, device_id, MAX_DMA_CHANNELS - 1);
+		}
+
+		if (options & DMA_PANIC_ON_ERROR)
+			panic("request_dma error!");
+		return -EINVAL;
+	}
+
+	switch (owner) {
+	case dma_eth0:
+		if (dmanr == 0)
+			strmux_cfg.dma0 = regk_strmux_eth0;
+		else if (dmanr == 1)
+			strmux_cfg.dma1 = regk_strmux_eth0;
+		else
+			panic("Invalid DMA channel for eth0\n");
+		break;
+	case dma_eth1:
+		if (dmanr == 6)
+			strmux_cfg.dma6 = regk_strmux_eth1;
+		else if (dmanr == 7)
+			strmux_cfg.dma7 = regk_strmux_eth1;
+		else
+			panic("Invalid DMA channel for eth1\n");
+		break;
+	case dma_iop0:
+		if (dmanr == 2)
+			strmux_cfg.dma2 = regk_strmux_iop0;
+		else if (dmanr == 3)
+			strmux_cfg.dma3 = regk_strmux_iop0;
+		else
+			panic("Invalid DMA channel for iop0\n");
+		break;
+	case dma_iop1:
+		if (dmanr == 4)
+			strmux_cfg.dma4 = regk_strmux_iop1;
+		else if (dmanr == 5)
+			strmux_cfg.dma5 = regk_strmux_iop1;
+		else
+			panic("Invalid DMA channel for iop1\n");
+		break;
+	case dma_ser0:
+		if (dmanr == 6)
+			strmux_cfg.dma6 = regk_strmux_ser0;
+		else if (dmanr == 7)
+			strmux_cfg.dma7 = regk_strmux_ser0;
+		else
+			panic("Invalid DMA channel for ser0\n");
+		break;
+	case dma_ser1:
+		if (dmanr == 4)
+			strmux_cfg.dma4 = regk_strmux_ser1;
+		else if (dmanr == 5)
+			strmux_cfg.dma5 = regk_strmux_ser1;
+		else
+			panic("Invalid DMA channel for ser1\n");
+		break;
+	case dma_ser2:
+		if (dmanr == 2)
+			strmux_cfg.dma2 = regk_strmux_ser2;
+		else if (dmanr == 3)
+			strmux_cfg.dma3 = regk_strmux_ser2;
+		else
+			panic("Invalid DMA channel for ser2\n");
+		break;
+	case dma_ser3:
+		if (dmanr == 8)
+			strmux_cfg.dma8 = regk_strmux_ser3;
+		else if (dmanr == 9)
+			strmux_cfg.dma9 = regk_strmux_ser3;
+		else
+			panic("Invalid DMA channel for ser3\n");
+		break;
+	case dma_sser0:
+		if (dmanr == 4)
+			strmux_cfg.dma4 = regk_strmux_sser0;
+		else if (dmanr == 5)
+			strmux_cfg.dma5 = regk_strmux_sser0;
+		else
+			panic("Invalid DMA channel for sser0\n");
+		break;
+	case dma_sser1:
+		if (dmanr == 6)
+			strmux_cfg.dma6 = regk_strmux_sser1;
+		else if (dmanr == 7)
+			strmux_cfg.dma7 = regk_strmux_sser1;
+		else
+			panic("Invalid DMA channel for sser1\n");
+		break;
+	case dma_ata:
+		if (dmanr == 2)
+			strmux_cfg.dma2 = regk_strmux_ata;
+		else if (dmanr == 3)
+			strmux_cfg.dma3 = regk_strmux_ata;
+		else
+			panic("Invalid DMA channel for ata\n");
+		break;
+	case dma_strp:
+		if (dmanr == 8)
+			strmux_cfg.dma8 = regk_strmux_strcop;
+		else if (dmanr == 9)
+			strmux_cfg.dma9 = regk_strmux_strcop;
+		else
+			panic("Invalid DMA channel for strp\n");
+		break;
+	case dma_ext0:
+		if (dmanr == 6)
+			strmux_cfg.dma6 = regk_strmux_ext0;
+		else
+			panic("Invalid DMA channel for ext0\n");
+		break;
+	case dma_ext1:
+		if (dmanr == 7)
+			strmux_cfg.dma7 = regk_strmux_ext1;
+		else
+			panic("Invalid DMA channel for ext1\n");
+		break;
+	case dma_ext2:
+		if (dmanr == 2)
+			strmux_cfg.dma2 = regk_strmux_ext2;
+		else if (dmanr == 8)
+			strmux_cfg.dma8 = regk_strmux_ext2;
+		else
+			panic("Invalid DMA channel for ext2\n");
+		break;
+	case dma_ext3:
+		if (dmanr == 3)
+			strmux_cfg.dma3 = regk_strmux_ext3;
+		else if (dmanr == 9)
+			strmux_cfg.dma9 = regk_strmux_ext2;
+		else
+			panic("Invalid DMA channel for ext2\n");
+		break;
+	}
+
+	used_dma_channels[dmanr] = 1;
+	used_dma_channels_users[dmanr] = device_id;
+	REG_WR(config, regi_config, rw_clk_ctrl, clk_ctrl);
+	REG_WR(strmux, regi_strmux, rw_cfg, strmux_cfg);
+	spin_unlock_irqrestore(&dma_lock, flags);
+	return 0;
+}
+
+void crisv32_free_dma(unsigned int dmanr)
+{
+	spin_lock(&dma_lock);
+	used_dma_channels[dmanr] = 0;
+	spin_unlock(&dma_lock);
+}
diff --git a/arch/cris/arch-v32/mach-fs/dram_init.S b/arch/cris/arch-v32/mach-fs/dram_init.S
new file mode 100644
index 0000000..6fbad33
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/dram_init.S
@@ -0,0 +1,116 @@
+/*
+ * DRAM/SDRAM initialization - alter with care
+ * This file is intended to be included from other assembler files
+ *
+ * Note: This file may not modify r8 or r9 because they are used to
+ * carry information from the decompresser to the kernel
+ *
+ * Copyright (C) 2000-2007 Axis Communications AB
+ *
+ * Authors:  Mikael Starvik <starvik@axis.com>
+ */
+
+/* Just to be certain the config file is included, we include it here
+ * explicitely instead of depending on it being included in the file that
+ * uses this code.
+ */
+
+#include <hwregs/asm/reg_map_asm.h>
+#include <hwregs/asm/bif_core_defs_asm.h>
+
+	;; WARNING! The registers r8 and r9 are used as parameters carrying
+	;; information from the decompressor (if the kernel was compressed).
+	;; They should not be used in the code below.
+
+	; Refer to BIF MDS for a description of SDRAM initialization
+
+	; Bank configuration
+	move.d   REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp0), $r0
+	move.d   CONFIG_ETRAX_SDRAM_GRP0_CONFIG, $r1
+	move.d   $r1, [$r0]
+	move.d   REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp1), $r0
+	move.d   CONFIG_ETRAX_SDRAM_GRP1_CONFIG, $r1
+	move.d   $r1, [$r0]
+
+	; Calculate value of mrs_data
+	; CAS latency = 2 && bus_width = 32 => 0x40
+	; CAS latency = 3 && bus_width = 32 => 0x60
+	; CAS latency = 2 && bus_width = 16 => 0x20
+	; CAS latency = 3 && bus_width = 16 => 0x30
+
+	; Check if value is already supplied in kernel config
+	move.d   CONFIG_ETRAX_SDRAM_COMMAND, $r2
+	bne	 _set_timing
+	nop
+
+	move.d   0x40, $r4       ; Assume 32 bits and CAS latency = 2
+	move.d   CONFIG_ETRAX_SDRAM_TIMING, $r1
+	and.d    0x07, $r1       ; Get CAS latency
+	cmpq	 2, $r1		 ; CL = 2 ?
+	beq	 _bw_check
+	nop
+	move.d   0x60, $r4
+
+_bw_check:
+	; Assume that group 0 width is equal to group 1. This assumption
+	; is wrong for a group 1 only hardware (such as the grand old
+	; StorPoint+).
+	move.d   CONFIG_ETRAX_SDRAM_GRP0_CONFIG, $r1
+	and.d    0x200, $r1	; DRAM width is bit 9
+	beq      _set_timing
+	lslq	 2, $r4		;  mrs_data starts at bit 2
+	lsrq     1, $r4		;  16 bits. Shift down value.
+
+	; Set timing parameters (refresh off to avoid Guinness TR 83)
+_set_timing:
+	move.d   CONFIG_ETRAX_SDRAM_TIMING, $r1
+	and.d    ~(3 << reg_bif_core_rw_sdram_timing___ref___lsb), $r1
+	move.d   REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing), $r0
+	move.d   $r1, [$r0]
+
+	; Issue NOP command
+	move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cmd), $r5
+	moveq regk_bif_core_nop, $r1
+	move.d $r1, [$r5]
+
+	; Wait 200us
+	move.d   10000, $r2
+1:	bne      1b
+	subq     1, $r2
+
+	; Issue initialization command sequence
+	lapc     _sdram_commands_start, $r2
+	lapc     _sdram_commands_end,  $r3
+1:	clear.d  $r6
+	move.b   [$r2+], $r6	; Load command
+	or.d     $r4, $r6	; Add calculated mrs
+	move.d   $r6, [$r5]	; Write rw_sdram_cmd
+	; Wait 80 ns between each command
+	move.d	 4000, $r7
+2:	bne	 2b
+	subq	 1, $r7
+	cmp.d    $r2, $r3	; Last command?
+	bne      1b
+	nop
+
+	; Start refresh
+	move.d   CONFIG_ETRAX_SDRAM_TIMING, $r1
+	move.d   REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing), $r0
+	move.d   $r1, [$r0]
+
+	; Initialization finished
+	ba       _sdram_commands_end
+	nop
+
+_sdram_commands_start:
+	.byte   regk_bif_core_pre ; Precharge
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_ref ; refresh
+	.byte   regk_bif_core_mrs ; mrs
+_sdram_commands_end:
diff --git a/arch/cris/arch-v32/mach-fs/hw_settings.S b/arch/cris/arch-v32/mach-fs/hw_settings.S
new file mode 100644
index 0000000..8bde93c
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/hw_settings.S
@@ -0,0 +1,70 @@
+/*
+ * This table is used by some tools to extract hardware parameters.
+ * The table should be included in the kernel and the decompressor.
+ * Don't forget to update the tools if you change this table.
+ *
+ * Copyright (C) 2001-2007 Axis Communications AB
+ *
+ * Authors:  Mikael Starvik <starvik@axis.com>
+ */
+
+#include <hwregs/asm/reg_map_asm.h>
+#include <hwregs/asm/bif_core_defs_asm.h>
+#include <hwregs/asm/gio_defs_asm.h>
+
+	.ascii "HW_PARAM_MAGIC" ; Magic number
+	.dword 0xc0004000	; Kernel start address
+
+	; Debug port
+#ifdef CONFIG_ETRAX_DEBUG_PORT0
+	.dword 0
+#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
+	.dword 1
+#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
+	.dword 2
+#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
+	.dword 3
+#else
+	.dword 4 ; No debug
+#endif
+
+	; Register values
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_grp1_cfg)
+	.dword CONFIG_ETRAX_MEM_GRP1_CONFIG
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_grp2_cfg)
+	.dword CONFIG_ETRAX_MEM_GRP2_CONFIG
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_grp3_cfg)
+	.dword CONFIG_ETRAX_MEM_GRP3_CONFIG
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_grp4_cfg)
+	.dword CONFIG_ETRAX_MEM_GRP4_CONFIG
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp0)
+	.dword CONFIG_ETRAX_SDRAM_GRP0_CONFIG
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp1)
+	.dword CONFIG_ETRAX_SDRAM_GRP1_CONFIG
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing)
+	.dword CONFIG_ETRAX_SDRAM_TIMING
+	.dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cmd)
+	.dword CONFIG_ETRAX_SDRAM_COMMAND
+
+	.dword REG_ADDR(gio, regi_gio, rw_pa_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PA_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pa_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PA_OE
+	.dword REG_ADDR(gio, regi_gio, rw_pb_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PB_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pb_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PB_OE
+	.dword REG_ADDR(gio, regi_gio, rw_pc_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PC_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pc_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PC_OE
+	.dword REG_ADDR(gio, regi_gio, rw_pd_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PD_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pd_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PD_OE
+	.dword REG_ADDR(gio, regi_gio, rw_pe_dout)
+	.dword CONFIG_ETRAX_DEF_GIO_PE_OUT
+	.dword REG_ADDR(gio, regi_gio, rw_pe_oe)
+	.dword CONFIG_ETRAX_DEF_GIO_PE_OE
+
+	.dword 0 ; No more register values
diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c
new file mode 100644
index 0000000..d8a3a3c
--- /dev/null
+++ b/arch/cris/arch-v32/mach-fs/pinmux.c
@@ -0,0 +1,327 @@
+/*
+ * Allocator for I/O pins. All pins are allocated to GPIO at bootup.
+ * Unassigned pins and GPIO pins can be allocated to a fixed interface
+ * or the I/O processor instead.
+ *
+ * Copyright (c) 2004-2007 Axis Communications AB.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <pinmux.h>
+#include <hwregs/pinmux_defs.h>
+
+#undef DEBUG
+
+#define PORT_PINS 18
+#define PORTS 4
+
+static char pins[PORTS][PORT_PINS];
+static DEFINE_SPINLOCK(pinmux_lock);
+
+static void crisv32_pinmux_set(int port);
+
+static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+				 enum pin_mode mode)
+{
+	int i;
+
+	for (i = first_pin; i <= last_pin; i++) {
+		if ((pins[port][i] != pinmux_none)
+		    && (pins[port][i] != pinmux_gpio)
+		    && (pins[port][i] != mode)) {
+#ifdef DEBUG
+			panic("Pinmux alloc failed!\n");
+#endif
+			return -EPERM;
+		}
+	}
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = mode;
+
+	crisv32_pinmux_set(port);
+
+	return 0;
+}
+
+static int crisv32_pinmux_init(void)
+{
+	static int initialized;
+
+	if (!initialized) {
+		reg_pinmux_rw_pa pa = REG_RD(pinmux, regi_pinmux, rw_pa);
+		initialized = 1;
+		REG_WR_INT(pinmux, regi_pinmux, rw_hwprot, 0);
+		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
+		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
+		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
+		__crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
+	}
+
+	return 0;
+}
+
+int crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+			 enum pin_mode mode)
+{
+	unsigned long flags;
+	int ret;
+
+	crisv32_pinmux_init();
+
+	if (port > PORTS || port < 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode);
+
+	spin_unlock_irqrestore(&pinmux_lock, flags);
+
+	return ret;
+}
+
+int crisv32_pinmux_alloc_fixed(enum fixed_function function)
+{
+	int ret = -EINVAL;
+	char saved[sizeof pins];
+	unsigned long flags;
+	reg_pinmux_rw_hwprot hwprot;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	/* Save internal data for recovery */
+	memcpy(saved, pins, sizeof pins);
+
+	crisv32_pinmux_init();	/* Must be done before we read rw_hwprot */
+
+	hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
+
+	switch (function) {
+	case pinmux_ser1:
+		ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
+		hwprot.ser1 = regk_pinmux_yes;
+		break;
+	case pinmux_ser2:
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
+		hwprot.ser2 = regk_pinmux_yes;
+		break;
+	case pinmux_ser3:
+		ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
+		hwprot.ser3 = regk_pinmux_yes;
+		break;
+	case pinmux_sser0:
+		ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		hwprot.sser0 = regk_pinmux_yes;
+		break;
+	case pinmux_sser1:
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		hwprot.sser1 = regk_pinmux_yes;
+		break;
+	case pinmux_ata0:
+		ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
+		hwprot.ata0 = regk_pinmux_yes;
+		break;
+	case pinmux_ata1:
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
+		hwprot.ata1 = regk_pinmux_yes;
+		break;
+	case pinmux_ata2:
+		ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
+		hwprot.ata2 = regk_pinmux_yes;
+		break;
+	case pinmux_ata3:
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
+		hwprot.ata2 = regk_pinmux_yes;
+		break;
+	case pinmux_ata:
+		ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
+		hwprot.ata = regk_pinmux_yes;
+		break;
+	case pinmux_eth1:
+		ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
+		hwprot.eth1 = regk_pinmux_yes;
+		hwprot.eth1_mgm = regk_pinmux_yes;
+		break;
+	case pinmux_timer:
+		ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		hwprot.timer = regk_pinmux_yes;
+		spin_unlock_irqrestore(&pinmux_lock, flags);
+		return ret;
+	}
+
+	if (!ret)
+		REG_WR(pinmux, regi_pinmux, rw_hwprot, hwprot);
+	else
+		memcpy(pins, saved, sizeof pins);
+
+	spin_unlock_irqrestore(&pinmux_lock, flags);
+
+	return ret;
+}
+
+void crisv32_pinmux_set(int port)
+{
+	int i;
+	int gpio_val = 0;
+	int iop_val = 0;
+
+	for (i = 0; i < PORT_PINS; i++) {
+		if (pins[port][i] == pinmux_gpio)
+			gpio_val |= (1 << i);
+		else if (pins[port][i] == pinmux_iop)
+			iop_val |= (1 << i);
+	}
+
+	REG_WRITE(int, regi_pinmux + REG_RD_ADDR_pinmux_rw_pb_gio + 8 * port,
+		  gpio_val);
+	REG_WRITE(int, regi_pinmux + REG_RD_ADDR_pinmux_rw_pb_iop + 8 * port,
+		  iop_val);
+
+#ifdef DEBUG
+	crisv32_pinmux_dump();
+#endif
+}
+
+static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
+	int i;
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = pinmux_none;
+
+	crisv32_pinmux_set(port);
+	return 0;
+}
+
+int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
+	unsigned long flags;
+
+	crisv32_pinmux_init();
+
+	if (port > PORTS || port < 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+	__crisv32_pinmux_dealloc(port, first_pin, last_pin);
+	spin_unlock_irqrestore(&pinmux_lock, flags);
+
+	return 0;
+}
+
+int crisv32_pinmux_dealloc_fixed(enum fixed_function function)
+{
+	int ret = -EINVAL;
+	char saved[sizeof pins];
+	unsigned long flags;
+	reg_pinmux_rw_hwprot hwprot;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	/* Save internal data for recovery */
+	memcpy(saved, pins, sizeof pins);
+
+	crisv32_pinmux_init();	/* Must be done before we read rw_hwprot */
+
+	hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
+
+	switch (function) {
+	case pinmux_ser1:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7);
+		hwprot.ser1 = regk_pinmux_no;
+		break;
+	case pinmux_ser2:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11);
+		hwprot.ser2 = regk_pinmux_no;
+		break;
+	case pinmux_ser3:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15);
+		hwprot.ser3 = regk_pinmux_no;
+		break;
+	case pinmux_sser0:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		hwprot.sser0 = regk_pinmux_no;
+		break;
+	case pinmux_sser1:
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		hwprot.sser1 = regk_pinmux_no;
+		break;
+	case pinmux_ata0:
+		ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17);
+		hwprot.ata0 = regk_pinmux_no;
+		break;
+	case pinmux_ata1:
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17);
+		hwprot.ata1 = regk_pinmux_no;
+		break;
+	case pinmux_ata2:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3);
+		hwprot.ata2 = regk_pinmux_no;
+		break;
+	case pinmux_ata3:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2);
+		hwprot.ata2 = regk_pinmux_no;
+		break;
+	case pinmux_ata:
+		ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15);
+		hwprot.ata = regk_pinmux_no;
+		break;
+	case pinmux_eth1:
+		ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17);
+		hwprot.eth1 = regk_pinmux_no;
+		hwprot.eth1_mgm = regk_pinmux_no;
+		break;
+	case pinmux_timer:
+		ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		hwprot.timer = regk_pinmux_no;
+		spin_unlock_irqrestore(&pinmux_lock, flags);
+		return ret;
+	}
+
+	if (!ret)
+		REG_WR(pinmux, regi_pinmux, rw_hwprot, hwprot);
+	else
+		memcpy(pins, saved, sizeof pins);
+
+	spin_unlock_irqrestore(&pinmux_lock, flags);
+
+	return ret;
+}
+
+#ifdef DEBUG
+static void crisv32_pinmux_dump(void)
+{
+	int i, j;
+
+	crisv32_pinmux_init();
+
+	for (i = 0; i < PORTS; i++) {
+		printk(KERN_DEBUG "Port %c\n", 'B' + i);
+		for (j = 0; j < PORT_PINS; j++)
+			printk(KERN_DEBUG "  Pin %d = %d\n", j, pins[i][j]);
+	}
+}
+#endif
+__initcall(crisv32_pinmux_init);
diff --git a/arch/cris/arch-v32/mm/Makefile b/arch/cris/arch-v32/mm/Makefile
new file mode 100644
index 0000000..0b801f2
--- /dev/null
+++ b/arch/cris/arch-v32/mm/Makefile
@@ -0,0 +1,4 @@
+# Makefile for the Linux/cris parts of the memory manager.
+
+obj-y	 += mmu.o init.o tlb.o intmem.o
+obj-$(CONFIG_ETRAX_L2CACHE)    += l2cache.o
diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c
new file mode 100644
index 0000000..f5438ca
--- /dev/null
+++ b/arch/cris/arch-v32/mm/init.c
@@ -0,0 +1,162 @@
+/*
+ * Set up paging and the MMU.
+ *
+ * Copyright (C) 2000-2003, Axis Communications AB.
+ *
+ * Authors:   Bjorn Wesen <bjornw@axis.com>
+ *            Tobias Anderberg <tobiasa@axis.com>, CRISv32 port.
+ */
+#include <linux/mmzone.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/mmu.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <arch/hwregs/asm/mmu_defs_asm.h>
+#include <arch/hwregs/supp_reg.h>
+
+extern void tlb_init(void);
+
+/*
+ * The kernel is already mapped with linear mapping at kseg_c so there's no
+ * need to map it with a page table. However, head.S also temporarily mapped it
+ * at kseg_4 thus the ksegs are set up again. Also clear the TLB and do various
+ * other paging stuff.
+ */
+void __init cris_mmu_init(void)
+{
+	unsigned long mmu_config;
+	unsigned long mmu_kbase_hi;
+	unsigned long mmu_kbase_lo;
+	unsigned short mmu_page_id;
+
+	/*
+	 * Make sure the current pgd table points to something sane, even if it
+	 * is most probably not used until the next switch_mm.
+	 */
+	per_cpu(current_pgd, smp_processor_id()) = init_mm.pgd;
+
+	/* Initialise the TLB. Function found in tlb.c. */
+	tlb_init();
+
+	/*
+	 * Enable exceptions and initialize the kernel segments.
+	 * See head.S for differences between ARTPEC-3 and ETRAX FS.
+	 */
+	mmu_config = ( REG_STATE(mmu, rw_mm_cfg, we, on)        |
+		       REG_STATE(mmu, rw_mm_cfg, acc, on)       |
+		       REG_STATE(mmu, rw_mm_cfg, ex, on)        |
+		       REG_STATE(mmu, rw_mm_cfg, inv, on)       |
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+		       REG_STATE(mmu, rw_mm_cfg, seg_f, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_e, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_d, linear) |
+#else
+		       REG_STATE(mmu, rw_mm_cfg, seg_f, linear) |
+		       REG_STATE(mmu, rw_mm_cfg, seg_e, linear) |
+		       REG_STATE(mmu, rw_mm_cfg, seg_d, page)   |
+#endif
+		       REG_STATE(mmu, rw_mm_cfg, seg_c, linear) |
+		       REG_STATE(mmu, rw_mm_cfg, seg_b, linear) |
+                       REG_STATE(mmu, rw_mm_cfg, seg_a, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_9, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_8, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_7, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_6, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_5, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_4, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_3, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_2, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_1, page)   |
+		       REG_STATE(mmu, rw_mm_cfg, seg_0, page));
+
+	/* See head.S for differences between ARTPEC-3 and ETRAX FS. */
+	mmu_kbase_hi = ( REG_FIELD(mmu, rw_mm_kbase_hi, base_f, 0x0) |
+#ifdef CONFIG_CRIS_MACH_ARTPEC3
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_d, 0x5) |
+#else
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 0x8) |
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_d, 0x0) |
+#endif
+                         REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 0x4) |
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb) |
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_a, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_9, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_hi, base_8, 0x0));
+
+	mmu_kbase_lo = ( REG_FIELD(mmu, rw_mm_kbase_lo, base_7, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_6, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_5, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_4, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_3, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_2, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_1, 0x0) |
+			 REG_FIELD(mmu, rw_mm_kbase_lo, base_0, 0x0));
+
+	mmu_page_id = REG_FIELD(mmu, rw_mm_tlb_hi, pid, 0);
+
+	/* Update the instruction MMU. */
+	SUPP_BANK_SEL(BANK_IM);
+	SUPP_REG_WR(RW_MM_CFG, mmu_config);
+	SUPP_REG_WR(RW_MM_KBASE_HI, mmu_kbase_hi);
+	SUPP_REG_WR(RW_MM_KBASE_LO, mmu_kbase_lo);
+	SUPP_REG_WR(RW_MM_TLB_HI, mmu_page_id);
+
+	/* Update the data MMU. */
+	SUPP_BANK_SEL(BANK_DM);
+	SUPP_REG_WR(RW_MM_CFG, mmu_config);
+	SUPP_REG_WR(RW_MM_KBASE_HI, mmu_kbase_hi);
+	SUPP_REG_WR(RW_MM_KBASE_LO, mmu_kbase_lo);
+	SUPP_REG_WR(RW_MM_TLB_HI, mmu_page_id);
+
+	SPEC_REG_WR(SPEC_REG_PID, 0);
+
+	/*
+	 * The MMU has been enabled ever since head.S but just to make it
+	 * totally obvious enable it here as well.
+	 */
+	SUPP_BANK_SEL(BANK_GC);
+	SUPP_REG_WR(RW_GC_CFG, 0xf); /* IMMU, DMMU, ICache, DCache on */
+}
+
+void __init paging_init(void)
+{
+	int i;
+	unsigned long zones_size[MAX_NR_ZONES];
+
+	printk("Setting up paging and the MMU.\n");
+
+	/* Clear out the init_mm.pgd that will contain the kernel's mappings. */
+	for(i = 0; i < PTRS_PER_PGD; i++)
+		swapper_pg_dir[i] = __pgd(0);
+
+	cris_mmu_init();
+
+	/*
+	 * Initialize the bad page table and bad page to point to a couple of
+	 * allocated pages.
+	 */
+	empty_zero_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+	memset((void *) empty_zero_page, 0, PAGE_SIZE);
+
+	/* All pages are DMA'able in Etrax, so put all in the DMA'able zone. */
+	zones_size[0] = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT;
+
+	for (i = 1; i < MAX_NR_ZONES; i++)
+		zones_size[i] = 0;
+
+	/*
+	 * Use free_area_init_node instead of free_area_init, because it is
+	 * designed for systems where the DRAM starts at an address
+	 * substantially higher than 0, like us (we start at PAGE_OFFSET). This
+	 * saves space in the mem_map page array.
+	 */
+	free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
+
+	mem_map = contig_page_data.node_mem_map;
+}
diff --git a/arch/cris/arch-v32/mm/intmem.c b/arch/cris/arch-v32/mm/intmem.c
new file mode 100644
index 0000000..9ef5609
--- /dev/null
+++ b/arch/cris/arch-v32/mm/intmem.c
@@ -0,0 +1,149 @@
+/*
+ * Simple allocator for internal RAM in ETRAX FS
+ *
+ * Copyright (c) 2004 Axis Communications AB.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+#include <memmap.h>
+
+#define STATUS_FREE 0
+#define STATUS_ALLOCATED 1
+
+#ifdef CONFIG_ETRAX_L2CACHE
+#define RESERVED_SIZE 66*1024
+#else
+#define RESERVED_SIZE 0
+#endif
+
+struct intmem_allocation {
+	struct list_head entry;
+	unsigned int size;
+	unsigned offset;
+	char status;
+};
+
+
+static struct list_head intmem_allocations;
+static void* intmem_virtual;
+
+static void crisv32_intmem_init(void)
+{
+	static int initiated = 0;
+	if (!initiated) {
+		struct intmem_allocation* alloc;
+		alloc = kmalloc(sizeof *alloc, GFP_KERNEL);
+		INIT_LIST_HEAD(&intmem_allocations);
+		intmem_virtual = ioremap(MEM_INTMEM_START + RESERVED_SIZE,
+					 MEM_INTMEM_SIZE - RESERVED_SIZE);
+		initiated = 1;
+		alloc->size = MEM_INTMEM_SIZE - RESERVED_SIZE;
+		alloc->offset = 0;
+		alloc->status = STATUS_FREE;
+		list_add_tail(&alloc->entry, &intmem_allocations);
+	}
+}
+
+void* crisv32_intmem_alloc(unsigned size, unsigned align)
+{
+	struct intmem_allocation* allocation;
+	struct intmem_allocation* tmp;
+	void* ret = NULL;
+
+	preempt_disable();
+	crisv32_intmem_init();
+
+	list_for_each_entry_safe(allocation, tmp, &intmem_allocations, entry) {
+		int alignment = allocation->offset % align;
+		alignment = alignment ? align - alignment : alignment;
+
+		if (allocation->status == STATUS_FREE &&
+		    allocation->size >= size + alignment) {
+			if (allocation->size > size + alignment) {
+				struct intmem_allocation* alloc;
+				alloc = kmalloc(sizeof *alloc, GFP_ATOMIC);
+				alloc->status = STATUS_FREE;
+				alloc->size = allocation->size - size -
+					alignment;
+				alloc->offset = allocation->offset + size +
+					alignment;
+				list_add(&alloc->entry, &allocation->entry);
+
+				if (alignment) {
+					struct intmem_allocation *tmp;
+					tmp = kmalloc(sizeof *tmp, GFP_ATOMIC);
+					tmp->offset = allocation->offset;
+					tmp->size = alignment;
+					tmp->status = STATUS_FREE;
+					allocation->offset += alignment;
+					list_add_tail(&tmp->entry,
+						&allocation->entry);
+				}
+			}
+			allocation->status = STATUS_ALLOCATED;
+			allocation->size = size;
+			ret = (void*)((int)intmem_virtual + allocation->offset);
+		}
+	}
+	preempt_enable();
+	return ret;
+}
+
+void crisv32_intmem_free(void* addr)
+{
+	struct intmem_allocation* allocation;
+	struct intmem_allocation* tmp;
+
+	if (addr == NULL)
+		return;
+
+	preempt_disable();
+	crisv32_intmem_init();
+
+	list_for_each_entry_safe(allocation, tmp, &intmem_allocations, entry) {
+		if (allocation->offset == (int)(addr - intmem_virtual)) {
+			struct intmem_allocation *prev =
+			  list_entry(allocation->entry.prev,
+			             struct intmem_allocation, entry);
+			struct intmem_allocation *next =
+			  list_entry(allocation->entry.next,
+				     struct intmem_allocation, entry);
+
+			allocation->status = STATUS_FREE;
+			/* Join with prev and/or next if also free */
+			if ((prev != &intmem_allocations) &&
+					(prev->status == STATUS_FREE)) {
+				prev->size += allocation->size;
+				list_del(&allocation->entry);
+				kfree(allocation);
+				allocation = prev;
+			}
+			if ((next != &intmem_allocations) &&
+					(next->status == STATUS_FREE)) {
+				allocation->size += next->size;
+				list_del(&next->entry);
+				kfree(next);
+			}
+			preempt_enable();
+			return;
+		}
+	}
+	preempt_enable();
+}
+
+void* crisv32_intmem_phys_to_virt(unsigned long addr)
+{
+	return (void *)(addr - (MEM_INTMEM_START + RESERVED_SIZE) +
+		(unsigned long)intmem_virtual);
+}
+
+unsigned long crisv32_intmem_virt_to_phys(void* addr)
+{
+	return (unsigned long)((unsigned long )addr -
+		(unsigned long)intmem_virtual + MEM_INTMEM_START +
+		RESERVED_SIZE);
+}
+device_initcall(crisv32_intmem_init);
+
diff --git a/arch/cris/arch-v32/mm/l2cache.c b/arch/cris/arch-v32/mm/l2cache.c
new file mode 100644
index 0000000..332ff10
--- /dev/null
+++ b/arch/cris/arch-v32/mm/l2cache.c
@@ -0,0 +1,29 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <memmap.h>
+#include <hwregs/reg_map.h>
+#include <hwregs/reg_rdwr.h>
+#include <hwregs/l2cache_defs.h>
+#include <asm/io.h>
+
+#define L2CACHE_SIZE 64
+
+int __init l2cache_init(void)
+{
+	reg_l2cache_rw_ctrl ctrl = {0};
+	reg_l2cache_rw_cfg cfg = {.en = regk_l2cache_yes};
+
+	ctrl.csize = L2CACHE_SIZE;
+	ctrl.cbase = L2CACHE_SIZE / 4 + (L2CACHE_SIZE % 4 ? 1 : 0);
+	REG_WR(l2cache, regi_l2cache, rw_ctrl, ctrl);
+
+	/* Flush the tag memory */
+	memset((void *)(MEM_INTMEM_START | MEM_NON_CACHEABLE), 0, 2*1024);
+
+	/* Enable the cache */
+	REG_WR(l2cache, regi_l2cache, rw_cfg, cfg);
+
+	return 0;
+}
+
diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S
new file mode 100644
index 0000000..c098104
--- /dev/null
+++ b/arch/cris/arch-v32/mm/mmu.S
@@ -0,0 +1,210 @@
+; WARNING : The refill handler has been modified, see below !!!
+
+/*
+ *  Copyright (C) 2003 Axis Communications AB
+ *
+ *  Authors:	Mikael Starvik (starvik@axis.com)
+ *
+ * Code for the fault low-level handling routines.
+ *
+ */
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+; Save all register. Must save in same order as struct pt_regs.
+.macro SAVE_ALL
+	subq	12, $sp
+	move	$erp, [$sp]
+	subq	4, $sp
+	move	$srp, [$sp]
+	subq	4, $sp
+	move	$ccs, [$sp]
+	subq	4, $sp
+	move	$spc, [$sp]
+	subq	4, $sp
+	move	$mof, [$sp]
+	subq	4, $sp
+	move	$srs, [$sp]
+	subq	4, $sp
+	move.d	$acr, [$sp]
+	subq	14*4, $sp
+	movem	$r13, [$sp]
+	subq	4, $sp
+	move.d	$r10, [$sp]
+.endm
+
+; Bus fault handler. Extracts relevant information and calls mm subsystem
+; to handle the fault.
+.macro	MMU_BUS_FAULT_HANDLER handler, mmu, we, ex
+	.globl	\handler
+	.type   \handler,"function"
+\handler:
+	SAVE_ALL
+	move	\mmu, $srs	; Select MMU support register bank
+	move.d  $sp, $r11	; regs
+	moveq	1, $r12		; protection fault
+	moveq   \we, $r13	; write exception?
+	orq	\ex << 1, $r13	; execute?
+	move    $s3, $r10	; rw_mm_cause
+	and.d	~8191, $r10	; Get faulting page start address
+
+	jsr	do_page_fault
+	nop
+	ba	ret_from_intr
+	nop
+	.size   \handler, . - \handler
+.endm
+
+; Refill handler. Three cases may occur:
+;   1. PMD and PTE exists in mm subsystem but not in TLB
+;   2. PMD exists but not PTE
+;   3. PMD doesn't exist
+; The code below handles case 1 and calls the mm subsystem for case 2 and 3.
+; Do not touch this code without very good reasons and extensive testing.
+; Note that the code is optimized to minimize stalls (makes the code harder
+; to read).
+;
+; WARNING !!!
+; Modified by Mikael Asker 060725: added a workaround for strange TLB
+; behavior. If the same PTE is present in more than one set, the TLB
+; doesn't recognize it and we get stuck in a loop of refill exceptions.
+; The workaround detects such loops and exits them by flushing
+; the TLB contents. The problem and workaround were verified
+; in VCS by Mikael Starvik.
+;
+; Each page is 8 KB. Each PMD holds 8192/4 PTEs (each PTE is 4 bytes) so each
+; PMD holds 16 MB of virtual memory.
+;   Bits  0-12 : Offset within a page
+;   Bits 13-23 : PTE offset within a PMD
+;   Bits 24-31 : PMD offset within the PGD
+
+.macro MMU_REFILL_HANDLER handler, mmu
+	.data
+1:	.dword	0		; refill_count
+                                ;   == 0 <=> last_refill_cause is invalid
+2:	.dword	0		; last_refill_cause
+	.text
+	.globl \handler
+	.type   \handler, "function"
+\handler:
+	subq	4, $sp
+; (The pipeline stalls for one cycle; $sp used as address in the next cycle.)
+	move	$srs, [$sp]
+	subq	4, $sp
+	move	\mmu, $srs	; Select MMU support register bank
+	move.d	$acr, [$sp]
+	subq	12, $sp
+	move.d	1b, $acr        ; Point to refill_count
+	movem	$r2, [$sp]
+
+	test.d	[$acr]	        ; refill_count == 0 ?
+	beq	5f		;   yes, last_refill_cause is invalid
+        move.d	$acr, $r1
+
+	; last_refill_cause is valid, investigate cause
+        addq    4, $r1          ; Point to last_refill_cause
+	move	$s3, $r0	; Get rw_mm_cause
+	move.d	[$r1], $r2	; Get last_refill_cause
+	cmp.d	$r0, $r2	; rw_mm_cause == last_refill_cause ?
+	beq	6f		;   yes, increment count
+	moveq	1, $r2
+
+        ; rw_mm_cause != last_refill_cause
+	move.d	$r2, [$acr]	; refill_count = 1
+	move.d	$r0, [$r1]	; last_refill_cause = rw_mm_cause
+
+3:	; Probably not in a loop, continue normal processing
+	move.d  current_pgd, $acr ; PGD
+	; Look up PMD in PGD
+	lsrq	24, $r0	; Get PMD index into PGD (bit 24-31)
+	move.d  [$acr], $acr	; PGD for the current process
+	addi	$r0.d, $acr, $acr
+	move	$s3, $r0	; rw_mm_cause
+	move.d  [$acr], $acr	; Get PMD
+	beq	8f
+	; Look up PTE in PMD
+	lsrq	PAGE_SHIFT, $r0
+	and.w	PAGE_MASK, $acr	; Remove PMD flags
+	and.d	0x7ff, $r0	; Get PTE index into PMD (bit 13-23)
+	addi    $r0.d, $acr, $acr
+	move.d  [$acr], $acr	; Get PTE
+	beq	9f
+	movem	[$sp], $r2	; Restore r0-r2 in delay slot
+	addq	12, $sp
+	; Store in TLB
+	move	$acr, $s5
+4:	; Return
+	move.d	[$sp+], $acr
+	move	[$sp], $srs
+	addq	4, $sp
+	rete
+	rfe
+
+5:      ; last_refill_cause is invalid
+	moveq	1, $r2
+        addq    4, $r1          ; Point to last_refill_cause
+	move.d	$r2, [$acr]	; refill_count = 1
+	move	$s3, $r0	; Get rw_mm_cause
+        ba      3b		; Continue normal processing
+	move.d	$r0,[$r1]	; last_refill_cause = rw_mm_cause
+
+6:      ; rw_mm_cause == last_refill_cause
+        move.d  [$acr], $r2     ; Get refill_count
+	cmpq	4, $r2		; refill_count > 4 ?
+	bhi	7f		;   yes
+	addq	1, $r2	        ; refill_count++
+	ba	3b		; Continue normal processing
+	move.d	$r2, [$acr]
+
+7:	; refill_count > 4, error
+	move.d	$acr, $r0       ; Save pointer to refill_count
+	clear.d	[$r0]		; refill_count = 0
+
+	;; rewind the short stack
+	movem	[$sp], $r2	; Restore r0-r2
+	addq	12, $sp
+	move.d	[$sp+], $acr
+	move	[$sp], $srs
+	addq	4, $sp
+	;; Keep it simple (slow), save all the regs.
+	SAVE_ALL
+	jsr	__flush_tlb_all
+	nop
+	ba	ret_from_intr	; Return
+	nop
+
+8:	; PMD missing, let the mm subsystem fix it up.
+	movem	[$sp], $r2	; Restore r0-r2
+9:      ; PTE missing, let the mm subsystem fix it up.
+	addq	12, $sp
+	move.d	[$sp+], $acr
+	move	[$sp], $srs
+	addq	4, $sp
+	SAVE_ALL
+	move    \mmu, $srs
+	move.d	$sp, $r11	; regs
+	clear.d	$r12		; Not a protection fault
+	move.w  PAGE_MASK, $acr
+	move    $s3, $r10	; rw_mm_cause
+	btstq   9, $r10		; Check if write access
+	smi     $r13
+	and.w	PAGE_MASK, $r10	; Get VPN (virtual address)
+	jsr	do_page_fault
+	and.w   $acr, $r10
+	; Return
+	ba	ret_from_intr
+	nop
+	.size   \handler, . - \handler
+.endm
+
+	; This is the MMU bus fault handlers.
+
+MMU_REFILL_HANDLER i_mmu_refill, 1
+MMU_BUS_FAULT_HANDLER i_mmu_invalid, 1, 0, 0
+MMU_BUS_FAULT_HANDLER i_mmu_access,  1, 0, 0
+MMU_BUS_FAULT_HANDLER i_mmu_execute, 1, 0, 1
+MMU_REFILL_HANDLER d_mmu_refill,  2
+MMU_BUS_FAULT_HANDLER d_mmu_invalid, 2, 0, 0
+MMU_BUS_FAULT_HANDLER d_mmu_access,  2, 0, 0
+MMU_BUS_FAULT_HANDLER d_mmu_write,   2, 1, 0
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
new file mode 100644
index 0000000..c030d02
--- /dev/null
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -0,0 +1,207 @@
+/*
+ * Low level TLB handling.
+ *
+ * Copyright (C) 2000-2003, Axis Communications AB.
+ *
+ * Authors:   Bjorn Wesen <bjornw@axis.com>
+ *            Tobias Anderberg <tobiasa@axis.com>, CRISv32 port.
+ */
+
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+#include <arch/hwregs/asm/mmu_defs_asm.h>
+#include <arch/hwregs/supp_reg.h>
+
+#define UPDATE_TLB_SEL_IDX(val)					\
+do {								\
+	unsigned long tlb_sel;					\
+								\
+	tlb_sel = REG_FIELD(mmu, rw_mm_tlb_sel, idx, val);	\
+	SUPP_REG_WR(RW_MM_TLB_SEL, tlb_sel);			\
+} while(0)
+
+#define UPDATE_TLB_HILO(tlb_hi, tlb_lo)		\
+do {						\
+	SUPP_REG_WR(RW_MM_TLB_HI, tlb_hi);	\
+	SUPP_REG_WR(RW_MM_TLB_LO, tlb_lo);	\
+} while(0)
+
+/*
+ * The TLB can host up to 256 different mm contexts at the same time. The running
+ * context is found in the PID register. Each TLB entry contains a page_id that
+ * has to match the PID register to give a hit. page_id_map keeps track of which
+ * mm's is assigned to which page_id's, making sure it's known when to
+ * invalidate TLB entries.
+ *
+ * The last page_id is never running, it is used as an invalid page_id so that
+ * it's possible to make TLB entries that will nerver match.
+ *
+ * Note; the flushes needs to be atomic otherwise an interrupt hander that uses
+ * vmalloc'ed memory might cause a TLB load in the middle of a flush.
+ */
+
+/* Flush all TLB entries. */
+void
+__flush_tlb_all(void)
+{
+	int i;
+	int mmu;
+	unsigned long flags;
+	unsigned long mmu_tlb_hi;
+	unsigned long mmu_tlb_sel;
+
+	/*
+	 * Mask with 0xf so similar TLB entries aren't written in the same 4-way
+	 * entry group.
+	 */
+	local_irq_save(flags);
+
+	for (mmu = 1; mmu <= 2; mmu++) {
+		SUPP_BANK_SEL(mmu); /* Select the MMU */
+		for (i = 0; i < NUM_TLB_ENTRIES; i++) {
+			/* Store invalid entry */
+			mmu_tlb_sel = REG_FIELD(mmu, rw_mm_tlb_sel, idx, i);
+
+			mmu_tlb_hi = (REG_FIELD(mmu, rw_mm_tlb_hi, pid, INVALID_PAGEID)
+				    | REG_FIELD(mmu, rw_mm_tlb_hi, vpn, i & 0xf));
+
+			SUPP_REG_WR(RW_MM_TLB_SEL, mmu_tlb_sel);
+			SUPP_REG_WR(RW_MM_TLB_HI, mmu_tlb_hi);
+			SUPP_REG_WR(RW_MM_TLB_LO, 0);
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+/* Flush an entire user address space. */
+void
+__flush_tlb_mm(struct mm_struct *mm)
+{
+	int i;
+	int mmu;
+	unsigned long flags;
+	unsigned long page_id;
+	unsigned long tlb_hi;
+	unsigned long mmu_tlb_hi;
+
+	page_id = mm->context.page_id;
+
+	if (page_id == NO_CONTEXT)
+		return;
+
+	/* Mark the TLB entries that match the page_id as invalid. */
+	local_irq_save(flags);
+
+	for (mmu = 1; mmu <= 2; mmu++) {
+		SUPP_BANK_SEL(mmu);
+		for (i = 0; i < NUM_TLB_ENTRIES; i++) {
+			UPDATE_TLB_SEL_IDX(i);
+
+			/* Get the page_id */
+			SUPP_REG_RD(RW_MM_TLB_HI, tlb_hi);
+
+			/* Check if the page_id match. */
+			if ((tlb_hi & 0xff) == page_id) {
+				mmu_tlb_hi = (REG_FIELD(mmu, rw_mm_tlb_hi, pid,
+				                        INVALID_PAGEID)
+				            | REG_FIELD(mmu, rw_mm_tlb_hi, vpn,
+				                        i & 0xf));
+
+				UPDATE_TLB_HILO(mmu_tlb_hi, 0);
+			}
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+/* Invalidate a single page. */
+void
+__flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	int i;
+	int mmu;
+	unsigned long page_id;
+	unsigned long flags;
+	unsigned long tlb_hi;
+	unsigned long mmu_tlb_hi;
+
+	page_id = vma->vm_mm->context.page_id;
+
+	if (page_id == NO_CONTEXT)
+		return;
+
+	addr &= PAGE_MASK;
+
+	/*
+	 * Invalidate those TLB entries that match both the mm context and the
+	 * requested virtual address.
+	 */
+	local_irq_save(flags);
+
+	for (mmu = 1; mmu <= 2; mmu++) {
+		SUPP_BANK_SEL(mmu);
+		for (i = 0; i < NUM_TLB_ENTRIES; i++) {
+			UPDATE_TLB_SEL_IDX(i);
+			SUPP_REG_RD(RW_MM_TLB_HI, tlb_hi);
+
+			/* Check if page_id and address matches */
+			if (((tlb_hi & 0xff) == page_id) &&
+			    ((tlb_hi & PAGE_MASK) == addr)) {
+				mmu_tlb_hi = REG_FIELD(mmu, rw_mm_tlb_hi, pid,
+				                       INVALID_PAGEID) | addr;
+
+				UPDATE_TLB_HILO(mmu_tlb_hi, 0);
+			}
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Initialize the context related info for a new mm_struct
+ * instance.
+ */
+
+int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	mm->context.page_id = NO_CONTEXT;
+	return 0;
+}
+
+static DEFINE_SPINLOCK(mmu_context_lock);
+
+/* Called in schedule() just before actually doing the switch_to. */
+void
+switch_mm(struct mm_struct *prev, struct mm_struct *next,
+	  struct task_struct *tsk)
+{
+	if (prev != next) {
+		int cpu = smp_processor_id();
+
+		/* Make sure there is a MMU context. */
+		spin_lock(&mmu_context_lock);
+		get_mmu_context(next);
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+		spin_unlock(&mmu_context_lock);
+
+		/*
+		 * Remember the pgd for the fault handlers. Keep a separate
+		 * copy of it because current and active_mm might be invalid
+		 * at points where * there's still a need to derefer the pgd.
+		 */
+		per_cpu(current_pgd, cpu) = next->pgd;
+
+		/* Switch context in the MMU. */
+		if (tsk && task_thread_info(tsk)) {
+			SPEC_REG_WR(SPEC_REG_PID, next->context.page_id |
+				task_thread_info(tsk)->tls);
+		} else {
+			SPEC_REG_WR(SPEC_REG_PID, next->context.page_id);
+		}
+	}
+}
+
diff --git a/arch/cris/arch-v32/output_arch.ld b/arch/cris/arch-v32/output_arch.ld
new file mode 100644
index 0000000..d60a57d
--- /dev/null
+++ b/arch/cris/arch-v32/output_arch.ld
@@ -0,0 +1,2 @@
+/* At the time of this writing, there's no equivalent ld option. */
+OUTPUT_ARCH (crisv32)