File-copy from v4.4.100
This is the result of 'cp' from a linux-stable tree with the 'v4.4.100'
tag checked out (commit 26d6298789e695c9f627ce49a7bbd2286405798a) on
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Please refer to that tree for all history prior to this point.
Change-Id: I8a9ee2aea93cd29c52c847d0ce33091a73ae6afe
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
new file mode 100644
index 0000000..4bf7d50
--- /dev/null
+++ b/drivers/misc/Kconfig
@@ -0,0 +1,540 @@
+#
+# Misc strange devices
+#
+
+menu "Misc devices"
+
+config SENSORS_LIS3LV02D
+ tristate
+ depends on INPUT
+ select INPUT_POLLDEV
+ default n
+
+config AD525X_DPOT
+ tristate "Analog Devices Digital Potentiometers"
+ depends on (I2C || SPI) && SYSFS
+ help
+ If you say yes here, you get support for the Analog Devices
+ AD5258, AD5259, AD5251, AD5252, AD5253, AD5254, AD5255
+ AD5160, AD5161, AD5162, AD5165, AD5200, AD5201, AD5203,
+ AD5204, AD5206, AD5207, AD5231, AD5232, AD5233, AD5235,
+ AD5260, AD5262, AD5263, AD5290, AD5291, AD5292, AD5293,
+ AD7376, AD8400, AD8402, AD8403, ADN2850, AD5241, AD5242,
+ AD5243, AD5245, AD5246, AD5247, AD5248, AD5280, AD5282,
+ ADN2860, AD5273, AD5171, AD5170, AD5172, AD5173, AD5270,
+ AD5271, AD5272, AD5274
+ digital potentiometer chips.
+
+ See Documentation/misc-devices/ad525x_dpot.txt for the
+ userspace interface.
+
+ This driver can also be built as a module. If so, the module
+ will be called ad525x_dpot.
+
+config AD525X_DPOT_I2C
+ tristate "support I2C bus connection"
+ depends on AD525X_DPOT && I2C
+ help
+ Say Y here if you have a digital potentiometers hooked to an I2C bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called ad525x_dpot-i2c.
+
+config AD525X_DPOT_SPI
+ tristate "support SPI bus connection"
+ depends on AD525X_DPOT && SPI_MASTER
+ help
+ Say Y here if you have a digital potentiometers hooked to an SPI bus.
+
+ If unsure, say N (but it's safe to say "Y").
+
+ To compile this driver as a module, choose M here: the
+ module will be called ad525x_dpot-spi.
+
+config ATMEL_TCLIB
+ bool "Atmel AT32/AT91 Timer/Counter Library"
+ depends on (AVR32 || ARCH_AT91)
+ help
+ Select this if you want a library to allocate the Timer/Counter
+ blocks found on many Atmel processors. This facilitates using
+ these blocks by different drivers despite processor differences.
+
+config ATMEL_TCB_CLKSRC
+ bool "TC Block Clocksource"
+ depends on ATMEL_TCLIB
+ default y
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate. Two timer channels
+ are combined to make a single 32-bit timer.
+
+ When GENERIC_CLOCKEVENTS is defined, the third timer channel
+ may be used as a clock event device supporting oneshot mode
+ (delays of up to two seconds) based on the 32 KiHz clock.
+
+config ATMEL_TCB_CLKSRC_BLOCK
+ int
+ depends on ATMEL_TCB_CLKSRC
+ prompt "TC Block" if CPU_AT32AP700X
+ default 0
+ range 0 1
+ help
+ Some chips provide more than one TC block, so you have the
+ choice of which one to use for the clock framework. The other
+ TC can be used for other purposes, such as PWM generation and
+ interval timing.
+
+config DUMMY_IRQ
+ tristate "Dummy IRQ handler"
+ default n
+ ---help---
+ This module accepts a single 'irq' parameter, which it should register for.
+ The sole purpose of this module is to help with debugging of systems on
+ which spurious IRQs would happen on disabled IRQ vector.
+
+config IBM_ASM
+ tristate "Device driver for IBM RSA service processor"
+ depends on X86 && PCI && INPUT
+ ---help---
+ This option enables device driver support for in-band access to the
+ IBM RSA (Condor) service processor in eServer xSeries systems.
+ The ibmasm device driver allows user space application to access
+ ASM (Advanced Systems Management) functions on the service
+ processor. The driver is meant to be used in conjunction with
+ a user space API.
+ The ibmasm driver also enables the OS to use the UART on the
+ service processor board as a regular serial port. To make use of
+ this feature serial driver support (CONFIG_SERIAL_8250) must be
+ enabled.
+
+ WARNING: This software may not be supported or function
+ correctly on your IBM server. Please consult the IBM ServerProven
+ website <http://www-03.ibm.com/systems/info/x86servers/serverproven/compat/us/>
+ for information on the specific driver level and support statement
+ for your IBM server.
+
+config PHANTOM
+ tristate "Sensable PHANToM (PCI)"
+ depends on PCI
+ help
+ Say Y here if you want to build a driver for Sensable PHANToM device.
+
+ This driver is only for PCI PHANToMs.
+
+ If you choose to build module, its name will be phantom. If unsure,
+ say N here.
+
+config INTEL_MID_PTI
+ tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+ depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST)
+ default n
+ help
+ The PTI (Parallel Trace Interface) driver directs
+ trace data routed from various parts in the system out
+ through an Intel Penwell PTI port and out of the mobile
+ device for analysis with a debugging tool (Lauterbach or Fido).
+
+ You should select this driver if the target kernel is meant for
+ an Intel Atom (non-netbook) mobile device containing a MIPI
+ P1149.7 standard implementation.
+
+config SGI_IOC4
+ tristate "SGI IOC4 Base IO support"
+ depends on PCI
+ ---help---
+ This option enables basic support for the IOC4 chip on certain
+ SGI IO controller cards (IO9, IO10, and PCI-RT). This option
+ does not enable any specific functions on such a card, but provides
+ necessary infrastructure for other drivers to utilize.
+
+ If you have an SGI Altix with an IOC4-based card say Y.
+ Otherwise say N.
+
+config TIFM_CORE
+ tristate "TI Flash Media interface support"
+ depends on PCI
+ help
+ If you want support for Texas Instruments(R) Flash Media adapters
+ you should select this option and then also choose an appropriate
+ host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter
+ support', if you have a TI PCI74xx compatible card reader, for
+ example.
+ You will also have to select some flash card format drivers. MMC/SD
+ cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD
+ Interface support (MMC_TIFM_SD)'.
+
+ To compile this driver as a module, choose M here: the module will
+ be called tifm_core.
+
+config TIFM_7XX1
+ tristate "TI Flash Media PCI74xx/PCI76xx host adapter support"
+ depends on PCI && TIFM_CORE
+ default TIFM_CORE
+ help
+ This option enables support for Texas Instruments(R) PCI74xx and
+ PCI76xx families of Flash Media adapters, found in many laptops.
+ To make actual use of the device, you will have to select some
+ flash card format drivers, as outlined in the TIFM_CORE Help.
+
+ To compile this driver as a module, choose M here: the module will
+ be called tifm_7xx1.
+
+config ICS932S401
+ tristate "Integrated Circuits ICS932S401"
+ depends on I2C
+ help
+ If you say yes here you get support for the Integrated Circuits
+ ICS932S401 clock control chips.
+
+ This driver can also be built as a module. If so, the module
+ will be called ics932s401.
+
+config ATMEL_SSC
+ tristate "Device driver for Atmel SSC peripheral"
+ depends on HAS_IOMEM && (AVR32 || ARCH_AT91 || COMPILE_TEST)
+ ---help---
+ This option enables device driver support for Atmel Synchronized
+ Serial Communication peripheral (SSC).
+
+ The SSC peripheral supports a wide variety of serial frame based
+ communications, i.e. I2S, SPI, etc.
+
+ If unsure, say N.
+
+config ENCLOSURE_SERVICES
+ tristate "Enclosure Services"
+ default n
+ help
+ Provides support for intelligent enclosures (bays which
+ contain storage devices). You also need either a host
+ driver (SCSI/ATA) which supports enclosures
+ or a SCSI enclosure device (SES) to use these services.
+
+config SGI_XP
+ tristate "Support communication between SGI SSIs"
+ depends on NET
+ depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
+ select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+ select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+ select SGI_GRU if X86_64 && SMP
+ ---help---
+ An SGI machine can be divided into multiple Single System
+ Images which act independently of each other and have
+ hardware based memory protection from the others. Enabling
+ this feature will allow for direct communication between SSIs
+ based on a network adapter and DMA messaging.
+
+config CS5535_MFGPT
+ tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
+ depends on MFD_CS5535
+ default n
+ help
+ This driver provides access to MFGPT functionality for other
+ drivers that need timers. MFGPTs are available in the CS5535 and
+ CS5536 companion chips that are found in AMD Geode and several
+ other platforms. They have a better resolution and max interval
+ than the generic PIT, and are suitable for use as high-res timers.
+ You probably don't want to enable this manually; other drivers that
+ make use of it should enable it.
+
+config CS5535_MFGPT_DEFAULT_IRQ
+ int
+ depends on CS5535_MFGPT
+ default 7
+ help
+ MFGPTs on the CS5535 require an interrupt. The selected IRQ
+ can be overridden as a module option as well as by driver that
+ use the cs5535_mfgpt_ API; however, different architectures might
+ want to use a different IRQ by default. This is here for
+ architectures to set as necessary.
+
+config CS5535_CLOCK_EVENT_SRC
+ tristate "CS5535/CS5536 high-res timer (MFGPT) events"
+ depends on GENERIC_CLOCKEVENTS && CS5535_MFGPT
+ help
+ This driver provides a clock event source based on the MFGPT
+ timer(s) in the CS5535 and CS5536 companion chips.
+ MFGPTs have a better resolution and max interval than the
+ generic PIT, and are suitable for use as high-res timers.
+
+config HP_ILO
+ tristate "Channel interface driver for the HP iLO processor"
+ depends on PCI
+ default n
+ help
+ The channel interface driver allows applications to communicate
+ with iLO management processors present on HP ProLiant servers.
+ Upon loading, the driver creates /dev/hpilo/dXccbN files, which
+ can be used to gather data from the management processor, via
+ read and write system calls.
+
+ To compile this driver as a module, choose M here: the
+ module will be called hpilo.
+
+config QCOM_COINCELL
+ tristate "Qualcomm coincell charger support"
+ depends on MFD_SPMI_PMIC || COMPILE_TEST
+ help
+ This driver supports the coincell block found inside of
+ Qualcomm PMICs. The coincell charger provides a means to
+ charge a coincell battery or backup capacitor which is used
+ to maintain PMIC register and RTC state in the absence of
+ external power.
+
+config SGI_GRU
+ tristate "SGI GRU driver"
+ depends on X86_UV && SMP
+ default n
+ select MMU_NOTIFIER
+ ---help---
+ The GRU is a hardware resource located in the system chipset. The GRU
+ contains memory that can be mmapped into the user address space. This memory is
+ used to communicate with the GRU to perform functions such as load/store,
+ scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user
+ instructions using user virtual addresses. GRU instructions (ex., bcopy) use
+ user virtual addresses for operands.
+
+ If you are not running on a SGI UV system, say N.
+
+config SGI_GRU_DEBUG
+ bool "SGI GRU driver debug"
+ depends on SGI_GRU
+ default n
+ ---help---
+ This option enables additional debugging code for the SGI GRU driver.
+ If you are unsure, say N.
+
+config APDS9802ALS
+ tristate "Medfield Avago APDS9802 ALS Sensor module"
+ depends on I2C
+ help
+ If you say yes here you get support for the ALS APDS9802 ambient
+ light sensor.
+
+ This driver can also be built as a module. If so, the module
+ will be called apds9802als.
+
+config ISL29003
+ tristate "Intersil ISL29003 ambient light sensor"
+ depends on I2C && SYSFS
+ help
+ If you say yes here you get support for the Intersil ISL29003
+ ambient light sensor.
+
+ This driver can also be built as a module. If so, the module
+ will be called isl29003.
+
+config ISL29020
+ tristate "Intersil ISL29020 ambient light sensor"
+ depends on I2C
+ help
+ If you say yes here you get support for the Intersil ISL29020
+ ambient light sensor.
+
+ This driver can also be built as a module. If so, the module
+ will be called isl29020.
+
+config SENSORS_TSL2550
+ tristate "Taos TSL2550 ambient light sensor"
+ depends on I2C && SYSFS
+ help
+ If you say yes here you get support for the Taos TSL2550
+ ambient light sensor.
+
+ This driver can also be built as a module. If so, the module
+ will be called tsl2550.
+
+config SENSORS_BH1780
+ tristate "ROHM BH1780GLI ambient light sensor"
+ depends on I2C && SYSFS
+ help
+ If you say yes here you get support for the ROHM BH1780GLI
+ ambient light sensor.
+
+ This driver can also be built as a module. If so, the module
+ will be called bh1780gli.
+
+config SENSORS_BH1770
+ tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
+ depends on I2C
+ ---help---
+ Say Y here if you want to build a driver for BH1770GLC (ROHM) or
+ SFH7770 (Osram) combined ambient light and proximity sensor chip.
+
+ To compile this driver as a module, choose M here: the
+ module will be called bh1770glc. If unsure, say N here.
+
+config SENSORS_APDS990X
+ tristate "APDS990X combined als and proximity sensors"
+ depends on I2C
+ default n
+ ---help---
+ Say Y here if you want to build a driver for Avago APDS990x
+ combined ambient light and proximity sensor chip.
+
+ To compile this driver as a module, choose M here: the
+ module will be called apds990x. If unsure, say N here.
+
+config HMC6352
+ tristate "Honeywell HMC6352 compass"
+ depends on I2C
+ help
+ This driver provides support for the Honeywell HMC6352 compass,
+ providing configuration and heading data via sysfs.
+
+config DS1682
+ tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
+ depends on I2C
+ help
+ If you say yes here you get support for Dallas Semiconductor
+ DS1682 Total Elapsed Time Recorder.
+
+ This driver can also be built as a module. If so, the module
+ will be called ds1682.
+
+config SPEAR13XX_PCIE_GADGET
+ bool "PCIe gadget support for SPEAr13XX platform"
+ depends on ARCH_SPEAR13XX && BROKEN
+ default n
+ help
+ This option enables gadget support for PCIe controller. If
+ board file defines any controller as PCIe endpoint then a sysfs
+ entry will be created for that controller. User can use these
+ sysfs node to configure PCIe EP as per his requirements.
+
+config TI_DAC7512
+ tristate "Texas Instruments DAC7512"
+ depends on SPI && SYSFS
+ help
+ If you say yes here you get support for the Texas Instruments
+ DAC7512 16-bit digital-to-analog converter.
+
+ This driver can also be built as a module. If so, the module
+ will be called ti_dac7512.
+
+config VMWARE_BALLOON
+ tristate "VMware Balloon Driver"
+ depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
+ help
+ This is VMware physical memory management driver which acts
+ like a "balloon" that can be inflated to reclaim physical pages
+ by reserving them in the guest and invalidating them in the
+ monitor, freeing up the underlying machine pages so they can
+ be allocated to other guests. The balloon can also be deflated
+ to allow the guest to use more physical memory.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called vmw_balloon.
+
+config ARM_CHARLCD
+ bool "ARM Ltd. Character LCD Driver"
+ depends on PLAT_VERSATILE
+ help
+ This is a driver for the character LCD found on the ARM Ltd.
+ Versatile and RealView Platform Baseboards. It doesn't do
+ very much more than display the text "ARM Linux" on the first
+ line and the Linux version on the second line, but that's
+ still useful.
+
+config BMP085
+ tristate
+ depends on SYSFS
+
+config BMP085_I2C
+ tristate "BMP085 digital pressure sensor on I2C"
+ select BMP085
+ select REGMAP_I2C
+ depends on I2C && SYSFS
+ help
+ Say Y here if you want to support Bosch Sensortec's digital pressure
+ sensor hooked to an I2C bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called bmp085-i2c.
+
+config BMP085_SPI
+ tristate "BMP085 digital pressure sensor on SPI"
+ select BMP085
+ select REGMAP_SPI
+ depends on SPI_MASTER && SYSFS
+ help
+ Say Y here if you want to support Bosch Sensortec's digital pressure
+ sensor hooked to an SPI bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called bmp085-spi.
+
+config PCH_PHUB
+ tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
+ select GENERIC_NET_UTILS
+ depends on PCI && (X86_32 || COMPILE_TEST)
+ help
+ This driver is for PCH(Platform controller Hub) PHUB(Packet Hub) of
+ Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded
+ processor. The Topcliff has MAC address and Option ROM data in SROM.
+ This driver can access MAC address and Option ROM data in SROM.
+
+ This driver also can be used for LAPIS Semiconductor's IOH,
+ ML7213/ML7223/ML7831.
+ ML7213 which is for IVI(In-Vehicle Infotainment) use.
+ ML7223 IOH is for MP(Media Phone) use.
+ ML7831 IOH is for general purpose use.
+ ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+ ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
+ To compile this driver as a module, choose M here: the module will
+ be called pch_phub.
+
+config USB_SWITCH_FSA9480
+ tristate "FSA9480 USB Switch"
+ depends on I2C
+ help
+ The FSA9480 is a USB port accessory detector and switch.
+ The FSA9480 is fully controlled using I2C and enables USB data,
+ stereo and mono audio, video, microphone and UART data to use
+ a common connector port.
+
+config LATTICE_ECP3_CONFIG
+ tristate "Lattice ECP3 FPGA bitstream configuration via SPI"
+ depends on SPI && SYSFS
+ select FW_LOADER
+ default n
+ help
+ This option enables support for bitstream configuration (programming
+ or loading) of the Lattice ECP3 FPGA family via SPI.
+
+ If unsure, say N.
+
+config SRAM
+ bool "Generic on-chip SRAM driver"
+ depends on HAS_IOMEM
+ select GENERIC_ALLOCATOR
+ help
+ This driver allows you to declare a memory region to be managed by
+ the genalloc API. It is supposed to be used for small on-chip SRAM
+ areas found on many SoCs.
+
+config VEXPRESS_SYSCFG
+ bool "Versatile Express System Configuration driver"
+ depends on VEXPRESS_CONFIG
+ default y
+ help
+ ARM Ltd. Versatile Express uses specialised platform configuration
+ bus. System Configuration interface is one of the possible means
+ of generating transactions on this bus.
+
+source "drivers/misc/c2port/Kconfig"
+source "drivers/misc/eeprom/Kconfig"
+source "drivers/misc/cb710/Kconfig"
+source "drivers/misc/ti-st/Kconfig"
+source "drivers/misc/lis3lv02d/Kconfig"
+source "drivers/misc/altera-stapl/Kconfig"
+source "drivers/misc/mei/Kconfig"
+source "drivers/misc/vmw_vmci/Kconfig"
+source "drivers/misc/mic/Kconfig"
+source "drivers/misc/genwqe/Kconfig"
+source "drivers/misc/echo/Kconfig"
+source "drivers/misc/cxl/Kconfig"
+endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
new file mode 100644
index 0000000..537d7f3
--- /dev/null
+++ b/drivers/misc/Makefile
@@ -0,0 +1,58 @@
+#
+# Makefile for misc devices that really don't fit anywhere else.
+#
+
+obj-$(CONFIG_IBM_ASM) += ibmasm/
+obj-$(CONFIG_AD525X_DPOT) += ad525x_dpot.o
+obj-$(CONFIG_AD525X_DPOT_I2C) += ad525x_dpot-i2c.o
+obj-$(CONFIG_AD525X_DPOT_SPI) += ad525x_dpot-spi.o
+obj-$(CONFIG_INTEL_MID_PTI) += pti.o
+obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
+obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
+obj-$(CONFIG_BMP085) += bmp085.o
+obj-$(CONFIG_BMP085_I2C) += bmp085-i2c.o
+obj-$(CONFIG_BMP085_SPI) += bmp085-spi.o
+obj-$(CONFIG_DUMMY_IRQ) += dummy-irq.o
+obj-$(CONFIG_ICS932S401) += ics932s401.o
+obj-$(CONFIG_LKDTM) += lkdtm.o
+obj-$(CONFIG_TIFM_CORE) += tifm_core.o
+obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o
+obj-$(CONFIG_PHANTOM) += phantom.o
+obj-$(CONFIG_QCOM_COINCELL) += qcom-coincell.o
+obj-$(CONFIG_SENSORS_BH1780) += bh1780gli.o
+obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o
+obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o
+obj-$(CONFIG_SGI_IOC4) += ioc4.o
+obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
+obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
+obj-$(CONFIG_SGI_XP) += sgi-xp/
+obj-$(CONFIG_SGI_GRU) += sgi-gru/
+obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o
+obj-$(CONFIG_HP_ILO) += hpilo.o
+obj-$(CONFIG_APDS9802ALS) += apds9802als.o
+obj-$(CONFIG_ISL29003) += isl29003.o
+obj-$(CONFIG_ISL29020) += isl29020.o
+obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o
+obj-$(CONFIG_DS1682) += ds1682.o
+obj-$(CONFIG_TI_DAC7512) += ti_dac7512.o
+obj-$(CONFIG_C2PORT) += c2port/
+obj-$(CONFIG_HMC6352) += hmc6352.o
+obj-y += eeprom/
+obj-y += cb710/
+obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
+obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
+obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
+obj-$(CONFIG_PCH_PHUB) += pch_phub.o
+obj-y += ti-st/
+obj-y += lis3lv02d/
+obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
+obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/
+obj-$(CONFIG_INTEL_MEI) += mei/
+obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/
+obj-$(CONFIG_LATTICE_ECP3_CONFIG) += lattice-ecp3-config.o
+obj-$(CONFIG_SRAM) += sram.o
+obj-y += mic/
+obj-$(CONFIG_GENWQE) += genwqe/
+obj-$(CONFIG_ECHO) += echo/
+obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o
+obj-$(CONFIG_CXL_BASE) += cxl/
diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
new file mode 100644
index 0000000..4f83200
--- /dev/null
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -0,0 +1,119 @@
+/*
+ * Driver for the Analog Devices digital potentiometers (I2C bus)
+ *
+ * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "ad525x_dpot.h"
+
+/* I2C bus functions */
+static int write_d8(void *client, u8 val)
+{
+ return i2c_smbus_write_byte(client, val);
+}
+
+static int write_r8d8(void *client, u8 reg, u8 val)
+{
+ return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static int write_r8d16(void *client, u8 reg, u16 val)
+{
+ return i2c_smbus_write_word_data(client, reg, val);
+}
+
+static int read_d8(void *client)
+{
+ return i2c_smbus_read_byte(client);
+}
+
+static int read_r8d8(void *client, u8 reg)
+{
+ return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int read_r8d16(void *client, u8 reg)
+{
+ return i2c_smbus_read_word_data(client, reg);
+}
+
+static const struct ad_dpot_bus_ops bops = {
+ .read_d8 = read_d8,
+ .read_r8d8 = read_r8d8,
+ .read_r8d16 = read_r8d16,
+ .write_d8 = write_d8,
+ .write_r8d8 = write_r8d8,
+ .write_r8d16 = write_r8d16,
+};
+
+static int ad_dpot_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct ad_dpot_bus_data bdata = {
+ .client = client,
+ .bops = &bops,
+ };
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_WORD_DATA)) {
+ dev_err(&client->dev, "SMBUS Word Data not Supported\n");
+ return -EIO;
+ }
+
+ return ad_dpot_probe(&client->dev, &bdata, id->driver_data, id->name);
+}
+
+static int ad_dpot_i2c_remove(struct i2c_client *client)
+{
+ return ad_dpot_remove(&client->dev);
+}
+
+static const struct i2c_device_id ad_dpot_id[] = {
+ {"ad5258", AD5258_ID},
+ {"ad5259", AD5259_ID},
+ {"ad5251", AD5251_ID},
+ {"ad5252", AD5252_ID},
+ {"ad5253", AD5253_ID},
+ {"ad5254", AD5254_ID},
+ {"ad5255", AD5255_ID},
+ {"ad5241", AD5241_ID},
+ {"ad5242", AD5242_ID},
+ {"ad5243", AD5243_ID},
+ {"ad5245", AD5245_ID},
+ {"ad5246", AD5246_ID},
+ {"ad5247", AD5247_ID},
+ {"ad5248", AD5248_ID},
+ {"ad5280", AD5280_ID},
+ {"ad5282", AD5282_ID},
+ {"adn2860", ADN2860_ID},
+ {"ad5273", AD5273_ID},
+ {"ad5161", AD5161_ID},
+ {"ad5171", AD5171_ID},
+ {"ad5170", AD5170_ID},
+ {"ad5172", AD5172_ID},
+ {"ad5173", AD5173_ID},
+ {"ad5272", AD5272_ID},
+ {"ad5274", AD5274_ID},
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, ad_dpot_id);
+
+static struct i2c_driver ad_dpot_i2c_driver = {
+ .driver = {
+ .name = "ad_dpot",
+ },
+ .probe = ad_dpot_i2c_probe,
+ .remove = ad_dpot_i2c_remove,
+ .id_table = ad_dpot_id,
+};
+
+module_i2c_driver(ad_dpot_i2c_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c
new file mode 100644
index 0000000..39a7f51
--- /dev/null
+++ b/drivers/misc/ad525x_dpot-spi.c
@@ -0,0 +1,146 @@
+/*
+ * Driver for the Analog Devices digital potentiometers (SPI bus)
+ *
+ * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/spi/spi.h>
+#include <linux/module.h>
+
+#include "ad525x_dpot.h"
+
+/* SPI bus functions */
+static int write8(void *client, u8 val)
+{
+ u8 data = val;
+
+ return spi_write(client, &data, 1);
+}
+
+static int write16(void *client, u8 reg, u8 val)
+{
+ u8 data[2] = {reg, val};
+
+ return spi_write(client, data, 2);
+}
+
+static int write24(void *client, u8 reg, u16 val)
+{
+ u8 data[3] = {reg, val >> 8, val};
+
+ return spi_write(client, data, 3);
+}
+
+static int read8(void *client)
+{
+ int ret;
+ u8 data;
+
+ ret = spi_read(client, &data, 1);
+ if (ret < 0)
+ return ret;
+
+ return data;
+}
+
+static int read16(void *client, u8 reg)
+{
+ int ret;
+ u8 buf_rx[2];
+
+ write16(client, reg, 0);
+ ret = spi_read(client, buf_rx, 2);
+ if (ret < 0)
+ return ret;
+
+ return (buf_rx[0] << 8) | buf_rx[1];
+}
+
+static int read24(void *client, u8 reg)
+{
+ int ret;
+ u8 buf_rx[3];
+
+ write24(client, reg, 0);
+ ret = spi_read(client, buf_rx, 3);
+ if (ret < 0)
+ return ret;
+
+ return (buf_rx[1] << 8) | buf_rx[2];
+}
+
+static const struct ad_dpot_bus_ops bops = {
+ .read_d8 = read8,
+ .read_r8d8 = read16,
+ .read_r8d16 = read24,
+ .write_d8 = write8,
+ .write_r8d8 = write16,
+ .write_r8d16 = write24,
+};
+static int ad_dpot_spi_probe(struct spi_device *spi)
+{
+ struct ad_dpot_bus_data bdata = {
+ .client = spi,
+ .bops = &bops,
+ };
+
+ return ad_dpot_probe(&spi->dev, &bdata,
+ spi_get_device_id(spi)->driver_data,
+ spi_get_device_id(spi)->name);
+}
+
+static int ad_dpot_spi_remove(struct spi_device *spi)
+{
+ return ad_dpot_remove(&spi->dev);
+}
+
+static const struct spi_device_id ad_dpot_spi_id[] = {
+ {"ad5160", AD5160_ID},
+ {"ad5161", AD5161_ID},
+ {"ad5162", AD5162_ID},
+ {"ad5165", AD5165_ID},
+ {"ad5200", AD5200_ID},
+ {"ad5201", AD5201_ID},
+ {"ad5203", AD5203_ID},
+ {"ad5204", AD5204_ID},
+ {"ad5206", AD5206_ID},
+ {"ad5207", AD5207_ID},
+ {"ad5231", AD5231_ID},
+ {"ad5232", AD5232_ID},
+ {"ad5233", AD5233_ID},
+ {"ad5235", AD5235_ID},
+ {"ad5260", AD5260_ID},
+ {"ad5262", AD5262_ID},
+ {"ad5263", AD5263_ID},
+ {"ad5290", AD5290_ID},
+ {"ad5291", AD5291_ID},
+ {"ad5292", AD5292_ID},
+ {"ad5293", AD5293_ID},
+ {"ad7376", AD7376_ID},
+ {"ad8400", AD8400_ID},
+ {"ad8402", AD8402_ID},
+ {"ad8403", AD8403_ID},
+ {"adn2850", ADN2850_ID},
+ {"ad5270", AD5270_ID},
+ {"ad5271", AD5271_ID},
+ {}
+};
+MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id);
+
+static struct spi_driver ad_dpot_spi_driver = {
+ .driver = {
+ .name = "ad_dpot",
+ },
+ .probe = ad_dpot_spi_probe,
+ .remove = ad_dpot_spi_remove,
+ .id_table = ad_dpot_spi_id,
+};
+
+module_spi_driver(ad_dpot_spi_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("digital potentiometer SPI bus driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad_dpot");
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
new file mode 100644
index 0000000..f1a0b99
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.c
@@ -0,0 +1,765 @@
+/*
+ * ad525x_dpot: Driver for the Analog Devices digital potentiometers
+ * Copyright (c) 2009-2010 Analog Devices, Inc.
+ * Author: Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * DEVID #Wipers #Positions Resistor Options (kOhm)
+ * AD5258 1 64 1, 10, 50, 100
+ * AD5259 1 256 5, 10, 50, 100
+ * AD5251 2 64 1, 10, 50, 100
+ * AD5252 2 256 1, 10, 50, 100
+ * AD5255 3 512 25, 250
+ * AD5253 4 64 1, 10, 50, 100
+ * AD5254 4 256 1, 10, 50, 100
+ * AD5160 1 256 5, 10, 50, 100
+ * AD5161 1 256 5, 10, 50, 100
+ * AD5162 2 256 2.5, 10, 50, 100
+ * AD5165 1 256 100
+ * AD5200 1 256 10, 50
+ * AD5201 1 33 10, 50
+ * AD5203 4 64 10, 100
+ * AD5204 4 256 10, 50, 100
+ * AD5206 6 256 10, 50, 100
+ * AD5207 2 256 10, 50, 100
+ * AD5231 1 1024 10, 50, 100
+ * AD5232 2 256 10, 50, 100
+ * AD5233 4 64 10, 50, 100
+ * AD5235 2 1024 25, 250
+ * AD5260 1 256 20, 50, 200
+ * AD5262 2 256 20, 50, 200
+ * AD5263 4 256 20, 50, 200
+ * AD5290 1 256 10, 50, 100
+ * AD5291 1 256 20, 50, 100 (20-TP)
+ * AD5292 1 1024 20, 50, 100 (20-TP)
+ * AD5293 1 1024 20, 50, 100
+ * AD7376 1 128 10, 50, 100, 1M
+ * AD8400 1 256 1, 10, 50, 100
+ * AD8402 2 256 1, 10, 50, 100
+ * AD8403 4 256 1, 10, 50, 100
+ * ADN2850 3 512 25, 250
+ * AD5241 1 256 10, 100, 1M
+ * AD5246 1 128 5, 10, 50, 100
+ * AD5247 1 128 5, 10, 50, 100
+ * AD5245 1 256 5, 10, 50, 100
+ * AD5243 2 256 2.5, 10, 50, 100
+ * AD5248 2 256 2.5, 10, 50, 100
+ * AD5242 2 256 20, 50, 200
+ * AD5280 1 256 20, 50, 200
+ * AD5282 2 256 20, 50, 200
+ * ADN2860 3 512 25, 250
+ * AD5273 1 64 1, 10, 50, 100 (OTP)
+ * AD5171 1 64 5, 10, 50, 100 (OTP)
+ * AD5170 1 256 2.5, 10, 50, 100 (OTP)
+ * AD5172 2 256 2.5, 10, 50, 100 (OTP)
+ * AD5173 2 256 2.5, 10, 50, 100 (OTP)
+ * AD5270 1 1024 20, 50, 100 (50-TP)
+ * AD5271 1 256 20, 50, 100 (50-TP)
+ * AD5272 1 1024 20, 50, 100 (50-TP)
+ * AD5274 1 256 20, 50, 100 (50-TP)
+ *
+ * See Documentation/misc-devices/ad525x_dpot.txt for more info.
+ *
+ * derived from ad5258.c
+ * Copyright (c) 2009 Cyber Switching, Inc.
+ * Author: Chris Verges <chrisv@cyberswitching.com>
+ *
+ * derived from ad5252.c
+ * Copyright (c) 2006-2011 Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "ad525x_dpot.h"
+
+/*
+ * Client data (each client gets its own)
+ */
+
+struct dpot_data {
+ struct ad_dpot_bus_data bdata;
+ struct mutex update_lock;
+ unsigned rdac_mask;
+ unsigned max_pos;
+ unsigned long devid;
+ unsigned uid;
+ unsigned feat;
+ unsigned wipers;
+ u16 rdac_cache[MAX_RDACS];
+ DECLARE_BITMAP(otp_en_mask, MAX_RDACS);
+};
+
+static inline int dpot_read_d8(struct dpot_data *dpot)
+{
+ return dpot->bdata.bops->read_d8(dpot->bdata.client);
+}
+
+static inline int dpot_read_r8d8(struct dpot_data *dpot, u8 reg)
+{
+ return dpot->bdata.bops->read_r8d8(dpot->bdata.client, reg);
+}
+
+static inline int dpot_read_r8d16(struct dpot_data *dpot, u8 reg)
+{
+ return dpot->bdata.bops->read_r8d16(dpot->bdata.client, reg);
+}
+
+static inline int dpot_write_d8(struct dpot_data *dpot, u8 val)
+{
+ return dpot->bdata.bops->write_d8(dpot->bdata.client, val);
+}
+
+static inline int dpot_write_r8d8(struct dpot_data *dpot, u8 reg, u16 val)
+{
+ return dpot->bdata.bops->write_r8d8(dpot->bdata.client, reg, val);
+}
+
+static inline int dpot_write_r8d16(struct dpot_data *dpot, u8 reg, u16 val)
+{
+ return dpot->bdata.bops->write_r8d16(dpot->bdata.client, reg, val);
+}
+
+static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg)
+{
+ unsigned ctrl = 0;
+ int value;
+
+ if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD))) {
+
+ if (dpot->feat & F_RDACS_WONLY)
+ return dpot->rdac_cache[reg & DPOT_RDAC_MASK];
+ if (dpot->uid == DPOT_UID(AD5291_ID) ||
+ dpot->uid == DPOT_UID(AD5292_ID) ||
+ dpot->uid == DPOT_UID(AD5293_ID)) {
+
+ value = dpot_read_r8d8(dpot,
+ DPOT_AD5291_READ_RDAC << 2);
+
+ if (dpot->uid == DPOT_UID(AD5291_ID))
+ value = value >> 2;
+
+ return value;
+ } else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+ dpot->uid == DPOT_UID(AD5271_ID)) {
+
+ value = dpot_read_r8d8(dpot,
+ DPOT_AD5270_1_2_4_READ_RDAC << 2);
+
+ if (value < 0)
+ return value;
+
+ if (dpot->uid == DPOT_UID(AD5271_ID))
+ value = value >> 2;
+
+ return value;
+ }
+
+ ctrl = DPOT_SPI_READ_RDAC;
+ } else if (reg & DPOT_ADDR_EEPROM) {
+ ctrl = DPOT_SPI_READ_EEPROM;
+ }
+
+ if (dpot->feat & F_SPI_16BIT)
+ return dpot_read_r8d8(dpot, ctrl);
+ else if (dpot->feat & F_SPI_24BIT)
+ return dpot_read_r8d16(dpot, ctrl);
+
+ return -EFAULT;
+}
+
+static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg)
+{
+ int value;
+ unsigned ctrl = 0;
+
+ switch (dpot->uid) {
+ case DPOT_UID(AD5246_ID):
+ case DPOT_UID(AD5247_ID):
+ return dpot_read_d8(dpot);
+ case DPOT_UID(AD5245_ID):
+ case DPOT_UID(AD5241_ID):
+ case DPOT_UID(AD5242_ID):
+ case DPOT_UID(AD5243_ID):
+ case DPOT_UID(AD5248_ID):
+ case DPOT_UID(AD5280_ID):
+ case DPOT_UID(AD5282_ID):
+ ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+ 0 : DPOT_AD5282_RDAC_AB;
+ return dpot_read_r8d8(dpot, ctrl);
+ case DPOT_UID(AD5170_ID):
+ case DPOT_UID(AD5171_ID):
+ case DPOT_UID(AD5273_ID):
+ return dpot_read_d8(dpot);
+ case DPOT_UID(AD5172_ID):
+ case DPOT_UID(AD5173_ID):
+ ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+ 0 : DPOT_AD5172_3_A0;
+ return dpot_read_r8d8(dpot, ctrl);
+ case DPOT_UID(AD5272_ID):
+ case DPOT_UID(AD5274_ID):
+ dpot_write_r8d8(dpot,
+ (DPOT_AD5270_1_2_4_READ_RDAC << 2), 0);
+
+ value = dpot_read_r8d16(dpot,
+ DPOT_AD5270_1_2_4_RDAC << 2);
+
+ if (value < 0)
+ return value;
+ /*
+ * AD5272/AD5274 returns high byte first, however
+ * underling smbus expects low byte first.
+ */
+ value = swab16(value);
+
+ if (dpot->uid == DPOT_UID(AD5274_ID))
+ value = value >> 2;
+ return value;
+ default:
+ if ((reg & DPOT_REG_TOL) || (dpot->max_pos > 256))
+ return dpot_read_r8d16(dpot, (reg & 0xF8) |
+ ((reg & 0x7) << 1));
+ else
+ return dpot_read_r8d8(dpot, reg);
+ }
+}
+
+static s32 dpot_read(struct dpot_data *dpot, u8 reg)
+{
+ if (dpot->feat & F_SPI)
+ return dpot_read_spi(dpot, reg);
+ else
+ return dpot_read_i2c(dpot, reg);
+}
+
+static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value)
+{
+ unsigned val = 0;
+
+ if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD | DPOT_ADDR_OTP))) {
+ if (dpot->feat & F_RDACS_WONLY)
+ dpot->rdac_cache[reg & DPOT_RDAC_MASK] = value;
+
+ if (dpot->feat & F_AD_APPDATA) {
+ if (dpot->feat & F_SPI_8BIT) {
+ val = ((reg & DPOT_RDAC_MASK) <<
+ DPOT_MAX_POS(dpot->devid)) |
+ value;
+ return dpot_write_d8(dpot, val);
+ } else if (dpot->feat & F_SPI_16BIT) {
+ val = ((reg & DPOT_RDAC_MASK) <<
+ DPOT_MAX_POS(dpot->devid)) |
+ value;
+ return dpot_write_r8d8(dpot, val >> 8,
+ val & 0xFF);
+ } else
+ BUG();
+ } else {
+ if (dpot->uid == DPOT_UID(AD5291_ID) ||
+ dpot->uid == DPOT_UID(AD5292_ID) ||
+ dpot->uid == DPOT_UID(AD5293_ID)) {
+
+ dpot_write_r8d8(dpot, DPOT_AD5291_CTRLREG << 2,
+ DPOT_AD5291_UNLOCK_CMD);
+
+ if (dpot->uid == DPOT_UID(AD5291_ID))
+ value = value << 2;
+
+ return dpot_write_r8d8(dpot,
+ (DPOT_AD5291_RDAC << 2) |
+ (value >> 8), value & 0xFF);
+ } else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+ dpot->uid == DPOT_UID(AD5271_ID)) {
+ dpot_write_r8d8(dpot,
+ DPOT_AD5270_1_2_4_CTRLREG << 2,
+ DPOT_AD5270_1_2_4_UNLOCK_CMD);
+
+ if (dpot->uid == DPOT_UID(AD5271_ID))
+ value = value << 2;
+
+ return dpot_write_r8d8(dpot,
+ (DPOT_AD5270_1_2_4_RDAC << 2) |
+ (value >> 8), value & 0xFF);
+ }
+ val = DPOT_SPI_RDAC | (reg & DPOT_RDAC_MASK);
+ }
+ } else if (reg & DPOT_ADDR_EEPROM) {
+ val = DPOT_SPI_EEPROM | (reg & DPOT_RDAC_MASK);
+ } else if (reg & DPOT_ADDR_CMD) {
+ switch (reg) {
+ case DPOT_DEC_ALL_6DB:
+ val = DPOT_SPI_DEC_ALL_6DB;
+ break;
+ case DPOT_INC_ALL_6DB:
+ val = DPOT_SPI_INC_ALL_6DB;
+ break;
+ case DPOT_DEC_ALL:
+ val = DPOT_SPI_DEC_ALL;
+ break;
+ case DPOT_INC_ALL:
+ val = DPOT_SPI_INC_ALL;
+ break;
+ }
+ } else if (reg & DPOT_ADDR_OTP) {
+ if (dpot->uid == DPOT_UID(AD5291_ID) ||
+ dpot->uid == DPOT_UID(AD5292_ID)) {
+ return dpot_write_r8d8(dpot,
+ DPOT_AD5291_STORE_XTPM << 2, 0);
+ } else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+ dpot->uid == DPOT_UID(AD5271_ID)) {
+ return dpot_write_r8d8(dpot,
+ DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0);
+ }
+ } else
+ BUG();
+
+ if (dpot->feat & F_SPI_16BIT)
+ return dpot_write_r8d8(dpot, val, value);
+ else if (dpot->feat & F_SPI_24BIT)
+ return dpot_write_r8d16(dpot, val, value);
+
+ return -EFAULT;
+}
+
+static s32 dpot_write_i2c(struct dpot_data *dpot, u8 reg, u16 value)
+{
+ /* Only write the instruction byte for certain commands */
+ unsigned tmp = 0, ctrl = 0;
+
+ switch (dpot->uid) {
+ case DPOT_UID(AD5246_ID):
+ case DPOT_UID(AD5247_ID):
+ return dpot_write_d8(dpot, value);
+
+ case DPOT_UID(AD5245_ID):
+ case DPOT_UID(AD5241_ID):
+ case DPOT_UID(AD5242_ID):
+ case DPOT_UID(AD5243_ID):
+ case DPOT_UID(AD5248_ID):
+ case DPOT_UID(AD5280_ID):
+ case DPOT_UID(AD5282_ID):
+ ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+ 0 : DPOT_AD5282_RDAC_AB;
+ return dpot_write_r8d8(dpot, ctrl, value);
+ case DPOT_UID(AD5171_ID):
+ case DPOT_UID(AD5273_ID):
+ if (reg & DPOT_ADDR_OTP) {
+ tmp = dpot_read_d8(dpot);
+ if (tmp >> 6) /* Ready to Program? */
+ return -EFAULT;
+ ctrl = DPOT_AD5273_FUSE;
+ }
+ return dpot_write_r8d8(dpot, ctrl, value);
+ case DPOT_UID(AD5172_ID):
+ case DPOT_UID(AD5173_ID):
+ ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+ 0 : DPOT_AD5172_3_A0;
+ if (reg & DPOT_ADDR_OTP) {
+ tmp = dpot_read_r8d16(dpot, ctrl);
+ if (tmp >> 14) /* Ready to Program? */
+ return -EFAULT;
+ ctrl |= DPOT_AD5170_2_3_FUSE;
+ }
+ return dpot_write_r8d8(dpot, ctrl, value);
+ case DPOT_UID(AD5170_ID):
+ if (reg & DPOT_ADDR_OTP) {
+ tmp = dpot_read_r8d16(dpot, tmp);
+ if (tmp >> 14) /* Ready to Program? */
+ return -EFAULT;
+ ctrl = DPOT_AD5170_2_3_FUSE;
+ }
+ return dpot_write_r8d8(dpot, ctrl, value);
+ case DPOT_UID(AD5272_ID):
+ case DPOT_UID(AD5274_ID):
+ dpot_write_r8d8(dpot, DPOT_AD5270_1_2_4_CTRLREG << 2,
+ DPOT_AD5270_1_2_4_UNLOCK_CMD);
+
+ if (reg & DPOT_ADDR_OTP)
+ return dpot_write_r8d8(dpot,
+ DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0);
+
+ if (dpot->uid == DPOT_UID(AD5274_ID))
+ value = value << 2;
+
+ return dpot_write_r8d8(dpot, (DPOT_AD5270_1_2_4_RDAC << 2) |
+ (value >> 8), value & 0xFF);
+ default:
+ if (reg & DPOT_ADDR_CMD)
+ return dpot_write_d8(dpot, reg);
+
+ if (dpot->max_pos > 256)
+ return dpot_write_r8d16(dpot, (reg & 0xF8) |
+ ((reg & 0x7) << 1), value);
+ else
+ /* All other registers require instruction + data bytes */
+ return dpot_write_r8d8(dpot, reg, value);
+ }
+}
+
+static s32 dpot_write(struct dpot_data *dpot, u8 reg, u16 value)
+{
+ if (dpot->feat & F_SPI)
+ return dpot_write_spi(dpot, reg, value);
+ else
+ return dpot_write_i2c(dpot, reg, value);
+}
+
+/* sysfs functions */
+
+static ssize_t sysfs_show_reg(struct device *dev,
+ struct device_attribute *attr,
+ char *buf, u32 reg)
+{
+ struct dpot_data *data = dev_get_drvdata(dev);
+ s32 value;
+
+ if (reg & DPOT_ADDR_OTP_EN)
+ return sprintf(buf, "%s\n",
+ test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask) ?
+ "enabled" : "disabled");
+
+
+ mutex_lock(&data->update_lock);
+ value = dpot_read(data, reg);
+ mutex_unlock(&data->update_lock);
+
+ if (value < 0)
+ return -EINVAL;
+ /*
+ * Let someone else deal with converting this ...
+ * the tolerance is a two-byte value where the MSB
+ * is a sign + integer value, and the LSB is a
+ * decimal value. See page 18 of the AD5258
+ * datasheet (Rev. A) for more details.
+ */
+
+ if (reg & DPOT_REG_TOL)
+ return sprintf(buf, "0x%04x\n", value & 0xFFFF);
+ else
+ return sprintf(buf, "%u\n", value & data->rdac_mask);
+}
+
+static ssize_t sysfs_set_reg(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count, u32 reg)
+{
+ struct dpot_data *data = dev_get_drvdata(dev);
+ unsigned long value;
+ int err;
+
+ if (reg & DPOT_ADDR_OTP_EN) {
+ if (!strncmp(buf, "enabled", sizeof("enabled")))
+ set_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask);
+ else
+ clear_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask);
+
+ return count;
+ }
+
+ if ((reg & DPOT_ADDR_OTP) &&
+ !test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask))
+ return -EPERM;
+
+ err = kstrtoul(buf, 10, &value);
+ if (err)
+ return err;
+
+ if (value > data->rdac_mask)
+ value = data->rdac_mask;
+
+ mutex_lock(&data->update_lock);
+ dpot_write(data, reg, value);
+ if (reg & DPOT_ADDR_EEPROM)
+ msleep(26); /* Sleep while the EEPROM updates */
+ else if (reg & DPOT_ADDR_OTP)
+ msleep(400); /* Sleep while the OTP updates */
+ mutex_unlock(&data->update_lock);
+
+ return count;
+}
+
+static ssize_t sysfs_do_cmd(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count, u32 reg)
+{
+ struct dpot_data *data = dev_get_drvdata(dev);
+
+ mutex_lock(&data->update_lock);
+ dpot_write(data, reg, 0);
+ mutex_unlock(&data->update_lock);
+
+ return count;
+}
+
+/* ------------------------------------------------------------------------- */
+
+#define DPOT_DEVICE_SHOW(_name, _reg) static ssize_t \
+show_##_name(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ return sysfs_show_reg(dev, attr, buf, _reg); \
+}
+
+#define DPOT_DEVICE_SET(_name, _reg) static ssize_t \
+set_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ return sysfs_set_reg(dev, attr, buf, count, _reg); \
+}
+
+#define DPOT_DEVICE_SHOW_SET(name, reg) \
+DPOT_DEVICE_SHOW(name, reg) \
+DPOT_DEVICE_SET(name, reg) \
+static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name);
+
+#define DPOT_DEVICE_SHOW_ONLY(name, reg) \
+DPOT_DEVICE_SHOW(name, reg) \
+static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL);
+
+DPOT_DEVICE_SHOW_SET(rdac0, DPOT_ADDR_RDAC | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_SET(eeprom0, DPOT_ADDR_EEPROM | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_ONLY(tolerance0, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC0);
+DPOT_DEVICE_SHOW_SET(otp0, DPOT_ADDR_OTP | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_SET(otp0en, DPOT_ADDR_OTP_EN | DPOT_RDAC0);
+
+DPOT_DEVICE_SHOW_SET(rdac1, DPOT_ADDR_RDAC | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_SET(eeprom1, DPOT_ADDR_EEPROM | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_ONLY(tolerance1, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC1);
+DPOT_DEVICE_SHOW_SET(otp1, DPOT_ADDR_OTP | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_SET(otp1en, DPOT_ADDR_OTP_EN | DPOT_RDAC1);
+
+DPOT_DEVICE_SHOW_SET(rdac2, DPOT_ADDR_RDAC | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_SET(eeprom2, DPOT_ADDR_EEPROM | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_ONLY(tolerance2, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC2);
+DPOT_DEVICE_SHOW_SET(otp2, DPOT_ADDR_OTP | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_SET(otp2en, DPOT_ADDR_OTP_EN | DPOT_RDAC2);
+
+DPOT_DEVICE_SHOW_SET(rdac3, DPOT_ADDR_RDAC | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_SET(eeprom3, DPOT_ADDR_EEPROM | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_ONLY(tolerance3, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC3);
+DPOT_DEVICE_SHOW_SET(otp3, DPOT_ADDR_OTP | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_SET(otp3en, DPOT_ADDR_OTP_EN | DPOT_RDAC3);
+
+DPOT_DEVICE_SHOW_SET(rdac4, DPOT_ADDR_RDAC | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_SET(eeprom4, DPOT_ADDR_EEPROM | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_ONLY(tolerance4, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC4);
+DPOT_DEVICE_SHOW_SET(otp4, DPOT_ADDR_OTP | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_SET(otp4en, DPOT_ADDR_OTP_EN | DPOT_RDAC4);
+
+DPOT_DEVICE_SHOW_SET(rdac5, DPOT_ADDR_RDAC | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_SET(eeprom5, DPOT_ADDR_EEPROM | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_ONLY(tolerance5, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC5);
+DPOT_DEVICE_SHOW_SET(otp5, DPOT_ADDR_OTP | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_SET(otp5en, DPOT_ADDR_OTP_EN | DPOT_RDAC5);
+
+static const struct attribute *dpot_attrib_wipers[] = {
+ &dev_attr_rdac0.attr,
+ &dev_attr_rdac1.attr,
+ &dev_attr_rdac2.attr,
+ &dev_attr_rdac3.attr,
+ &dev_attr_rdac4.attr,
+ &dev_attr_rdac5.attr,
+ NULL
+};
+
+static const struct attribute *dpot_attrib_eeprom[] = {
+ &dev_attr_eeprom0.attr,
+ &dev_attr_eeprom1.attr,
+ &dev_attr_eeprom2.attr,
+ &dev_attr_eeprom3.attr,
+ &dev_attr_eeprom4.attr,
+ &dev_attr_eeprom5.attr,
+ NULL
+};
+
+static const struct attribute *dpot_attrib_otp[] = {
+ &dev_attr_otp0.attr,
+ &dev_attr_otp1.attr,
+ &dev_attr_otp2.attr,
+ &dev_attr_otp3.attr,
+ &dev_attr_otp4.attr,
+ &dev_attr_otp5.attr,
+ NULL
+};
+
+static const struct attribute *dpot_attrib_otp_en[] = {
+ &dev_attr_otp0en.attr,
+ &dev_attr_otp1en.attr,
+ &dev_attr_otp2en.attr,
+ &dev_attr_otp3en.attr,
+ &dev_attr_otp4en.attr,
+ &dev_attr_otp5en.attr,
+ NULL
+};
+
+static const struct attribute *dpot_attrib_tolerance[] = {
+ &dev_attr_tolerance0.attr,
+ &dev_attr_tolerance1.attr,
+ &dev_attr_tolerance2.attr,
+ &dev_attr_tolerance3.attr,
+ &dev_attr_tolerance4.attr,
+ &dev_attr_tolerance5.attr,
+ NULL
+};
+
+/* ------------------------------------------------------------------------- */
+
+#define DPOT_DEVICE_DO_CMD(_name, _cmd) static ssize_t \
+set_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ return sysfs_do_cmd(dev, attr, buf, count, _cmd); \
+} \
+static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name);
+
+DPOT_DEVICE_DO_CMD(inc_all, DPOT_INC_ALL);
+DPOT_DEVICE_DO_CMD(dec_all, DPOT_DEC_ALL);
+DPOT_DEVICE_DO_CMD(inc_all_6db, DPOT_INC_ALL_6DB);
+DPOT_DEVICE_DO_CMD(dec_all_6db, DPOT_DEC_ALL_6DB);
+
+static struct attribute *ad525x_attributes_commands[] = {
+ &dev_attr_inc_all.attr,
+ &dev_attr_dec_all.attr,
+ &dev_attr_inc_all_6db.attr,
+ &dev_attr_dec_all_6db.attr,
+ NULL
+};
+
+static const struct attribute_group ad525x_group_commands = {
+ .attrs = ad525x_attributes_commands,
+};
+
+static int ad_dpot_add_files(struct device *dev,
+ unsigned features, unsigned rdac)
+{
+ int err = sysfs_create_file(&dev->kobj,
+ dpot_attrib_wipers[rdac]);
+ if (features & F_CMD_EEP)
+ err |= sysfs_create_file(&dev->kobj,
+ dpot_attrib_eeprom[rdac]);
+ if (features & F_CMD_TOL)
+ err |= sysfs_create_file(&dev->kobj,
+ dpot_attrib_tolerance[rdac]);
+ if (features & F_CMD_OTP) {
+ err |= sysfs_create_file(&dev->kobj,
+ dpot_attrib_otp_en[rdac]);
+ err |= sysfs_create_file(&dev->kobj,
+ dpot_attrib_otp[rdac]);
+ }
+
+ if (err)
+ dev_err(dev, "failed to register sysfs hooks for RDAC%d\n",
+ rdac);
+
+ return err;
+}
+
+static inline void ad_dpot_remove_files(struct device *dev,
+ unsigned features, unsigned rdac)
+{
+ sysfs_remove_file(&dev->kobj,
+ dpot_attrib_wipers[rdac]);
+ if (features & F_CMD_EEP)
+ sysfs_remove_file(&dev->kobj,
+ dpot_attrib_eeprom[rdac]);
+ if (features & F_CMD_TOL)
+ sysfs_remove_file(&dev->kobj,
+ dpot_attrib_tolerance[rdac]);
+ if (features & F_CMD_OTP) {
+ sysfs_remove_file(&dev->kobj,
+ dpot_attrib_otp_en[rdac]);
+ sysfs_remove_file(&dev->kobj,
+ dpot_attrib_otp[rdac]);
+ }
+}
+
+int ad_dpot_probe(struct device *dev,
+ struct ad_dpot_bus_data *bdata, unsigned long devid,
+ const char *name)
+{
+
+ struct dpot_data *data;
+ int i, err = 0;
+
+ data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL);
+ if (!data) {
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ dev_set_drvdata(dev, data);
+ mutex_init(&data->update_lock);
+
+ data->bdata = *bdata;
+ data->devid = devid;
+
+ data->max_pos = 1 << DPOT_MAX_POS(devid);
+ data->rdac_mask = data->max_pos - 1;
+ data->feat = DPOT_FEAT(devid);
+ data->uid = DPOT_UID(devid);
+ data->wipers = DPOT_WIPERS(devid);
+
+ for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+ if (data->wipers & (1 << i)) {
+ err = ad_dpot_add_files(dev, data->feat, i);
+ if (err)
+ goto exit_remove_files;
+ /* power-up midscale */
+ if (data->feat & F_RDACS_WONLY)
+ data->rdac_cache[i] = data->max_pos / 2;
+ }
+
+ if (data->feat & F_CMD_INC)
+ err = sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+
+ if (err) {
+ dev_err(dev, "failed to register sysfs hooks\n");
+ goto exit_free;
+ }
+
+ dev_info(dev, "%s %d-Position Digital Potentiometer registered\n",
+ name, data->max_pos);
+
+ return 0;
+
+exit_remove_files:
+ for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+ if (data->wipers & (1 << i))
+ ad_dpot_remove_files(dev, data->feat, i);
+
+exit_free:
+ kfree(data);
+ dev_set_drvdata(dev, NULL);
+exit:
+ dev_err(dev, "failed to create client for %s ID 0x%lX\n",
+ name, devid);
+ return err;
+}
+EXPORT_SYMBOL(ad_dpot_probe);
+
+int ad_dpot_remove(struct device *dev)
+{
+ struct dpot_data *data = dev_get_drvdata(dev);
+ int i;
+
+ for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+ if (data->wipers & (1 << i))
+ ad_dpot_remove_files(dev, data->feat, i);
+
+ kfree(data);
+
+ return 0;
+}
+EXPORT_SYMBOL(ad_dpot_remove);
+
+
+MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, "
+ "Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Digital potentiometer driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot.h b/drivers/misc/ad525x_dpot.h
new file mode 100644
index 0000000..6bd1eba
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.h
@@ -0,0 +1,215 @@
+/*
+ * Driver for the Analog Devices digital potentiometers
+ *
+ * Copyright (C) 2010 Michael Hennerich, Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _AD_DPOT_H_
+#define _AD_DPOT_H_
+
+#include <linux/types.h>
+
+#define DPOT_CONF(features, wipers, max_pos, uid) \
+ (((features) << 18) | (((wipers) & 0xFF) << 10) | \
+ ((max_pos & 0xF) << 6) | (uid & 0x3F))
+
+#define DPOT_UID(conf) (conf & 0x3F)
+#define DPOT_MAX_POS(conf) ((conf >> 6) & 0xF)
+#define DPOT_WIPERS(conf) ((conf >> 10) & 0xFF)
+#define DPOT_FEAT(conf) (conf >> 18)
+
+#define BRDAC0 (1 << 0)
+#define BRDAC1 (1 << 1)
+#define BRDAC2 (1 << 2)
+#define BRDAC3 (1 << 3)
+#define BRDAC4 (1 << 4)
+#define BRDAC5 (1 << 5)
+#define MAX_RDACS 6
+
+#define F_CMD_INC (1 << 0) /* Features INC/DEC ALL, 6dB */
+#define F_CMD_EEP (1 << 1) /* Features EEPROM */
+#define F_CMD_OTP (1 << 2) /* Features OTP */
+#define F_CMD_TOL (1 << 3) /* RDACS feature Tolerance REG */
+#define F_RDACS_RW (1 << 4) /* RDACS are Read/Write */
+#define F_RDACS_WONLY (1 << 5) /* RDACS are Write only */
+#define F_AD_APPDATA (1 << 6) /* RDAC Address append to data */
+#define F_SPI_8BIT (1 << 7) /* All SPI XFERS are 8-bit */
+#define F_SPI_16BIT (1 << 8) /* All SPI XFERS are 16-bit */
+#define F_SPI_24BIT (1 << 9) /* All SPI XFERS are 24-bit */
+
+#define F_RDACS_RW_TOL (F_RDACS_RW | F_CMD_EEP | F_CMD_TOL)
+#define F_RDACS_RW_EEP (F_RDACS_RW | F_CMD_EEP)
+#define F_SPI (F_SPI_8BIT | F_SPI_16BIT | F_SPI_24BIT)
+
+enum dpot_devid {
+ AD5258_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 6, 0), /* I2C */
+ AD5259_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 8, 1),
+ AD5251_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+ BRDAC1 | BRDAC3, 6, 2),
+ AD5252_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+ BRDAC1 | BRDAC3, 8, 3),
+ AD5253_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 4),
+ AD5254_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 5),
+ AD5255_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+ BRDAC0 | BRDAC1 | BRDAC2, 9, 6),
+ AD5160_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 8, 7), /* SPI */
+ AD5161_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 8, 8),
+ AD5162_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1, 8, 9),
+ AD5165_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 8, 10),
+ AD5200_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 8, 11),
+ AD5201_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 5, 12),
+ AD5203_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 13),
+ AD5204_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 14),
+ AD5206_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3 | BRDAC4 | BRDAC5,
+ 8, 15),
+ AD5207_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1, 8, 16),
+ AD5231_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+ BRDAC0, 10, 17),
+ AD5232_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT,
+ BRDAC0 | BRDAC1, 8, 18),
+ AD5233_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 19),
+ AD5235_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+ BRDAC0 | BRDAC1, 10, 20),
+ AD5260_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 8, 21),
+ AD5262_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1, 8, 22),
+ AD5263_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 23),
+ AD5290_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 8, 24),
+ AD5291_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP,
+ BRDAC0, 8, 25),
+ AD5292_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP,
+ BRDAC0, 10, 26),
+ AD5293_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT, BRDAC0, 10, 27),
+ AD7376_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+ BRDAC0, 7, 28),
+ AD8400_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0, 8, 29),
+ AD8402_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1, 8, 30),
+ AD8403_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+ BRDAC0 | BRDAC1 | BRDAC2, 8, 31),
+ ADN2850_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+ BRDAC0 | BRDAC1, 10, 32),
+ AD5241_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 33),
+ AD5242_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 34),
+ AD5243_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 35),
+ AD5245_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 36),
+ AD5246_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 37),
+ AD5247_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 38),
+ AD5248_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 39),
+ AD5280_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 40),
+ AD5282_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 41),
+ ADN2860_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+ BRDAC0 | BRDAC1 | BRDAC2, 9, 42),
+ AD5273_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 43),
+ AD5171_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 44),
+ AD5170_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 45),
+ AD5172_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 46),
+ AD5173_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 47),
+ AD5270_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT,
+ BRDAC0, 10, 48),
+ AD5271_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT,
+ BRDAC0, 8, 49),
+ AD5272_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 10, 50),
+ AD5274_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 51),
+};
+
+#define DPOT_RDAC0 0
+#define DPOT_RDAC1 1
+#define DPOT_RDAC2 2
+#define DPOT_RDAC3 3
+#define DPOT_RDAC4 4
+#define DPOT_RDAC5 5
+
+#define DPOT_RDAC_MASK 0x1F
+
+#define DPOT_REG_TOL 0x18
+#define DPOT_TOL_RDAC0 (DPOT_REG_TOL | DPOT_RDAC0)
+#define DPOT_TOL_RDAC1 (DPOT_REG_TOL | DPOT_RDAC1)
+#define DPOT_TOL_RDAC2 (DPOT_REG_TOL | DPOT_RDAC2)
+#define DPOT_TOL_RDAC3 (DPOT_REG_TOL | DPOT_RDAC3)
+#define DPOT_TOL_RDAC4 (DPOT_REG_TOL | DPOT_RDAC4)
+#define DPOT_TOL_RDAC5 (DPOT_REG_TOL | DPOT_RDAC5)
+
+/* RDAC-to-EEPROM Interface Commands */
+#define DPOT_ADDR_RDAC (0x0 << 5)
+#define DPOT_ADDR_EEPROM (0x1 << 5)
+#define DPOT_ADDR_OTP (0x1 << 6)
+#define DPOT_ADDR_CMD (0x1 << 7)
+#define DPOT_ADDR_OTP_EN (0x1 << 9)
+
+#define DPOT_DEC_ALL_6DB (DPOT_ADDR_CMD | (0x4 << 3))
+#define DPOT_INC_ALL_6DB (DPOT_ADDR_CMD | (0x9 << 3))
+#define DPOT_DEC_ALL (DPOT_ADDR_CMD | (0x6 << 3))
+#define DPOT_INC_ALL (DPOT_ADDR_CMD | (0xB << 3))
+
+#define DPOT_SPI_RDAC 0xB0
+#define DPOT_SPI_EEPROM 0x30
+#define DPOT_SPI_READ_RDAC 0xA0
+#define DPOT_SPI_READ_EEPROM 0x90
+#define DPOT_SPI_DEC_ALL_6DB 0x50
+#define DPOT_SPI_INC_ALL_6DB 0xD0
+#define DPOT_SPI_DEC_ALL 0x70
+#define DPOT_SPI_INC_ALL 0xF0
+
+/* AD5291/2/3 use special commands */
+#define DPOT_AD5291_RDAC 0x01
+#define DPOT_AD5291_READ_RDAC 0x02
+#define DPOT_AD5291_STORE_XTPM 0x03
+#define DPOT_AD5291_CTRLREG 0x06
+#define DPOT_AD5291_UNLOCK_CMD 0x03
+
+/* AD5270/1/2/4 use special commands */
+#define DPOT_AD5270_1_2_4_RDAC 0x01
+#define DPOT_AD5270_1_2_4_READ_RDAC 0x02
+#define DPOT_AD5270_1_2_4_STORE_XTPM 0x03
+#define DPOT_AD5270_1_2_4_CTRLREG 0x07
+#define DPOT_AD5270_1_2_4_UNLOCK_CMD 0x03
+
+#define DPOT_AD5282_RDAC_AB 0x80
+
+#define DPOT_AD5273_FUSE 0x80
+#define DPOT_AD5170_2_3_FUSE 0x20
+#define DPOT_AD5170_2_3_OW 0x08
+#define DPOT_AD5172_3_A0 0x08
+#define DPOT_AD5170_2FUSE 0x80
+
+struct dpot_data;
+
+struct ad_dpot_bus_ops {
+ int (*read_d8) (void *client);
+ int (*read_r8d8) (void *client, u8 reg);
+ int (*read_r8d16) (void *client, u8 reg);
+ int (*write_d8) (void *client, u8 val);
+ int (*write_r8d8) (void *client, u8 reg, u8 val);
+ int (*write_r8d16) (void *client, u8 reg, u16 val);
+};
+
+struct ad_dpot_bus_data {
+ void *client;
+ const struct ad_dpot_bus_ops *bops;
+};
+
+int ad_dpot_probe(struct device *dev, struct ad_dpot_bus_data *bdata,
+ unsigned long devid, const char *name);
+int ad_dpot_remove(struct device *dev);
+
+#endif
diff --git a/drivers/misc/altera-stapl/Kconfig b/drivers/misc/altera-stapl/Kconfig
new file mode 100644
index 0000000..7f01d8e
--- /dev/null
+++ b/drivers/misc/altera-stapl/Kconfig
@@ -0,0 +1,8 @@
+comment "Altera FPGA firmware download module"
+
+config ALTERA_STAPL
+ tristate "Altera FPGA firmware download module"
+ depends on I2C
+ default n
+ help
+ An Altera FPGA module. Say Y when you want to support this tool.
diff --git a/drivers/misc/altera-stapl/Makefile b/drivers/misc/altera-stapl/Makefile
new file mode 100644
index 0000000..055f61e
--- /dev/null
+++ b/drivers/misc/altera-stapl/Makefile
@@ -0,0 +1,3 @@
+altera-stapl-objs = altera-lpt.o altera-jtag.o altera-comp.o altera.o
+
+obj-$(CONFIG_ALTERA_STAPL) += altera-stapl.o
diff --git a/drivers/misc/altera-stapl/altera-comp.c b/drivers/misc/altera-stapl/altera-comp.c
new file mode 100644
index 0000000..49b103b
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-comp.c
@@ -0,0 +1,142 @@
+/*
+ * altera-comp.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include "altera-exprt.h"
+
+#define SHORT_BITS 16
+#define CHAR_BITS 8
+#define DATA_BLOB_LENGTH 3
+#define MATCH_DATA_LENGTH 8192
+#define ALTERA_REQUEST_SIZE 1024
+#define ALTERA_BUFFER_SIZE (MATCH_DATA_LENGTH + ALTERA_REQUEST_SIZE)
+
+static u32 altera_bits_req(u32 n)
+{
+ u32 result = SHORT_BITS;
+
+ if (n == 0)
+ result = 1;
+ else {
+ /* Look for the highest non-zero bit position */
+ while ((n & (1 << (SHORT_BITS - 1))) == 0) {
+ n <<= 1;
+ --result;
+ }
+ }
+
+ return result;
+}
+
+static u32 altera_read_packed(u8 *buffer, u32 bits, u32 *bits_avail,
+ u32 *in_index)
+{
+ u32 result = 0;
+ u32 shift = 0;
+ u32 databyte = 0;
+
+ while (bits > 0) {
+ databyte = buffer[*in_index];
+ result |= (((databyte >> (CHAR_BITS - *bits_avail))
+ & (0xff >> (CHAR_BITS - *bits_avail))) << shift);
+
+ if (bits <= *bits_avail) {
+ result &= (0xffff >> (SHORT_BITS - (bits + shift)));
+ *bits_avail -= bits;
+ bits = 0;
+ } else {
+ ++(*in_index);
+ shift += *bits_avail;
+ bits -= *bits_avail;
+ *bits_avail = CHAR_BITS;
+ }
+ }
+
+ return result;
+}
+
+u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version)
+{
+ u32 i, j, data_length = 0L;
+ u32 offset, length;
+ u32 match_data_length = MATCH_DATA_LENGTH;
+ u32 bits_avail = CHAR_BITS;
+ u32 in_index = 0L;
+
+ if (version > 0)
+ --match_data_length;
+
+ for (i = 0; i < out_length; ++i)
+ out[i] = 0;
+
+ /* Read number of bytes in data. */
+ for (i = 0; i < sizeof(in_length); ++i) {
+ data_length = data_length | (
+ altera_read_packed(in,
+ CHAR_BITS,
+ &bits_avail,
+ &in_index) << (i * CHAR_BITS));
+ }
+
+ if (data_length > out_length) {
+ data_length = 0L;
+ return data_length;
+ }
+
+ i = 0;
+ while (i < data_length) {
+ /* A 0 bit indicates literal data. */
+ if (altera_read_packed(in, 1, &bits_avail,
+ &in_index) == 0) {
+ for (j = 0; j < DATA_BLOB_LENGTH; ++j) {
+ if (i < data_length) {
+ out[i] = (u8)altera_read_packed(in,
+ CHAR_BITS,
+ &bits_avail,
+ &in_index);
+ i++;
+ }
+ }
+ } else {
+ /* A 1 bit indicates offset/length to follow. */
+ offset = altera_read_packed(in, altera_bits_req((s16)
+ (i > match_data_length ?
+ match_data_length : i)),
+ &bits_avail,
+ &in_index);
+ length = altera_read_packed(in, CHAR_BITS,
+ &bits_avail,
+ &in_index);
+ for (j = 0; j < length; ++j) {
+ if (i < data_length) {
+ out[i] = out[i - offset];
+ i++;
+ }
+ }
+ }
+ }
+
+ return data_length;
+}
diff --git a/drivers/misc/altera-stapl/altera-exprt.h b/drivers/misc/altera-stapl/altera-exprt.h
new file mode 100644
index 0000000..39c38d8
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-exprt.h
@@ -0,0 +1,33 @@
+/*
+ * altera-exprt.h
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef ALTERA_EXPRT_H
+#define ALTERA_EXPRT_H
+
+
+u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version);
+int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo);
+
+#endif /* ALTERA_EXPRT_H */
diff --git a/drivers/misc/altera-stapl/altera-jtag.c b/drivers/misc/altera-stapl/altera-jtag.c
new file mode 100644
index 0000000..f4bf200
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-jtag.c
@@ -0,0 +1,1021 @@
+/*
+ * altera-jtag.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <misc/altera.h>
+#include "altera-exprt.h"
+#include "altera-jtag.h"
+
+#define alt_jtag_io(a, b, c)\
+ astate->config->jtag_io(astate->config->dev, a, b, c);
+
+#define alt_malloc(a) kzalloc(a, GFP_KERNEL);
+
+/*
+ * This structure shows, for each JTAG state, which state is reached after
+ * a single TCK clock cycle with TMS high or TMS low, respectively. This
+ * describes all possible state transitions in the JTAG state machine.
+ */
+struct altera_jtag_machine {
+ enum altera_jtag_state tms_high;
+ enum altera_jtag_state tms_low;
+};
+
+static const struct altera_jtag_machine altera_transitions[] = {
+ /* RESET */ { RESET, IDLE },
+ /* IDLE */ { DRSELECT, IDLE },
+ /* DRSELECT */ { IRSELECT, DRCAPTURE },
+ /* DRCAPTURE */ { DREXIT1, DRSHIFT },
+ /* DRSHIFT */ { DREXIT1, DRSHIFT },
+ /* DREXIT1 */ { DRUPDATE, DRPAUSE },
+ /* DRPAUSE */ { DREXIT2, DRPAUSE },
+ /* DREXIT2 */ { DRUPDATE, DRSHIFT },
+ /* DRUPDATE */ { DRSELECT, IDLE },
+ /* IRSELECT */ { RESET, IRCAPTURE },
+ /* IRCAPTURE */ { IREXIT1, IRSHIFT },
+ /* IRSHIFT */ { IREXIT1, IRSHIFT },
+ /* IREXIT1 */ { IRUPDATE, IRPAUSE },
+ /* IRPAUSE */ { IREXIT2, IRPAUSE },
+ /* IREXIT2 */ { IRUPDATE, IRSHIFT },
+ /* IRUPDATE */ { DRSELECT, IDLE }
+};
+
+/*
+ * This table contains the TMS value to be used to take the NEXT STEP on
+ * the path to the desired state. The array index is the current state,
+ * and the bit position is the desired endstate. To find out which state
+ * is used as the intermediate state, look up the TMS value in the
+ * altera_transitions[] table.
+ */
+static const u16 altera_jtag_path_map[16] = {
+ /* RST RTI SDRS CDR SDR E1DR PDR E2DR */
+ 0x0001, 0xFFFD, 0xFE01, 0xFFE7, 0xFFEF, 0xFF0F, 0xFFBF, 0xFFFF,
+ /* UDR SIRS CIR SIR E1IR PIR E2IR UIR */
+ 0xFEFD, 0x0001, 0xF3FF, 0xF7FF, 0x87FF, 0xDFFF, 0xFFFF, 0x7FFD
+};
+
+/* Flag bits for alt_jtag_io() function */
+#define TMS_HIGH 1
+#define TMS_LOW 0
+#define TDI_HIGH 1
+#define TDI_LOW 0
+#define READ_TDO 1
+#define IGNORE_TDO 0
+
+int altera_jinit(struct altera_state *astate)
+{
+ struct altera_jtag *js = &astate->js;
+
+ /* initial JTAG state is unknown */
+ js->jtag_state = ILLEGAL_JTAG_STATE;
+
+ /* initialize to default state */
+ js->drstop_state = IDLE;
+ js->irstop_state = IDLE;
+ js->dr_pre = 0;
+ js->dr_post = 0;
+ js->ir_pre = 0;
+ js->ir_post = 0;
+ js->dr_length = 0;
+ js->ir_length = 0;
+
+ js->dr_pre_data = NULL;
+ js->dr_post_data = NULL;
+ js->ir_pre_data = NULL;
+ js->ir_post_data = NULL;
+ js->dr_buffer = NULL;
+ js->ir_buffer = NULL;
+
+ return 0;
+}
+
+int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state)
+{
+ js->drstop_state = state;
+
+ return 0;
+}
+
+int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state)
+{
+ js->irstop_state = state;
+
+ return 0;
+}
+
+int altera_set_dr_pre(struct altera_jtag *js,
+ u32 count, u32 start_index,
+ u8 *preamble_data)
+{
+ int status = 0;
+ u32 i;
+ u32 j;
+
+ if (count > js->dr_pre) {
+ kfree(js->dr_pre_data);
+ js->dr_pre_data = (u8 *)alt_malloc((count + 7) >> 3);
+ if (js->dr_pre_data == NULL)
+ status = -ENOMEM;
+ else
+ js->dr_pre = count;
+ } else
+ js->dr_pre = count;
+
+ if (status == 0) {
+ for (i = 0; i < count; ++i) {
+ j = i + start_index;
+
+ if (preamble_data == NULL)
+ js->dr_pre_data[i >> 3] |= (1 << (i & 7));
+ else {
+ if (preamble_data[j >> 3] & (1 << (j & 7)))
+ js->dr_pre_data[i >> 3] |=
+ (1 << (i & 7));
+ else
+ js->dr_pre_data[i >> 3] &=
+ ~(u32)(1 << (i & 7));
+
+ }
+ }
+ }
+
+ return status;
+}
+
+int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *preamble_data)
+{
+ int status = 0;
+ u32 i;
+ u32 j;
+
+ if (count > js->ir_pre) {
+ kfree(js->ir_pre_data);
+ js->ir_pre_data = (u8 *)alt_malloc((count + 7) >> 3);
+ if (js->ir_pre_data == NULL)
+ status = -ENOMEM;
+ else
+ js->ir_pre = count;
+
+ } else
+ js->ir_pre = count;
+
+ if (status == 0) {
+ for (i = 0; i < count; ++i) {
+ j = i + start_index;
+ if (preamble_data == NULL)
+ js->ir_pre_data[i >> 3] |= (1 << (i & 7));
+ else {
+ if (preamble_data[j >> 3] & (1 << (j & 7)))
+ js->ir_pre_data[i >> 3] |=
+ (1 << (i & 7));
+ else
+ js->ir_pre_data[i >> 3] &=
+ ~(u32)(1 << (i & 7));
+
+ }
+ }
+ }
+
+ return status;
+}
+
+int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *postamble_data)
+{
+ int status = 0;
+ u32 i;
+ u32 j;
+
+ if (count > js->dr_post) {
+ kfree(js->dr_post_data);
+ js->dr_post_data = (u8 *)alt_malloc((count + 7) >> 3);
+
+ if (js->dr_post_data == NULL)
+ status = -ENOMEM;
+ else
+ js->dr_post = count;
+
+ } else
+ js->dr_post = count;
+
+ if (status == 0) {
+ for (i = 0; i < count; ++i) {
+ j = i + start_index;
+
+ if (postamble_data == NULL)
+ js->dr_post_data[i >> 3] |= (1 << (i & 7));
+ else {
+ if (postamble_data[j >> 3] & (1 << (j & 7)))
+ js->dr_post_data[i >> 3] |=
+ (1 << (i & 7));
+ else
+ js->dr_post_data[i >> 3] &=
+ ~(u32)(1 << (i & 7));
+
+ }
+ }
+ }
+
+ return status;
+}
+
+int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *postamble_data)
+{
+ int status = 0;
+ u32 i;
+ u32 j;
+
+ if (count > js->ir_post) {
+ kfree(js->ir_post_data);
+ js->ir_post_data = (u8 *)alt_malloc((count + 7) >> 3);
+ if (js->ir_post_data == NULL)
+ status = -ENOMEM;
+ else
+ js->ir_post = count;
+
+ } else
+ js->ir_post = count;
+
+ if (status != 0)
+ return status;
+
+ for (i = 0; i < count; ++i) {
+ j = i + start_index;
+
+ if (postamble_data == NULL)
+ js->ir_post_data[i >> 3] |= (1 << (i & 7));
+ else {
+ if (postamble_data[j >> 3] & (1 << (j & 7)))
+ js->ir_post_data[i >> 3] |= (1 << (i & 7));
+ else
+ js->ir_post_data[i >> 3] &=
+ ~(u32)(1 << (i & 7));
+
+ }
+ }
+
+ return status;
+}
+
+static void altera_jreset_idle(struct altera_state *astate)
+{
+ struct altera_jtag *js = &astate->js;
+ int i;
+ /* Go to Test Logic Reset (no matter what the starting state may be) */
+ for (i = 0; i < 5; ++i)
+ alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO);
+
+ /* Now step to Run Test / Idle */
+ alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO);
+ js->jtag_state = IDLE;
+}
+
+int altera_goto_jstate(struct altera_state *astate,
+ enum altera_jtag_state state)
+{
+ struct altera_jtag *js = &astate->js;
+ int tms;
+ int count = 0;
+ int status = 0;
+
+ if (js->jtag_state == ILLEGAL_JTAG_STATE)
+ /* initialize JTAG chain to known state */
+ altera_jreset_idle(astate);
+
+ if (js->jtag_state == state) {
+ /*
+ * We are already in the desired state.
+ * If it is a stable state, loop here.
+ * Otherwise do nothing (no clock cycles).
+ */
+ if ((state == IDLE) || (state == DRSHIFT) ||
+ (state == DRPAUSE) || (state == IRSHIFT) ||
+ (state == IRPAUSE)) {
+ alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO);
+ } else if (state == RESET)
+ alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO);
+
+ } else {
+ while ((js->jtag_state != state) && (count < 9)) {
+ /* Get TMS value to take a step toward desired state */
+ tms = (altera_jtag_path_map[js->jtag_state] &
+ (1 << state))
+ ? TMS_HIGH : TMS_LOW;
+
+ /* Take a step */
+ alt_jtag_io(tms, TDI_LOW, IGNORE_TDO);
+
+ if (tms)
+ js->jtag_state =
+ altera_transitions[js->jtag_state].tms_high;
+ else
+ js->jtag_state =
+ altera_transitions[js->jtag_state].tms_low;
+
+ ++count;
+ }
+ }
+
+ if (js->jtag_state != state)
+ status = -EREMOTEIO;
+
+ return status;
+}
+
+int altera_wait_cycles(struct altera_state *astate,
+ s32 cycles,
+ enum altera_jtag_state wait_state)
+{
+ struct altera_jtag *js = &astate->js;
+ int tms;
+ s32 count;
+ int status = 0;
+
+ if (js->jtag_state != wait_state)
+ status = altera_goto_jstate(astate, wait_state);
+
+ if (status == 0) {
+ /*
+ * Set TMS high to loop in RESET state
+ * Set TMS low to loop in any other stable state
+ */
+ tms = (wait_state == RESET) ? TMS_HIGH : TMS_LOW;
+
+ for (count = 0L; count < cycles; count++)
+ alt_jtag_io(tms, TDI_LOW, IGNORE_TDO);
+
+ }
+
+ return status;
+}
+
+int altera_wait_msecs(struct altera_state *astate,
+ s32 microseconds, enum altera_jtag_state wait_state)
+/*
+ * Causes JTAG hardware to sit in the specified stable
+ * state for the specified duration of real time. If
+ * no JTAG operations have been performed yet, then only
+ * a delay is performed. This permits the WAIT USECS
+ * statement to be used in VECTOR programs without causing
+ * any JTAG operations.
+ * Returns 0 for success, else appropriate error code.
+ */
+{
+ struct altera_jtag *js = &astate->js;
+ int status = 0;
+
+ if ((js->jtag_state != ILLEGAL_JTAG_STATE) &&
+ (js->jtag_state != wait_state))
+ status = altera_goto_jstate(astate, wait_state);
+
+ if (status == 0)
+ /* Wait for specified time interval */
+ udelay(microseconds);
+
+ return status;
+}
+
+static void altera_concatenate_data(u8 *buffer,
+ u8 *preamble_data,
+ u32 preamble_count,
+ u8 *target_data,
+ u32 start_index,
+ u32 target_count,
+ u8 *postamble_data,
+ u32 postamble_count)
+/*
+ * Copies preamble data, target data, and postamble data
+ * into one buffer for IR or DR scans.
+ */
+{
+ u32 i, j, k;
+
+ for (i = 0L; i < preamble_count; ++i) {
+ if (preamble_data[i >> 3L] & (1L << (i & 7L)))
+ buffer[i >> 3L] |= (1L << (i & 7L));
+ else
+ buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+ }
+
+ j = start_index;
+ k = preamble_count + target_count;
+ for (; i < k; ++i, ++j) {
+ if (target_data[j >> 3L] & (1L << (j & 7L)))
+ buffer[i >> 3L] |= (1L << (i & 7L));
+ else
+ buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+ }
+
+ j = 0L;
+ k = preamble_count + target_count + postamble_count;
+ for (; i < k; ++i, ++j) {
+ if (postamble_data[j >> 3L] & (1L << (j & 7L)))
+ buffer[i >> 3L] |= (1L << (i & 7L));
+ else
+ buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+ }
+}
+
+static int alt_jtag_drscan(struct altera_state *astate,
+ int start_state,
+ int count,
+ u8 *tdi,
+ u8 *tdo)
+{
+ int i = 0;
+ int tdo_bit = 0;
+ int status = 1;
+
+ /* First go to DRSHIFT state */
+ switch (start_state) {
+ case 0: /* IDLE */
+ alt_jtag_io(1, 0, 0); /* DRSELECT */
+ alt_jtag_io(0, 0, 0); /* DRCAPTURE */
+ alt_jtag_io(0, 0, 0); /* DRSHIFT */
+ break;
+
+ case 1: /* DRPAUSE */
+ alt_jtag_io(1, 0, 0); /* DREXIT2 */
+ alt_jtag_io(1, 0, 0); /* DRUPDATE */
+ alt_jtag_io(1, 0, 0); /* DRSELECT */
+ alt_jtag_io(0, 0, 0); /* DRCAPTURE */
+ alt_jtag_io(0, 0, 0); /* DRSHIFT */
+ break;
+
+ case 2: /* IRPAUSE */
+ alt_jtag_io(1, 0, 0); /* IREXIT2 */
+ alt_jtag_io(1, 0, 0); /* IRUPDATE */
+ alt_jtag_io(1, 0, 0); /* DRSELECT */
+ alt_jtag_io(0, 0, 0); /* DRCAPTURE */
+ alt_jtag_io(0, 0, 0); /* DRSHIFT */
+ break;
+
+ default:
+ status = 0;
+ }
+
+ if (status) {
+ /* loop in the SHIFT-DR state */
+ for (i = 0; i < count; i++) {
+ tdo_bit = alt_jtag_io(
+ (i == count - 1),
+ tdi[i >> 3] & (1 << (i & 7)),
+ (tdo != NULL));
+
+ if (tdo != NULL) {
+ if (tdo_bit)
+ tdo[i >> 3] |= (1 << (i & 7));
+ else
+ tdo[i >> 3] &= ~(u32)(1 << (i & 7));
+
+ }
+ }
+
+ alt_jtag_io(0, 0, 0); /* DRPAUSE */
+ }
+
+ return status;
+}
+
+static int alt_jtag_irscan(struct altera_state *astate,
+ int start_state,
+ int count,
+ u8 *tdi,
+ u8 *tdo)
+{
+ int i = 0;
+ int tdo_bit = 0;
+ int status = 1;
+
+ /* First go to IRSHIFT state */
+ switch (start_state) {
+ case 0: /* IDLE */
+ alt_jtag_io(1, 0, 0); /* DRSELECT */
+ alt_jtag_io(1, 0, 0); /* IRSELECT */
+ alt_jtag_io(0, 0, 0); /* IRCAPTURE */
+ alt_jtag_io(0, 0, 0); /* IRSHIFT */
+ break;
+
+ case 1: /* DRPAUSE */
+ alt_jtag_io(1, 0, 0); /* DREXIT2 */
+ alt_jtag_io(1, 0, 0); /* DRUPDATE */
+ alt_jtag_io(1, 0, 0); /* DRSELECT */
+ alt_jtag_io(1, 0, 0); /* IRSELECT */
+ alt_jtag_io(0, 0, 0); /* IRCAPTURE */
+ alt_jtag_io(0, 0, 0); /* IRSHIFT */
+ break;
+
+ case 2: /* IRPAUSE */
+ alt_jtag_io(1, 0, 0); /* IREXIT2 */
+ alt_jtag_io(1, 0, 0); /* IRUPDATE */
+ alt_jtag_io(1, 0, 0); /* DRSELECT */
+ alt_jtag_io(1, 0, 0); /* IRSELECT */
+ alt_jtag_io(0, 0, 0); /* IRCAPTURE */
+ alt_jtag_io(0, 0, 0); /* IRSHIFT */
+ break;
+
+ default:
+ status = 0;
+ }
+
+ if (status) {
+ /* loop in the SHIFT-IR state */
+ for (i = 0; i < count; i++) {
+ tdo_bit = alt_jtag_io(
+ (i == count - 1),
+ tdi[i >> 3] & (1 << (i & 7)),
+ (tdo != NULL));
+ if (tdo != NULL) {
+ if (tdo_bit)
+ tdo[i >> 3] |= (1 << (i & 7));
+ else
+ tdo[i >> 3] &= ~(u32)(1 << (i & 7));
+
+ }
+ }
+
+ alt_jtag_io(0, 0, 0); /* IRPAUSE */
+ }
+
+ return status;
+}
+
+static void altera_extract_target_data(u8 *buffer,
+ u8 *target_data,
+ u32 start_index,
+ u32 preamble_count,
+ u32 target_count)
+/*
+ * Copies target data from scan buffer, filtering out
+ * preamble and postamble data.
+ */
+{
+ u32 i;
+ u32 j;
+ u32 k;
+
+ j = preamble_count;
+ k = start_index + target_count;
+ for (i = start_index; i < k; ++i, ++j) {
+ if (buffer[j >> 3] & (1 << (j & 7)))
+ target_data[i >> 3] |= (1 << (i & 7));
+ else
+ target_data[i >> 3] &= ~(u32)(1 << (i & 7));
+
+ }
+}
+
+int altera_irscan(struct altera_state *astate,
+ u32 count,
+ u8 *tdi_data,
+ u32 start_index)
+/* Shifts data into instruction register */
+{
+ struct altera_jtag *js = &astate->js;
+ int start_code = 0;
+ u32 alloc_chars = 0;
+ u32 shift_count = js->ir_pre + count + js->ir_post;
+ int status = 0;
+ enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+ switch (js->jtag_state) {
+ case ILLEGAL_JTAG_STATE:
+ case RESET:
+ case IDLE:
+ start_code = 0;
+ start_state = IDLE;
+ break;
+
+ case DRSELECT:
+ case DRCAPTURE:
+ case DRSHIFT:
+ case DREXIT1:
+ case DRPAUSE:
+ case DREXIT2:
+ case DRUPDATE:
+ start_code = 1;
+ start_state = DRPAUSE;
+ break;
+
+ case IRSELECT:
+ case IRCAPTURE:
+ case IRSHIFT:
+ case IREXIT1:
+ case IRPAUSE:
+ case IREXIT2:
+ case IRUPDATE:
+ start_code = 2;
+ start_state = IRPAUSE;
+ break;
+
+ default:
+ status = -EREMOTEIO;
+ break;
+ }
+
+ if (status == 0)
+ if (js->jtag_state != start_state)
+ status = altera_goto_jstate(astate, start_state);
+
+ if (status == 0) {
+ if (shift_count > js->ir_length) {
+ alloc_chars = (shift_count + 7) >> 3;
+ kfree(js->ir_buffer);
+ js->ir_buffer = (u8 *)alt_malloc(alloc_chars);
+ if (js->ir_buffer == NULL)
+ status = -ENOMEM;
+ else
+ js->ir_length = alloc_chars * 8;
+
+ }
+ }
+
+ if (status == 0) {
+ /*
+ * Copy preamble data, IR data,
+ * and postamble data into a buffer
+ */
+ altera_concatenate_data(js->ir_buffer,
+ js->ir_pre_data,
+ js->ir_pre,
+ tdi_data,
+ start_index,
+ count,
+ js->ir_post_data,
+ js->ir_post);
+ /* Do the IRSCAN */
+ alt_jtag_irscan(astate,
+ start_code,
+ shift_count,
+ js->ir_buffer,
+ NULL);
+
+ /* alt_jtag_irscan() always ends in IRPAUSE state */
+ js->jtag_state = IRPAUSE;
+ }
+
+ if (status == 0)
+ if (js->irstop_state != IRPAUSE)
+ status = altera_goto_jstate(astate, js->irstop_state);
+
+
+ return status;
+}
+
+int altera_swap_ir(struct altera_state *astate,
+ u32 count,
+ u8 *in_data,
+ u32 in_index,
+ u8 *out_data,
+ u32 out_index)
+/* Shifts data into instruction register, capturing output data */
+{
+ struct altera_jtag *js = &astate->js;
+ int start_code = 0;
+ u32 alloc_chars = 0;
+ u32 shift_count = js->ir_pre + count + js->ir_post;
+ int status = 0;
+ enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+ switch (js->jtag_state) {
+ case ILLEGAL_JTAG_STATE:
+ case RESET:
+ case IDLE:
+ start_code = 0;
+ start_state = IDLE;
+ break;
+
+ case DRSELECT:
+ case DRCAPTURE:
+ case DRSHIFT:
+ case DREXIT1:
+ case DRPAUSE:
+ case DREXIT2:
+ case DRUPDATE:
+ start_code = 1;
+ start_state = DRPAUSE;
+ break;
+
+ case IRSELECT:
+ case IRCAPTURE:
+ case IRSHIFT:
+ case IREXIT1:
+ case IRPAUSE:
+ case IREXIT2:
+ case IRUPDATE:
+ start_code = 2;
+ start_state = IRPAUSE;
+ break;
+
+ default:
+ status = -EREMOTEIO;
+ break;
+ }
+
+ if (status == 0)
+ if (js->jtag_state != start_state)
+ status = altera_goto_jstate(astate, start_state);
+
+ if (status == 0) {
+ if (shift_count > js->ir_length) {
+ alloc_chars = (shift_count + 7) >> 3;
+ kfree(js->ir_buffer);
+ js->ir_buffer = (u8 *)alt_malloc(alloc_chars);
+ if (js->ir_buffer == NULL)
+ status = -ENOMEM;
+ else
+ js->ir_length = alloc_chars * 8;
+
+ }
+ }
+
+ if (status == 0) {
+ /*
+ * Copy preamble data, IR data,
+ * and postamble data into a buffer
+ */
+ altera_concatenate_data(js->ir_buffer,
+ js->ir_pre_data,
+ js->ir_pre,
+ in_data,
+ in_index,
+ count,
+ js->ir_post_data,
+ js->ir_post);
+
+ /* Do the IRSCAN */
+ alt_jtag_irscan(astate,
+ start_code,
+ shift_count,
+ js->ir_buffer,
+ js->ir_buffer);
+
+ /* alt_jtag_irscan() always ends in IRPAUSE state */
+ js->jtag_state = IRPAUSE;
+ }
+
+ if (status == 0)
+ if (js->irstop_state != IRPAUSE)
+ status = altera_goto_jstate(astate, js->irstop_state);
+
+
+ if (status == 0)
+ /* Now extract the returned data from the buffer */
+ altera_extract_target_data(js->ir_buffer,
+ out_data, out_index,
+ js->ir_pre, count);
+
+ return status;
+}
+
+int altera_drscan(struct altera_state *astate,
+ u32 count,
+ u8 *tdi_data,
+ u32 start_index)
+/* Shifts data into data register (ignoring output data) */
+{
+ struct altera_jtag *js = &astate->js;
+ int start_code = 0;
+ u32 alloc_chars = 0;
+ u32 shift_count = js->dr_pre + count + js->dr_post;
+ int status = 0;
+ enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+ switch (js->jtag_state) {
+ case ILLEGAL_JTAG_STATE:
+ case RESET:
+ case IDLE:
+ start_code = 0;
+ start_state = IDLE;
+ break;
+
+ case DRSELECT:
+ case DRCAPTURE:
+ case DRSHIFT:
+ case DREXIT1:
+ case DRPAUSE:
+ case DREXIT2:
+ case DRUPDATE:
+ start_code = 1;
+ start_state = DRPAUSE;
+ break;
+
+ case IRSELECT:
+ case IRCAPTURE:
+ case IRSHIFT:
+ case IREXIT1:
+ case IRPAUSE:
+ case IREXIT2:
+ case IRUPDATE:
+ start_code = 2;
+ start_state = IRPAUSE;
+ break;
+
+ default:
+ status = -EREMOTEIO;
+ break;
+ }
+
+ if (status == 0)
+ if (js->jtag_state != start_state)
+ status = altera_goto_jstate(astate, start_state);
+
+ if (status == 0) {
+ if (shift_count > js->dr_length) {
+ alloc_chars = (shift_count + 7) >> 3;
+ kfree(js->dr_buffer);
+ js->dr_buffer = (u8 *)alt_malloc(alloc_chars);
+ if (js->dr_buffer == NULL)
+ status = -ENOMEM;
+ else
+ js->dr_length = alloc_chars * 8;
+
+ }
+ }
+
+ if (status == 0) {
+ /*
+ * Copy preamble data, DR data,
+ * and postamble data into a buffer
+ */
+ altera_concatenate_data(js->dr_buffer,
+ js->dr_pre_data,
+ js->dr_pre,
+ tdi_data,
+ start_index,
+ count,
+ js->dr_post_data,
+ js->dr_post);
+ /* Do the DRSCAN */
+ alt_jtag_drscan(astate, start_code, shift_count,
+ js->dr_buffer, NULL);
+ /* alt_jtag_drscan() always ends in DRPAUSE state */
+ js->jtag_state = DRPAUSE;
+ }
+
+ if (status == 0)
+ if (js->drstop_state != DRPAUSE)
+ status = altera_goto_jstate(astate, js->drstop_state);
+
+ return status;
+}
+
+int altera_swap_dr(struct altera_state *astate, u32 count,
+ u8 *in_data, u32 in_index,
+ u8 *out_data, u32 out_index)
+/* Shifts data into data register, capturing output data */
+{
+ struct altera_jtag *js = &astate->js;
+ int start_code = 0;
+ u32 alloc_chars = 0;
+ u32 shift_count = js->dr_pre + count + js->dr_post;
+ int status = 0;
+ enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+ switch (js->jtag_state) {
+ case ILLEGAL_JTAG_STATE:
+ case RESET:
+ case IDLE:
+ start_code = 0;
+ start_state = IDLE;
+ break;
+
+ case DRSELECT:
+ case DRCAPTURE:
+ case DRSHIFT:
+ case DREXIT1:
+ case DRPAUSE:
+ case DREXIT2:
+ case DRUPDATE:
+ start_code = 1;
+ start_state = DRPAUSE;
+ break;
+
+ case IRSELECT:
+ case IRCAPTURE:
+ case IRSHIFT:
+ case IREXIT1:
+ case IRPAUSE:
+ case IREXIT2:
+ case IRUPDATE:
+ start_code = 2;
+ start_state = IRPAUSE;
+ break;
+
+ default:
+ status = -EREMOTEIO;
+ break;
+ }
+
+ if (status == 0)
+ if (js->jtag_state != start_state)
+ status = altera_goto_jstate(astate, start_state);
+
+ if (status == 0) {
+ if (shift_count > js->dr_length) {
+ alloc_chars = (shift_count + 7) >> 3;
+ kfree(js->dr_buffer);
+ js->dr_buffer = (u8 *)alt_malloc(alloc_chars);
+
+ if (js->dr_buffer == NULL)
+ status = -ENOMEM;
+ else
+ js->dr_length = alloc_chars * 8;
+
+ }
+ }
+
+ if (status == 0) {
+ /*
+ * Copy preamble data, DR data,
+ * and postamble data into a buffer
+ */
+ altera_concatenate_data(js->dr_buffer,
+ js->dr_pre_data,
+ js->dr_pre,
+ in_data,
+ in_index,
+ count,
+ js->dr_post_data,
+ js->dr_post);
+
+ /* Do the DRSCAN */
+ alt_jtag_drscan(astate,
+ start_code,
+ shift_count,
+ js->dr_buffer,
+ js->dr_buffer);
+
+ /* alt_jtag_drscan() always ends in DRPAUSE state */
+ js->jtag_state = DRPAUSE;
+ }
+
+ if (status == 0)
+ if (js->drstop_state != DRPAUSE)
+ status = altera_goto_jstate(astate, js->drstop_state);
+
+ if (status == 0)
+ /* Now extract the returned data from the buffer */
+ altera_extract_target_data(js->dr_buffer,
+ out_data,
+ out_index,
+ js->dr_pre,
+ count);
+
+ return status;
+}
+
+void altera_free_buffers(struct altera_state *astate)
+{
+ struct altera_jtag *js = &astate->js;
+ /* If the JTAG interface was used, reset it to TLR */
+ if (js->jtag_state != ILLEGAL_JTAG_STATE)
+ altera_jreset_idle(astate);
+
+ kfree(js->dr_pre_data);
+ js->dr_pre_data = NULL;
+
+ kfree(js->dr_post_data);
+ js->dr_post_data = NULL;
+
+ kfree(js->dr_buffer);
+ js->dr_buffer = NULL;
+
+ kfree(js->ir_pre_data);
+ js->ir_pre_data = NULL;
+
+ kfree(js->ir_post_data);
+ js->ir_post_data = NULL;
+
+ kfree(js->ir_buffer);
+ js->ir_buffer = NULL;
+}
diff --git a/drivers/misc/altera-stapl/altera-jtag.h b/drivers/misc/altera-stapl/altera-jtag.h
new file mode 100644
index 0000000..2f97e36
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-jtag.h
@@ -0,0 +1,113 @@
+/*
+ * altera-jtag.h
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef ALTERA_JTAG_H
+#define ALTERA_JTAG_H
+
+/* Function Prototypes */
+enum altera_jtag_state {
+ ILLEGAL_JTAG_STATE = -1,
+ RESET = 0,
+ IDLE = 1,
+ DRSELECT = 2,
+ DRCAPTURE = 3,
+ DRSHIFT = 4,
+ DREXIT1 = 5,
+ DRPAUSE = 6,
+ DREXIT2 = 7,
+ DRUPDATE = 8,
+ IRSELECT = 9,
+ IRCAPTURE = 10,
+ IRSHIFT = 11,
+ IREXIT1 = 12,
+ IRPAUSE = 13,
+ IREXIT2 = 14,
+ IRUPDATE = 15
+
+};
+
+struct altera_jtag {
+ /* Global variable to store the current JTAG state */
+ enum altera_jtag_state jtag_state;
+
+ /* Store current stop-state for DR and IR scan commands */
+ enum altera_jtag_state drstop_state;
+ enum altera_jtag_state irstop_state;
+
+ /* Store current padding values */
+ u32 dr_pre;
+ u32 dr_post;
+ u32 ir_pre;
+ u32 ir_post;
+ u32 dr_length;
+ u32 ir_length;
+ u8 *dr_pre_data;
+ u8 *dr_post_data;
+ u8 *ir_pre_data;
+ u8 *ir_post_data;
+ u8 *dr_buffer;
+ u8 *ir_buffer;
+};
+
+#define ALTERA_STACK_SIZE 128
+#define ALTERA_MESSAGE_LENGTH 1024
+
+struct altera_state {
+ struct altera_config *config;
+ struct altera_jtag js;
+ char msg_buff[ALTERA_MESSAGE_LENGTH + 1];
+ long stack[ALTERA_STACK_SIZE];
+};
+
+int altera_jinit(struct altera_state *astate);
+int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state);
+int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state);
+int altera_set_dr_pre(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *preamble_data);
+int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *preamble_data);
+int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *postamble_data);
+int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index,
+ u8 *postamble_data);
+int altera_goto_jstate(struct altera_state *astate,
+ enum altera_jtag_state state);
+int altera_wait_cycles(struct altera_state *astate, s32 cycles,
+ enum altera_jtag_state wait_state);
+int altera_wait_msecs(struct altera_state *astate, s32 microseconds,
+ enum altera_jtag_state wait_state);
+int altera_irscan(struct altera_state *astate, u32 count,
+ u8 *tdi_data, u32 start_index);
+int altera_swap_ir(struct altera_state *astate,
+ u32 count, u8 *in_data,
+ u32 in_index, u8 *out_data,
+ u32 out_index);
+int altera_drscan(struct altera_state *astate, u32 count,
+ u8 *tdi_data, u32 start_index);
+int altera_swap_dr(struct altera_state *astate, u32 count,
+ u8 *in_data, u32 in_index,
+ u8 *out_data, u32 out_index);
+void altera_free_buffers(struct altera_state *astate);
+#endif /* ALTERA_JTAG_H */
diff --git a/drivers/misc/altera-stapl/altera-lpt.c b/drivers/misc/altera-stapl/altera-lpt.c
new file mode 100644
index 0000000..91456a0
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-lpt.c
@@ -0,0 +1,70 @@
+/*
+ * altera-lpt.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Abylay Ospan <aospan@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include "altera-exprt.h"
+
+static int lpt_hardware_initialized;
+
+static void byteblaster_write(int port, int data)
+{
+ outb((u8)data, (u16)(port + 0x378));
+};
+
+static int byteblaster_read(int port)
+{
+ int data = 0;
+ data = inb((u16)(port + 0x378));
+ return data & 0xff;
+};
+
+int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo)
+{
+ int data = 0;
+ int tdo = 0;
+ int initial_lpt_ctrl = 0;
+
+ if (!lpt_hardware_initialized) {
+ initial_lpt_ctrl = byteblaster_read(2);
+ byteblaster_write(2, (initial_lpt_ctrl | 0x02) & 0xdf);
+ lpt_hardware_initialized = 1;
+ }
+
+ data = ((tdi ? 0x40 : 0) | (tms ? 0x02 : 0));
+
+ byteblaster_write(0, data);
+
+ if (read_tdo) {
+ tdo = byteblaster_read(1);
+ tdo = ((tdo & 0x80) ? 0 : 1);
+ }
+
+ byteblaster_write(0, data | 0x01);
+
+ byteblaster_write(0, data);
+
+ return tdo;
+}
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
new file mode 100644
index 0000000..f53e217
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera.c
@@ -0,0 +1,2537 @@
+/*
+ * altera.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010,2011 NetUP Inc.
+ * Copyright (C) 2010,2011 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/unaligned.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <misc/altera.h>
+#include "altera-exprt.h"
+#include "altera-jtag.h"
+
+static int debug = 1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "enable debugging information");
+
+MODULE_DESCRIPTION("altera FPGA kernel module");
+MODULE_AUTHOR("Igor M. Liplianin <liplianin@netup.ru>");
+MODULE_LICENSE("GPL");
+
+#define dprintk(args...) \
+ if (debug) { \
+ printk(KERN_DEBUG args); \
+ }
+
+enum altera_fpga_opcode {
+ OP_NOP = 0,
+ OP_DUP,
+ OP_SWP,
+ OP_ADD,
+ OP_SUB,
+ OP_MULT,
+ OP_DIV,
+ OP_MOD,
+ OP_SHL,
+ OP_SHR,
+ OP_NOT,
+ OP_AND,
+ OP_OR,
+ OP_XOR,
+ OP_INV,
+ OP_GT,
+ OP_LT,
+ OP_RET,
+ OP_CMPS,
+ OP_PINT,
+ OP_PRNT,
+ OP_DSS,
+ OP_DSSC,
+ OP_ISS,
+ OP_ISSC,
+ OP_DPR = 0x1c,
+ OP_DPRL,
+ OP_DPO,
+ OP_DPOL,
+ OP_IPR,
+ OP_IPRL,
+ OP_IPO,
+ OP_IPOL,
+ OP_PCHR,
+ OP_EXIT,
+ OP_EQU,
+ OP_POPT,
+ OP_ABS = 0x2c,
+ OP_BCH0,
+ OP_PSH0 = 0x2f,
+ OP_PSHL = 0x40,
+ OP_PSHV,
+ OP_JMP,
+ OP_CALL,
+ OP_NEXT,
+ OP_PSTR,
+ OP_SINT = 0x47,
+ OP_ST,
+ OP_ISTP,
+ OP_DSTP,
+ OP_SWPN,
+ OP_DUPN,
+ OP_POPV,
+ OP_POPE,
+ OP_POPA,
+ OP_JMPZ,
+ OP_DS,
+ OP_IS,
+ OP_DPRA,
+ OP_DPOA,
+ OP_IPRA,
+ OP_IPOA,
+ OP_EXPT,
+ OP_PSHE,
+ OP_PSHA,
+ OP_DYNA,
+ OP_EXPV = 0x5c,
+ OP_COPY = 0x80,
+ OP_REVA,
+ OP_DSC,
+ OP_ISC,
+ OP_WAIT,
+ OP_VS,
+ OP_CMPA = 0xc0,
+ OP_VSC,
+};
+
+struct altera_procinfo {
+ char *name;
+ u8 attrs;
+ struct altera_procinfo *next;
+};
+
+/* This function checks if enough parameters are available on the stack. */
+static int altera_check_stack(int stack_ptr, int count, int *status)
+{
+ if (stack_ptr < count) {
+ *status = -EOVERFLOW;
+ return 0;
+ }
+
+ return 1;
+}
+
+static void altera_export_int(char *key, s32 value)
+{
+ dprintk("Export: key = \"%s\", value = %d\n", key, value);
+}
+
+#define HEX_LINE_CHARS 72
+#define HEX_LINE_BITS (HEX_LINE_CHARS * 4)
+
+static void altera_export_bool_array(char *key, u8 *data, s32 count)
+{
+ char string[HEX_LINE_CHARS + 1];
+ s32 i, offset;
+ u32 size, line, lines, linebits, value, j, k;
+
+ if (count > HEX_LINE_BITS) {
+ dprintk("Export: key = \"%s\", %d bits, value = HEX\n",
+ key, count);
+ lines = (count + (HEX_LINE_BITS - 1)) / HEX_LINE_BITS;
+
+ for (line = 0; line < lines; ++line) {
+ if (line < (lines - 1)) {
+ linebits = HEX_LINE_BITS;
+ size = HEX_LINE_CHARS;
+ offset = count - ((line + 1) * HEX_LINE_BITS);
+ } else {
+ linebits =
+ count - ((lines - 1) * HEX_LINE_BITS);
+ size = (linebits + 3) / 4;
+ offset = 0L;
+ }
+
+ string[size] = '\0';
+ j = size - 1;
+ value = 0;
+
+ for (k = 0; k < linebits; ++k) {
+ i = k + offset;
+ if (data[i >> 3] & (1 << (i & 7)))
+ value |= (1 << (i & 3));
+ if ((i & 3) == 3) {
+ sprintf(&string[j], "%1x", value);
+ value = 0;
+ --j;
+ }
+ }
+ if ((k & 3) > 0)
+ sprintf(&string[j], "%1x", value);
+
+ dprintk("%s\n", string);
+ }
+
+ } else {
+ size = (count + 3) / 4;
+ string[size] = '\0';
+ j = size - 1;
+ value = 0;
+
+ for (i = 0; i < count; ++i) {
+ if (data[i >> 3] & (1 << (i & 7)))
+ value |= (1 << (i & 3));
+ if ((i & 3) == 3) {
+ sprintf(&string[j], "%1x", value);
+ value = 0;
+ --j;
+ }
+ }
+ if ((i & 3) > 0)
+ sprintf(&string[j], "%1x", value);
+
+ dprintk("Export: key = \"%s\", %d bits, value = HEX %s\n",
+ key, count, string);
+ }
+}
+
+static int altera_execute(struct altera_state *astate,
+ u8 *p,
+ s32 program_size,
+ s32 *error_address,
+ int *exit_code,
+ int *format_version)
+{
+ struct altera_config *aconf = astate->config;
+ char *msg_buff = astate->msg_buff;
+ long *stack = astate->stack;
+ int status = 0;
+ u32 first_word = 0L;
+ u32 action_table = 0L;
+ u32 proc_table = 0L;
+ u32 str_table = 0L;
+ u32 sym_table = 0L;
+ u32 data_sect = 0L;
+ u32 code_sect = 0L;
+ u32 debug_sect = 0L;
+ u32 action_count = 0L;
+ u32 proc_count = 0L;
+ u32 sym_count = 0L;
+ long *vars = NULL;
+ s32 *var_size = NULL;
+ char *attrs = NULL;
+ u8 *proc_attributes = NULL;
+ u32 pc;
+ u32 opcode_address;
+ u32 args[3];
+ u32 opcode;
+ u32 name_id;
+ u8 charbuf[4];
+ long long_tmp;
+ u32 variable_id;
+ u8 *charptr_tmp;
+ u8 *charptr_tmp2;
+ long *longptr_tmp;
+ int version = 0;
+ int delta = 0;
+ int stack_ptr = 0;
+ u32 arg_count;
+ int done = 0;
+ int bad_opcode = 0;
+ u32 count;
+ u32 index;
+ u32 index2;
+ s32 long_count;
+ s32 long_idx;
+ s32 long_idx2;
+ u32 i;
+ u32 j;
+ u32 uncomp_size;
+ u32 offset;
+ u32 value;
+ int current_proc = 0;
+ int reverse;
+
+ char *name;
+
+ dprintk("%s\n", __func__);
+
+ /* Read header information */
+ if (program_size > 52L) {
+ first_word = get_unaligned_be32(&p[0]);
+ version = (first_word & 1L);
+ *format_version = version + 1;
+ delta = version * 8;
+
+ action_table = get_unaligned_be32(&p[4]);
+ proc_table = get_unaligned_be32(&p[8]);
+ str_table = get_unaligned_be32(&p[4 + delta]);
+ sym_table = get_unaligned_be32(&p[16 + delta]);
+ data_sect = get_unaligned_be32(&p[20 + delta]);
+ code_sect = get_unaligned_be32(&p[24 + delta]);
+ debug_sect = get_unaligned_be32(&p[28 + delta]);
+ action_count = get_unaligned_be32(&p[40 + delta]);
+ proc_count = get_unaligned_be32(&p[44 + delta]);
+ sym_count = get_unaligned_be32(&p[48 + (2 * delta)]);
+ }
+
+ if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) {
+ done = 1;
+ status = -EIO;
+ goto exit_done;
+ }
+
+ if (sym_count <= 0)
+ goto exit_done;
+
+ vars = kzalloc(sym_count * sizeof(long), GFP_KERNEL);
+
+ if (vars == NULL)
+ status = -ENOMEM;
+
+ if (status == 0) {
+ var_size = kzalloc(sym_count * sizeof(s32), GFP_KERNEL);
+
+ if (var_size == NULL)
+ status = -ENOMEM;
+ }
+
+ if (status == 0) {
+ attrs = kzalloc(sym_count, GFP_KERNEL);
+
+ if (attrs == NULL)
+ status = -ENOMEM;
+ }
+
+ if ((status == 0) && (version > 0)) {
+ proc_attributes = kzalloc(proc_count, GFP_KERNEL);
+
+ if (proc_attributes == NULL)
+ status = -ENOMEM;
+ }
+
+ if (status != 0)
+ goto exit_done;
+
+ delta = version * 2;
+
+ for (i = 0; i < sym_count; ++i) {
+ offset = (sym_table + ((11 + delta) * i));
+
+ value = get_unaligned_be32(&p[offset + 3 + delta]);
+
+ attrs[i] = p[offset];
+
+ /*
+ * use bit 7 of attribute byte to indicate that
+ * this buffer was dynamically allocated
+ * and should be freed later
+ */
+ attrs[i] &= 0x7f;
+
+ var_size[i] = get_unaligned_be32(&p[offset + 7 + delta]);
+
+ /*
+ * Attribute bits:
+ * bit 0: 0 = read-only, 1 = read-write
+ * bit 1: 0 = not compressed, 1 = compressed
+ * bit 2: 0 = not initialized, 1 = initialized
+ * bit 3: 0 = scalar, 1 = array
+ * bit 4: 0 = Boolean, 1 = integer
+ * bit 5: 0 = declared variable,
+ * 1 = compiler created temporary variable
+ */
+
+ if ((attrs[i] & 0x0c) == 0x04)
+ /* initialized scalar variable */
+ vars[i] = value;
+ else if ((attrs[i] & 0x1e) == 0x0e) {
+ /* initialized compressed Boolean array */
+ uncomp_size = get_unaligned_le32(&p[data_sect + value]);
+
+ /* allocate a buffer for the uncompressed data */
+ vars[i] = (long)kzalloc(uncomp_size, GFP_KERNEL);
+ if (vars[i] == 0L)
+ status = -ENOMEM;
+ else {
+ /* set flag so buffer will be freed later */
+ attrs[i] |= 0x80;
+
+ /* uncompress the data */
+ if (altera_shrink(&p[data_sect + value],
+ var_size[i],
+ (u8 *)vars[i],
+ uncomp_size,
+ version) != uncomp_size)
+ /* decompression failed */
+ status = -EIO;
+ else
+ var_size[i] = uncomp_size * 8L;
+
+ }
+ } else if ((attrs[i] & 0x1e) == 0x0c) {
+ /* initialized Boolean array */
+ vars[i] = value + data_sect + (long)p;
+ } else if ((attrs[i] & 0x1c) == 0x1c) {
+ /* initialized integer array */
+ vars[i] = value + data_sect;
+ } else if ((attrs[i] & 0x0c) == 0x08) {
+ /* uninitialized array */
+
+ /* flag attrs so that memory is freed */
+ attrs[i] |= 0x80;
+
+ if (var_size[i] > 0) {
+ u32 size;
+
+ if (attrs[i] & 0x10)
+ /* integer array */
+ size = (var_size[i] * sizeof(s32));
+ else
+ /* Boolean array */
+ size = ((var_size[i] + 7L) / 8L);
+
+ vars[i] = (long)kzalloc(size, GFP_KERNEL);
+
+ if (vars[i] == 0) {
+ status = -ENOMEM;
+ } else {
+ /* zero out memory */
+ for (j = 0; j < size; ++j)
+ ((u8 *)(vars[i]))[j] = 0;
+
+ }
+ } else
+ vars[i] = 0;
+
+ } else
+ vars[i] = 0;
+
+ }
+
+exit_done:
+ if (status != 0)
+ done = 1;
+
+ altera_jinit(astate);
+
+ pc = code_sect;
+ msg_buff[0] = '\0';
+
+ /*
+ * For JBC version 2, we will execute the procedures corresponding to
+ * the selected ACTION
+ */
+ if (version > 0) {
+ if (aconf->action == NULL) {
+ status = -EINVAL;
+ done = 1;
+ } else {
+ int action_found = 0;
+ for (i = 0; (i < action_count) && !action_found; ++i) {
+ name_id = get_unaligned_be32(&p[action_table +
+ (12 * i)]);
+
+ name = &p[str_table + name_id];
+
+ if (strncasecmp(aconf->action, name, strlen(name)) == 0) {
+ action_found = 1;
+ current_proc =
+ get_unaligned_be32(&p[action_table +
+ (12 * i) + 8]);
+ }
+ }
+
+ if (!action_found) {
+ status = -EINVAL;
+ done = 1;
+ }
+ }
+
+ if (status == 0) {
+ int first_time = 1;
+ i = current_proc;
+ while ((i != 0) || first_time) {
+ first_time = 0;
+ /* check procedure attribute byte */
+ proc_attributes[i] =
+ (p[proc_table +
+ (13 * i) + 8] &
+ 0x03);
+
+ /*
+ * BIT0 - OPTIONAL
+ * BIT1 - RECOMMENDED
+ * BIT6 - FORCED OFF
+ * BIT7 - FORCED ON
+ */
+
+ i = get_unaligned_be32(&p[proc_table +
+ (13 * i) + 4]);
+ }
+
+ /*
+ * Set current_proc to the first procedure
+ * to be executed
+ */
+ i = current_proc;
+ while ((i != 0) &&
+ ((proc_attributes[i] == 1) ||
+ ((proc_attributes[i] & 0xc0) == 0x40))) {
+ i = get_unaligned_be32(&p[proc_table +
+ (13 * i) + 4]);
+ }
+
+ if ((i != 0) || ((i == 0) && (current_proc == 0) &&
+ ((proc_attributes[0] != 1) &&
+ ((proc_attributes[0] & 0xc0) != 0x40)))) {
+ current_proc = i;
+ pc = code_sect +
+ get_unaligned_be32(&p[proc_table +
+ (13 * i) + 9]);
+ if ((pc < code_sect) || (pc >= debug_sect))
+ status = -ERANGE;
+ } else
+ /* there are no procedures to execute! */
+ done = 1;
+
+ }
+ }
+
+ msg_buff[0] = '\0';
+
+ while (!done) {
+ opcode = (p[pc] & 0xff);
+ opcode_address = pc;
+ ++pc;
+
+ if (debug > 1)
+ printk("opcode: %02x\n", opcode);
+
+ arg_count = (opcode >> 6) & 3;
+ for (i = 0; i < arg_count; ++i) {
+ args[i] = get_unaligned_be32(&p[pc]);
+ pc += 4;
+ }
+
+ switch (opcode) {
+ case OP_NOP:
+ break;
+ case OP_DUP:
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ stack[stack_ptr] = stack[stack_ptr - 1];
+ ++stack_ptr;
+ }
+ break;
+ case OP_SWP:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ long_tmp = stack[stack_ptr - 2];
+ stack[stack_ptr - 2] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+ break;
+ case OP_ADD:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] += stack[stack_ptr];
+ }
+ break;
+ case OP_SUB:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] -= stack[stack_ptr];
+ }
+ break;
+ case OP_MULT:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] *= stack[stack_ptr];
+ }
+ break;
+ case OP_DIV:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] /= stack[stack_ptr];
+ }
+ break;
+ case OP_MOD:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] %= stack[stack_ptr];
+ }
+ break;
+ case OP_SHL:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] <<= stack[stack_ptr];
+ }
+ break;
+ case OP_SHR:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] >>= stack[stack_ptr];
+ }
+ break;
+ case OP_NOT:
+ if (altera_check_stack(stack_ptr, 1, &status))
+ stack[stack_ptr - 1] ^= (-1L);
+
+ break;
+ case OP_AND:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] &= stack[stack_ptr];
+ }
+ break;
+ case OP_OR:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] |= stack[stack_ptr];
+ }
+ break;
+ case OP_XOR:
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ --stack_ptr;
+ stack[stack_ptr - 1] ^= stack[stack_ptr];
+ }
+ break;
+ case OP_INV:
+ if (!altera_check_stack(stack_ptr, 1, &status))
+ break;
+ stack[stack_ptr - 1] = stack[stack_ptr - 1] ? 0L : 1L;
+ break;
+ case OP_GT:
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ --stack_ptr;
+ stack[stack_ptr - 1] =
+ (stack[stack_ptr - 1] > stack[stack_ptr]) ?
+ 1L : 0L;
+
+ break;
+ case OP_LT:
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ --stack_ptr;
+ stack[stack_ptr - 1] =
+ (stack[stack_ptr - 1] < stack[stack_ptr]) ?
+ 1L : 0L;
+
+ break;
+ case OP_RET:
+ if ((version > 0) && (stack_ptr == 0)) {
+ /*
+ * We completed one of the main procedures
+ * of an ACTION.
+ * Find the next procedure
+ * to be executed and jump to it.
+ * If there are no more procedures, then EXIT.
+ */
+ i = get_unaligned_be32(&p[proc_table +
+ (13 * current_proc) + 4]);
+ while ((i != 0) &&
+ ((proc_attributes[i] == 1) ||
+ ((proc_attributes[i] & 0xc0) == 0x40)))
+ i = get_unaligned_be32(&p[proc_table +
+ (13 * i) + 4]);
+
+ if (i == 0) {
+ /* no procedures to execute! */
+ done = 1;
+ *exit_code = 0; /* success */
+ } else {
+ current_proc = i;
+ pc = code_sect + get_unaligned_be32(
+ &p[proc_table +
+ (13 * i) + 9]);
+ if ((pc < code_sect) ||
+ (pc >= debug_sect))
+ status = -ERANGE;
+ }
+
+ } else
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ pc = stack[--stack_ptr] + code_sect;
+ if ((pc <= code_sect) ||
+ (pc >= debug_sect))
+ status = -ERANGE;
+
+ }
+
+ break;
+ case OP_CMPS:
+ /*
+ * Array short compare
+ * ...stack 0 is source 1 value
+ * ...stack 1 is source 2 value
+ * ...stack 2 is mask value
+ * ...stack 3 is count
+ */
+ if (altera_check_stack(stack_ptr, 4, &status)) {
+ s32 a = stack[--stack_ptr];
+ s32 b = stack[--stack_ptr];
+ long_tmp = stack[--stack_ptr];
+ count = stack[stack_ptr - 1];
+
+ if ((count < 1) || (count > 32))
+ status = -ERANGE;
+ else {
+ long_tmp &= ((-1L) >> (32 - count));
+
+ stack[stack_ptr - 1] =
+ ((a & long_tmp) == (b & long_tmp))
+ ? 1L : 0L;
+ }
+ }
+ break;
+ case OP_PINT:
+ /*
+ * PRINT add integer
+ * ...stack 0 is integer value
+ */
+ if (!altera_check_stack(stack_ptr, 1, &status))
+ break;
+ sprintf(&msg_buff[strlen(msg_buff)],
+ "%ld", stack[--stack_ptr]);
+ break;
+ case OP_PRNT:
+ /* PRINT finish */
+ if (debug)
+ printk(msg_buff, "\n");
+
+ msg_buff[0] = '\0';
+ break;
+ case OP_DSS:
+ /*
+ * DRSCAN short
+ * ...stack 0 is scan data
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ long_tmp = stack[--stack_ptr];
+ count = stack[--stack_ptr];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_drscan(astate, count, charbuf, 0);
+ break;
+ case OP_DSSC:
+ /*
+ * DRSCAN short with capture
+ * ...stack 0 is scan data
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ long_tmp = stack[--stack_ptr];
+ count = stack[stack_ptr - 1];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_swap_dr(astate, count, charbuf,
+ 0, charbuf, 0);
+ stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]);
+ break;
+ case OP_ISS:
+ /*
+ * IRSCAN short
+ * ...stack 0 is scan data
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ long_tmp = stack[--stack_ptr];
+ count = stack[--stack_ptr];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_irscan(astate, count, charbuf, 0);
+ break;
+ case OP_ISSC:
+ /*
+ * IRSCAN short with capture
+ * ...stack 0 is scan data
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ long_tmp = stack[--stack_ptr];
+ count = stack[stack_ptr - 1];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_swap_ir(astate, count, charbuf,
+ 0, charbuf, 0);
+ stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]);
+ break;
+ case OP_DPR:
+ if (!altera_check_stack(stack_ptr, 1, &status))
+ break;
+ count = stack[--stack_ptr];
+ status = altera_set_dr_pre(&astate->js, count, 0, NULL);
+ break;
+ case OP_DPRL:
+ /*
+ * DRPRE with literal data
+ * ...stack 0 is count
+ * ...stack 1 is literal data
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ count = stack[--stack_ptr];
+ long_tmp = stack[--stack_ptr];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_set_dr_pre(&astate->js, count, 0,
+ charbuf);
+ break;
+ case OP_DPO:
+ /*
+ * DRPOST
+ * ...stack 0 is count
+ */
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ count = stack[--stack_ptr];
+ status = altera_set_dr_post(&astate->js, count,
+ 0, NULL);
+ }
+ break;
+ case OP_DPOL:
+ /*
+ * DRPOST with literal data
+ * ...stack 0 is count
+ * ...stack 1 is literal data
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ count = stack[--stack_ptr];
+ long_tmp = stack[--stack_ptr];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_set_dr_post(&astate->js, count, 0,
+ charbuf);
+ break;
+ case OP_IPR:
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ count = stack[--stack_ptr];
+ status = altera_set_ir_pre(&astate->js, count,
+ 0, NULL);
+ }
+ break;
+ case OP_IPRL:
+ /*
+ * IRPRE with literal data
+ * ...stack 0 is count
+ * ...stack 1 is literal data
+ */
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ count = stack[--stack_ptr];
+ long_tmp = stack[--stack_ptr];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_set_ir_pre(&astate->js, count,
+ 0, charbuf);
+ }
+ break;
+ case OP_IPO:
+ /*
+ * IRPOST
+ * ...stack 0 is count
+ */
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ count = stack[--stack_ptr];
+ status = altera_set_ir_post(&astate->js, count,
+ 0, NULL);
+ }
+ break;
+ case OP_IPOL:
+ /*
+ * IRPOST with literal data
+ * ...stack 0 is count
+ * ...stack 1 is literal data
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ count = stack[--stack_ptr];
+ long_tmp = stack[--stack_ptr];
+ put_unaligned_le32(long_tmp, &charbuf[0]);
+ status = altera_set_ir_post(&astate->js, count, 0,
+ charbuf);
+ break;
+ case OP_PCHR:
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ u8 ch;
+ count = strlen(msg_buff);
+ ch = (char) stack[--stack_ptr];
+ if ((ch < 1) || (ch > 127)) {
+ /*
+ * character code out of range
+ * instead of flagging an error,
+ * force the value to 127
+ */
+ ch = 127;
+ }
+ msg_buff[count] = ch;
+ msg_buff[count + 1] = '\0';
+ }
+ break;
+ case OP_EXIT:
+ if (altera_check_stack(stack_ptr, 1, &status))
+ *exit_code = stack[--stack_ptr];
+
+ done = 1;
+ break;
+ case OP_EQU:
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ --stack_ptr;
+ stack[stack_ptr - 1] =
+ (stack[stack_ptr - 1] == stack[stack_ptr]) ?
+ 1L : 0L;
+ break;
+ case OP_POPT:
+ if (altera_check_stack(stack_ptr, 1, &status))
+ --stack_ptr;
+
+ break;
+ case OP_ABS:
+ if (!altera_check_stack(stack_ptr, 1, &status))
+ break;
+ if (stack[stack_ptr - 1] < 0)
+ stack[stack_ptr - 1] = 0 - stack[stack_ptr - 1];
+
+ break;
+ case OP_BCH0:
+ /*
+ * Batch operation 0
+ * SWP
+ * SWPN 7
+ * SWP
+ * SWPN 6
+ * DUPN 8
+ * SWPN 2
+ * SWP
+ * DUPN 6
+ * DUPN 6
+ */
+
+ /* SWP */
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ long_tmp = stack[stack_ptr - 2];
+ stack[stack_ptr - 2] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+
+ /* SWPN 7 */
+ index = 7 + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ long_tmp = stack[stack_ptr - index];
+ stack[stack_ptr - index] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+
+ /* SWP */
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ long_tmp = stack[stack_ptr - 2];
+ stack[stack_ptr - 2] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+
+ /* SWPN 6 */
+ index = 6 + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ long_tmp = stack[stack_ptr - index];
+ stack[stack_ptr - index] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+
+ /* DUPN 8 */
+ index = 8 + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ stack[stack_ptr] = stack[stack_ptr - index];
+ ++stack_ptr;
+ }
+
+ /* SWPN 2 */
+ index = 2 + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ long_tmp = stack[stack_ptr - index];
+ stack[stack_ptr - index] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+
+ /* SWP */
+ if (altera_check_stack(stack_ptr, 2, &status)) {
+ long_tmp = stack[stack_ptr - 2];
+ stack[stack_ptr - 2] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+
+ /* DUPN 6 */
+ index = 6 + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ stack[stack_ptr] = stack[stack_ptr - index];
+ ++stack_ptr;
+ }
+
+ /* DUPN 6 */
+ index = 6 + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ stack[stack_ptr] = stack[stack_ptr - index];
+ ++stack_ptr;
+ }
+ break;
+ case OP_PSH0:
+ stack[stack_ptr++] = 0;
+ break;
+ case OP_PSHL:
+ stack[stack_ptr++] = (s32) args[0];
+ break;
+ case OP_PSHV:
+ stack[stack_ptr++] = vars[args[0]];
+ break;
+ case OP_JMP:
+ pc = args[0] + code_sect;
+ if ((pc < code_sect) || (pc >= debug_sect))
+ status = -ERANGE;
+ break;
+ case OP_CALL:
+ stack[stack_ptr++] = pc;
+ pc = args[0] + code_sect;
+ if ((pc < code_sect) || (pc >= debug_sect))
+ status = -ERANGE;
+ break;
+ case OP_NEXT:
+ /*
+ * Process FOR / NEXT loop
+ * ...argument 0 is variable ID
+ * ...stack 0 is step value
+ * ...stack 1 is end value
+ * ...stack 2 is top address
+ */
+ if (altera_check_stack(stack_ptr, 3, &status)) {
+ s32 step = stack[stack_ptr - 1];
+ s32 end = stack[stack_ptr - 2];
+ s32 top = stack[stack_ptr - 3];
+ s32 iterator = vars[args[0]];
+ int break_out = 0;
+
+ if (step < 0) {
+ if (iterator <= end)
+ break_out = 1;
+ } else if (iterator >= end)
+ break_out = 1;
+
+ if (break_out) {
+ stack_ptr -= 3;
+ } else {
+ vars[args[0]] = iterator + step;
+ pc = top + code_sect;
+ if ((pc < code_sect) ||
+ (pc >= debug_sect))
+ status = -ERANGE;
+ }
+ }
+ break;
+ case OP_PSTR:
+ /*
+ * PRINT add string
+ * ...argument 0 is string ID
+ */
+ count = strlen(msg_buff);
+ strlcpy(&msg_buff[count],
+ &p[str_table + args[0]],
+ ALTERA_MESSAGE_LENGTH - count);
+ break;
+ case OP_SINT:
+ /*
+ * STATE intermediate state
+ * ...argument 0 is state code
+ */
+ status = altera_goto_jstate(astate, args[0]);
+ break;
+ case OP_ST:
+ /*
+ * STATE final state
+ * ...argument 0 is state code
+ */
+ status = altera_goto_jstate(astate, args[0]);
+ break;
+ case OP_ISTP:
+ /*
+ * IRSTOP state
+ * ...argument 0 is state code
+ */
+ status = altera_set_irstop(&astate->js, args[0]);
+ break;
+ case OP_DSTP:
+ /*
+ * DRSTOP state
+ * ...argument 0 is state code
+ */
+ status = altera_set_drstop(&astate->js, args[0]);
+ break;
+
+ case OP_SWPN:
+ /*
+ * Exchange top with Nth stack value
+ * ...argument 0 is 0-based stack entry
+ * to swap with top element
+ */
+ index = (args[0]) + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ long_tmp = stack[stack_ptr - index];
+ stack[stack_ptr - index] = stack[stack_ptr - 1];
+ stack[stack_ptr - 1] = long_tmp;
+ }
+ break;
+ case OP_DUPN:
+ /*
+ * Duplicate Nth stack value
+ * ...argument 0 is 0-based stack entry to duplicate
+ */
+ index = (args[0]) + 1;
+ if (altera_check_stack(stack_ptr, index, &status)) {
+ stack[stack_ptr] = stack[stack_ptr - index];
+ ++stack_ptr;
+ }
+ break;
+ case OP_POPV:
+ /*
+ * Pop stack into scalar variable
+ * ...argument 0 is variable ID
+ * ...stack 0 is value
+ */
+ if (altera_check_stack(stack_ptr, 1, &status))
+ vars[args[0]] = stack[--stack_ptr];
+
+ break;
+ case OP_POPE:
+ /*
+ * Pop stack into integer array element
+ * ...argument 0 is variable ID
+ * ...stack 0 is array index
+ * ...stack 1 is value
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ variable_id = args[0];
+
+ /*
+ * If variable is read-only,
+ * convert to writable array
+ */
+ if ((version > 0) &&
+ ((attrs[variable_id] & 0x9c) == 0x1c)) {
+ /* Allocate a writable buffer for this array */
+ count = var_size[variable_id];
+ long_tmp = vars[variable_id];
+ longptr_tmp = kzalloc(count * sizeof(long),
+ GFP_KERNEL);
+ vars[variable_id] = (long)longptr_tmp;
+
+ if (vars[variable_id] == 0) {
+ status = -ENOMEM;
+ break;
+ }
+
+ /* copy previous contents into buffer */
+ for (i = 0; i < count; ++i) {
+ longptr_tmp[i] =
+ get_unaligned_be32(&p[long_tmp]);
+ long_tmp += sizeof(long);
+ }
+
+ /*
+ * set bit 7 - buffer was
+ * dynamically allocated
+ */
+ attrs[variable_id] |= 0x80;
+
+ /* clear bit 2 - variable is writable */
+ attrs[variable_id] &= ~0x04;
+ attrs[variable_id] |= 0x01;
+
+ }
+
+ /* check that variable is a writable integer array */
+ if ((attrs[variable_id] & 0x1c) != 0x18)
+ status = -ERANGE;
+ else {
+ longptr_tmp = (long *)vars[variable_id];
+
+ /* pop the array index */
+ index = stack[--stack_ptr];
+
+ /* pop the value and store it into the array */
+ longptr_tmp[index] = stack[--stack_ptr];
+ }
+
+ break;
+ case OP_POPA:
+ /*
+ * Pop stack into Boolean array
+ * ...argument 0 is variable ID
+ * ...stack 0 is count
+ * ...stack 1 is array index
+ * ...stack 2 is value
+ */
+ if (!altera_check_stack(stack_ptr, 3, &status))
+ break;
+ variable_id = args[0];
+
+ /*
+ * If variable is read-only,
+ * convert to writable array
+ */
+ if ((version > 0) &&
+ ((attrs[variable_id] & 0x9c) == 0x0c)) {
+ /* Allocate a writable buffer for this array */
+ long_tmp =
+ (var_size[variable_id] + 7L) >> 3L;
+ charptr_tmp2 = (u8 *)vars[variable_id];
+ charptr_tmp =
+ kzalloc(long_tmp, GFP_KERNEL);
+ vars[variable_id] = (long)charptr_tmp;
+
+ if (vars[variable_id] == 0) {
+ status = -ENOMEM;
+ break;
+ }
+
+ /* zero the buffer */
+ for (long_idx = 0L;
+ long_idx < long_tmp;
+ ++long_idx) {
+ charptr_tmp[long_idx] = 0;
+ }
+
+ /* copy previous contents into buffer */
+ for (long_idx = 0L;
+ long_idx < var_size[variable_id];
+ ++long_idx) {
+ long_idx2 = long_idx;
+
+ if (charptr_tmp2[long_idx2 >> 3] &
+ (1 << (long_idx2 & 7))) {
+ charptr_tmp[long_idx >> 3] |=
+ (1 << (long_idx & 7));
+ }
+ }
+
+ /*
+ * set bit 7 - buffer was
+ * dynamically allocated
+ */
+ attrs[variable_id] |= 0x80;
+
+ /* clear bit 2 - variable is writable */
+ attrs[variable_id] &= ~0x04;
+ attrs[variable_id] |= 0x01;
+
+ }
+
+ /*
+ * check that variable is
+ * a writable Boolean array
+ */
+ if ((attrs[variable_id] & 0x1c) != 0x08) {
+ status = -ERANGE;
+ break;
+ }
+
+ charptr_tmp = (u8 *)vars[variable_id];
+
+ /* pop the count (number of bits to copy) */
+ long_count = stack[--stack_ptr];
+
+ /* pop the array index */
+ long_idx = stack[--stack_ptr];
+
+ reverse = 0;
+
+ if (version > 0) {
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ */
+
+ if (long_idx > long_count) {
+ reverse = 1;
+ long_tmp = long_count;
+ long_count = 1 + long_idx -
+ long_count;
+ long_idx = long_tmp;
+
+ /* reverse POPA is not supported */
+ status = -ERANGE;
+ break;
+ } else
+ long_count = 1 + long_count -
+ long_idx;
+
+ }
+
+ /* pop the data */
+ long_tmp = stack[--stack_ptr];
+
+ if (long_count < 1) {
+ status = -ERANGE;
+ break;
+ }
+
+ for (i = 0; i < long_count; ++i) {
+ if (long_tmp & (1L << (s32) i))
+ charptr_tmp[long_idx >> 3L] |=
+ (1L << (long_idx & 7L));
+ else
+ charptr_tmp[long_idx >> 3L] &=
+ ~(1L << (long_idx & 7L));
+
+ ++long_idx;
+ }
+
+ break;
+ case OP_JMPZ:
+ /*
+ * Pop stack and branch if zero
+ * ...argument 0 is address
+ * ...stack 0 is condition value
+ */
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ if (stack[--stack_ptr] == 0) {
+ pc = args[0] + code_sect;
+ if ((pc < code_sect) ||
+ (pc >= debug_sect))
+ status = -ERANGE;
+ }
+ }
+ break;
+ case OP_DS:
+ case OP_IS:
+ /*
+ * DRSCAN
+ * IRSCAN
+ * ...argument 0 is scan data variable ID
+ * ...stack 0 is array index
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ long_idx = stack[--stack_ptr];
+ long_count = stack[--stack_ptr];
+ reverse = 0;
+ if (version > 0) {
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ * stack 2 = count
+ */
+ long_tmp = long_count;
+ long_count = stack[--stack_ptr];
+
+ if (long_idx > long_tmp) {
+ reverse = 1;
+ long_idx = long_tmp;
+ }
+ }
+
+ charptr_tmp = (u8 *)vars[args[0]];
+
+ if (reverse) {
+ /*
+ * allocate a buffer
+ * and reverse the data order
+ */
+ charptr_tmp2 = charptr_tmp;
+ charptr_tmp = kzalloc((long_count >> 3) + 1,
+ GFP_KERNEL);
+ if (charptr_tmp == NULL) {
+ status = -ENOMEM;
+ break;
+ }
+
+ long_tmp = long_idx + long_count - 1;
+ long_idx2 = 0;
+ while (long_idx2 < long_count) {
+ if (charptr_tmp2[long_tmp >> 3] &
+ (1 << (long_tmp & 7)))
+ charptr_tmp[long_idx2 >> 3] |=
+ (1 << (long_idx2 & 7));
+ else
+ charptr_tmp[long_idx2 >> 3] &=
+ ~(1 << (long_idx2 & 7));
+
+ --long_tmp;
+ ++long_idx2;
+ }
+ }
+
+ if (opcode == 0x51) /* DS */
+ status = altera_drscan(astate, long_count,
+ charptr_tmp, long_idx);
+ else /* IS */
+ status = altera_irscan(astate, long_count,
+ charptr_tmp, long_idx);
+
+ if (reverse)
+ kfree(charptr_tmp);
+
+ break;
+ case OP_DPRA:
+ /*
+ * DRPRE with array data
+ * ...argument 0 is variable ID
+ * ...stack 0 is array index
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ index = stack[--stack_ptr];
+ count = stack[--stack_ptr];
+
+ if (version > 0)
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ */
+ count = 1 + count - index;
+
+ charptr_tmp = (u8 *)vars[args[0]];
+ status = altera_set_dr_pre(&astate->js, count, index,
+ charptr_tmp);
+ break;
+ case OP_DPOA:
+ /*
+ * DRPOST with array data
+ * ...argument 0 is variable ID
+ * ...stack 0 is array index
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ index = stack[--stack_ptr];
+ count = stack[--stack_ptr];
+
+ if (version > 0)
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ */
+ count = 1 + count - index;
+
+ charptr_tmp = (u8 *)vars[args[0]];
+ status = altera_set_dr_post(&astate->js, count, index,
+ charptr_tmp);
+ break;
+ case OP_IPRA:
+ /*
+ * IRPRE with array data
+ * ...argument 0 is variable ID
+ * ...stack 0 is array index
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ index = stack[--stack_ptr];
+ count = stack[--stack_ptr];
+
+ if (version > 0)
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ */
+ count = 1 + count - index;
+
+ charptr_tmp = (u8 *)vars[args[0]];
+ status = altera_set_ir_pre(&astate->js, count, index,
+ charptr_tmp);
+
+ break;
+ case OP_IPOA:
+ /*
+ * IRPOST with array data
+ * ...argument 0 is variable ID
+ * ...stack 0 is array index
+ * ...stack 1 is count
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ index = stack[--stack_ptr];
+ count = stack[--stack_ptr];
+
+ if (version > 0)
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ */
+ count = 1 + count - index;
+
+ charptr_tmp = (u8 *)vars[args[0]];
+ status = altera_set_ir_post(&astate->js, count, index,
+ charptr_tmp);
+
+ break;
+ case OP_EXPT:
+ /*
+ * EXPORT
+ * ...argument 0 is string ID
+ * ...stack 0 is integer expression
+ */
+ if (altera_check_stack(stack_ptr, 1, &status)) {
+ name = &p[str_table + args[0]];
+ long_tmp = stack[--stack_ptr];
+ altera_export_int(name, long_tmp);
+ }
+ break;
+ case OP_PSHE:
+ /*
+ * Push integer array element
+ * ...argument 0 is variable ID
+ * ...stack 0 is array index
+ */
+ if (!altera_check_stack(stack_ptr, 1, &status))
+ break;
+ variable_id = args[0];
+ index = stack[stack_ptr - 1];
+
+ /* check variable type */
+ if ((attrs[variable_id] & 0x1f) == 0x19) {
+ /* writable integer array */
+ longptr_tmp = (long *)vars[variable_id];
+ stack[stack_ptr - 1] = longptr_tmp[index];
+ } else if ((attrs[variable_id] & 0x1f) == 0x1c) {
+ /* read-only integer array */
+ long_tmp = vars[variable_id] +
+ (index * sizeof(long));
+ stack[stack_ptr - 1] =
+ get_unaligned_be32(&p[long_tmp]);
+ } else
+ status = -ERANGE;
+
+ break;
+ case OP_PSHA:
+ /*
+ * Push Boolean array
+ * ...argument 0 is variable ID
+ * ...stack 0 is count
+ * ...stack 1 is array index
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ variable_id = args[0];
+
+ /* check that variable is a Boolean array */
+ if ((attrs[variable_id] & 0x18) != 0x08) {
+ status = -ERANGE;
+ break;
+ }
+
+ charptr_tmp = (u8 *)vars[variable_id];
+
+ /* pop the count (number of bits to copy) */
+ count = stack[--stack_ptr];
+
+ /* pop the array index */
+ index = stack[stack_ptr - 1];
+
+ if (version > 0)
+ /*
+ * stack 0 = array right index
+ * stack 1 = array left index
+ */
+ count = 1 + count - index;
+
+ if ((count < 1) || (count > 32)) {
+ status = -ERANGE;
+ break;
+ }
+
+ long_tmp = 0L;
+
+ for (i = 0; i < count; ++i)
+ if (charptr_tmp[(i + index) >> 3] &
+ (1 << ((i + index) & 7)))
+ long_tmp |= (1L << i);
+
+ stack[stack_ptr - 1] = long_tmp;
+
+ break;
+ case OP_DYNA:
+ /*
+ * Dynamically change size of array
+ * ...argument 0 is variable ID
+ * ...stack 0 is new size
+ */
+ if (!altera_check_stack(stack_ptr, 1, &status))
+ break;
+ variable_id = args[0];
+ long_tmp = stack[--stack_ptr];
+
+ if (long_tmp > var_size[variable_id]) {
+ var_size[variable_id] = long_tmp;
+
+ if (attrs[variable_id] & 0x10)
+ /* allocate integer array */
+ long_tmp *= sizeof(long);
+ else
+ /* allocate Boolean array */
+ long_tmp = (long_tmp + 7) >> 3;
+
+ /*
+ * If the buffer was previously allocated,
+ * free it
+ */
+ if (attrs[variable_id] & 0x80) {
+ kfree((void *)vars[variable_id]);
+ vars[variable_id] = 0;
+ }
+
+ /*
+ * Allocate a new buffer
+ * of the requested size
+ */
+ vars[variable_id] = (long)
+ kzalloc(long_tmp, GFP_KERNEL);
+
+ if (vars[variable_id] == 0) {
+ status = -ENOMEM;
+ break;
+ }
+
+ /*
+ * Set the attribute bit to indicate that
+ * this buffer was dynamically allocated and
+ * should be freed later
+ */
+ attrs[variable_id] |= 0x80;
+
+ /* zero out memory */
+ count = ((var_size[variable_id] + 7L) /
+ 8L);
+ charptr_tmp = (u8 *)(vars[variable_id]);
+ for (index = 0; index < count; ++index)
+ charptr_tmp[index] = 0;
+
+ }
+
+ break;
+ case OP_EXPV:
+ /*
+ * Export Boolean array
+ * ...argument 0 is string ID
+ * ...stack 0 is variable ID
+ * ...stack 1 is array right index
+ * ...stack 2 is array left index
+ */
+ if (!altera_check_stack(stack_ptr, 3, &status))
+ break;
+ if (version == 0) {
+ /* EXPV is not supported in JBC 1.0 */
+ bad_opcode = 1;
+ break;
+ }
+ name = &p[str_table + args[0]];
+ variable_id = stack[--stack_ptr];
+ long_idx = stack[--stack_ptr];/* right indx */
+ long_idx2 = stack[--stack_ptr];/* left indx */
+
+ if (long_idx > long_idx2) {
+ /* reverse indices not supported */
+ status = -ERANGE;
+ break;
+ }
+
+ long_count = 1 + long_idx2 - long_idx;
+
+ charptr_tmp = (u8 *)vars[variable_id];
+ charptr_tmp2 = NULL;
+
+ if ((long_idx & 7L) != 0) {
+ s32 k = long_idx;
+ charptr_tmp2 =
+ kzalloc(((long_count + 7L) / 8L),
+ GFP_KERNEL);
+ if (charptr_tmp2 == NULL) {
+ status = -ENOMEM;
+ break;
+ }
+
+ for (i = 0; i < long_count; ++i) {
+ if (charptr_tmp[k >> 3] &
+ (1 << (k & 7)))
+ charptr_tmp2[i >> 3] |=
+ (1 << (i & 7));
+ else
+ charptr_tmp2[i >> 3] &=
+ ~(1 << (i & 7));
+
+ ++k;
+ }
+ charptr_tmp = charptr_tmp2;
+
+ } else if (long_idx != 0)
+ charptr_tmp = &charptr_tmp[long_idx >> 3];
+
+ altera_export_bool_array(name, charptr_tmp,
+ long_count);
+
+ /* free allocated buffer */
+ if ((long_idx & 7L) != 0)
+ kfree(charptr_tmp2);
+
+ break;
+ case OP_COPY: {
+ /*
+ * Array copy
+ * ...argument 0 is dest ID
+ * ...argument 1 is source ID
+ * ...stack 0 is count
+ * ...stack 1 is dest index
+ * ...stack 2 is source index
+ */
+ s32 copy_count;
+ s32 copy_index;
+ s32 copy_index2;
+ s32 destleft;
+ s32 src_count;
+ s32 dest_count;
+ int src_reverse = 0;
+ int dest_reverse = 0;
+
+ if (!altera_check_stack(stack_ptr, 3, &status))
+ break;
+
+ copy_count = stack[--stack_ptr];
+ copy_index = stack[--stack_ptr];
+ copy_index2 = stack[--stack_ptr];
+ reverse = 0;
+
+ if (version > 0) {
+ /*
+ * stack 0 = source right index
+ * stack 1 = source left index
+ * stack 2 = destination right index
+ * stack 3 = destination left index
+ */
+ destleft = stack[--stack_ptr];
+
+ if (copy_count > copy_index) {
+ src_reverse = 1;
+ reverse = 1;
+ src_count = 1 + copy_count - copy_index;
+ /* copy_index = source start index */
+ } else {
+ src_count = 1 + copy_index - copy_count;
+ /* source start index */
+ copy_index = copy_count;
+ }
+
+ if (copy_index2 > destleft) {
+ dest_reverse = 1;
+ reverse = !reverse;
+ dest_count = 1 + copy_index2 - destleft;
+ /* destination start index */
+ copy_index2 = destleft;
+ } else
+ dest_count = 1 + destleft - copy_index2;
+
+ copy_count = (src_count < dest_count) ?
+ src_count : dest_count;
+
+ if ((src_reverse || dest_reverse) &&
+ (src_count != dest_count))
+ /*
+ * If either the source or destination
+ * is reversed, we can't tolerate
+ * a length mismatch, because we
+ * "left justify" arrays when copying.
+ * This won't work correctly
+ * with reversed arrays.
+ */
+ status = -ERANGE;
+
+ }
+
+ count = copy_count;
+ index = copy_index;
+ index2 = copy_index2;
+
+ /*
+ * If destination is a read-only array,
+ * allocate a buffer and convert it to a writable array
+ */
+ variable_id = args[1];
+ if ((version > 0) &&
+ ((attrs[variable_id] & 0x9c) == 0x0c)) {
+ /* Allocate a writable buffer for this array */
+ long_tmp =
+ (var_size[variable_id] + 7L) >> 3L;
+ charptr_tmp2 = (u8 *)vars[variable_id];
+ charptr_tmp =
+ kzalloc(long_tmp, GFP_KERNEL);
+ vars[variable_id] = (long)charptr_tmp;
+
+ if (vars[variable_id] == 0) {
+ status = -ENOMEM;
+ break;
+ }
+
+ /* zero the buffer */
+ for (long_idx = 0L; long_idx < long_tmp;
+ ++long_idx)
+ charptr_tmp[long_idx] = 0;
+
+ /* copy previous contents into buffer */
+ for (long_idx = 0L;
+ long_idx < var_size[variable_id];
+ ++long_idx) {
+ long_idx2 = long_idx;
+
+ if (charptr_tmp2[long_idx2 >> 3] &
+ (1 << (long_idx2 & 7)))
+ charptr_tmp[long_idx >> 3] |=
+ (1 << (long_idx & 7));
+
+ }
+
+ /*
+ set bit 7 - buffer was dynamically allocated */
+ attrs[variable_id] |= 0x80;
+
+ /* clear bit 2 - variable is writable */
+ attrs[variable_id] &= ~0x04;
+ attrs[variable_id] |= 0x01;
+ }
+
+ charptr_tmp = (u8 *)vars[args[1]];
+ charptr_tmp2 = (u8 *)vars[args[0]];
+
+ /* check if destination is a writable Boolean array */
+ if ((attrs[args[1]] & 0x1c) != 0x08) {
+ status = -ERANGE;
+ break;
+ }
+
+ if (count < 1) {
+ status = -ERANGE;
+ break;
+ }
+
+ if (reverse)
+ index2 += (count - 1);
+
+ for (i = 0; i < count; ++i) {
+ if (charptr_tmp2[index >> 3] &
+ (1 << (index & 7)))
+ charptr_tmp[index2 >> 3] |=
+ (1 << (index2 & 7));
+ else
+ charptr_tmp[index2 >> 3] &=
+ ~(1 << (index2 & 7));
+
+ ++index;
+ if (reverse)
+ --index2;
+ else
+ ++index2;
+ }
+
+ break;
+ }
+ case OP_DSC:
+ case OP_ISC: {
+ /*
+ * DRSCAN with capture
+ * IRSCAN with capture
+ * ...argument 0 is scan data variable ID
+ * ...argument 1 is capture variable ID
+ * ...stack 0 is capture index
+ * ...stack 1 is scan data index
+ * ...stack 2 is count
+ */
+ s32 scan_right, scan_left;
+ s32 capture_count = 0;
+ s32 scan_count = 0;
+ s32 capture_index;
+ s32 scan_index;
+
+ if (!altera_check_stack(stack_ptr, 3, &status))
+ break;
+
+ capture_index = stack[--stack_ptr];
+ scan_index = stack[--stack_ptr];
+
+ if (version > 0) {
+ /*
+ * stack 0 = capture right index
+ * stack 1 = capture left index
+ * stack 2 = scan right index
+ * stack 3 = scan left index
+ * stack 4 = count
+ */
+ scan_right = stack[--stack_ptr];
+ scan_left = stack[--stack_ptr];
+ capture_count = 1 + scan_index - capture_index;
+ scan_count = 1 + scan_left - scan_right;
+ scan_index = scan_right;
+ }
+
+ long_count = stack[--stack_ptr];
+ /*
+ * If capture array is read-only, allocate a buffer
+ * and convert it to a writable array
+ */
+ variable_id = args[1];
+ if ((version > 0) &&
+ ((attrs[variable_id] & 0x9c) == 0x0c)) {
+ /* Allocate a writable buffer for this array */
+ long_tmp =
+ (var_size[variable_id] + 7L) >> 3L;
+ charptr_tmp2 = (u8 *)vars[variable_id];
+ charptr_tmp =
+ kzalloc(long_tmp, GFP_KERNEL);
+ vars[variable_id] = (long)charptr_tmp;
+
+ if (vars[variable_id] == 0) {
+ status = -ENOMEM;
+ break;
+ }
+
+ /* zero the buffer */
+ for (long_idx = 0L; long_idx < long_tmp;
+ ++long_idx)
+ charptr_tmp[long_idx] = 0;
+
+ /* copy previous contents into buffer */
+ for (long_idx = 0L;
+ long_idx < var_size[variable_id];
+ ++long_idx) {
+ long_idx2 = long_idx;
+
+ if (charptr_tmp2[long_idx2 >> 3] &
+ (1 << (long_idx2 & 7)))
+ charptr_tmp[long_idx >> 3] |=
+ (1 << (long_idx & 7));
+
+ }
+
+ /*
+ * set bit 7 - buffer was
+ * dynamically allocated
+ */
+ attrs[variable_id] |= 0x80;
+
+ /* clear bit 2 - variable is writable */
+ attrs[variable_id] &= ~0x04;
+ attrs[variable_id] |= 0x01;
+
+ }
+
+ charptr_tmp = (u8 *)vars[args[0]];
+ charptr_tmp2 = (u8 *)vars[args[1]];
+
+ if ((version > 0) &&
+ ((long_count > capture_count) ||
+ (long_count > scan_count))) {
+ status = -ERANGE;
+ break;
+ }
+
+ /*
+ * check that capture array
+ * is a writable Boolean array
+ */
+ if ((attrs[args[1]] & 0x1c) != 0x08) {
+ status = -ERANGE;
+ break;
+ }
+
+ if (status == 0) {
+ if (opcode == 0x82) /* DSC */
+ status = altera_swap_dr(astate,
+ long_count,
+ charptr_tmp,
+ scan_index,
+ charptr_tmp2,
+ capture_index);
+ else /* ISC */
+ status = altera_swap_ir(astate,
+ long_count,
+ charptr_tmp,
+ scan_index,
+ charptr_tmp2,
+ capture_index);
+
+ }
+
+ break;
+ }
+ case OP_WAIT:
+ /*
+ * WAIT
+ * ...argument 0 is wait state
+ * ...argument 1 is end state
+ * ...stack 0 is cycles
+ * ...stack 1 is microseconds
+ */
+ if (!altera_check_stack(stack_ptr, 2, &status))
+ break;
+ long_tmp = stack[--stack_ptr];
+
+ if (long_tmp != 0L)
+ status = altera_wait_cycles(astate, long_tmp,
+ args[0]);
+
+ long_tmp = stack[--stack_ptr];
+
+ if ((status == 0) && (long_tmp != 0L))
+ status = altera_wait_msecs(astate,
+ long_tmp,
+ args[0]);
+
+ if ((status == 0) && (args[1] != args[0]))
+ status = altera_goto_jstate(astate,
+ args[1]);
+
+ if (version > 0) {
+ --stack_ptr; /* throw away MAX cycles */
+ --stack_ptr; /* throw away MAX microseconds */
+ }
+ break;
+ case OP_CMPA: {
+ /*
+ * Array compare
+ * ...argument 0 is source 1 ID
+ * ...argument 1 is source 2 ID
+ * ...argument 2 is mask ID
+ * ...stack 0 is source 1 index
+ * ...stack 1 is source 2 index
+ * ...stack 2 is mask index
+ * ...stack 3 is count
+ */
+ s32 a, b;
+ u8 *source1 = (u8 *)vars[args[0]];
+ u8 *source2 = (u8 *)vars[args[1]];
+ u8 *mask = (u8 *)vars[args[2]];
+ u32 index1;
+ u32 index2;
+ u32 mask_index;
+
+ if (!altera_check_stack(stack_ptr, 4, &status))
+ break;
+
+ index1 = stack[--stack_ptr];
+ index2 = stack[--stack_ptr];
+ mask_index = stack[--stack_ptr];
+ long_count = stack[--stack_ptr];
+
+ if (version > 0) {
+ /*
+ * stack 0 = source 1 right index
+ * stack 1 = source 1 left index
+ * stack 2 = source 2 right index
+ * stack 3 = source 2 left index
+ * stack 4 = mask right index
+ * stack 5 = mask left index
+ */
+ s32 mask_right = stack[--stack_ptr];
+ s32 mask_left = stack[--stack_ptr];
+ /* source 1 count */
+ a = 1 + index2 - index1;
+ /* source 2 count */
+ b = 1 + long_count - mask_index;
+ a = (a < b) ? a : b;
+ /* mask count */
+ b = 1 + mask_left - mask_right;
+ a = (a < b) ? a : b;
+ /* source 2 start index */
+ index2 = mask_index;
+ /* mask start index */
+ mask_index = mask_right;
+ long_count = a;
+ }
+
+ long_tmp = 1L;
+
+ if (long_count < 1)
+ status = -ERANGE;
+ else {
+ count = long_count;
+
+ for (i = 0; i < count; ++i) {
+ if (mask[mask_index >> 3] &
+ (1 << (mask_index & 7))) {
+ a = source1[index1 >> 3] &
+ (1 << (index1 & 7))
+ ? 1 : 0;
+ b = source2[index2 >> 3] &
+ (1 << (index2 & 7))
+ ? 1 : 0;
+
+ if (a != b) /* failure */
+ long_tmp = 0L;
+ }
+ ++index1;
+ ++index2;
+ ++mask_index;
+ }
+ }
+
+ stack[stack_ptr++] = long_tmp;
+
+ break;
+ }
+ default:
+ /* Unrecognized opcode -- ERROR! */
+ bad_opcode = 1;
+ break;
+ }
+
+ if (bad_opcode)
+ status = -ENOSYS;
+
+ if ((stack_ptr < 0) || (stack_ptr >= ALTERA_STACK_SIZE))
+ status = -EOVERFLOW;
+
+ if (status != 0) {
+ done = 1;
+ *error_address = (s32)(opcode_address - code_sect);
+ }
+ }
+
+ altera_free_buffers(astate);
+
+ /* Free all dynamically allocated arrays */
+ if ((attrs != NULL) && (vars != NULL))
+ for (i = 0; i < sym_count; ++i)
+ if (attrs[i] & 0x80)
+ kfree((void *)vars[i]);
+
+ kfree(vars);
+ kfree(var_size);
+ kfree(attrs);
+ kfree(proc_attributes);
+
+ return status;
+}
+
+static int altera_get_note(u8 *p, s32 program_size,
+ s32 *offset, char *key, char *value, int length)
+/*
+ * Gets key and value of NOTE fields in the JBC file.
+ * Can be called in two modes: if offset pointer is NULL,
+ * then the function searches for note fields which match
+ * the key string provided. If offset is not NULL, then
+ * the function finds the next note field of any key,
+ * starting at the offset specified by the offset pointer.
+ * Returns 0 for success, else appropriate error code
+ */
+{
+ int status = -ENODATA;
+ u32 note_strings = 0L;
+ u32 note_table = 0L;
+ u32 note_count = 0L;
+ u32 first_word = 0L;
+ int version = 0;
+ int delta = 0;
+ char *key_ptr;
+ char *value_ptr;
+ int i;
+
+ /* Read header information */
+ if (program_size > 52L) {
+ first_word = get_unaligned_be32(&p[0]);
+ version = (first_word & 1L);
+ delta = version * 8;
+
+ note_strings = get_unaligned_be32(&p[8 + delta]);
+ note_table = get_unaligned_be32(&p[12 + delta]);
+ note_count = get_unaligned_be32(&p[44 + (2 * delta)]);
+ }
+
+ if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L))
+ return -EIO;
+
+ if (note_count <= 0L)
+ return status;
+
+ if (offset == NULL) {
+ /*
+ * We will search for the first note with a specific key,
+ * and return only the value
+ */
+ for (i = 0; (i < note_count) &&
+ (status != 0); ++i) {
+ key_ptr = &p[note_strings +
+ get_unaligned_be32(
+ &p[note_table + (8 * i)])];
+ if ((strncasecmp(key, key_ptr, strlen(key_ptr)) == 0) &&
+ (key != NULL)) {
+ status = 0;
+
+ value_ptr = &p[note_strings +
+ get_unaligned_be32(
+ &p[note_table + (8 * i) + 4])];
+
+ if (value != NULL)
+ strlcpy(value, value_ptr, length);
+
+ }
+ }
+ } else {
+ /*
+ * We will search for the next note, regardless of the key,
+ * and return both the value and the key
+ */
+
+ i = *offset;
+
+ if ((i >= 0) && (i < note_count)) {
+ status = 0;
+
+ if (key != NULL)
+ strlcpy(key, &p[note_strings +
+ get_unaligned_be32(
+ &p[note_table + (8 * i)])],
+ length);
+
+ if (value != NULL)
+ strlcpy(value, &p[note_strings +
+ get_unaligned_be32(
+ &p[note_table + (8 * i) + 4])],
+ length);
+
+ *offset = i + 1;
+ }
+ }
+
+ return status;
+}
+
+static int altera_check_crc(u8 *p, s32 program_size)
+{
+ int status = 0;
+ u16 local_expected = 0,
+ local_actual = 0,
+ shift_reg = 0xffff;
+ int bit, feedback;
+ u8 databyte;
+ u32 i;
+ u32 crc_section = 0L;
+ u32 first_word = 0L;
+ int version = 0;
+ int delta = 0;
+
+ if (program_size > 52L) {
+ first_word = get_unaligned_be32(&p[0]);
+ version = (first_word & 1L);
+ delta = version * 8;
+
+ crc_section = get_unaligned_be32(&p[32 + delta]);
+ }
+
+ if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L))
+ status = -EIO;
+
+ if (crc_section >= program_size)
+ status = -EIO;
+
+ if (status == 0) {
+ local_expected = (u16)get_unaligned_be16(&p[crc_section]);
+
+ for (i = 0; i < crc_section; ++i) {
+ databyte = p[i];
+ for (bit = 0; bit < 8; bit++) {
+ feedback = (databyte ^ shift_reg) & 0x01;
+ shift_reg >>= 1;
+ if (feedback)
+ shift_reg ^= 0x8408;
+
+ databyte >>= 1;
+ }
+ }
+
+ local_actual = (u16)~shift_reg;
+
+ if (local_expected != local_actual)
+ status = -EILSEQ;
+
+ }
+
+ if (debug || status) {
+ switch (status) {
+ case 0:
+ printk(KERN_INFO "%s: CRC matched: %04x\n", __func__,
+ local_actual);
+ break;
+ case -EILSEQ:
+ printk(KERN_ERR "%s: CRC mismatch: expected %04x, "
+ "actual %04x\n", __func__, local_expected,
+ local_actual);
+ break;
+ case -ENODATA:
+ printk(KERN_ERR "%s: expected CRC not found, "
+ "actual CRC = %04x\n", __func__,
+ local_actual);
+ break;
+ case -EIO:
+ printk(KERN_ERR "%s: error: format isn't "
+ "recognized.\n", __func__);
+ break;
+ default:
+ printk(KERN_ERR "%s: CRC function returned error "
+ "code %d\n", __func__, status);
+ break;
+ }
+ }
+
+ return status;
+}
+
+static int altera_get_file_info(u8 *p,
+ s32 program_size,
+ int *format_version,
+ int *action_count,
+ int *procedure_count)
+{
+ int status = -EIO;
+ u32 first_word = 0;
+ int version = 0;
+
+ if (program_size <= 52L)
+ return status;
+
+ first_word = get_unaligned_be32(&p[0]);
+
+ if ((first_word == 0x4A414D00L) || (first_word == 0x4A414D01L)) {
+ status = 0;
+
+ version = (first_word & 1L);
+ *format_version = version + 1;
+
+ if (version > 0) {
+ *action_count = get_unaligned_be32(&p[48]);
+ *procedure_count = get_unaligned_be32(&p[52]);
+ }
+ }
+
+ return status;
+}
+
+static int altera_get_act_info(u8 *p,
+ s32 program_size,
+ int index,
+ char **name,
+ char **description,
+ struct altera_procinfo **proc_list)
+{
+ int status = -EIO;
+ struct altera_procinfo *procptr = NULL;
+ struct altera_procinfo *tmpptr = NULL;
+ u32 first_word = 0L;
+ u32 action_table = 0L;
+ u32 proc_table = 0L;
+ u32 str_table = 0L;
+ u32 note_strings = 0L;
+ u32 action_count = 0L;
+ u32 proc_count = 0L;
+ u32 act_name_id = 0L;
+ u32 act_desc_id = 0L;
+ u32 act_proc_id = 0L;
+ u32 act_proc_name = 0L;
+ u8 act_proc_attribute = 0;
+
+ if (program_size <= 52L)
+ return status;
+ /* Read header information */
+ first_word = get_unaligned_be32(&p[0]);
+
+ if (first_word != 0x4A414D01L)
+ return status;
+
+ action_table = get_unaligned_be32(&p[4]);
+ proc_table = get_unaligned_be32(&p[8]);
+ str_table = get_unaligned_be32(&p[12]);
+ note_strings = get_unaligned_be32(&p[16]);
+ action_count = get_unaligned_be32(&p[48]);
+ proc_count = get_unaligned_be32(&p[52]);
+
+ if (index >= action_count)
+ return status;
+
+ act_name_id = get_unaligned_be32(&p[action_table + (12 * index)]);
+ act_desc_id = get_unaligned_be32(&p[action_table + (12 * index) + 4]);
+ act_proc_id = get_unaligned_be32(&p[action_table + (12 * index) + 8]);
+
+ *name = &p[str_table + act_name_id];
+
+ if (act_desc_id < (note_strings - str_table))
+ *description = &p[str_table + act_desc_id];
+
+ do {
+ act_proc_name = get_unaligned_be32(
+ &p[proc_table + (13 * act_proc_id)]);
+ act_proc_attribute =
+ (p[proc_table + (13 * act_proc_id) + 8] & 0x03);
+
+ procptr =
+ kzalloc(sizeof(struct altera_procinfo),
+ GFP_KERNEL);
+
+ if (procptr == NULL)
+ status = -ENOMEM;
+ else {
+ procptr->name = &p[str_table + act_proc_name];
+ procptr->attrs = act_proc_attribute;
+ procptr->next = NULL;
+
+ /* add record to end of linked list */
+ if (*proc_list == NULL)
+ *proc_list = procptr;
+ else {
+ tmpptr = *proc_list;
+ while (tmpptr->next != NULL)
+ tmpptr = tmpptr->next;
+ tmpptr->next = procptr;
+ }
+ }
+
+ act_proc_id = get_unaligned_be32(
+ &p[proc_table + (13 * act_proc_id) + 4]);
+ } while ((act_proc_id != 0) && (act_proc_id < proc_count));
+
+ return status;
+}
+
+int altera_init(struct altera_config *config, const struct firmware *fw)
+{
+ struct altera_state *astate = NULL;
+ struct altera_procinfo *proc_list = NULL;
+ struct altera_procinfo *procptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *action_name = NULL;
+ char *description = NULL;
+ int exec_result = 0;
+ int exit_code = 0;
+ int format_version = 0;
+ int action_count = 0;
+ int procedure_count = 0;
+ int index = 0;
+ s32 offset = 0L;
+ s32 error_address = 0L;
+ int retval = 0;
+
+ key = kzalloc(33, GFP_KERNEL);
+ if (!key) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ value = kzalloc(257, GFP_KERNEL);
+ if (!value) {
+ retval = -ENOMEM;
+ goto free_key;
+ }
+ astate = kzalloc(sizeof(struct altera_state), GFP_KERNEL);
+ if (!astate) {
+ retval = -ENOMEM;
+ goto free_value;
+ }
+
+ astate->config = config;
+ if (!astate->config->jtag_io) {
+ dprintk("%s: using byteblaster!\n", __func__);
+ astate->config->jtag_io = netup_jtag_io_lpt;
+ }
+
+ altera_check_crc((u8 *)fw->data, fw->size);
+
+ if (debug) {
+ altera_get_file_info((u8 *)fw->data, fw->size, &format_version,
+ &action_count, &procedure_count);
+ printk(KERN_INFO "%s: File format is %s ByteCode format\n",
+ __func__, (format_version == 2) ? "Jam STAPL" :
+ "pre-standardized Jam 1.1");
+ while (altera_get_note((u8 *)fw->data, fw->size,
+ &offset, key, value, 256) == 0)
+ printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n",
+ __func__, key, value);
+ }
+
+ if (debug && (format_version == 2) && (action_count > 0)) {
+ printk(KERN_INFO "%s: Actions available:\n", __func__);
+ for (index = 0; index < action_count; ++index) {
+ altera_get_act_info((u8 *)fw->data, fw->size,
+ index, &action_name,
+ &description,
+ &proc_list);
+
+ if (description == NULL)
+ printk(KERN_INFO "%s: %s\n",
+ __func__,
+ action_name);
+ else
+ printk(KERN_INFO "%s: %s \"%s\"\n",
+ __func__,
+ action_name,
+ description);
+
+ procptr = proc_list;
+ while (procptr != NULL) {
+ if (procptr->attrs != 0)
+ printk(KERN_INFO "%s: %s (%s)\n",
+ __func__,
+ procptr->name,
+ (procptr->attrs == 1) ?
+ "optional" : "recommended");
+
+ proc_list = procptr->next;
+ kfree(procptr);
+ procptr = proc_list;
+ }
+ }
+
+ printk(KERN_INFO "\n");
+ }
+
+ exec_result = altera_execute(astate, (u8 *)fw->data, fw->size,
+ &error_address, &exit_code, &format_version);
+
+ if (exit_code)
+ exec_result = -EREMOTEIO;
+
+ if ((format_version == 2) && (exec_result == -EINVAL)) {
+ if (astate->config->action == NULL)
+ printk(KERN_ERR "%s: error: no action specified for "
+ "Jam STAPL file.\nprogram terminated.\n",
+ __func__);
+ else
+ printk(KERN_ERR "%s: error: action \"%s\""
+ " is not supported "
+ "for this Jam STAPL file.\n"
+ "Program terminated.\n", __func__,
+ astate->config->action);
+
+ } else if (exec_result)
+ printk(KERN_ERR "%s: error %d\n", __func__, exec_result);
+
+ kfree(astate);
+free_value:
+ kfree(value);
+free_key:
+ kfree(key);
+out:
+ return retval;
+}
+EXPORT_SYMBOL(altera_init);
diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
new file mode 100644
index 0000000..c6cc3dc
--- /dev/null
+++ b/drivers/misc/apds9802als.c
@@ -0,0 +1,322 @@
+/*
+ * apds9802als.c - apds9802 ALS Driver
+ *
+ * Copyright (C) 2009 Intel Corp
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/pm_runtime.h>
+
+#define ALS_MIN_RANGE_VAL 1
+#define ALS_MAX_RANGE_VAL 2
+#define POWER_STA_ENABLE 1
+#define POWER_STA_DISABLE 0
+
+#define DRIVER_NAME "apds9802als"
+
+struct als_data {
+ struct mutex mutex;
+};
+
+static ssize_t als_sensing_range_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ int val;
+
+ val = i2c_smbus_read_byte_data(client, 0x81);
+ if (val < 0)
+ return val;
+ if (val & 1)
+ return sprintf(buf, "4095\n");
+ else
+ return sprintf(buf, "65535\n");
+}
+
+static int als_wait_for_data_ready(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ int ret;
+ int retry = 10;
+
+ do {
+ msleep(30);
+ ret = i2c_smbus_read_byte_data(client, 0x86);
+ } while (!(ret & 0x80) && retry--);
+
+ if (retry < 0) {
+ dev_warn(dev, "timeout waiting for data ready\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static ssize_t als_lux0_input_data_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct als_data *data = i2c_get_clientdata(client);
+ int ret_val;
+ int temp;
+
+ /* Protect against parallel reads */
+ pm_runtime_get_sync(dev);
+ mutex_lock(&data->mutex);
+
+ /* clear EOC interrupt status */
+ i2c_smbus_write_byte(client, 0x40);
+ /* start measurement */
+ temp = i2c_smbus_read_byte_data(client, 0x81);
+ i2c_smbus_write_byte_data(client, 0x81, temp | 0x08);
+
+ ret_val = als_wait_for_data_ready(dev);
+ if (ret_val < 0)
+ goto failed;
+
+ temp = i2c_smbus_read_byte_data(client, 0x8C); /* LSB data */
+ if (temp < 0) {
+ ret_val = temp;
+ goto failed;
+ }
+ ret_val = i2c_smbus_read_byte_data(client, 0x8D); /* MSB data */
+ if (ret_val < 0)
+ goto failed;
+
+ mutex_unlock(&data->mutex);
+ pm_runtime_put_sync(dev);
+
+ temp = (ret_val << 8) | temp;
+ return sprintf(buf, "%d\n", temp);
+failed:
+ mutex_unlock(&data->mutex);
+ pm_runtime_put_sync(dev);
+ return ret_val;
+}
+
+static ssize_t als_sensing_range_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct als_data *data = i2c_get_clientdata(client);
+ int ret_val;
+ unsigned long val;
+
+ ret_val = kstrtoul(buf, 10, &val);
+ if (ret_val)
+ return ret_val;
+
+ if (val < 4096)
+ val = 1;
+ else if (val < 65536)
+ val = 2;
+ else
+ return -ERANGE;
+
+ pm_runtime_get_sync(dev);
+
+ /* Make sure nobody else reads/modifies/writes 0x81 while we
+ are active */
+ mutex_lock(&data->mutex);
+
+ ret_val = i2c_smbus_read_byte_data(client, 0x81);
+ if (ret_val < 0)
+ goto fail;
+
+ /* Reset the bits before setting them */
+ ret_val = ret_val & 0xFA;
+
+ if (val == 1) /* Setting detection range up to 4k LUX */
+ ret_val = (ret_val | 0x01);
+ else /* Setting detection range up to 64k LUX*/
+ ret_val = (ret_val | 0x00);
+
+ ret_val = i2c_smbus_write_byte_data(client, 0x81, ret_val);
+
+ if (ret_val >= 0) {
+ /* All OK */
+ mutex_unlock(&data->mutex);
+ pm_runtime_put_sync(dev);
+ return count;
+ }
+fail:
+ mutex_unlock(&data->mutex);
+ pm_runtime_put_sync(dev);
+ return ret_val;
+}
+
+static int als_set_power_state(struct i2c_client *client, bool on_off)
+{
+ int ret_val;
+ struct als_data *data = i2c_get_clientdata(client);
+
+ mutex_lock(&data->mutex);
+ ret_val = i2c_smbus_read_byte_data(client, 0x80);
+ if (ret_val < 0)
+ goto fail;
+ if (on_off)
+ ret_val = ret_val | 0x01;
+ else
+ ret_val = ret_val & 0xFE;
+ ret_val = i2c_smbus_write_byte_data(client, 0x80, ret_val);
+fail:
+ mutex_unlock(&data->mutex);
+ return ret_val;
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR,
+ als_sensing_range_show, als_sensing_range_store);
+static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux0_input_data_show, NULL);
+
+static struct attribute *mid_att_als[] = {
+ &dev_attr_lux0_sensor_range.attr,
+ &dev_attr_lux0_input.attr,
+ NULL
+};
+
+static struct attribute_group m_als_gr = {
+ .name = "apds9802als",
+ .attrs = mid_att_als
+};
+
+static int als_set_default_config(struct i2c_client *client)
+{
+ int ret_val;
+ /* Write the command and then switch on */
+ ret_val = i2c_smbus_write_byte_data(client, 0x80, 0x01);
+ if (ret_val < 0) {
+ dev_err(&client->dev, "failed default switch on write\n");
+ return ret_val;
+ }
+ /* detection range: 1~64K Lux, maunal measurement */
+ ret_val = i2c_smbus_write_byte_data(client, 0x81, 0x08);
+ if (ret_val < 0)
+ dev_err(&client->dev, "failed default LUX on write\n");
+
+ /* We always get 0 for the 1st measurement after system power on,
+ * so make sure it is finished before user asks for data.
+ */
+ als_wait_for_data_ready(&client->dev);
+
+ return ret_val;
+}
+
+static int apds9802als_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int res;
+ struct als_data *data;
+
+ data = kzalloc(sizeof(struct als_data), GFP_KERNEL);
+ if (data == NULL) {
+ dev_err(&client->dev, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+ i2c_set_clientdata(client, data);
+ res = sysfs_create_group(&client->dev.kobj, &m_als_gr);
+ if (res) {
+ dev_err(&client->dev, "device create file failed\n");
+ goto als_error1;
+ }
+ dev_info(&client->dev, "ALS chip found\n");
+ als_set_default_config(client);
+ mutex_init(&data->mutex);
+
+ pm_runtime_set_active(&client->dev);
+ pm_runtime_enable(&client->dev);
+
+ return res;
+als_error1:
+ kfree(data);
+ return res;
+}
+
+static int apds9802als_remove(struct i2c_client *client)
+{
+ struct als_data *data = i2c_get_clientdata(client);
+
+ pm_runtime_get_sync(&client->dev);
+
+ als_set_power_state(client, false);
+ sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
+ pm_runtime_put_noidle(&client->dev);
+
+ kfree(data);
+ return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int apds9802als_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ als_set_power_state(client, false);
+ return 0;
+}
+
+static int apds9802als_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ als_set_power_state(client, true);
+ return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(apds9802als_pm_ops, apds9802als_suspend,
+ apds9802als_resume, NULL);
+
+#define APDS9802ALS_PM_OPS (&apds9802als_pm_ops)
+
+#else /* CONFIG_PM */
+#define APDS9802ALS_PM_OPS NULL
+#endif /* CONFIG_PM */
+
+static struct i2c_device_id apds9802als_id[] = {
+ { DRIVER_NAME, 0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, apds9802als_id);
+
+static struct i2c_driver apds9802als_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .pm = APDS9802ALS_PM_OPS,
+ },
+ .probe = apds9802als_probe,
+ .remove = apds9802als_remove,
+ .id_table = apds9802als_id,
+};
+
+module_i2c_driver(apds9802als_driver);
+
+MODULE_AUTHOR("Anantha Narayanan <Anantha.Narayanan@intel.com");
+MODULE_DESCRIPTION("Avago apds9802als ALS Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
new file mode 100644
index 0000000..a3e789b
--- /dev/null
+++ b/drivers/misc/apds990x.c
@@ -0,0 +1,1289 @@
+/*
+ * This file is part of the APDS990x sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/i2c/apds990x.h>
+
+/* Register map */
+#define APDS990X_ENABLE 0x00 /* Enable of states and interrupts */
+#define APDS990X_ATIME 0x01 /* ALS ADC time */
+#define APDS990X_PTIME 0x02 /* Proximity ADC time */
+#define APDS990X_WTIME 0x03 /* Wait time */
+#define APDS990X_AILTL 0x04 /* ALS interrupt low threshold low byte */
+#define APDS990X_AILTH 0x05 /* ALS interrupt low threshold hi byte */
+#define APDS990X_AIHTL 0x06 /* ALS interrupt hi threshold low byte */
+#define APDS990X_AIHTH 0x07 /* ALS interrupt hi threshold hi byte */
+#define APDS990X_PILTL 0x08 /* Proximity interrupt low threshold low byte */
+#define APDS990X_PILTH 0x09 /* Proximity interrupt low threshold hi byte */
+#define APDS990X_PIHTL 0x0a /* Proximity interrupt hi threshold low byte */
+#define APDS990X_PIHTH 0x0b /* Proximity interrupt hi threshold hi byte */
+#define APDS990X_PERS 0x0c /* Interrupt persistence filters */
+#define APDS990X_CONFIG 0x0d /* Configuration */
+#define APDS990X_PPCOUNT 0x0e /* Proximity pulse count */
+#define APDS990X_CONTROL 0x0f /* Gain control register */
+#define APDS990X_REV 0x11 /* Revision Number */
+#define APDS990X_ID 0x12 /* Device ID */
+#define APDS990X_STATUS 0x13 /* Device status */
+#define APDS990X_CDATAL 0x14 /* Clear ADC low data register */
+#define APDS990X_CDATAH 0x15 /* Clear ADC high data register */
+#define APDS990X_IRDATAL 0x16 /* IR ADC low data register */
+#define APDS990X_IRDATAH 0x17 /* IR ADC high data register */
+#define APDS990X_PDATAL 0x18 /* Proximity ADC low data register */
+#define APDS990X_PDATAH 0x19 /* Proximity ADC high data register */
+
+/* Control */
+#define APDS990X_MAX_AGAIN 3
+
+/* Enable register */
+#define APDS990X_EN_PIEN (0x1 << 5)
+#define APDS990X_EN_AIEN (0x1 << 4)
+#define APDS990X_EN_WEN (0x1 << 3)
+#define APDS990X_EN_PEN (0x1 << 2)
+#define APDS990X_EN_AEN (0x1 << 1)
+#define APDS990X_EN_PON (0x1 << 0)
+#define APDS990X_EN_DISABLE_ALL 0
+
+/* Status register */
+#define APDS990X_ST_PINT (0x1 << 5)
+#define APDS990X_ST_AINT (0x1 << 4)
+
+/* I2C access types */
+#define APDS990x_CMD_TYPE_MASK (0x03 << 5)
+#define APDS990x_CMD_TYPE_RB (0x00 << 5) /* Repeated byte */
+#define APDS990x_CMD_TYPE_INC (0x01 << 5) /* Auto increment */
+#define APDS990x_CMD_TYPE_SPE (0x03 << 5) /* Special function */
+
+#define APDS990x_ADDR_SHIFT 0
+#define APDS990x_CMD 0x80
+
+/* Interrupt ack commands */
+#define APDS990X_INT_ACK_ALS 0x6
+#define APDS990X_INT_ACK_PS 0x5
+#define APDS990X_INT_ACK_BOTH 0x7
+
+/* ptime */
+#define APDS990X_PTIME_DEFAULT 0xff /* Recommended conversion time 2.7ms*/
+
+/* wtime */
+#define APDS990X_WTIME_DEFAULT 0xee /* ~50ms wait time */
+
+#define APDS990X_TIME_TO_ADC 1024 /* One timetick as ADC count value */
+
+/* Persistence */
+#define APDS990X_APERS_SHIFT 0
+#define APDS990X_PPERS_SHIFT 4
+
+/* Supported ID:s */
+#define APDS990X_ID_0 0x0
+#define APDS990X_ID_4 0x4
+#define APDS990X_ID_29 0x29
+
+/* pgain and pdiode settings */
+#define APDS_PGAIN_1X 0x0
+#define APDS_PDIODE_IR 0x2
+
+#define APDS990X_LUX_OUTPUT_SCALE 10
+
+/* Reverse chip factors for threshold calculation */
+struct reverse_factors {
+ u32 afactor;
+ int cf1;
+ int irf1;
+ int cf2;
+ int irf2;
+};
+
+struct apds990x_chip {
+ struct apds990x_platform_data *pdata;
+ struct i2c_client *client;
+ struct mutex mutex; /* avoid parallel access */
+ struct regulator_bulk_data regs[2];
+ wait_queue_head_t wait;
+
+ int prox_en;
+ bool prox_continuous_mode;
+ bool lux_wait_fresh_res;
+
+ /* Chip parameters */
+ struct apds990x_chip_factors cf;
+ struct reverse_factors rcf;
+ u16 atime; /* als integration time */
+ u16 arate; /* als reporting rate */
+ u16 a_max_result; /* Max possible ADC value with current atime */
+ u8 again_meas; /* Gain used in last measurement */
+ u8 again_next; /* Next calculated gain */
+ u8 pgain;
+ u8 pdiode;
+ u8 pdrive;
+ u8 lux_persistence;
+ u8 prox_persistence;
+
+ u32 lux_raw;
+ u32 lux;
+ u16 lux_clear;
+ u16 lux_ir;
+ u16 lux_calib;
+ u32 lux_thres_hi;
+ u32 lux_thres_lo;
+
+ u32 prox_thres;
+ u16 prox_data;
+ u16 prox_calib;
+
+ char chipname[10];
+ u8 revision;
+};
+
+#define APDS_CALIB_SCALER 8192
+#define APDS_LUX_NEUTRAL_CALIB_VALUE (1 * APDS_CALIB_SCALER)
+#define APDS_PROX_NEUTRAL_CALIB_VALUE (1 * APDS_CALIB_SCALER)
+
+#define APDS_PROX_DEF_THRES 600
+#define APDS_PROX_HYSTERESIS 50
+#define APDS_LUX_DEF_THRES_HI 101
+#define APDS_LUX_DEF_THRES_LO 100
+#define APDS_DEFAULT_PROX_PERS 1
+
+#define APDS_TIMEOUT 2000
+#define APDS_STARTUP_DELAY 25000 /* us */
+#define APDS_RANGE 65535
+#define APDS_PROX_RANGE 1023
+#define APDS_LUX_GAIN_LO_LIMIT 100
+#define APDS_LUX_GAIN_LO_LIMIT_STRICT 25
+
+#define TIMESTEP 87 /* 2.7ms is about 87 / 32 */
+#define TIME_STEP_SCALER 32
+
+#define APDS_LUX_AVERAGING_TIME 50 /* tolerates 50/60Hz ripple */
+#define APDS_LUX_DEFAULT_RATE 200
+
+static const u8 again[] = {1, 8, 16, 120}; /* ALS gain steps */
+static const u8 ir_currents[] = {100, 50, 25, 12}; /* IRled currents in mA */
+
+/* Following two tables must match i.e 10Hz rate means 1 as persistence value */
+static const u16 arates_hz[] = {10, 5, 2, 1};
+static const u8 apersis[] = {1, 2, 4, 5};
+
+/* Regulators */
+static const char reg_vcc[] = "Vdd";
+static const char reg_vled[] = "Vled";
+
+static int apds990x_read_byte(struct apds990x_chip *chip, u8 reg, u8 *data)
+{
+ struct i2c_client *client = chip->client;
+ s32 ret;
+
+ reg &= ~APDS990x_CMD_TYPE_MASK;
+ reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+ ret = i2c_smbus_read_byte_data(client, reg);
+ *data = ret;
+ return (int)ret;
+}
+
+static int apds990x_read_word(struct apds990x_chip *chip, u8 reg, u16 *data)
+{
+ struct i2c_client *client = chip->client;
+ s32 ret;
+
+ reg &= ~APDS990x_CMD_TYPE_MASK;
+ reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+ ret = i2c_smbus_read_word_data(client, reg);
+ *data = ret;
+ return (int)ret;
+}
+
+static int apds990x_write_byte(struct apds990x_chip *chip, u8 reg, u8 data)
+{
+ struct i2c_client *client = chip->client;
+ s32 ret;
+
+ reg &= ~APDS990x_CMD_TYPE_MASK;
+ reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+ ret = i2c_smbus_write_byte_data(client, reg, data);
+ return (int)ret;
+}
+
+static int apds990x_write_word(struct apds990x_chip *chip, u8 reg, u16 data)
+{
+ struct i2c_client *client = chip->client;
+ s32 ret;
+
+ reg &= ~APDS990x_CMD_TYPE_MASK;
+ reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+ ret = i2c_smbus_write_word_data(client, reg, data);
+ return (int)ret;
+}
+
+static int apds990x_mode_on(struct apds990x_chip *chip)
+{
+ /* ALS is mandatory, proximity optional */
+ u8 reg = APDS990X_EN_AIEN | APDS990X_EN_PON | APDS990X_EN_AEN |
+ APDS990X_EN_WEN;
+
+ if (chip->prox_en)
+ reg |= APDS990X_EN_PIEN | APDS990X_EN_PEN;
+
+ return apds990x_write_byte(chip, APDS990X_ENABLE, reg);
+}
+
+static u16 apds990x_lux_to_threshold(struct apds990x_chip *chip, u32 lux)
+{
+ u32 thres;
+ u32 cpl;
+ u32 ir;
+
+ if (lux == 0)
+ return 0;
+ else if (lux == APDS_RANGE)
+ return APDS_RANGE;
+
+ /*
+ * Reported LUX value is a combination of the IR and CLEAR channel
+ * values. However, interrupt threshold is only for clear channel.
+ * This function approximates needed HW threshold value for a given
+ * LUX value in the current lightning type.
+ * IR level compared to visible light varies heavily depending on the
+ * source of the light
+ *
+ * Calculate threshold value for the next measurement period.
+ * Math: threshold = lux * cpl where
+ * cpl = atime * again / (glass_attenuation * device_factor)
+ * (count-per-lux)
+ *
+ * First remove calibration. Division by four is to avoid overflow
+ */
+ lux = lux * (APDS_CALIB_SCALER / 4) / (chip->lux_calib / 4);
+
+ /* Multiplication by 64 is to increase accuracy */
+ cpl = ((u32)chip->atime * (u32)again[chip->again_next] *
+ APDS_PARAM_SCALE * 64) / (chip->cf.ga * chip->cf.df);
+
+ thres = lux * cpl / 64;
+ /*
+ * Convert IR light from the latest result to match with
+ * new gain step. This helps to adapt with the current
+ * source of light.
+ */
+ ir = (u32)chip->lux_ir * (u32)again[chip->again_next] /
+ (u32)again[chip->again_meas];
+
+ /*
+ * Compensate count with IR light impact
+ * IAC1 > IAC2 (see apds990x_get_lux for formulas)
+ */
+ if (chip->lux_clear * APDS_PARAM_SCALE >=
+ chip->rcf.afactor * chip->lux_ir)
+ thres = (chip->rcf.cf1 * thres + chip->rcf.irf1 * ir) /
+ APDS_PARAM_SCALE;
+ else
+ thres = (chip->rcf.cf2 * thres + chip->rcf.irf2 * ir) /
+ APDS_PARAM_SCALE;
+
+ if (thres >= chip->a_max_result)
+ thres = chip->a_max_result - 1;
+ return thres;
+}
+
+static inline int apds990x_set_atime(struct apds990x_chip *chip, u32 time_ms)
+{
+ u8 reg_value;
+
+ chip->atime = time_ms;
+ /* Formula is specified in the data sheet */
+ reg_value = 256 - ((time_ms * TIME_STEP_SCALER) / TIMESTEP);
+ /* Calculate max ADC value for given integration time */
+ chip->a_max_result = (u16)(256 - reg_value) * APDS990X_TIME_TO_ADC;
+ return apds990x_write_byte(chip, APDS990X_ATIME, reg_value);
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_pthres(struct apds990x_chip *chip, int data)
+{
+ int ret, lo, hi;
+
+ /* If the chip is not in use, don't try to access it */
+ if (pm_runtime_suspended(&chip->client->dev))
+ return 0;
+
+ if (data < chip->prox_thres) {
+ lo = 0;
+ hi = chip->prox_thres;
+ } else {
+ lo = chip->prox_thres - APDS_PROX_HYSTERESIS;
+ if (chip->prox_continuous_mode)
+ hi = chip->prox_thres;
+ else
+ hi = APDS_RANGE;
+ }
+
+ ret = apds990x_write_word(chip, APDS990X_PILTL, lo);
+ ret |= apds990x_write_word(chip, APDS990X_PIHTL, hi);
+ return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_athres(struct apds990x_chip *chip)
+{
+ int ret;
+ /* If the chip is not in use, don't try to access it */
+ if (pm_runtime_suspended(&chip->client->dev))
+ return 0;
+
+ ret = apds990x_write_word(chip, APDS990X_AILTL,
+ apds990x_lux_to_threshold(chip, chip->lux_thres_lo));
+ ret |= apds990x_write_word(chip, APDS990X_AIHTL,
+ apds990x_lux_to_threshold(chip, chip->lux_thres_hi));
+
+ return ret;
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_a_refresh(struct apds990x_chip *chip)
+{
+ /* This will force ALS interrupt after the next measurement. */
+ apds990x_write_word(chip, APDS990X_AILTL, APDS_LUX_DEF_THRES_LO);
+ apds990x_write_word(chip, APDS990X_AIHTL, APDS_LUX_DEF_THRES_HI);
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_p_refresh(struct apds990x_chip *chip)
+{
+ /* This will force proximity interrupt after the next measurement. */
+ apds990x_write_word(chip, APDS990X_PILTL, APDS_PROX_DEF_THRES - 1);
+ apds990x_write_word(chip, APDS990X_PIHTL, APDS_PROX_DEF_THRES);
+}
+
+/* Called always with mutex locked */
+static int apds990x_calc_again(struct apds990x_chip *chip)
+{
+ int curr_again = chip->again_meas;
+ int next_again = chip->again_meas;
+ int ret = 0;
+
+ /* Calculate suitable als gain */
+ if (chip->lux_clear == chip->a_max_result)
+ next_again -= 2; /* ALS saturated. Decrease gain by 2 steps */
+ else if (chip->lux_clear > chip->a_max_result / 2)
+ next_again--;
+ else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+ next_again += 2; /* Too dark. Increase gain by 2 steps */
+ else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT)
+ next_again++;
+
+ /* Limit gain to available range */
+ if (next_again < 0)
+ next_again = 0;
+ else if (next_again > APDS990X_MAX_AGAIN)
+ next_again = APDS990X_MAX_AGAIN;
+
+ /* Let's check can we trust the measured result */
+ if (chip->lux_clear == chip->a_max_result)
+ /* Result can be totally garbage due to saturation */
+ ret = -ERANGE;
+ else if (next_again != curr_again &&
+ chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+ /*
+ * Gain is changed and measurement result is very small.
+ * Result can be totally garbage due to underflow
+ */
+ ret = -ERANGE;
+
+ chip->again_next = next_again;
+ apds990x_write_byte(chip, APDS990X_CONTROL,
+ (chip->pdrive << 6) |
+ (chip->pdiode << 4) |
+ (chip->pgain << 2) |
+ (chip->again_next << 0));
+
+ /*
+ * Error means bad result -> re-measurement is needed. The forced
+ * refresh uses fastest possible persistence setting to get result
+ * as soon as possible.
+ */
+ if (ret < 0)
+ apds990x_force_a_refresh(chip);
+ else
+ apds990x_refresh_athres(chip);
+
+ return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_get_lux(struct apds990x_chip *chip, int clear, int ir)
+{
+ int iac, iac1, iac2; /* IR adjusted counts */
+ u32 lpc; /* Lux per count */
+
+ /* Formulas:
+ * iac1 = CF1 * CLEAR_CH - IRF1 * IR_CH
+ * iac2 = CF2 * CLEAR_CH - IRF2 * IR_CH
+ */
+ iac1 = (chip->cf.cf1 * clear - chip->cf.irf1 * ir) / APDS_PARAM_SCALE;
+ iac2 = (chip->cf.cf2 * clear - chip->cf.irf2 * ir) / APDS_PARAM_SCALE;
+
+ iac = max(iac1, iac2);
+ iac = max(iac, 0);
+
+ lpc = APDS990X_LUX_OUTPUT_SCALE * (chip->cf.df * chip->cf.ga) /
+ (u32)(again[chip->again_meas] * (u32)chip->atime);
+
+ return (iac * lpc) / APDS_PARAM_SCALE;
+}
+
+static int apds990x_ack_int(struct apds990x_chip *chip, u8 mode)
+{
+ struct i2c_client *client = chip->client;
+ s32 ret;
+ u8 reg = APDS990x_CMD | APDS990x_CMD_TYPE_SPE;
+
+ switch (mode & (APDS990X_ST_AINT | APDS990X_ST_PINT)) {
+ case APDS990X_ST_AINT:
+ reg |= APDS990X_INT_ACK_ALS;
+ break;
+ case APDS990X_ST_PINT:
+ reg |= APDS990X_INT_ACK_PS;
+ break;
+ default:
+ reg |= APDS990X_INT_ACK_BOTH;
+ break;
+ }
+
+ ret = i2c_smbus_read_byte_data(client, reg);
+ return (int)ret;
+}
+
+static irqreturn_t apds990x_irq(int irq, void *data)
+{
+ struct apds990x_chip *chip = data;
+ u8 status;
+
+ apds990x_read_byte(chip, APDS990X_STATUS, &status);
+ apds990x_ack_int(chip, status);
+
+ mutex_lock(&chip->mutex);
+ if (!pm_runtime_suspended(&chip->client->dev)) {
+ if (status & APDS990X_ST_AINT) {
+ apds990x_read_word(chip, APDS990X_CDATAL,
+ &chip->lux_clear);
+ apds990x_read_word(chip, APDS990X_IRDATAL,
+ &chip->lux_ir);
+ /* Store used gain for calculations */
+ chip->again_meas = chip->again_next;
+
+ chip->lux_raw = apds990x_get_lux(chip,
+ chip->lux_clear,
+ chip->lux_ir);
+
+ if (apds990x_calc_again(chip) == 0) {
+ /* Result is valid */
+ chip->lux = chip->lux_raw;
+ chip->lux_wait_fresh_res = false;
+ wake_up(&chip->wait);
+ sysfs_notify(&chip->client->dev.kobj,
+ NULL, "lux0_input");
+ }
+ }
+
+ if ((status & APDS990X_ST_PINT) && chip->prox_en) {
+ u16 clr_ch;
+
+ apds990x_read_word(chip, APDS990X_CDATAL, &clr_ch);
+ /*
+ * If ALS channel is saturated at min gain,
+ * proximity gives false posivite values.
+ * Just ignore them.
+ */
+ if (chip->again_meas == 0 &&
+ clr_ch == chip->a_max_result)
+ chip->prox_data = 0;
+ else
+ apds990x_read_word(chip,
+ APDS990X_PDATAL,
+ &chip->prox_data);
+
+ apds990x_refresh_pthres(chip, chip->prox_data);
+ if (chip->prox_data < chip->prox_thres)
+ chip->prox_data = 0;
+ else if (!chip->prox_continuous_mode)
+ chip->prox_data = APDS_PROX_RANGE;
+ sysfs_notify(&chip->client->dev.kobj,
+ NULL, "prox0_raw");
+ }
+ }
+ mutex_unlock(&chip->mutex);
+ return IRQ_HANDLED;
+}
+
+static int apds990x_configure(struct apds990x_chip *chip)
+{
+ /* It is recommended to use disabled mode during these operations */
+ apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+
+ /* conversion and wait times for different state machince states */
+ apds990x_write_byte(chip, APDS990X_PTIME, APDS990X_PTIME_DEFAULT);
+ apds990x_write_byte(chip, APDS990X_WTIME, APDS990X_WTIME_DEFAULT);
+ apds990x_set_atime(chip, APDS_LUX_AVERAGING_TIME);
+
+ apds990x_write_byte(chip, APDS990X_CONFIG, 0);
+
+ /* Persistence levels */
+ apds990x_write_byte(chip, APDS990X_PERS,
+ (chip->lux_persistence << APDS990X_APERS_SHIFT) |
+ (chip->prox_persistence << APDS990X_PPERS_SHIFT));
+
+ apds990x_write_byte(chip, APDS990X_PPCOUNT, chip->pdata->ppcount);
+
+ /* Start with relatively small gain */
+ chip->again_meas = 1;
+ chip->again_next = 1;
+ apds990x_write_byte(chip, APDS990X_CONTROL,
+ (chip->pdrive << 6) |
+ (chip->pdiode << 4) |
+ (chip->pgain << 2) |
+ (chip->again_next << 0));
+ return 0;
+}
+
+static int apds990x_detect(struct apds990x_chip *chip)
+{
+ struct i2c_client *client = chip->client;
+ int ret;
+ u8 id;
+
+ ret = apds990x_read_byte(chip, APDS990X_ID, &id);
+ if (ret < 0) {
+ dev_err(&client->dev, "ID read failed\n");
+ return ret;
+ }
+
+ ret = apds990x_read_byte(chip, APDS990X_REV, &chip->revision);
+ if (ret < 0) {
+ dev_err(&client->dev, "REV read failed\n");
+ return ret;
+ }
+
+ switch (id) {
+ case APDS990X_ID_0:
+ case APDS990X_ID_4:
+ case APDS990X_ID_29:
+ snprintf(chip->chipname, sizeof(chip->chipname), "APDS-990x");
+ break;
+ default:
+ ret = -ENODEV;
+ break;
+ }
+ return ret;
+}
+
+#ifdef CONFIG_PM
+static int apds990x_chip_on(struct apds990x_chip *chip)
+{
+ int err = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+ chip->regs);
+ if (err < 0)
+ return err;
+
+ usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+ /* Refresh all configs in case of regulators were off */
+ chip->prox_data = 0;
+ apds990x_configure(chip);
+ apds990x_mode_on(chip);
+ return 0;
+}
+#endif
+
+static int apds990x_chip_off(struct apds990x_chip *chip)
+{
+ apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+ regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+ return 0;
+}
+
+static ssize_t apds990x_lux_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ ssize_t ret;
+ u32 result;
+ long timeout;
+
+ if (pm_runtime_suspended(dev))
+ return -EIO;
+
+ timeout = wait_event_interruptible_timeout(chip->wait,
+ !chip->lux_wait_fresh_res,
+ msecs_to_jiffies(APDS_TIMEOUT));
+ if (!timeout)
+ return -EIO;
+
+ mutex_lock(&chip->mutex);
+ result = (chip->lux * chip->lux_calib) / APDS_CALIB_SCALER;
+ if (result > (APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE))
+ result = APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE;
+
+ ret = sprintf(buf, "%d.%d\n",
+ result / APDS990X_LUX_OUTPUT_SCALE,
+ result % APDS990X_LUX_OUTPUT_SCALE);
+ mutex_unlock(&chip->mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(lux0_input, S_IRUGO, apds990x_lux_show, NULL);
+
+static ssize_t apds990x_lux_range_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", APDS_RANGE);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, apds990x_lux_range_show, NULL);
+
+static ssize_t apds990x_lux_calib_format_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", APDS_CALIB_SCALER);
+}
+
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+ apds990x_lux_calib_format_show, NULL);
+
+static ssize_t apds990x_lux_calib_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%u\n", chip->lux_calib);
+}
+
+static ssize_t apds990x_lux_calib_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ chip->lux_calib = value;
+
+ return len;
+}
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, apds990x_lux_calib_show,
+ apds990x_lux_calib_store);
+
+static ssize_t apds990x_rate_avail(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int i;
+ int pos = 0;
+ for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+ pos += sprintf(buf + pos, "%d ", arates_hz[i]);
+ sprintf(buf + pos - 1, "\n");
+ return pos;
+}
+
+static ssize_t apds990x_rate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->arate);
+}
+
+static int apds990x_set_arate(struct apds990x_chip *chip, int rate)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+ if (rate >= arates_hz[i])
+ break;
+
+ if (i == ARRAY_SIZE(arates_hz))
+ return -EINVAL;
+
+ /* Pick up corresponding persistence value */
+ chip->lux_persistence = apersis[i];
+ chip->arate = arates_hz[i];
+
+ /* If the chip is not in use, don't try to access it */
+ if (pm_runtime_suspended(&chip->client->dev))
+ return 0;
+
+ /* Persistence levels */
+ return apds990x_write_byte(chip, APDS990X_PERS,
+ (chip->lux_persistence << APDS990X_APERS_SHIFT) |
+ (chip->prox_persistence << APDS990X_PPERS_SHIFT));
+}
+
+static ssize_t apds990x_rate_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+ ret = apds990x_set_arate(chip, value);
+ mutex_unlock(&chip->mutex);
+
+ if (ret < 0)
+ return ret;
+ return len;
+}
+
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, apds990x_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, apds990x_rate_show,
+ apds990x_rate_store);
+
+static ssize_t apds990x_prox_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t ret;
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ if (pm_runtime_suspended(dev) || !chip->prox_en)
+ return -EIO;
+
+ mutex_lock(&chip->mutex);
+ ret = sprintf(buf, "%d\n", chip->prox_data);
+ mutex_unlock(&chip->mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(prox0_raw, S_IRUGO, apds990x_prox_show, NULL);
+
+static ssize_t apds990x_prox_range_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", APDS_PROX_RANGE);
+}
+
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, apds990x_prox_range_show, NULL);
+
+static ssize_t apds990x_prox_enable_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->prox_en);
+}
+
+static ssize_t apds990x_prox_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+
+ if (!chip->prox_en)
+ chip->prox_data = 0;
+
+ if (value)
+ chip->prox_en++;
+ else if (chip->prox_en > 0)
+ chip->prox_en--;
+
+ if (!pm_runtime_suspended(dev))
+ apds990x_mode_on(chip);
+ mutex_unlock(&chip->mutex);
+ return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, apds990x_prox_enable_show,
+ apds990x_prox_enable_store);
+
+static const char reporting_modes[][9] = {"trigger", "periodic"};
+
+static ssize_t apds990x_prox_reporting_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%s\n",
+ reporting_modes[!!chip->prox_continuous_mode]);
+}
+
+static ssize_t apds990x_prox_reporting_mode_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+
+ if (sysfs_streq(buf, reporting_modes[0]))
+ chip->prox_continuous_mode = 0;
+ else if (sysfs_streq(buf, reporting_modes[1]))
+ chip->prox_continuous_mode = 1;
+ else
+ return -EINVAL;
+ return len;
+}
+
+static DEVICE_ATTR(prox0_reporting_mode, S_IRUGO | S_IWUSR,
+ apds990x_prox_reporting_mode_show,
+ apds990x_prox_reporting_mode_store);
+
+static ssize_t apds990x_prox_reporting_avail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s %s\n", reporting_modes[0], reporting_modes[1]);
+}
+
+static DEVICE_ATTR(prox0_reporting_mode_avail, S_IRUGO | S_IWUSR,
+ apds990x_prox_reporting_avail_show, NULL);
+
+
+static ssize_t apds990x_lux_thresh_above_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->lux_thres_hi);
+}
+
+static ssize_t apds990x_lux_thresh_below_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->lux_thres_lo);
+}
+
+static ssize_t apds990x_set_lux_thresh(struct apds990x_chip *chip, u32 *target,
+ const char *buf)
+{
+ unsigned long thresh;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &thresh);
+ if (ret)
+ return ret;
+
+ if (thresh > APDS_RANGE)
+ return -EINVAL;
+
+ mutex_lock(&chip->mutex);
+ *target = thresh;
+ /*
+ * Don't update values in HW if we are still waiting for
+ * first interrupt to come after device handle open call.
+ */
+ if (!chip->lux_wait_fresh_res)
+ apds990x_refresh_athres(chip);
+ mutex_unlock(&chip->mutex);
+ return ret;
+
+}
+
+static ssize_t apds990x_lux_thresh_above_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf);
+ if (ret < 0)
+ return ret;
+ return len;
+}
+
+static ssize_t apds990x_lux_thresh_below_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf);
+ if (ret < 0)
+ return ret;
+ return len;
+}
+
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+ apds990x_lux_thresh_above_show,
+ apds990x_lux_thresh_above_store);
+
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+ apds990x_lux_thresh_below_show,
+ apds990x_lux_thresh_below_store);
+
+static ssize_t apds990x_prox_threshold_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->prox_thres);
+}
+
+static ssize_t apds990x_prox_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ if ((value > APDS_RANGE) || (value == 0) ||
+ (value < APDS_PROX_HYSTERESIS))
+ return -EINVAL;
+
+ mutex_lock(&chip->mutex);
+ chip->prox_thres = value;
+
+ apds990x_force_p_refresh(chip);
+ mutex_unlock(&chip->mutex);
+ return len;
+}
+
+static DEVICE_ATTR(prox0_thresh_above_value, S_IRUGO | S_IWUSR,
+ apds990x_prox_threshold_show,
+ apds990x_prox_threshold_store);
+
+static ssize_t apds990x_power_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+ return 0;
+}
+
+static ssize_t apds990x_power_state_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ if (value) {
+ pm_runtime_get_sync(dev);
+ mutex_lock(&chip->mutex);
+ chip->lux_wait_fresh_res = true;
+ apds990x_force_a_refresh(chip);
+ apds990x_force_p_refresh(chip);
+ mutex_unlock(&chip->mutex);
+ } else {
+ if (!pm_runtime_suspended(dev))
+ pm_runtime_put(dev);
+ }
+ return len;
+}
+
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR,
+ apds990x_power_state_show,
+ apds990x_power_state_store);
+
+static ssize_t apds990x_chip_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct apds990x_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%s %d\n", chip->chipname, chip->revision);
+}
+
+static DEVICE_ATTR(chip_id, S_IRUGO, apds990x_chip_id_show, NULL);
+
+static struct attribute *sysfs_attrs_ctrl[] = {
+ &dev_attr_lux0_calibscale.attr,
+ &dev_attr_lux0_calibscale_default.attr,
+ &dev_attr_lux0_input.attr,
+ &dev_attr_lux0_sensor_range.attr,
+ &dev_attr_lux0_rate.attr,
+ &dev_attr_lux0_rate_avail.attr,
+ &dev_attr_lux0_thresh_above_value.attr,
+ &dev_attr_lux0_thresh_below_value.attr,
+ &dev_attr_prox0_raw_en.attr,
+ &dev_attr_prox0_raw.attr,
+ &dev_attr_prox0_sensor_range.attr,
+ &dev_attr_prox0_thresh_above_value.attr,
+ &dev_attr_prox0_reporting_mode.attr,
+ &dev_attr_prox0_reporting_mode_avail.attr,
+ &dev_attr_chip_id.attr,
+ &dev_attr_power_state.attr,
+ NULL
+};
+
+static struct attribute_group apds990x_attribute_group[] = {
+ {.attrs = sysfs_attrs_ctrl },
+};
+
+static int apds990x_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct apds990x_chip *chip;
+ int err;
+
+ chip = kzalloc(sizeof *chip, GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, chip);
+ chip->client = client;
+
+ init_waitqueue_head(&chip->wait);
+ mutex_init(&chip->mutex);
+ chip->pdata = client->dev.platform_data;
+
+ if (chip->pdata == NULL) {
+ dev_err(&client->dev, "platform data is mandatory\n");
+ err = -EINVAL;
+ goto fail1;
+ }
+
+ if (chip->pdata->cf.ga == 0) {
+ /* set uncovered sensor default parameters */
+ chip->cf.ga = 1966; /* 0.48 * APDS_PARAM_SCALE */
+ chip->cf.cf1 = 4096; /* 1.00 * APDS_PARAM_SCALE */
+ chip->cf.irf1 = 9134; /* 2.23 * APDS_PARAM_SCALE */
+ chip->cf.cf2 = 2867; /* 0.70 * APDS_PARAM_SCALE */
+ chip->cf.irf2 = 5816; /* 1.42 * APDS_PARAM_SCALE */
+ chip->cf.df = 52;
+ } else {
+ chip->cf = chip->pdata->cf;
+ }
+
+ /* precalculate inverse chip factors for threshold control */
+ chip->rcf.afactor =
+ (chip->cf.irf1 - chip->cf.irf2) * APDS_PARAM_SCALE /
+ (chip->cf.cf1 - chip->cf.cf2);
+ chip->rcf.cf1 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+ chip->cf.cf1;
+ chip->rcf.irf1 = chip->cf.irf1 * APDS_PARAM_SCALE /
+ chip->cf.cf1;
+ chip->rcf.cf2 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+ chip->cf.cf2;
+ chip->rcf.irf2 = chip->cf.irf2 * APDS_PARAM_SCALE /
+ chip->cf.cf2;
+
+ /* Set something to start with */
+ chip->lux_thres_hi = APDS_LUX_DEF_THRES_HI;
+ chip->lux_thres_lo = APDS_LUX_DEF_THRES_LO;
+ chip->lux_calib = APDS_LUX_NEUTRAL_CALIB_VALUE;
+
+ chip->prox_thres = APDS_PROX_DEF_THRES;
+ chip->pdrive = chip->pdata->pdrive;
+ chip->pdiode = APDS_PDIODE_IR;
+ chip->pgain = APDS_PGAIN_1X;
+ chip->prox_calib = APDS_PROX_NEUTRAL_CALIB_VALUE;
+ chip->prox_persistence = APDS_DEFAULT_PROX_PERS;
+ chip->prox_continuous_mode = false;
+
+ chip->regs[0].supply = reg_vcc;
+ chip->regs[1].supply = reg_vled;
+
+ err = regulator_bulk_get(&client->dev,
+ ARRAY_SIZE(chip->regs), chip->regs);
+ if (err < 0) {
+ dev_err(&client->dev, "Cannot get regulators\n");
+ goto fail1;
+ }
+
+ err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), chip->regs);
+ if (err < 0) {
+ dev_err(&client->dev, "Cannot enable regulators\n");
+ goto fail2;
+ }
+
+ usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+ err = apds990x_detect(chip);
+ if (err < 0) {
+ dev_err(&client->dev, "APDS990X not found\n");
+ goto fail3;
+ }
+
+ pm_runtime_set_active(&client->dev);
+
+ apds990x_configure(chip);
+ apds990x_set_arate(chip, APDS_LUX_DEFAULT_RATE);
+ apds990x_mode_on(chip);
+
+ pm_runtime_enable(&client->dev);
+
+ if (chip->pdata->setup_resources) {
+ err = chip->pdata->setup_resources();
+ if (err) {
+ err = -EINVAL;
+ goto fail3;
+ }
+ }
+
+ err = sysfs_create_group(&chip->client->dev.kobj,
+ apds990x_attribute_group);
+ if (err < 0) {
+ dev_err(&chip->client->dev, "Sysfs registration failed\n");
+ goto fail4;
+ }
+
+ err = request_threaded_irq(client->irq, NULL,
+ apds990x_irq,
+ IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW |
+ IRQF_ONESHOT,
+ "apds990x", chip);
+ if (err) {
+ dev_err(&client->dev, "could not get IRQ %d\n",
+ client->irq);
+ goto fail5;
+ }
+ return err;
+fail5:
+ sysfs_remove_group(&chip->client->dev.kobj,
+ &apds990x_attribute_group[0]);
+fail4:
+ if (chip->pdata && chip->pdata->release_resources)
+ chip->pdata->release_resources();
+fail3:
+ regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+fail2:
+ regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+fail1:
+ kfree(chip);
+ return err;
+}
+
+static int apds990x_remove(struct i2c_client *client)
+{
+ struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+ free_irq(client->irq, chip);
+ sysfs_remove_group(&chip->client->dev.kobj,
+ apds990x_attribute_group);
+
+ if (chip->pdata && chip->pdata->release_resources)
+ chip->pdata->release_resources();
+
+ if (!pm_runtime_suspended(&client->dev))
+ apds990x_chip_off(chip);
+
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
+
+ regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+
+ kfree(chip);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int apds990x_suspend(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+ apds990x_chip_off(chip);
+ return 0;
+}
+
+static int apds990x_resume(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+ /*
+ * If we were enabled at suspend time, it is expected
+ * everything works nice and smoothly. Chip_on is enough
+ */
+ apds990x_chip_on(chip);
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int apds990x_runtime_suspend(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+ apds990x_chip_off(chip);
+ return 0;
+}
+
+static int apds990x_runtime_resume(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+ apds990x_chip_on(chip);
+ return 0;
+}
+
+#endif
+
+static const struct i2c_device_id apds990x_id[] = {
+ {"apds990x", 0 },
+ {}
+};
+
+MODULE_DEVICE_TABLE(i2c, apds990x_id);
+
+static const struct dev_pm_ops apds990x_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(apds990x_suspend, apds990x_resume)
+ SET_RUNTIME_PM_OPS(apds990x_runtime_suspend,
+ apds990x_runtime_resume,
+ NULL)
+};
+
+static struct i2c_driver apds990x_driver = {
+ .driver = {
+ .name = "apds990x",
+ .pm = &apds990x_pm_ops,
+ },
+ .probe = apds990x_probe,
+ .remove = apds990x_remove,
+ .id_table = apds990x_id,
+};
+
+module_i2c_driver(apds990x_driver);
+
+MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/arm-charlcd.c b/drivers/misc/arm-charlcd.c
new file mode 100644
index 0000000..c65b5ea
--- /dev/null
+++ b/drivers/misc/arm-charlcd.c
@@ -0,0 +1,388 @@
+/*
+ * Driver for the on-board character LCD found on some ARM reference boards
+ * This is basically an Hitachi HD44780 LCD with a custom IP block to drive it
+ * http://en.wikipedia.org/wiki/HD44780_Character_LCD
+ * Currently it will just display the text "ARM Linux" and the linux version
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ * Author: Linus Walleij <triad@df.lth.se>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <generated/utsrelease.h>
+
+#define DRIVERNAME "arm-charlcd"
+#define CHARLCD_TIMEOUT (msecs_to_jiffies(1000))
+
+/* Offsets to registers */
+#define CHAR_COM 0x00U
+#define CHAR_DAT 0x04U
+#define CHAR_RD 0x08U
+#define CHAR_RAW 0x0CU
+#define CHAR_MASK 0x10U
+#define CHAR_STAT 0x14U
+
+#define CHAR_RAW_CLEAR 0x00000000U
+#define CHAR_RAW_VALID 0x00000100U
+
+/* Hitachi HD44780 display commands */
+#define HD_CLEAR 0x01U
+#define HD_HOME 0x02U
+#define HD_ENTRYMODE 0x04U
+#define HD_ENTRYMODE_INCREMENT 0x02U
+#define HD_ENTRYMODE_SHIFT 0x01U
+#define HD_DISPCTRL 0x08U
+#define HD_DISPCTRL_ON 0x04U
+#define HD_DISPCTRL_CURSOR_ON 0x02U
+#define HD_DISPCTRL_CURSOR_BLINK 0x01U
+#define HD_CRSR_SHIFT 0x10U
+#define HD_CRSR_SHIFT_DISPLAY 0x08U
+#define HD_CRSR_SHIFT_DISPLAY_RIGHT 0x04U
+#define HD_FUNCSET 0x20U
+#define HD_FUNCSET_8BIT 0x10U
+#define HD_FUNCSET_2_LINES 0x08U
+#define HD_FUNCSET_FONT_5X10 0x04U
+#define HD_SET_CGRAM 0x40U
+#define HD_SET_DDRAM 0x80U
+#define HD_BUSY_FLAG 0x80U
+
+/**
+ * @dev: a pointer back to containing device
+ * @phybase: the offset to the controller in physical memory
+ * @physize: the size of the physical page
+ * @virtbase: the offset to the controller in virtual memory
+ * @irq: reserved interrupt number
+ * @complete: completion structure for the last LCD command
+ */
+struct charlcd {
+ struct device *dev;
+ u32 phybase;
+ u32 physize;
+ void __iomem *virtbase;
+ int irq;
+ struct completion complete;
+ struct delayed_work init_work;
+};
+
+static irqreturn_t charlcd_interrupt(int irq, void *data)
+{
+ struct charlcd *lcd = data;
+ u8 status;
+
+ status = readl(lcd->virtbase + CHAR_STAT) & 0x01;
+ /* Clear IRQ */
+ writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW);
+ if (status)
+ complete(&lcd->complete);
+ else
+ dev_info(lcd->dev, "Spurious IRQ (%02x)\n", status);
+ return IRQ_HANDLED;
+}
+
+
+static void charlcd_wait_complete_irq(struct charlcd *lcd)
+{
+ int ret;
+
+ ret = wait_for_completion_interruptible_timeout(&lcd->complete,
+ CHARLCD_TIMEOUT);
+ /* Disable IRQ after completion */
+ writel(0x00, lcd->virtbase + CHAR_MASK);
+
+ if (ret < 0) {
+ dev_err(lcd->dev,
+ "wait_for_completion_interruptible_timeout() "
+ "returned %d waiting for ready\n", ret);
+ return;
+ }
+
+ if (ret == 0) {
+ dev_err(lcd->dev, "charlcd controller timed out "
+ "waiting for ready\n");
+ return;
+ }
+}
+
+static u8 charlcd_4bit_read_char(struct charlcd *lcd)
+{
+ u8 data;
+ u32 val;
+ int i;
+
+ /* If we can, use an IRQ to wait for the data, else poll */
+ if (lcd->irq >= 0)
+ charlcd_wait_complete_irq(lcd);
+ else {
+ i = 0;
+ val = 0;
+ while (!(val & CHAR_RAW_VALID) && i < 10) {
+ udelay(100);
+ val = readl(lcd->virtbase + CHAR_RAW);
+ i++;
+ }
+
+ writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW);
+ }
+ msleep(1);
+
+ /* Read the 4 high bits of the data */
+ data = readl(lcd->virtbase + CHAR_RD) & 0xf0;
+
+ /*
+ * The second read for the low bits does not trigger an IRQ
+ * so in this case we have to poll for the 4 lower bits
+ */
+ i = 0;
+ val = 0;
+ while (!(val & CHAR_RAW_VALID) && i < 10) {
+ udelay(100);
+ val = readl(lcd->virtbase + CHAR_RAW);
+ i++;
+ }
+ writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW);
+ msleep(1);
+
+ /* Read the 4 low bits of the data */
+ data |= (readl(lcd->virtbase + CHAR_RD) >> 4) & 0x0f;
+
+ return data;
+}
+
+static bool charlcd_4bit_read_bf(struct charlcd *lcd)
+{
+ if (lcd->irq >= 0) {
+ /*
+ * If we'll use IRQs to wait for the busyflag, clear any
+ * pending flag and enable IRQ
+ */
+ writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW);
+ init_completion(&lcd->complete);
+ writel(0x01, lcd->virtbase + CHAR_MASK);
+ }
+ readl(lcd->virtbase + CHAR_COM);
+ return charlcd_4bit_read_char(lcd) & HD_BUSY_FLAG ? true : false;
+}
+
+static void charlcd_4bit_wait_busy(struct charlcd *lcd)
+{
+ int retries = 50;
+
+ udelay(100);
+ while (charlcd_4bit_read_bf(lcd) && retries)
+ retries--;
+ if (!retries)
+ dev_err(lcd->dev, "timeout waiting for busyflag\n");
+}
+
+static void charlcd_4bit_command(struct charlcd *lcd, u8 cmd)
+{
+ u32 cmdlo = (cmd << 4) & 0xf0;
+ u32 cmdhi = (cmd & 0xf0);
+
+ writel(cmdhi, lcd->virtbase + CHAR_COM);
+ udelay(10);
+ writel(cmdlo, lcd->virtbase + CHAR_COM);
+ charlcd_4bit_wait_busy(lcd);
+}
+
+static void charlcd_4bit_char(struct charlcd *lcd, u8 ch)
+{
+ u32 chlo = (ch << 4) & 0xf0;
+ u32 chhi = (ch & 0xf0);
+
+ writel(chhi, lcd->virtbase + CHAR_DAT);
+ udelay(10);
+ writel(chlo, lcd->virtbase + CHAR_DAT);
+ charlcd_4bit_wait_busy(lcd);
+}
+
+static void charlcd_4bit_print(struct charlcd *lcd, int line, const char *str)
+{
+ u8 offset;
+ int i;
+
+ /*
+ * We support line 0, 1
+ * Line 1 runs from 0x00..0x27
+ * Line 2 runs from 0x28..0x4f
+ */
+ if (line == 0)
+ offset = 0;
+ else if (line == 1)
+ offset = 0x28;
+ else
+ return;
+
+ /* Set offset */
+ charlcd_4bit_command(lcd, HD_SET_DDRAM | offset);
+
+ /* Send string */
+ for (i = 0; i < strlen(str) && i < 0x28; i++)
+ charlcd_4bit_char(lcd, str[i]);
+}
+
+static void charlcd_4bit_init(struct charlcd *lcd)
+{
+ /* These commands cannot be checked with the busy flag */
+ writel(HD_FUNCSET | HD_FUNCSET_8BIT, lcd->virtbase + CHAR_COM);
+ msleep(5);
+ writel(HD_FUNCSET | HD_FUNCSET_8BIT, lcd->virtbase + CHAR_COM);
+ udelay(100);
+ writel(HD_FUNCSET | HD_FUNCSET_8BIT, lcd->virtbase + CHAR_COM);
+ udelay(100);
+ /* Go to 4bit mode */
+ writel(HD_FUNCSET, lcd->virtbase + CHAR_COM);
+ udelay(100);
+ /*
+ * 4bit mode, 2 lines, 5x8 font, after this the number of lines
+ * and the font cannot be changed until the next initialization sequence
+ */
+ charlcd_4bit_command(lcd, HD_FUNCSET | HD_FUNCSET_2_LINES);
+ charlcd_4bit_command(lcd, HD_DISPCTRL | HD_DISPCTRL_ON);
+ charlcd_4bit_command(lcd, HD_ENTRYMODE | HD_ENTRYMODE_INCREMENT);
+ charlcd_4bit_command(lcd, HD_CLEAR);
+ charlcd_4bit_command(lcd, HD_HOME);
+ /* Put something useful in the display */
+ charlcd_4bit_print(lcd, 0, "ARM Linux");
+ charlcd_4bit_print(lcd, 1, UTS_RELEASE);
+}
+
+static void charlcd_init_work(struct work_struct *work)
+{
+ struct charlcd *lcd =
+ container_of(work, struct charlcd, init_work.work);
+
+ charlcd_4bit_init(lcd);
+}
+
+static int __init charlcd_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct charlcd *lcd;
+ struct resource *res;
+
+ lcd = kzalloc(sizeof(struct charlcd), GFP_KERNEL);
+ if (!lcd)
+ return -ENOMEM;
+
+ lcd->dev = &pdev->dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ ret = -ENOENT;
+ goto out_no_resource;
+ }
+ lcd->phybase = res->start;
+ lcd->physize = resource_size(res);
+
+ if (request_mem_region(lcd->phybase, lcd->physize,
+ DRIVERNAME) == NULL) {
+ ret = -EBUSY;
+ goto out_no_memregion;
+ }
+
+ lcd->virtbase = ioremap(lcd->phybase, lcd->physize);
+ if (!lcd->virtbase) {
+ ret = -ENOMEM;
+ goto out_no_memregion;
+ }
+
+ lcd->irq = platform_get_irq(pdev, 0);
+ /* If no IRQ is supplied, we'll survive without it */
+ if (lcd->irq >= 0) {
+ if (request_irq(lcd->irq, charlcd_interrupt, 0,
+ DRIVERNAME, lcd)) {
+ ret = -EIO;
+ goto out_no_irq;
+ }
+ }
+
+ platform_set_drvdata(pdev, lcd);
+
+ /*
+ * Initialize the display in a delayed work, because
+ * it is VERY slow and would slow down the boot of the system.
+ */
+ INIT_DELAYED_WORK(&lcd->init_work, charlcd_init_work);
+ schedule_delayed_work(&lcd->init_work, 0);
+
+ dev_info(&pdev->dev, "initialized ARM character LCD at %08x\n",
+ lcd->phybase);
+
+ return 0;
+
+out_no_irq:
+ iounmap(lcd->virtbase);
+out_no_memregion:
+ release_mem_region(lcd->phybase, SZ_4K);
+out_no_resource:
+ kfree(lcd);
+ return ret;
+}
+
+static int __exit charlcd_remove(struct platform_device *pdev)
+{
+ struct charlcd *lcd = platform_get_drvdata(pdev);
+
+ if (lcd) {
+ free_irq(lcd->irq, lcd);
+ iounmap(lcd->virtbase);
+ release_mem_region(lcd->phybase, lcd->physize);
+ kfree(lcd);
+ }
+
+ return 0;
+}
+
+static int charlcd_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct charlcd *lcd = platform_get_drvdata(pdev);
+
+ /* Power the display off */
+ charlcd_4bit_command(lcd, HD_DISPCTRL);
+ return 0;
+}
+
+static int charlcd_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct charlcd *lcd = platform_get_drvdata(pdev);
+
+ /* Turn the display back on */
+ charlcd_4bit_command(lcd, HD_DISPCTRL | HD_DISPCTRL_ON);
+ return 0;
+}
+
+static const struct dev_pm_ops charlcd_pm_ops = {
+ .suspend = charlcd_suspend,
+ .resume = charlcd_resume,
+};
+
+static const struct of_device_id charlcd_match[] = {
+ { .compatible = "arm,versatile-lcd", },
+ {}
+};
+
+static struct platform_driver charlcd_driver = {
+ .driver = {
+ .name = DRIVERNAME,
+ .pm = &charlcd_pm_ops,
+ .of_match_table = of_match_ptr(charlcd_match),
+ },
+ .remove = __exit_p(charlcd_remove),
+};
+
+module_platform_driver_probe(charlcd_driver, charlcd_probe);
+
+MODULE_AUTHOR("Linus Walleij <triad@df.lth.se>");
+MODULE_DESCRIPTION("ARM Character LCD Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
new file mode 100644
index 0000000..e11a0bd
--- /dev/null
+++ b/drivers/misc/atmel-ssc.c
@@ -0,0 +1,234 @@
+/*
+ * Atmel SSC driver
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/atmel-ssc.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/of.h>
+
+/* Serialize access to ssc_list and user count */
+static DEFINE_SPINLOCK(user_lock);
+static LIST_HEAD(ssc_list);
+
+struct ssc_device *ssc_request(unsigned int ssc_num)
+{
+ int ssc_valid = 0;
+ struct ssc_device *ssc;
+
+ spin_lock(&user_lock);
+ list_for_each_entry(ssc, &ssc_list, list) {
+ if (ssc->pdev->dev.of_node) {
+ if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc")
+ == ssc_num) {
+ ssc_valid = 1;
+ break;
+ }
+ } else if (ssc->pdev->id == ssc_num) {
+ ssc_valid = 1;
+ break;
+ }
+ }
+
+ if (!ssc_valid) {
+ spin_unlock(&user_lock);
+ pr_err("ssc: ssc%d platform device is missing\n", ssc_num);
+ return ERR_PTR(-ENODEV);
+ }
+
+ if (ssc->user) {
+ spin_unlock(&user_lock);
+ dev_dbg(&ssc->pdev->dev, "module busy\n");
+ return ERR_PTR(-EBUSY);
+ }
+ ssc->user++;
+ spin_unlock(&user_lock);
+
+ clk_prepare(ssc->clk);
+
+ return ssc;
+}
+EXPORT_SYMBOL(ssc_request);
+
+void ssc_free(struct ssc_device *ssc)
+{
+ bool disable_clk = true;
+
+ spin_lock(&user_lock);
+ if (ssc->user)
+ ssc->user--;
+ else {
+ disable_clk = false;
+ dev_dbg(&ssc->pdev->dev, "device already free\n");
+ }
+ spin_unlock(&user_lock);
+
+ if (disable_clk)
+ clk_unprepare(ssc->clk);
+}
+EXPORT_SYMBOL(ssc_free);
+
+static struct atmel_ssc_platform_data at91rm9200_config = {
+ .use_dma = 0,
+ .has_fslen_ext = 0,
+};
+
+static struct atmel_ssc_platform_data at91sam9rl_config = {
+ .use_dma = 0,
+ .has_fslen_ext = 1,
+};
+
+static struct atmel_ssc_platform_data at91sam9g45_config = {
+ .use_dma = 1,
+ .has_fslen_ext = 1,
+};
+
+static const struct platform_device_id atmel_ssc_devtypes[] = {
+ {
+ .name = "at91rm9200_ssc",
+ .driver_data = (unsigned long) &at91rm9200_config,
+ }, {
+ .name = "at91sam9rl_ssc",
+ .driver_data = (unsigned long) &at91sam9rl_config,
+ }, {
+ .name = "at91sam9g45_ssc",
+ .driver_data = (unsigned long) &at91sam9g45_config,
+ }, {
+ /* sentinel */
+ }
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id atmel_ssc_dt_ids[] = {
+ {
+ .compatible = "atmel,at91rm9200-ssc",
+ .data = &at91rm9200_config,
+ }, {
+ .compatible = "atmel,at91sam9rl-ssc",
+ .data = &at91sam9rl_config,
+ }, {
+ .compatible = "atmel,at91sam9g45-ssc",
+ .data = &at91sam9g45_config,
+ }, {
+ /* sentinel */
+ }
+};
+MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids);
+#endif
+
+static inline const struct atmel_ssc_platform_data * __init
+ atmel_ssc_get_driver_data(struct platform_device *pdev)
+{
+ if (pdev->dev.of_node) {
+ const struct of_device_id *match;
+ match = of_match_node(atmel_ssc_dt_ids, pdev->dev.of_node);
+ if (match == NULL)
+ return NULL;
+ return match->data;
+ }
+
+ return (struct atmel_ssc_platform_data *)
+ platform_get_device_id(pdev)->driver_data;
+}
+
+static int ssc_probe(struct platform_device *pdev)
+{
+ struct resource *regs;
+ struct ssc_device *ssc;
+ const struct atmel_ssc_platform_data *plat_dat;
+
+ ssc = devm_kzalloc(&pdev->dev, sizeof(struct ssc_device), GFP_KERNEL);
+ if (!ssc) {
+ dev_dbg(&pdev->dev, "out of memory\n");
+ return -ENOMEM;
+ }
+
+ ssc->pdev = pdev;
+
+ plat_dat = atmel_ssc_get_driver_data(pdev);
+ if (!plat_dat)
+ return -ENODEV;
+ ssc->pdata = (struct atmel_ssc_platform_data *)plat_dat;
+
+ if (pdev->dev.of_node) {
+ struct device_node *np = pdev->dev.of_node;
+ ssc->clk_from_rk_pin =
+ of_property_read_bool(np, "atmel,clk-from-rk-pin");
+ }
+
+ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ ssc->regs = devm_ioremap_resource(&pdev->dev, regs);
+ if (IS_ERR(ssc->regs))
+ return PTR_ERR(ssc->regs);
+
+ ssc->phybase = regs->start;
+
+ ssc->clk = devm_clk_get(&pdev->dev, "pclk");
+ if (IS_ERR(ssc->clk)) {
+ dev_dbg(&pdev->dev, "no pclk clock defined\n");
+ return -ENXIO;
+ }
+
+ /* disable all interrupts */
+ clk_prepare_enable(ssc->clk);
+ ssc_writel(ssc->regs, IDR, -1);
+ ssc_readl(ssc->regs, SR);
+ clk_disable_unprepare(ssc->clk);
+
+ ssc->irq = platform_get_irq(pdev, 0);
+ if (!ssc->irq) {
+ dev_dbg(&pdev->dev, "could not get irq\n");
+ return -ENXIO;
+ }
+
+ spin_lock(&user_lock);
+ list_add_tail(&ssc->list, &ssc_list);
+ spin_unlock(&user_lock);
+
+ platform_set_drvdata(pdev, ssc);
+
+ dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n",
+ ssc->regs, ssc->irq);
+
+ return 0;
+}
+
+static int ssc_remove(struct platform_device *pdev)
+{
+ struct ssc_device *ssc = platform_get_drvdata(pdev);
+
+ spin_lock(&user_lock);
+ list_del(&ssc->list);
+ spin_unlock(&user_lock);
+
+ return 0;
+}
+
+static struct platform_driver ssc_driver = {
+ .driver = {
+ .name = "ssc",
+ .of_match_table = of_match_ptr(atmel_ssc_dt_ids),
+ },
+ .id_table = atmel_ssc_devtypes,
+ .probe = ssc_probe,
+ .remove = ssc_remove,
+};
+module_platform_driver(ssc_driver);
+
+MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
+MODULE_DESCRIPTION("SSC driver for Atmel AVR32 and AT91");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ssc");
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
new file mode 100644
index 0000000..ac24a4b
--- /dev/null
+++ b/drivers/misc/atmel_tclib.c
@@ -0,0 +1,198 @@
+#include <linux/atmel_tc.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/of.h>
+
+/*
+ * This is a thin library to solve the problem of how to portably allocate
+ * one of the TC blocks. For simplicity, it doesn't currently expect to
+ * share individual timers between different drivers.
+ */
+
+#if defined(CONFIG_AVR32)
+/* AVR32 has these divide PBB */
+const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#elif defined(CONFIG_ARCH_AT91)
+/* AT91 has these divide MCK */
+const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#endif
+
+static DEFINE_SPINLOCK(tc_list_lock);
+static LIST_HEAD(tc_list);
+
+/**
+ * atmel_tc_alloc - allocate a specified TC block
+ * @block: which block to allocate
+ *
+ * Caller allocates a block. If it is available, a pointer to a
+ * pre-initialized struct atmel_tc is returned. The caller can access
+ * the registers directly through the "regs" field.
+ */
+struct atmel_tc *atmel_tc_alloc(unsigned block)
+{
+ struct atmel_tc *tc;
+ struct platform_device *pdev = NULL;
+
+ spin_lock(&tc_list_lock);
+ list_for_each_entry(tc, &tc_list, node) {
+ if (tc->allocated)
+ continue;
+
+ if ((tc->pdev->dev.of_node && tc->id == block) ||
+ (tc->pdev->id == block)) {
+ pdev = tc->pdev;
+ tc->allocated = true;
+ break;
+ }
+ }
+ spin_unlock(&tc_list_lock);
+
+ return pdev ? tc : NULL;
+}
+EXPORT_SYMBOL_GPL(atmel_tc_alloc);
+
+/**
+ * atmel_tc_free - release a specified TC block
+ * @tc: Timer/counter block that was returned by atmel_tc_alloc()
+ *
+ * This reverses the effect of atmel_tc_alloc(), invalidating the resource
+ * returned by that routine and making the TC available to other drivers.
+ */
+void atmel_tc_free(struct atmel_tc *tc)
+{
+ spin_lock(&tc_list_lock);
+ if (tc->allocated)
+ tc->allocated = false;
+ spin_unlock(&tc_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_tc_free);
+
+#if defined(CONFIG_OF)
+static struct atmel_tcb_config tcb_rm9200_config = {
+ .counter_width = 16,
+};
+
+static struct atmel_tcb_config tcb_sam9x5_config = {
+ .counter_width = 32,
+};
+
+static const struct of_device_id atmel_tcb_dt_ids[] = {
+ {
+ .compatible = "atmel,at91rm9200-tcb",
+ .data = &tcb_rm9200_config,
+ }, {
+ .compatible = "atmel,at91sam9x5-tcb",
+ .data = &tcb_sam9x5_config,
+ }, {
+ /* sentinel */
+ }
+};
+
+MODULE_DEVICE_TABLE(of, atmel_tcb_dt_ids);
+#endif
+
+static int __init tc_probe(struct platform_device *pdev)
+{
+ struct atmel_tc *tc;
+ struct clk *clk;
+ int irq;
+ struct resource *r;
+ unsigned int i;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return -EINVAL;
+
+ tc = devm_kzalloc(&pdev->dev, sizeof(struct atmel_tc), GFP_KERNEL);
+ if (!tc)
+ return -ENOMEM;
+
+ tc->pdev = pdev;
+
+ clk = devm_clk_get(&pdev->dev, "t0_clk");
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
+ if (IS_ERR(tc->slow_clk))
+ return PTR_ERR(tc->slow_clk);
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ tc->regs = devm_ioremap_resource(&pdev->dev, r);
+ if (IS_ERR(tc->regs))
+ return PTR_ERR(tc->regs);
+
+ /* Now take SoC information if available */
+ if (pdev->dev.of_node) {
+ const struct of_device_id *match;
+ match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node);
+ if (match)
+ tc->tcb_config = match->data;
+
+ tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb");
+ } else {
+ tc->id = pdev->id;
+ }
+
+ tc->clk[0] = clk;
+ tc->clk[1] = devm_clk_get(&pdev->dev, "t1_clk");
+ if (IS_ERR(tc->clk[1]))
+ tc->clk[1] = clk;
+ tc->clk[2] = devm_clk_get(&pdev->dev, "t2_clk");
+ if (IS_ERR(tc->clk[2]))
+ tc->clk[2] = clk;
+
+ tc->irq[0] = irq;
+ tc->irq[1] = platform_get_irq(pdev, 1);
+ if (tc->irq[1] < 0)
+ tc->irq[1] = irq;
+ tc->irq[2] = platform_get_irq(pdev, 2);
+ if (tc->irq[2] < 0)
+ tc->irq[2] = irq;
+
+ for (i = 0; i < 3; i++)
+ writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+
+ spin_lock(&tc_list_lock);
+ list_add_tail(&tc->node, &tc_list);
+ spin_unlock(&tc_list_lock);
+
+ platform_set_drvdata(pdev, tc);
+
+ return 0;
+}
+
+static void tc_shutdown(struct platform_device *pdev)
+{
+ int i;
+ struct atmel_tc *tc = platform_get_drvdata(pdev);
+
+ for (i = 0; i < 3; i++)
+ writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+}
+
+static struct platform_driver tc_driver = {
+ .driver = {
+ .name = "atmel_tcb",
+ .of_match_table = of_match_ptr(atmel_tcb_dt_ids),
+ },
+ .shutdown = tc_shutdown,
+};
+
+static int __init tc_init(void)
+{
+ return platform_driver_probe(&tc_driver, tc_probe);
+}
+arch_initcall(tc_init);
diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
new file mode 100644
index 0000000..753d7ec
--- /dev/null
+++ b/drivers/misc/bh1770glc.c
@@ -0,0 +1,1410 @@
+/*
+ * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/i2c/bh1770glc.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+
+#define BH1770_ALS_CONTROL 0x80 /* ALS operation mode control */
+#define BH1770_PS_CONTROL 0x81 /* PS operation mode control */
+#define BH1770_I_LED 0x82 /* active LED and LED1, LED2 current */
+#define BH1770_I_LED3 0x83 /* LED3 current setting */
+#define BH1770_ALS_PS_MEAS 0x84 /* Forced mode trigger */
+#define BH1770_PS_MEAS_RATE 0x85 /* PS meas. rate at stand alone mode */
+#define BH1770_ALS_MEAS_RATE 0x86 /* ALS meas. rate at stand alone mode */
+#define BH1770_PART_ID 0x8a /* Part number and revision ID */
+#define BH1770_MANUFACT_ID 0x8b /* Manufacturerer ID */
+#define BH1770_ALS_DATA_0 0x8c /* ALS DATA low byte */
+#define BH1770_ALS_DATA_1 0x8d /* ALS DATA high byte */
+#define BH1770_ALS_PS_STATUS 0x8e /* Measurement data and int status */
+#define BH1770_PS_DATA_LED1 0x8f /* PS data from LED1 */
+#define BH1770_PS_DATA_LED2 0x90 /* PS data from LED2 */
+#define BH1770_PS_DATA_LED3 0x91 /* PS data from LED3 */
+#define BH1770_INTERRUPT 0x92 /* Interrupt setting */
+#define BH1770_PS_TH_LED1 0x93 /* PS interrupt threshold for LED1 */
+#define BH1770_PS_TH_LED2 0x94 /* PS interrupt threshold for LED2 */
+#define BH1770_PS_TH_LED3 0x95 /* PS interrupt threshold for LED3 */
+#define BH1770_ALS_TH_UP_0 0x96 /* ALS upper threshold low byte */
+#define BH1770_ALS_TH_UP_1 0x97 /* ALS upper threshold high byte */
+#define BH1770_ALS_TH_LOW_0 0x98 /* ALS lower threshold low byte */
+#define BH1770_ALS_TH_LOW_1 0x99 /* ALS lower threshold high byte */
+
+/* MANUFACT_ID */
+#define BH1770_MANUFACT_ROHM 0x01
+#define BH1770_MANUFACT_OSRAM 0x03
+
+/* PART_ID */
+#define BH1770_PART 0x90
+#define BH1770_PART_MASK 0xf0
+#define BH1770_REV_MASK 0x0f
+#define BH1770_REV_SHIFT 0
+#define BH1770_REV_0 0x00
+#define BH1770_REV_1 0x01
+
+/* Operating modes for both */
+#define BH1770_STANDBY 0x00
+#define BH1770_FORCED 0x02
+#define BH1770_STANDALONE 0x03
+#define BH1770_SWRESET (0x01 << 2)
+
+#define BH1770_PS_TRIG_MEAS (1 << 0)
+#define BH1770_ALS_TRIG_MEAS (1 << 1)
+
+/* Interrupt control */
+#define BH1770_INT_OUTPUT_MODE (1 << 3) /* 0 = latched */
+#define BH1770_INT_POLARITY (1 << 2) /* 1 = active high */
+#define BH1770_INT_ALS_ENA (1 << 1)
+#define BH1770_INT_PS_ENA (1 << 0)
+
+/* Interrupt status */
+#define BH1770_INT_LED1_DATA (1 << 0)
+#define BH1770_INT_LED1_INT (1 << 1)
+#define BH1770_INT_LED2_DATA (1 << 2)
+#define BH1770_INT_LED2_INT (1 << 3)
+#define BH1770_INT_LED3_DATA (1 << 4)
+#define BH1770_INT_LED3_INT (1 << 5)
+#define BH1770_INT_LEDS_INT ((1 << 1) | (1 << 3) | (1 << 5))
+#define BH1770_INT_ALS_DATA (1 << 6)
+#define BH1770_INT_ALS_INT (1 << 7)
+
+/* Led channels */
+#define BH1770_LED1 0x00
+
+#define BH1770_DISABLE 0
+#define BH1770_ENABLE 1
+#define BH1770_PROX_CHANNELS 1
+
+#define BH1770_LUX_DEFAULT_RATE 1 /* Index to lux rate table */
+#define BH1770_PROX_DEFAULT_RATE 1 /* Direct HW value =~ 50Hz */
+#define BH1770_PROX_DEF_RATE_THRESH 6 /* Direct HW value =~ 5 Hz */
+#define BH1770_STARTUP_DELAY 50
+#define BH1770_RESET_TIME 10
+#define BH1770_TIMEOUT 2100 /* Timeout in 2.1 seconds */
+
+#define BH1770_LUX_RANGE 65535
+#define BH1770_PROX_RANGE 255
+#define BH1770_COEF_SCALER 1024
+#define BH1770_CALIB_SCALER 8192
+#define BH1770_LUX_NEUTRAL_CALIB_VALUE (1 * BH1770_CALIB_SCALER)
+#define BH1770_LUX_DEF_THRES 1000
+#define BH1770_PROX_DEF_THRES 70
+#define BH1770_PROX_DEF_ABS_THRES 100
+#define BH1770_DEFAULT_PERSISTENCE 10
+#define BH1770_PROX_MAX_PERSISTENCE 50
+#define BH1770_LUX_GA_SCALE 16384
+#define BH1770_LUX_CF_SCALE 2048 /* CF ChipFactor */
+#define BH1770_NEUTRAL_CF BH1770_LUX_CF_SCALE
+#define BH1770_LUX_CORR_SCALE 4096
+
+#define PROX_ABOVE_THRESHOLD 1
+#define PROX_BELOW_THRESHOLD 0
+
+#define PROX_IGNORE_LUX_LIMIT 500
+
+struct bh1770_chip {
+ struct bh1770_platform_data *pdata;
+ char chipname[10];
+ u8 revision;
+ struct i2c_client *client;
+ struct regulator_bulk_data regs[2];
+ struct mutex mutex; /* avoid parallel access */
+ wait_queue_head_t wait;
+
+ bool int_mode_prox;
+ bool int_mode_lux;
+ struct delayed_work prox_work;
+ u32 lux_cf; /* Chip specific factor */
+ u32 lux_ga;
+ u32 lux_calib;
+ int lux_rate_index;
+ u32 lux_corr;
+ u16 lux_data_raw;
+ u16 lux_threshold_hi;
+ u16 lux_threshold_lo;
+ u16 lux_thres_hi_onchip;
+ u16 lux_thres_lo_onchip;
+ bool lux_wait_result;
+
+ int prox_enable_count;
+ u16 prox_coef;
+ u16 prox_const;
+ int prox_rate;
+ int prox_rate_threshold;
+ u8 prox_persistence;
+ u8 prox_persistence_counter;
+ u8 prox_data;
+ u8 prox_threshold;
+ u8 prox_threshold_hw;
+ bool prox_force_update;
+ u8 prox_abs_thres;
+ u8 prox_led;
+};
+
+static const char reg_vcc[] = "Vcc";
+static const char reg_vleds[] = "Vleds";
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {10, 20, 30, 40, 70, 100, 200, 500, 1000, 2000};
+ */
+static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2};
+static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500};
+
+/* Supported IR-led currents in mA */
+static const u8 prox_curr_ma[] = {5, 10, 20, 50, 100, 150, 200};
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {100, 200, 500, 1000, 2000};
+ */
+static const s16 lux_rates_hz[] = {10, 5, 2, 1, 0};
+
+/*
+ * interrupt control functions are called while keeping chip->mutex
+ * excluding module probe / remove
+ */
+static inline int bh1770_lux_interrupt_control(struct bh1770_chip *chip,
+ int lux)
+{
+ chip->int_mode_lux = lux;
+ /* Set interrupt modes, interrupt active low, latched */
+ return i2c_smbus_write_byte_data(chip->client,
+ BH1770_INTERRUPT,
+ (lux << 1) | chip->int_mode_prox);
+}
+
+static inline int bh1770_prox_interrupt_control(struct bh1770_chip *chip,
+ int ps)
+{
+ chip->int_mode_prox = ps;
+ return i2c_smbus_write_byte_data(chip->client,
+ BH1770_INTERRUPT,
+ (chip->int_mode_lux << 1) | (ps << 0));
+}
+
+/* chip->mutex is always kept here */
+static int bh1770_lux_rate(struct bh1770_chip *chip, int rate_index)
+{
+ /* sysfs may call this when the chip is powered off */
+ if (pm_runtime_suspended(&chip->client->dev))
+ return 0;
+
+ /* Proper proximity response needs fastest lux rate (100ms) */
+ if (chip->prox_enable_count)
+ rate_index = 0;
+
+ return i2c_smbus_write_byte_data(chip->client,
+ BH1770_ALS_MEAS_RATE,
+ rate_index);
+}
+
+static int bh1770_prox_rate(struct bh1770_chip *chip, int mode)
+{
+ int rate;
+
+ rate = (mode == PROX_ABOVE_THRESHOLD) ?
+ chip->prox_rate_threshold : chip->prox_rate;
+
+ return i2c_smbus_write_byte_data(chip->client,
+ BH1770_PS_MEAS_RATE,
+ rate);
+}
+
+/* InfraredLED is controlled by the chip during proximity scanning */
+static inline int bh1770_led_cfg(struct bh1770_chip *chip)
+{
+ /* LED cfg, current for leds 1 and 2 */
+ return i2c_smbus_write_byte_data(chip->client,
+ BH1770_I_LED,
+ (BH1770_LED1 << 6) |
+ (BH1770_LED_5mA << 3) |
+ chip->prox_led);
+}
+
+/*
+ * Following two functions converts raw ps values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions.
+ */
+static inline u8 bh1770_psraw_to_adjusted(struct bh1770_chip *chip, u8 psraw)
+{
+ u16 adjusted;
+ adjusted = (u16)(((u32)(psraw + chip->prox_const) * chip->prox_coef) /
+ BH1770_COEF_SCALER);
+ if (adjusted > BH1770_PROX_RANGE)
+ adjusted = BH1770_PROX_RANGE;
+ return adjusted;
+}
+
+static inline u8 bh1770_psadjusted_to_raw(struct bh1770_chip *chip, u8 ps)
+{
+ u16 raw;
+
+ raw = (((u32)ps * BH1770_COEF_SCALER) / chip->prox_coef);
+ if (raw > chip->prox_const)
+ raw = raw - chip->prox_const;
+ else
+ raw = 0;
+ return raw;
+}
+
+/*
+ * Following two functions converts raw lux values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions. Chip->mutex is kept when this is called.
+ */
+static int bh1770_prox_set_threshold(struct bh1770_chip *chip)
+{
+ u8 tmp = 0;
+
+ /* sysfs may call this when the chip is powered off */
+ if (pm_runtime_suspended(&chip->client->dev))
+ return 0;
+
+ tmp = bh1770_psadjusted_to_raw(chip, chip->prox_threshold);
+ chip->prox_threshold_hw = tmp;
+
+ return i2c_smbus_write_byte_data(chip->client, BH1770_PS_TH_LED1,
+ tmp);
+}
+
+static inline u16 bh1770_lux_raw_to_adjusted(struct bh1770_chip *chip, u16 raw)
+{
+ u32 lux;
+ lux = ((u32)raw * chip->lux_corr) / BH1770_LUX_CORR_SCALE;
+ return min(lux, (u32)BH1770_LUX_RANGE);
+}
+
+static inline u16 bh1770_lux_adjusted_to_raw(struct bh1770_chip *chip,
+ u16 adjusted)
+{
+ return (u32)adjusted * BH1770_LUX_CORR_SCALE / chip->lux_corr;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_lux_update_thresholds(struct bh1770_chip *chip,
+ u16 threshold_hi, u16 threshold_lo)
+{
+ u8 data[4];
+ int ret;
+
+ /* sysfs may call this when the chip is powered off */
+ if (pm_runtime_suspended(&chip->client->dev))
+ return 0;
+
+ /*
+ * Compensate threshold values with the correction factors if not
+ * set to minimum or maximum.
+ * Min & max values disables interrupts.
+ */
+ if (threshold_hi != BH1770_LUX_RANGE && threshold_hi != 0)
+ threshold_hi = bh1770_lux_adjusted_to_raw(chip, threshold_hi);
+
+ if (threshold_lo != BH1770_LUX_RANGE && threshold_lo != 0)
+ threshold_lo = bh1770_lux_adjusted_to_raw(chip, threshold_lo);
+
+ if (chip->lux_thres_hi_onchip == threshold_hi &&
+ chip->lux_thres_lo_onchip == threshold_lo)
+ return 0;
+
+ chip->lux_thres_hi_onchip = threshold_hi;
+ chip->lux_thres_lo_onchip = threshold_lo;
+
+ data[0] = threshold_hi;
+ data[1] = threshold_hi >> 8;
+ data[2] = threshold_lo;
+ data[3] = threshold_lo >> 8;
+
+ ret = i2c_smbus_write_i2c_block_data(chip->client,
+ BH1770_ALS_TH_UP_0,
+ ARRAY_SIZE(data),
+ data);
+ return ret;
+}
+
+static int bh1770_lux_get_result(struct bh1770_chip *chip)
+{
+ u16 data;
+ int ret;
+
+ ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_0);
+ if (ret < 0)
+ return ret;
+
+ data = ret & 0xff;
+ ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_1);
+ if (ret < 0)
+ return ret;
+
+ chip->lux_data_raw = data | ((ret & 0xff) << 8);
+
+ return 0;
+}
+
+/* Calculate correction value which contains chip and device specific parts */
+static u32 bh1770_get_corr_value(struct bh1770_chip *chip)
+{
+ u32 tmp;
+ /* Impact of glass attenuation correction */
+ tmp = (BH1770_LUX_CORR_SCALE * chip->lux_ga) / BH1770_LUX_GA_SCALE;
+ /* Impact of chip factor correction */
+ tmp = (tmp * chip->lux_cf) / BH1770_LUX_CF_SCALE;
+ /* Impact of Device specific calibration correction */
+ tmp = (tmp * chip->lux_calib) / BH1770_CALIB_SCALER;
+ return tmp;
+}
+
+static int bh1770_lux_read_result(struct bh1770_chip *chip)
+{
+ bh1770_lux_get_result(chip);
+ return bh1770_lux_raw_to_adjusted(chip, chip->lux_data_raw);
+}
+
+/*
+ * Chip on / off functions are called while keeping mutex except probe
+ * or remove phase
+ */
+static int bh1770_chip_on(struct bh1770_chip *chip)
+{
+ int ret = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+ chip->regs);
+ if (ret < 0)
+ return ret;
+
+ usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+
+ /* Reset the chip */
+ i2c_smbus_write_byte_data(chip->client, BH1770_ALS_CONTROL,
+ BH1770_SWRESET);
+ usleep_range(BH1770_RESET_TIME, BH1770_RESET_TIME * 2);
+
+ /*
+ * ALS is started always since proximity needs als results
+ * for realibility estimation.
+ * Let's assume dark until the first ALS measurement is ready.
+ */
+ chip->lux_data_raw = 0;
+ chip->prox_data = 0;
+ ret = i2c_smbus_write_byte_data(chip->client,
+ BH1770_ALS_CONTROL, BH1770_STANDALONE);
+
+ /* Assume reset defaults */
+ chip->lux_thres_hi_onchip = BH1770_LUX_RANGE;
+ chip->lux_thres_lo_onchip = 0;
+
+ return ret;
+}
+
+static void bh1770_chip_off(struct bh1770_chip *chip)
+{
+ i2c_smbus_write_byte_data(chip->client,
+ BH1770_INTERRUPT, BH1770_DISABLE);
+ i2c_smbus_write_byte_data(chip->client,
+ BH1770_ALS_CONTROL, BH1770_STANDBY);
+ i2c_smbus_write_byte_data(chip->client,
+ BH1770_PS_CONTROL, BH1770_STANDBY);
+ regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_mode_control(struct bh1770_chip *chip)
+{
+ if (chip->prox_enable_count) {
+ chip->prox_force_update = true; /* Force immediate update */
+
+ bh1770_lux_rate(chip, chip->lux_rate_index);
+ bh1770_prox_set_threshold(chip);
+ bh1770_led_cfg(chip);
+ bh1770_prox_rate(chip, PROX_BELOW_THRESHOLD);
+ bh1770_prox_interrupt_control(chip, BH1770_ENABLE);
+ i2c_smbus_write_byte_data(chip->client,
+ BH1770_PS_CONTROL, BH1770_STANDALONE);
+ } else {
+ chip->prox_data = 0;
+ bh1770_lux_rate(chip, chip->lux_rate_index);
+ bh1770_prox_interrupt_control(chip, BH1770_DISABLE);
+ i2c_smbus_write_byte_data(chip->client,
+ BH1770_PS_CONTROL, BH1770_STANDBY);
+ }
+ return 0;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_read_result(struct bh1770_chip *chip)
+{
+ int ret;
+ bool above;
+ u8 mode;
+
+ ret = i2c_smbus_read_byte_data(chip->client, BH1770_PS_DATA_LED1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > chip->prox_threshold_hw)
+ above = true;
+ else
+ above = false;
+
+ /*
+ * when ALS levels goes above limit, proximity result may be
+ * false proximity. Thus ignore the result. With real proximity
+ * there is a shadow causing low als levels.
+ */
+ if (chip->lux_data_raw > PROX_IGNORE_LUX_LIMIT)
+ ret = 0;
+
+ chip->prox_data = bh1770_psraw_to_adjusted(chip, ret);
+
+ /* Strong proximity level or force mode requires immediate response */
+ if (chip->prox_data >= chip->prox_abs_thres ||
+ chip->prox_force_update)
+ chip->prox_persistence_counter = chip->prox_persistence;
+
+ chip->prox_force_update = false;
+
+ /* Persistence filttering to reduce false proximity events */
+ if (likely(above)) {
+ if (chip->prox_persistence_counter < chip->prox_persistence) {
+ chip->prox_persistence_counter++;
+ ret = -ENODATA;
+ } else {
+ mode = PROX_ABOVE_THRESHOLD;
+ ret = 0;
+ }
+ } else {
+ chip->prox_persistence_counter = 0;
+ mode = PROX_BELOW_THRESHOLD;
+ chip->prox_data = 0;
+ ret = 0;
+ }
+
+ /* Set proximity detection rate based on above or below value */
+ if (ret == 0) {
+ bh1770_prox_rate(chip, mode);
+ sysfs_notify(&chip->client->dev.kobj, NULL, "prox0_raw");
+ }
+out:
+ return ret;
+}
+
+static int bh1770_detect(struct bh1770_chip *chip)
+{
+ struct i2c_client *client = chip->client;
+ s32 ret;
+ u8 manu, part;
+
+ ret = i2c_smbus_read_byte_data(client, BH1770_MANUFACT_ID);
+ if (ret < 0)
+ goto error;
+ manu = (u8)ret;
+
+ ret = i2c_smbus_read_byte_data(client, BH1770_PART_ID);
+ if (ret < 0)
+ goto error;
+ part = (u8)ret;
+
+ chip->revision = (part & BH1770_REV_MASK) >> BH1770_REV_SHIFT;
+ chip->prox_coef = BH1770_COEF_SCALER;
+ chip->prox_const = 0;
+ chip->lux_cf = BH1770_NEUTRAL_CF;
+
+ if ((manu == BH1770_MANUFACT_ROHM) &&
+ ((part & BH1770_PART_MASK) == BH1770_PART)) {
+ snprintf(chip->chipname, sizeof(chip->chipname), "BH1770GLC");
+ return 0;
+ }
+
+ if ((manu == BH1770_MANUFACT_OSRAM) &&
+ ((part & BH1770_PART_MASK) == BH1770_PART)) {
+ snprintf(chip->chipname, sizeof(chip->chipname), "SFH7770");
+ /* Values selected by comparing different versions */
+ chip->prox_coef = 819; /* 0.8 * BH1770_COEF_SCALER */
+ chip->prox_const = 40;
+ return 0;
+ }
+
+ ret = -ENODEV;
+error:
+ dev_dbg(&client->dev, "BH1770 or SFH7770 not found\n");
+
+ return ret;
+}
+
+/*
+ * This work is re-scheduled at every proximity interrupt.
+ * If this work is running, it means that there hasn't been any
+ * proximity interrupt in time. Situation is handled as no-proximity.
+ * It would be nice to have low-threshold interrupt or interrupt
+ * when measurement and hi-threshold are both 0. But neither of those exists.
+ * This is a workaroud for missing HW feature.
+ */
+
+static void bh1770_prox_work(struct work_struct *work)
+{
+ struct bh1770_chip *chip =
+ container_of(work, struct bh1770_chip, prox_work.work);
+
+ mutex_lock(&chip->mutex);
+ bh1770_prox_read_result(chip);
+ mutex_unlock(&chip->mutex);
+}
+
+/* This is threaded irq handler */
+static irqreturn_t bh1770_irq(int irq, void *data)
+{
+ struct bh1770_chip *chip = data;
+ int status;
+ int rate = 0;
+
+ mutex_lock(&chip->mutex);
+ status = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_PS_STATUS);
+
+ /* Acknowledge interrupt by reading this register */
+ i2c_smbus_read_byte_data(chip->client, BH1770_INTERRUPT);
+
+ /*
+ * Check if there is fresh data available for als.
+ * If this is the very first data, update thresholds after that.
+ */
+ if (status & BH1770_INT_ALS_DATA) {
+ bh1770_lux_get_result(chip);
+ if (unlikely(chip->lux_wait_result)) {
+ chip->lux_wait_result = false;
+ wake_up(&chip->wait);
+ bh1770_lux_update_thresholds(chip,
+ chip->lux_threshold_hi,
+ chip->lux_threshold_lo);
+ }
+ }
+
+ /* Disable interrupt logic to guarantee acknowledgement */
+ i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+ (0 << 1) | (0 << 0));
+
+ if ((status & BH1770_INT_ALS_INT))
+ sysfs_notify(&chip->client->dev.kobj, NULL, "lux0_input");
+
+ if (chip->int_mode_prox && (status & BH1770_INT_LEDS_INT)) {
+ rate = prox_rates_ms[chip->prox_rate_threshold];
+ bh1770_prox_read_result(chip);
+ }
+
+ /* Re-enable interrupt logic */
+ i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+ (chip->int_mode_lux << 1) |
+ (chip->int_mode_prox << 0));
+ mutex_unlock(&chip->mutex);
+
+ /*
+ * Can't cancel work while keeping mutex since the work uses the
+ * same mutex.
+ */
+ if (rate) {
+ /*
+ * Simulate missing no-proximity interrupt 50ms after the
+ * next expected interrupt time.
+ */
+ cancel_delayed_work_sync(&chip->prox_work);
+ schedule_delayed_work(&chip->prox_work,
+ msecs_to_jiffies(rate + 50));
+ }
+ return IRQ_HANDLED;
+}
+
+static ssize_t bh1770_power_state_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ ssize_t ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+ if (value) {
+ pm_runtime_get_sync(dev);
+
+ ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+ if (ret < 0) {
+ pm_runtime_put(dev);
+ goto leave;
+ }
+
+ ret = bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+ if (ret < 0) {
+ pm_runtime_put(dev);
+ goto leave;
+ }
+
+ /* This causes interrupt after the next measurement cycle */
+ bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+ BH1770_LUX_DEF_THRES);
+ /* Inform that we are waiting for a result from ALS */
+ chip->lux_wait_result = true;
+ bh1770_prox_mode_control(chip);
+ } else if (!pm_runtime_suspended(dev)) {
+ pm_runtime_put(dev);
+ }
+ ret = count;
+leave:
+ mutex_unlock(&chip->mutex);
+ return ret;
+}
+
+static ssize_t bh1770_power_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+}
+
+static ssize_t bh1770_lux_result_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ ssize_t ret;
+ long timeout;
+
+ if (pm_runtime_suspended(dev))
+ return -EIO; /* Chip is not enabled at all */
+
+ timeout = wait_event_interruptible_timeout(chip->wait,
+ !chip->lux_wait_result,
+ msecs_to_jiffies(BH1770_TIMEOUT));
+ if (!timeout)
+ return -EIO;
+
+ mutex_lock(&chip->mutex);
+ ret = sprintf(buf, "%d\n", bh1770_lux_read_result(chip));
+ mutex_unlock(&chip->mutex);
+
+ return ret;
+}
+
+static ssize_t bh1770_lux_range_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", BH1770_LUX_RANGE);
+}
+
+static ssize_t bh1770_prox_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+ /* Assume no proximity. Sensor will tell real state soon */
+ if (!chip->prox_enable_count)
+ chip->prox_data = 0;
+
+ if (value)
+ chip->prox_enable_count++;
+ else if (chip->prox_enable_count > 0)
+ chip->prox_enable_count--;
+ else
+ goto leave;
+
+ /* Run control only when chip is powered on */
+ if (!pm_runtime_suspended(dev))
+ bh1770_prox_mode_control(chip);
+leave:
+ mutex_unlock(&chip->mutex);
+ return count;
+}
+
+static ssize_t bh1770_prox_enable_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ ssize_t len;
+
+ mutex_lock(&chip->mutex);
+ len = sprintf(buf, "%d\n", chip->prox_enable_count);
+ mutex_unlock(&chip->mutex);
+ return len;
+}
+
+static ssize_t bh1770_prox_result_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ mutex_lock(&chip->mutex);
+ if (chip->prox_enable_count && !pm_runtime_suspended(dev))
+ ret = sprintf(buf, "%d\n", chip->prox_data);
+ else
+ ret = -EIO;
+ mutex_unlock(&chip->mutex);
+ return ret;
+}
+
+static ssize_t bh1770_prox_range_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", BH1770_PROX_RANGE);
+}
+
+static ssize_t bh1770_get_prox_rate_avail(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int i;
+ int pos = 0;
+ for (i = 0; i < ARRAY_SIZE(prox_rates_hz); i++)
+ pos += sprintf(buf + pos, "%d ", prox_rates_hz[i]);
+ sprintf(buf + pos - 1, "\n");
+ return pos;
+}
+
+static ssize_t bh1770_get_prox_rate_above(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate_threshold]);
+}
+
+static ssize_t bh1770_get_prox_rate_below(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate]);
+}
+
+static int bh1770_prox_rate_validate(int rate)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(prox_rates_hz) - 1; i++)
+ if (rate >= prox_rates_hz[i])
+ break;
+ return i;
+}
+
+static ssize_t bh1770_set_prox_rate_above(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+ chip->prox_rate_threshold = bh1770_prox_rate_validate(value);
+ mutex_unlock(&chip->mutex);
+ return count;
+}
+
+static ssize_t bh1770_set_prox_rate_below(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+ chip->prox_rate = bh1770_prox_rate_validate(value);
+ mutex_unlock(&chip->mutex);
+ return count;
+}
+
+static ssize_t bh1770_get_prox_thres(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->prox_threshold);
+}
+
+static ssize_t bh1770_set_prox_thres(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ if (value > BH1770_PROX_RANGE)
+ return -EINVAL;
+
+ mutex_lock(&chip->mutex);
+ chip->prox_threshold = value;
+ ret = bh1770_prox_set_threshold(chip);
+ mutex_unlock(&chip->mutex);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+
+static ssize_t bh1770_prox_persistence_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%u\n", chip->prox_persistence);
+}
+
+static ssize_t bh1770_prox_persistence_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ if (value > BH1770_PROX_MAX_PERSISTENCE)
+ return -EINVAL;
+
+ chip->prox_persistence = value;
+
+ return len;
+}
+
+static ssize_t bh1770_prox_abs_thres_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%u\n", chip->prox_abs_thres);
+}
+
+static ssize_t bh1770_prox_abs_thres_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ if (value > BH1770_PROX_RANGE)
+ return -EINVAL;
+
+ chip->prox_abs_thres = value;
+
+ return len;
+}
+
+static ssize_t bh1770_chip_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%s rev %d\n", chip->chipname, chip->revision);
+}
+
+static ssize_t bh1770_lux_calib_default_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", BH1770_CALIB_SCALER);
+}
+
+static ssize_t bh1770_lux_calib_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ ssize_t len;
+
+ mutex_lock(&chip->mutex);
+ len = sprintf(buf, "%u\n", chip->lux_calib);
+ mutex_unlock(&chip->mutex);
+ return len;
+}
+
+static ssize_t bh1770_lux_calib_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long value;
+ u32 old_calib;
+ u32 new_corr;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&chip->mutex);
+ old_calib = chip->lux_calib;
+ chip->lux_calib = value;
+ new_corr = bh1770_get_corr_value(chip);
+ if (new_corr == 0) {
+ chip->lux_calib = old_calib;
+ mutex_unlock(&chip->mutex);
+ return -EINVAL;
+ }
+ chip->lux_corr = new_corr;
+ /* Refresh thresholds on HW after changing correction value */
+ bh1770_lux_update_thresholds(chip, chip->lux_threshold_hi,
+ chip->lux_threshold_lo);
+
+ mutex_unlock(&chip->mutex);
+
+ return len;
+}
+
+static ssize_t bh1770_get_lux_rate_avail(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int i;
+ int pos = 0;
+ for (i = 0; i < ARRAY_SIZE(lux_rates_hz); i++)
+ pos += sprintf(buf + pos, "%d ", lux_rates_hz[i]);
+ sprintf(buf + pos - 1, "\n");
+ return pos;
+}
+
+static ssize_t bh1770_get_lux_rate(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", lux_rates_hz[chip->lux_rate_index]);
+}
+
+static ssize_t bh1770_set_lux_rate(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ unsigned long rate_hz;
+ int ret, i;
+
+ ret = kstrtoul(buf, 0, &rate_hz);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(lux_rates_hz) - 1; i++)
+ if (rate_hz >= lux_rates_hz[i])
+ break;
+
+ mutex_lock(&chip->mutex);
+ chip->lux_rate_index = i;
+ ret = bh1770_lux_rate(chip, i);
+ mutex_unlock(&chip->mutex);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t bh1770_get_lux_thresh_above(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->lux_threshold_hi);
+}
+
+static ssize_t bh1770_get_lux_thresh_below(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", chip->lux_threshold_lo);
+}
+
+static ssize_t bh1770_set_lux_thresh(struct bh1770_chip *chip, u16 *target,
+ const char *buf)
+{
+ unsigned long thresh;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &thresh);
+ if (ret)
+ return ret;
+
+ if (thresh > BH1770_LUX_RANGE)
+ return -EINVAL;
+
+ mutex_lock(&chip->mutex);
+ *target = thresh;
+ /*
+ * Don't update values in HW if we are still waiting for
+ * first interrupt to come after device handle open call.
+ */
+ if (!chip->lux_wait_result)
+ ret = bh1770_lux_update_thresholds(chip,
+ chip->lux_threshold_hi,
+ chip->lux_threshold_lo);
+ mutex_unlock(&chip->mutex);
+ return ret;
+
+}
+
+static ssize_t bh1770_set_lux_thresh_above(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_hi, buf);
+ if (ret < 0)
+ return ret;
+ return len;
+}
+
+static ssize_t bh1770_set_lux_thresh_below(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct bh1770_chip *chip = dev_get_drvdata(dev);
+ int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_lo, buf);
+ if (ret < 0)
+ return ret;
+ return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, bh1770_prox_enable_show,
+ bh1770_prox_enable_store);
+static DEVICE_ATTR(prox0_thresh_above1_value, S_IRUGO | S_IWUSR,
+ bh1770_prox_abs_thres_show,
+ bh1770_prox_abs_thres_store);
+static DEVICE_ATTR(prox0_thresh_above0_value, S_IRUGO | S_IWUSR,
+ bh1770_get_prox_thres,
+ bh1770_set_prox_thres);
+static DEVICE_ATTR(prox0_raw, S_IRUGO, bh1770_prox_result_show, NULL);
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, bh1770_prox_range_show, NULL);
+static DEVICE_ATTR(prox0_thresh_above_count, S_IRUGO | S_IWUSR,
+ bh1770_prox_persistence_show,
+ bh1770_prox_persistence_store);
+static DEVICE_ATTR(prox0_rate_above, S_IRUGO | S_IWUSR,
+ bh1770_get_prox_rate_above,
+ bh1770_set_prox_rate_above);
+static DEVICE_ATTR(prox0_rate_below, S_IRUGO | S_IWUSR,
+ bh1770_get_prox_rate_below,
+ bh1770_set_prox_rate_below);
+static DEVICE_ATTR(prox0_rate_avail, S_IRUGO, bh1770_get_prox_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, bh1770_lux_calib_show,
+ bh1770_lux_calib_store);
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+ bh1770_lux_calib_default_show,
+ NULL);
+static DEVICE_ATTR(lux0_input, S_IRUGO, bh1770_lux_result_show, NULL);
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, bh1770_lux_range_show, NULL);
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, bh1770_get_lux_rate,
+ bh1770_set_lux_rate);
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, bh1770_get_lux_rate_avail, NULL);
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+ bh1770_get_lux_thresh_above,
+ bh1770_set_lux_thresh_above);
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+ bh1770_get_lux_thresh_below,
+ bh1770_set_lux_thresh_below);
+static DEVICE_ATTR(chip_id, S_IRUGO, bh1770_chip_id_show, NULL);
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, bh1770_power_state_show,
+ bh1770_power_state_store);
+
+
+static struct attribute *sysfs_attrs[] = {
+ &dev_attr_lux0_calibscale.attr,
+ &dev_attr_lux0_calibscale_default.attr,
+ &dev_attr_lux0_input.attr,
+ &dev_attr_lux0_sensor_range.attr,
+ &dev_attr_lux0_rate.attr,
+ &dev_attr_lux0_rate_avail.attr,
+ &dev_attr_lux0_thresh_above_value.attr,
+ &dev_attr_lux0_thresh_below_value.attr,
+ &dev_attr_prox0_raw.attr,
+ &dev_attr_prox0_sensor_range.attr,
+ &dev_attr_prox0_raw_en.attr,
+ &dev_attr_prox0_thresh_above_count.attr,
+ &dev_attr_prox0_rate_above.attr,
+ &dev_attr_prox0_rate_below.attr,
+ &dev_attr_prox0_rate_avail.attr,
+ &dev_attr_prox0_thresh_above0_value.attr,
+ &dev_attr_prox0_thresh_above1_value.attr,
+ &dev_attr_chip_id.attr,
+ &dev_attr_power_state.attr,
+ NULL
+};
+
+static struct attribute_group bh1770_attribute_group = {
+ .attrs = sysfs_attrs
+};
+
+static int bh1770_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct bh1770_chip *chip;
+ int err;
+
+ chip = devm_kzalloc(&client->dev, sizeof *chip, GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, chip);
+ chip->client = client;
+
+ mutex_init(&chip->mutex);
+ init_waitqueue_head(&chip->wait);
+ INIT_DELAYED_WORK(&chip->prox_work, bh1770_prox_work);
+
+ if (client->dev.platform_data == NULL) {
+ dev_err(&client->dev, "platform data is mandatory\n");
+ return -EINVAL;
+ }
+
+ chip->pdata = client->dev.platform_data;
+ chip->lux_calib = BH1770_LUX_NEUTRAL_CALIB_VALUE;
+ chip->lux_rate_index = BH1770_LUX_DEFAULT_RATE;
+ chip->lux_threshold_lo = BH1770_LUX_DEF_THRES;
+ chip->lux_threshold_hi = BH1770_LUX_DEF_THRES;
+
+ if (chip->pdata->glass_attenuation == 0)
+ chip->lux_ga = BH1770_NEUTRAL_GA;
+ else
+ chip->lux_ga = chip->pdata->glass_attenuation;
+
+ chip->prox_threshold = BH1770_PROX_DEF_THRES;
+ chip->prox_led = chip->pdata->led_def_curr;
+ chip->prox_abs_thres = BH1770_PROX_DEF_ABS_THRES;
+ chip->prox_persistence = BH1770_DEFAULT_PERSISTENCE;
+ chip->prox_rate_threshold = BH1770_PROX_DEF_RATE_THRESH;
+ chip->prox_rate = BH1770_PROX_DEFAULT_RATE;
+ chip->prox_data = 0;
+
+ chip->regs[0].supply = reg_vcc;
+ chip->regs[1].supply = reg_vleds;
+
+ err = devm_regulator_bulk_get(&client->dev,
+ ARRAY_SIZE(chip->regs), chip->regs);
+ if (err < 0) {
+ dev_err(&client->dev, "Cannot get regulators\n");
+ return err;
+ }
+
+ err = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+ chip->regs);
+ if (err < 0) {
+ dev_err(&client->dev, "Cannot enable regulators\n");
+ return err;
+ }
+
+ usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+ err = bh1770_detect(chip);
+ if (err < 0)
+ goto fail0;
+
+ /* Start chip */
+ bh1770_chip_on(chip);
+ pm_runtime_set_active(&client->dev);
+ pm_runtime_enable(&client->dev);
+
+ chip->lux_corr = bh1770_get_corr_value(chip);
+ if (chip->lux_corr == 0) {
+ dev_err(&client->dev, "Improper correction values\n");
+ err = -EINVAL;
+ goto fail0;
+ }
+
+ if (chip->pdata->setup_resources) {
+ err = chip->pdata->setup_resources();
+ if (err) {
+ err = -EINVAL;
+ goto fail0;
+ }
+ }
+
+ err = sysfs_create_group(&chip->client->dev.kobj,
+ &bh1770_attribute_group);
+ if (err < 0) {
+ dev_err(&chip->client->dev, "Sysfs registration failed\n");
+ goto fail1;
+ }
+
+ /*
+ * Chip needs level triggered interrupt to work. However,
+ * level triggering doesn't work always correctly with power
+ * management. Select both
+ */
+ err = request_threaded_irq(client->irq, NULL,
+ bh1770_irq,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+ IRQF_TRIGGER_LOW,
+ "bh1770", chip);
+ if (err) {
+ dev_err(&client->dev, "could not get IRQ %d\n",
+ client->irq);
+ goto fail2;
+ }
+ regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+ return err;
+fail2:
+ sysfs_remove_group(&chip->client->dev.kobj,
+ &bh1770_attribute_group);
+fail1:
+ if (chip->pdata->release_resources)
+ chip->pdata->release_resources();
+fail0:
+ regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+ return err;
+}
+
+static int bh1770_remove(struct i2c_client *client)
+{
+ struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+ free_irq(client->irq, chip);
+
+ sysfs_remove_group(&chip->client->dev.kobj,
+ &bh1770_attribute_group);
+
+ if (chip->pdata->release_resources)
+ chip->pdata->release_resources();
+
+ cancel_delayed_work_sync(&chip->prox_work);
+
+ if (!pm_runtime_suspended(&client->dev))
+ bh1770_chip_off(chip);
+
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bh1770_suspend(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+ bh1770_chip_off(chip);
+
+ return 0;
+}
+
+static int bh1770_resume(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct bh1770_chip *chip = i2c_get_clientdata(client);
+ int ret = 0;
+
+ bh1770_chip_on(chip);
+
+ if (!pm_runtime_suspended(dev)) {
+ /*
+ * If we were enabled at suspend time, it is expected
+ * everything works nice and smoothly
+ */
+ ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+ ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+
+ /* This causes interrupt after the next measurement cycle */
+ bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+ BH1770_LUX_DEF_THRES);
+ /* Inform that we are waiting for a result from ALS */
+ chip->lux_wait_result = true;
+ bh1770_prox_mode_control(chip);
+ }
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int bh1770_runtime_suspend(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+ bh1770_chip_off(chip);
+
+ return 0;
+}
+
+static int bh1770_runtime_resume(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+ bh1770_chip_on(chip);
+
+ return 0;
+}
+#endif
+
+static const struct i2c_device_id bh1770_id[] = {
+ {"bh1770glc", 0 },
+ {"sfh7770", 0 },
+ {}
+};
+
+MODULE_DEVICE_TABLE(i2c, bh1770_id);
+
+static const struct dev_pm_ops bh1770_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(bh1770_suspend, bh1770_resume)
+ SET_RUNTIME_PM_OPS(bh1770_runtime_suspend, bh1770_runtime_resume, NULL)
+};
+
+static struct i2c_driver bh1770_driver = {
+ .driver = {
+ .name = "bh1770glc",
+ .pm = &bh1770_pm_ops,
+ },
+ .probe = bh1770_probe,
+ .remove = bh1770_remove,
+ .id_table = bh1770_id,
+};
+
+module_i2c_driver(bh1770_driver);
+
+MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c
new file mode 100644
index 0000000..7f90ce5
--- /dev/null
+++ b/drivers/misc/bh1780gli.c
@@ -0,0 +1,259 @@
+/*
+ * bh1780gli.c
+ * ROHM Ambient Light Sensor Driver
+ *
+ * Copyright (C) 2010 Texas Instruments
+ * Author: Hemanth V <hemanthv@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#define BH1780_REG_CONTROL 0x80
+#define BH1780_REG_PARTID 0x8A
+#define BH1780_REG_MANFID 0x8B
+#define BH1780_REG_DLOW 0x8C
+#define BH1780_REG_DHIGH 0x8D
+
+#define BH1780_REVMASK (0xf)
+#define BH1780_POWMASK (0x3)
+#define BH1780_POFF (0x0)
+#define BH1780_PON (0x3)
+
+/* power on settling time in ms */
+#define BH1780_PON_DELAY 2
+
+struct bh1780_data {
+ struct i2c_client *client;
+ int power_state;
+ /* lock for sysfs operations */
+ struct mutex lock;
+};
+
+static int bh1780_write(struct bh1780_data *ddata, u8 reg, u8 val, char *msg)
+{
+ int ret = i2c_smbus_write_byte_data(ddata->client, reg, val);
+ if (ret < 0)
+ dev_err(&ddata->client->dev,
+ "i2c_smbus_write_byte_data failed error %d Register (%s)\n",
+ ret, msg);
+ return ret;
+}
+
+static int bh1780_read(struct bh1780_data *ddata, u8 reg, char *msg)
+{
+ int ret = i2c_smbus_read_byte_data(ddata->client, reg);
+ if (ret < 0)
+ dev_err(&ddata->client->dev,
+ "i2c_smbus_read_byte_data failed error %d Register (%s)\n",
+ ret, msg);
+ return ret;
+}
+
+static ssize_t bh1780_show_lux(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct bh1780_data *ddata = platform_get_drvdata(pdev);
+ int lsb, msb;
+
+ lsb = bh1780_read(ddata, BH1780_REG_DLOW, "DLOW");
+ if (lsb < 0)
+ return lsb;
+
+ msb = bh1780_read(ddata, BH1780_REG_DHIGH, "DHIGH");
+ if (msb < 0)
+ return msb;
+
+ return sprintf(buf, "%d\n", (msb << 8) | lsb);
+}
+
+static ssize_t bh1780_show_power_state(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct bh1780_data *ddata = platform_get_drvdata(pdev);
+ int state;
+
+ state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
+ if (state < 0)
+ return state;
+
+ return sprintf(buf, "%d\n", state & BH1780_POWMASK);
+}
+
+static ssize_t bh1780_store_power_state(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct bh1780_data *ddata = platform_get_drvdata(pdev);
+ unsigned long val;
+ int error;
+
+ error = kstrtoul(buf, 0, &val);
+ if (error)
+ return error;
+
+ if (val < BH1780_POFF || val > BH1780_PON)
+ return -EINVAL;
+
+ mutex_lock(&ddata->lock);
+
+ error = bh1780_write(ddata, BH1780_REG_CONTROL, val, "CONTROL");
+ if (error < 0) {
+ mutex_unlock(&ddata->lock);
+ return error;
+ }
+
+ msleep(BH1780_PON_DELAY);
+ ddata->power_state = val;
+ mutex_unlock(&ddata->lock);
+
+ return count;
+}
+
+static DEVICE_ATTR(lux, S_IRUGO, bh1780_show_lux, NULL);
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+ bh1780_show_power_state, bh1780_store_power_state);
+
+static struct attribute *bh1780_attributes[] = {
+ &dev_attr_power_state.attr,
+ &dev_attr_lux.attr,
+ NULL
+};
+
+static const struct attribute_group bh1780_attr_group = {
+ .attrs = bh1780_attributes,
+};
+
+static int bh1780_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int ret;
+ struct bh1780_data *ddata;
+ struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+ return -EIO;
+
+ ddata = devm_kzalloc(&client->dev, sizeof(struct bh1780_data),
+ GFP_KERNEL);
+ if (ddata == NULL)
+ return -ENOMEM;
+
+ ddata->client = client;
+ i2c_set_clientdata(client, ddata);
+
+ ret = bh1780_read(ddata, BH1780_REG_PARTID, "PART ID");
+ if (ret < 0)
+ return ret;
+
+ dev_info(&client->dev, "Ambient Light Sensor, Rev : %d\n",
+ (ret & BH1780_REVMASK));
+
+ mutex_init(&ddata->lock);
+
+ return sysfs_create_group(&client->dev.kobj, &bh1780_attr_group);
+}
+
+static int bh1780_remove(struct i2c_client *client)
+{
+ sysfs_remove_group(&client->dev.kobj, &bh1780_attr_group);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bh1780_suspend(struct device *dev)
+{
+ struct bh1780_data *ddata;
+ int state, ret;
+ struct i2c_client *client = to_i2c_client(dev);
+
+ ddata = i2c_get_clientdata(client);
+ state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
+ if (state < 0)
+ return state;
+
+ ddata->power_state = state & BH1780_POWMASK;
+
+ ret = bh1780_write(ddata, BH1780_REG_CONTROL, BH1780_POFF,
+ "CONTROL");
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int bh1780_resume(struct device *dev)
+{
+ struct bh1780_data *ddata;
+ int state, ret;
+ struct i2c_client *client = to_i2c_client(dev);
+
+ ddata = i2c_get_clientdata(client);
+ state = ddata->power_state;
+ ret = bh1780_write(ddata, BH1780_REG_CONTROL, state,
+ "CONTROL");
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume);
+
+static const struct i2c_device_id bh1780_id[] = {
+ { "bh1780", 0 },
+ { },
+};
+
+MODULE_DEVICE_TABLE(i2c, bh1780_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id of_bh1780_match[] = {
+ { .compatible = "rohm,bh1780gli", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, of_bh1780_match);
+#endif
+
+static struct i2c_driver bh1780_driver = {
+ .probe = bh1780_probe,
+ .remove = bh1780_remove,
+ .id_table = bh1780_id,
+ .driver = {
+ .name = "bh1780",
+ .pm = &bh1780_pm,
+ .of_match_table = of_match_ptr(of_bh1780_match),
+ },
+};
+
+module_i2c_driver(bh1780_driver);
+
+MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hemanth V <hemanthv@ti.com>");
diff --git a/drivers/misc/bmp085-i2c.c b/drivers/misc/bmp085-i2c.c
new file mode 100644
index 0000000..f35c218
--- /dev/null
+++ b/drivers/misc/bmp085-i2c.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2012 Bosch Sensortec GmbH
+ * Copyright (c) 2012 Unixphere AB
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include "bmp085.h"
+
+#define BMP085_I2C_ADDRESS 0x77
+
+static const unsigned short normal_i2c[] = { BMP085_I2C_ADDRESS,
+ I2C_CLIENT_END };
+
+static int bmp085_i2c_detect(struct i2c_client *client,
+ struct i2c_board_info *info)
+{
+ if (client->addr != BMP085_I2C_ADDRESS)
+ return -ENODEV;
+
+ return bmp085_detect(&client->dev);
+}
+
+static int bmp085_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int err;
+ struct regmap *regmap = devm_regmap_init_i2c(client,
+ &bmp085_regmap_config);
+
+ if (IS_ERR(regmap)) {
+ err = PTR_ERR(regmap);
+ dev_err(&client->dev, "Failed to init regmap: %d\n", err);
+ return err;
+ }
+
+ return bmp085_probe(&client->dev, regmap, client->irq);
+}
+
+static int bmp085_i2c_remove(struct i2c_client *client)
+{
+ return bmp085_remove(&client->dev);
+}
+
+static const struct i2c_device_id bmp085_id[] = {
+ { BMP085_NAME, 0 },
+ { "bmp180", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, bmp085_id);
+
+static struct i2c_driver bmp085_i2c_driver = {
+ .driver = {
+ .name = BMP085_NAME,
+ },
+ .id_table = bmp085_id,
+ .probe = bmp085_i2c_probe,
+ .remove = bmp085_i2c_remove,
+
+ .detect = bmp085_i2c_detect,
+ .address_list = normal_i2c
+};
+
+module_i2c_driver(bmp085_i2c_driver);
+
+MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>");
+MODULE_DESCRIPTION("BMP085 I2C bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085-spi.c b/drivers/misc/bmp085-spi.c
new file mode 100644
index 0000000..17ecbf9
--- /dev/null
+++ b/drivers/misc/bmp085-spi.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2012 Bosch Sensortec GmbH
+ * Copyright (c) 2012 Unixphere AB
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/err.h>
+#include "bmp085.h"
+
+static int bmp085_spi_probe(struct spi_device *client)
+{
+ int err;
+ struct regmap *regmap;
+
+ client->bits_per_word = 8;
+ err = spi_setup(client);
+ if (err < 0) {
+ dev_err(&client->dev, "spi_setup failed!\n");
+ return err;
+ }
+
+ regmap = devm_regmap_init_spi(client, &bmp085_regmap_config);
+ if (IS_ERR(regmap)) {
+ err = PTR_ERR(regmap);
+ dev_err(&client->dev, "Failed to init regmap: %d\n", err);
+ return err;
+ }
+
+ return bmp085_probe(&client->dev, regmap, client->irq);
+}
+
+static int bmp085_spi_remove(struct spi_device *client)
+{
+ return bmp085_remove(&client->dev);
+}
+
+static const struct of_device_id bmp085_of_match[] = {
+ { .compatible = "bosch,bmp085", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, bmp085_of_match);
+
+static const struct spi_device_id bmp085_id[] = {
+ { "bmp180", 0 },
+ { "bmp181", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, bmp085_id);
+
+static struct spi_driver bmp085_spi_driver = {
+ .driver = {
+ .name = BMP085_NAME,
+ .of_match_table = bmp085_of_match
+ },
+ .id_table = bmp085_id,
+ .probe = bmp085_spi_probe,
+ .remove = bmp085_spi_remove
+};
+
+module_spi_driver(bmp085_spi_driver);
+
+MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>");
+MODULE_DESCRIPTION("BMP085 SPI bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
new file mode 100644
index 0000000..9b313f7
--- /dev/null
+++ b/drivers/misc/bmp085.c
@@ -0,0 +1,506 @@
+/* Copyright (c) 2010 Christoph Mair <christoph.mair@gmail.com>
+ * Copyright (c) 2012 Bosch Sensortec GmbH
+ * Copyright (c) 2012 Unixphere AB
+ *
+ * This driver supports the bmp085 and bmp18x digital barometric pressure
+ * and temperature sensors from Bosch Sensortec. The datasheets
+ * are available from their website:
+ * http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP085-DS000-05.pdf
+ * http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP180-DS000-07.pdf
+ *
+ * A pressure measurement is issued by reading from pressure0_input.
+ * The return value ranges from 30000 to 110000 pascal with a resulution
+ * of 1 pascal (0.01 millibar) which enables measurements from 9000m above
+ * to 500m below sea level.
+ *
+ * The temperature can be read from temp0_input. Values range from
+ * -400 to 850 representing the ambient temperature in degree celsius
+ * multiplied by 10.The resolution is 0.1 celsius.
+ *
+ * Because ambient pressure is temperature dependent, a temperature
+ * measurement will be executed automatically even if the user is reading
+ * from pressure0_input. This happens if the last temperature measurement
+ * has been executed more then one second ago.
+ *
+ * To decrease RMS noise from pressure measurements, the bmp085 can
+ * autonomously calculate the average of up to eight samples. This is
+ * set up by writing to the oversampling sysfs file. Accepted values
+ * are 0, 1, 2 and 3. 2^x when x is the value written to this file
+ * specifies the number of samples used to calculate the ambient pressure.
+ * RMS noise is specified with six pascal (without averaging) and decreases
+ * down to 3 pascal when using an oversampling setting of 3.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include "bmp085.h"
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/gpio.h>
+
+#define BMP085_CHIP_ID 0x55
+#define BMP085_CALIBRATION_DATA_START 0xAA
+#define BMP085_CALIBRATION_DATA_LENGTH 11 /* 16 bit values */
+#define BMP085_CHIP_ID_REG 0xD0
+#define BMP085_CTRL_REG 0xF4
+#define BMP085_TEMP_MEASUREMENT 0x2E
+#define BMP085_PRESSURE_MEASUREMENT 0x34
+#define BMP085_CONVERSION_REGISTER_MSB 0xF6
+#define BMP085_CONVERSION_REGISTER_LSB 0xF7
+#define BMP085_CONVERSION_REGISTER_XLSB 0xF8
+#define BMP085_TEMP_CONVERSION_TIME 5
+
+struct bmp085_calibration_data {
+ s16 AC1, AC2, AC3;
+ u16 AC4, AC5, AC6;
+ s16 B1, B2;
+ s16 MB, MC, MD;
+};
+
+struct bmp085_data {
+ struct device *dev;
+ struct regmap *regmap;
+ struct mutex lock;
+ struct bmp085_calibration_data calibration;
+ u8 oversampling_setting;
+ u32 raw_temperature;
+ u32 raw_pressure;
+ u32 temp_measurement_period;
+ unsigned long last_temp_measurement;
+ u8 chip_id;
+ s32 b6; /* calculated temperature correction coefficient */
+ int irq;
+ struct completion done;
+};
+
+static irqreturn_t bmp085_eoc_isr(int irq, void *devid)
+{
+ struct bmp085_data *data = devid;
+
+ complete(&data->done);
+
+ return IRQ_HANDLED;
+}
+
+static s32 bmp085_read_calibration_data(struct bmp085_data *data)
+{
+ u16 tmp[BMP085_CALIBRATION_DATA_LENGTH];
+ struct bmp085_calibration_data *cali = &(data->calibration);
+ s32 status = regmap_bulk_read(data->regmap,
+ BMP085_CALIBRATION_DATA_START, (u8 *)tmp,
+ (BMP085_CALIBRATION_DATA_LENGTH << 1));
+ if (status < 0)
+ return status;
+
+ cali->AC1 = be16_to_cpu(tmp[0]);
+ cali->AC2 = be16_to_cpu(tmp[1]);
+ cali->AC3 = be16_to_cpu(tmp[2]);
+ cali->AC4 = be16_to_cpu(tmp[3]);
+ cali->AC5 = be16_to_cpu(tmp[4]);
+ cali->AC6 = be16_to_cpu(tmp[5]);
+ cali->B1 = be16_to_cpu(tmp[6]);
+ cali->B2 = be16_to_cpu(tmp[7]);
+ cali->MB = be16_to_cpu(tmp[8]);
+ cali->MC = be16_to_cpu(tmp[9]);
+ cali->MD = be16_to_cpu(tmp[10]);
+ return 0;
+}
+
+static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
+{
+ u16 tmp;
+ s32 status;
+
+ mutex_lock(&data->lock);
+
+ init_completion(&data->done);
+
+ status = regmap_write(data->regmap, BMP085_CTRL_REG,
+ BMP085_TEMP_MEASUREMENT);
+ if (status < 0) {
+ dev_err(data->dev,
+ "Error while requesting temperature measurement.\n");
+ goto exit;
+ }
+ wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
+ BMP085_TEMP_CONVERSION_TIME));
+
+ status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
+ &tmp, sizeof(tmp));
+ if (status < 0) {
+ dev_err(data->dev,
+ "Error while reading temperature measurement result\n");
+ goto exit;
+ }
+ data->raw_temperature = be16_to_cpu(tmp);
+ data->last_temp_measurement = jiffies;
+ status = 0; /* everything ok, return 0 */
+
+exit:
+ mutex_unlock(&data->lock);
+ return status;
+}
+
+static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
+{
+ u32 tmp = 0;
+ s32 status;
+
+ mutex_lock(&data->lock);
+
+ init_completion(&data->done);
+
+ status = regmap_write(data->regmap, BMP085_CTRL_REG,
+ BMP085_PRESSURE_MEASUREMENT +
+ (data->oversampling_setting << 6));
+ if (status < 0) {
+ dev_err(data->dev,
+ "Error while requesting pressure measurement.\n");
+ goto exit;
+ }
+
+ /* wait for the end of conversion */
+ wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
+ 2+(3 << data->oversampling_setting)));
+ /* copy data into a u32 (4 bytes), but skip the first byte. */
+ status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
+ ((u8 *)&tmp)+1, 3);
+ if (status < 0) {
+ dev_err(data->dev,
+ "Error while reading pressure measurement results\n");
+ goto exit;
+ }
+ data->raw_pressure = be32_to_cpu((tmp));
+ data->raw_pressure >>= (8-data->oversampling_setting);
+ status = 0; /* everything ok, return 0 */
+
+exit:
+ mutex_unlock(&data->lock);
+ return status;
+}
+
+/*
+ * This function starts the temperature measurement and returns the value
+ * in tenth of a degree celsius.
+ */
+static s32 bmp085_get_temperature(struct bmp085_data *data, int *temperature)
+{
+ struct bmp085_calibration_data *cali = &data->calibration;
+ long x1, x2;
+ int status;
+
+ status = bmp085_update_raw_temperature(data);
+ if (status < 0)
+ goto exit;
+
+ x1 = ((data->raw_temperature - cali->AC6) * cali->AC5) >> 15;
+ x2 = (cali->MC << 11) / (x1 + cali->MD);
+ data->b6 = x1 + x2 - 4000;
+ /* if NULL just update b6. Used for pressure only measurements */
+ if (temperature != NULL)
+ *temperature = (x1+x2+8) >> 4;
+
+exit:
+ return status;
+}
+
+/*
+ * This function starts the pressure measurement and returns the value
+ * in millibar. Since the pressure depends on the ambient temperature,
+ * a temperature measurement is executed according to the given temperature
+ * measurement period (default is 1 sec boundary). This period could vary
+ * and needs to be adjusted according to the sensor environment, i.e. if big
+ * temperature variations then the temperature needs to be read out often.
+ */
+static s32 bmp085_get_pressure(struct bmp085_data *data, int *pressure)
+{
+ struct bmp085_calibration_data *cali = &data->calibration;
+ s32 x1, x2, x3, b3;
+ u32 b4, b7;
+ s32 p;
+ int status;
+
+ /* alt least every second force an update of the ambient temperature */
+ if ((data->last_temp_measurement == 0) ||
+ time_is_before_jiffies(data->last_temp_measurement + 1*HZ)) {
+ status = bmp085_get_temperature(data, NULL);
+ if (status < 0)
+ return status;
+ }
+
+ status = bmp085_update_raw_pressure(data);
+ if (status < 0)
+ return status;
+
+ x1 = (data->b6 * data->b6) >> 12;
+ x1 *= cali->B2;
+ x1 >>= 11;
+
+ x2 = cali->AC2 * data->b6;
+ x2 >>= 11;
+
+ x3 = x1 + x2;
+
+ b3 = (((((s32)cali->AC1) * 4 + x3) << data->oversampling_setting) + 2);
+ b3 >>= 2;
+
+ x1 = (cali->AC3 * data->b6) >> 13;
+ x2 = (cali->B1 * ((data->b6 * data->b6) >> 12)) >> 16;
+ x3 = (x1 + x2 + 2) >> 2;
+ b4 = (cali->AC4 * (u32)(x3 + 32768)) >> 15;
+
+ b7 = ((u32)data->raw_pressure - b3) *
+ (50000 >> data->oversampling_setting);
+ p = ((b7 < 0x80000000) ? ((b7 << 1) / b4) : ((b7 / b4) * 2));
+
+ x1 = p >> 8;
+ x1 *= x1;
+ x1 = (x1 * 3038) >> 16;
+ x2 = (-7357 * p) >> 16;
+ p += (x1 + x2 + 3791) >> 4;
+
+ *pressure = p;
+
+ return 0;
+}
+
+/*
+ * This function sets the chip-internal oversampling. Valid values are 0..3.
+ * The chip will use 2^oversampling samples for internal averaging.
+ * This influences the measurement time and the accuracy; larger values
+ * increase both. The datasheet gives an overview on how measurement time,
+ * accuracy and noise correlate.
+ */
+static void bmp085_set_oversampling(struct bmp085_data *data,
+ unsigned char oversampling)
+{
+ if (oversampling > 3)
+ oversampling = 3;
+ data->oversampling_setting = oversampling;
+}
+
+/*
+ * Returns the currently selected oversampling. Range: 0..3
+ */
+static unsigned char bmp085_get_oversampling(struct bmp085_data *data)
+{
+ return data->oversampling_setting;
+}
+
+/* sysfs callbacks */
+static ssize_t set_oversampling(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bmp085_data *data = dev_get_drvdata(dev);
+ unsigned long oversampling;
+ int err = kstrtoul(buf, 10, &oversampling);
+
+ if (err == 0) {
+ mutex_lock(&data->lock);
+ bmp085_set_oversampling(data, oversampling);
+ mutex_unlock(&data->lock);
+ return count;
+ }
+
+ return err;
+}
+
+static ssize_t show_oversampling(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct bmp085_data *data = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%u\n", bmp085_get_oversampling(data));
+}
+static DEVICE_ATTR(oversampling, S_IWUSR | S_IRUGO,
+ show_oversampling, set_oversampling);
+
+
+static ssize_t show_temperature(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int temperature;
+ int status;
+ struct bmp085_data *data = dev_get_drvdata(dev);
+
+ status = bmp085_get_temperature(data, &temperature);
+ if (status < 0)
+ return status;
+ else
+ return sprintf(buf, "%d\n", temperature);
+}
+static DEVICE_ATTR(temp0_input, S_IRUGO, show_temperature, NULL);
+
+
+static ssize_t show_pressure(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int pressure;
+ int status;
+ struct bmp085_data *data = dev_get_drvdata(dev);
+
+ status = bmp085_get_pressure(data, &pressure);
+ if (status < 0)
+ return status;
+ else
+ return sprintf(buf, "%d\n", pressure);
+}
+static DEVICE_ATTR(pressure0_input, S_IRUGO, show_pressure, NULL);
+
+
+static struct attribute *bmp085_attributes[] = {
+ &dev_attr_temp0_input.attr,
+ &dev_attr_pressure0_input.attr,
+ &dev_attr_oversampling.attr,
+ NULL
+};
+
+static const struct attribute_group bmp085_attr_group = {
+ .attrs = bmp085_attributes,
+};
+
+int bmp085_detect(struct device *dev)
+{
+ struct bmp085_data *data = dev_get_drvdata(dev);
+ unsigned int id;
+ int ret;
+
+ ret = regmap_read(data->regmap, BMP085_CHIP_ID_REG, &id);
+ if (ret < 0)
+ return ret;
+
+ if (id != data->chip_id)
+ return -ENODEV;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bmp085_detect);
+
+static void bmp085_get_of_properties(struct bmp085_data *data)
+{
+#ifdef CONFIG_OF
+ struct device_node *np = data->dev->of_node;
+ u32 prop;
+
+ if (!np)
+ return;
+
+ if (!of_property_read_u32(np, "chip-id", &prop))
+ data->chip_id = prop & 0xff;
+
+ if (!of_property_read_u32(np, "temp-measurement-period", &prop))
+ data->temp_measurement_period = (prop/100)*HZ;
+
+ if (!of_property_read_u32(np, "default-oversampling", &prop))
+ data->oversampling_setting = prop & 0xff;
+#endif
+}
+
+static int bmp085_init_client(struct bmp085_data *data)
+{
+ int status = bmp085_read_calibration_data(data);
+
+ if (status < 0)
+ return status;
+
+ /* default settings */
+ data->chip_id = BMP085_CHIP_ID;
+ data->last_temp_measurement = 0;
+ data->temp_measurement_period = 1*HZ;
+ data->oversampling_setting = 3;
+
+ bmp085_get_of_properties(data);
+
+ mutex_init(&data->lock);
+
+ return 0;
+}
+
+struct regmap_config bmp085_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8
+};
+EXPORT_SYMBOL_GPL(bmp085_regmap_config);
+
+int bmp085_probe(struct device *dev, struct regmap *regmap, int irq)
+{
+ struct bmp085_data *data;
+ int err = 0;
+
+ data = kzalloc(sizeof(struct bmp085_data), GFP_KERNEL);
+ if (!data) {
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ dev_set_drvdata(dev, data);
+ data->dev = dev;
+ data->regmap = regmap;
+ data->irq = irq;
+
+ if (data->irq > 0) {
+ err = devm_request_irq(dev, data->irq, bmp085_eoc_isr,
+ IRQF_TRIGGER_RISING, "bmp085",
+ data);
+ if (err < 0)
+ goto exit_free;
+ }
+
+ /* Initialize the BMP085 chip */
+ err = bmp085_init_client(data);
+ if (err < 0)
+ goto exit_free;
+
+ err = bmp085_detect(dev);
+ if (err < 0) {
+ dev_err(dev, "%s: chip_id failed!\n", BMP085_NAME);
+ goto exit_free;
+ }
+
+ /* Register sysfs hooks */
+ err = sysfs_create_group(&dev->kobj, &bmp085_attr_group);
+ if (err)
+ goto exit_free;
+
+ dev_info(dev, "Successfully initialized %s!\n", BMP085_NAME);
+
+ return 0;
+
+exit_free:
+ kfree(data);
+exit:
+ return err;
+}
+EXPORT_SYMBOL_GPL(bmp085_probe);
+
+int bmp085_remove(struct device *dev)
+{
+ struct bmp085_data *data = dev_get_drvdata(dev);
+
+ sysfs_remove_group(&data->dev->kobj, &bmp085_attr_group);
+ kfree(data);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bmp085_remove);
+
+MODULE_AUTHOR("Christoph Mair <christoph.mair@gmail.com>");
+MODULE_DESCRIPTION("BMP085 driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085.h b/drivers/misc/bmp085.h
new file mode 100644
index 0000000..8b8e3b1
--- /dev/null
+++ b/drivers/misc/bmp085.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Bosch Sensortec GmbH
+ * Copyright (c) 2012 Unixphere AB
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _BMP085_H
+#define _BMP085_H
+
+#include <linux/regmap.h>
+
+#define BMP085_NAME "bmp085"
+
+extern struct regmap_config bmp085_regmap_config;
+
+int bmp085_probe(struct device *dev, struct regmap *regmap, int irq);
+int bmp085_remove(struct device *dev);
+int bmp085_detect(struct device *dev);
+
+#endif
diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig
new file mode 100644
index 0000000..0dd690e
--- /dev/null
+++ b/drivers/misc/c2port/Kconfig
@@ -0,0 +1,34 @@
+#
+# C2 port devices
+#
+
+menuconfig C2PORT
+ tristate "Silicon Labs C2 port support"
+ default n
+ help
+ This option enables support for Silicon Labs C2 port used to
+ program Silicon micro controller chips (and other 8051 compatible).
+
+ If your board have no such micro controllers you don't need this
+ interface at all.
+
+ To compile this driver as a module, choose M here: the module will
+ be called c2port_core. Note that you also need a client module
+ usually called c2port-*.
+
+ If you are not sure, say N here.
+
+if C2PORT
+
+config C2PORT_DURAMAR_2150
+ tristate "C2 port support for Eurotech's Duramar 2150"
+ depends on X86
+ default n
+ help
+ This option enables C2 support for the Eurotech's Duramar 2150
+ on board micro controller.
+
+ To compile this driver as a module, choose M here: the module will
+ be called c2port-duramar2150.
+
+endif # C2PORT
diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile
new file mode 100644
index 0000000..3b2cf43
--- /dev/null
+++ b/drivers/misc/c2port/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_C2PORT) += core.o
+
+obj-$(CONFIG_C2PORT_DURAMAR_2150) += c2port-duramar2150.o
diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c
new file mode 100644
index 0000000..3dc61ea
--- /dev/null
+++ b/drivers/misc/c2port/c2port-duramar2150.c
@@ -0,0 +1,159 @@
+/*
+ * Silicon Labs C2 port Linux support for Eurotech Duramar 2150
+ *
+ * Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (c) 2008 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/c2port.h>
+
+#define DATA_PORT 0x325
+#define DIR_PORT 0x326
+#define C2D (1 << 0)
+#define C2CK (1 << 1)
+
+static DEFINE_MUTEX(update_lock);
+
+/*
+ * C2 port operations
+ */
+
+static void duramar2150_c2port_access(struct c2port_device *dev, int status)
+{
+ u8 v;
+
+ mutex_lock(&update_lock);
+
+ v = inb(DIR_PORT);
+
+ /* 0 = input, 1 = output */
+ if (status)
+ outb(v | (C2D | C2CK), DIR_PORT);
+ else
+ /* When access is "off" is important that both lines are set
+ * as inputs or hi-impedance */
+ outb(v & ~(C2D | C2CK), DIR_PORT);
+
+ mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir)
+{
+ u8 v;
+
+ mutex_lock(&update_lock);
+
+ v = inb(DIR_PORT);
+
+ if (dir)
+ outb(v & ~C2D, DIR_PORT);
+ else
+ outb(v | C2D, DIR_PORT);
+
+ mutex_unlock(&update_lock);
+}
+
+static int duramar2150_c2port_c2d_get(struct c2port_device *dev)
+{
+ return inb(DATA_PORT) & C2D;
+}
+
+static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status)
+{
+ u8 v;
+
+ mutex_lock(&update_lock);
+
+ v = inb(DATA_PORT);
+
+ if (status)
+ outb(v | C2D, DATA_PORT);
+ else
+ outb(v & ~C2D, DATA_PORT);
+
+ mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status)
+{
+ u8 v;
+
+ mutex_lock(&update_lock);
+
+ v = inb(DATA_PORT);
+
+ if (status)
+ outb(v | C2CK, DATA_PORT);
+ else
+ outb(v & ~C2CK, DATA_PORT);
+
+ mutex_unlock(&update_lock);
+}
+
+static struct c2port_ops duramar2150_c2port_ops = {
+ .block_size = 512, /* bytes */
+ .blocks_num = 30, /* total flash size: 15360 bytes */
+
+ .access = duramar2150_c2port_access,
+ .c2d_dir = duramar2150_c2port_c2d_dir,
+ .c2d_get = duramar2150_c2port_c2d_get,
+ .c2d_set = duramar2150_c2port_c2d_set,
+ .c2ck_set = duramar2150_c2port_c2ck_set,
+};
+
+static struct c2port_device *duramar2150_c2port_dev;
+
+/*
+ * Module stuff
+ */
+
+static int __init duramar2150_c2port_init(void)
+{
+ struct resource *res;
+ int ret = 0;
+
+ res = request_region(0x325, 2, "c2port");
+ if (!res)
+ return -EBUSY;
+
+ duramar2150_c2port_dev = c2port_device_register("uc",
+ &duramar2150_c2port_ops, NULL);
+ if (IS_ERR(duramar2150_c2port_dev)) {
+ ret = PTR_ERR(duramar2150_c2port_dev);
+ goto free_region;
+ }
+
+ return 0;
+
+free_region:
+ release_region(0x325, 2);
+ return ret;
+}
+
+static void __exit duramar2150_c2port_exit(void)
+{
+ /* Setup the GPIOs as input by default (access = 0) */
+ duramar2150_c2port_access(duramar2150_c2port_dev, 0);
+
+ c2port_device_unregister(duramar2150_c2port_dev);
+
+ release_region(0x325, 2);
+}
+
+module_init(duramar2150_c2port_init);
+module_exit(duramar2150_c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
new file mode 100644
index 0000000..cc8645b
--- /dev/null
+++ b/drivers/misc/c2port/core.c
@@ -0,0 +1,1009 @@
+/*
+ * Silicon Labs C2 port core Linux support
+ *
+ * Copyright (c) 2007 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (c) 2007 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <linux/c2port.h>
+
+#define DRIVER_NAME "c2port"
+#define DRIVER_VERSION "0.51.0"
+
+static DEFINE_SPINLOCK(c2port_idr_lock);
+static DEFINE_IDR(c2port_idr);
+
+/*
+ * Local variables
+ */
+
+static struct class *c2port_class;
+
+/*
+ * C2 registers & commands defines
+ */
+
+/* C2 registers */
+#define C2PORT_DEVICEID 0x00
+#define C2PORT_REVID 0x01
+#define C2PORT_FPCTL 0x02
+#define C2PORT_FPDAT 0xB4
+
+/* C2 interface commands */
+#define C2PORT_GET_VERSION 0x01
+#define C2PORT_DEVICE_ERASE 0x03
+#define C2PORT_BLOCK_READ 0x06
+#define C2PORT_BLOCK_WRITE 0x07
+#define C2PORT_PAGE_ERASE 0x08
+
+/* C2 status return codes */
+#define C2PORT_INVALID_COMMAND 0x00
+#define C2PORT_COMMAND_FAILED 0x02
+#define C2PORT_COMMAND_OK 0x0d
+
+/*
+ * C2 port low level signal managements
+ */
+
+static void c2port_reset(struct c2port_device *dev)
+{
+ struct c2port_ops *ops = dev->ops;
+
+ /* To reset the device we have to keep clock line low for at least
+ * 20us.
+ */
+ local_irq_disable();
+ ops->c2ck_set(dev, 0);
+ udelay(25);
+ ops->c2ck_set(dev, 1);
+ local_irq_enable();
+
+ udelay(1);
+}
+
+static void c2port_strobe_ck(struct c2port_device *dev)
+{
+ struct c2port_ops *ops = dev->ops;
+
+ /* During hi-low-hi transition we disable local IRQs to avoid
+ * interructions since C2 port specification says that it must be
+ * shorter than 5us, otherwise the microcontroller may consider
+ * it as a reset signal!
+ */
+ local_irq_disable();
+ ops->c2ck_set(dev, 0);
+ udelay(1);
+ ops->c2ck_set(dev, 1);
+ local_irq_enable();
+
+ udelay(1);
+}
+
+/*
+ * C2 port basic functions
+ */
+
+static void c2port_write_ar(struct c2port_device *dev, u8 addr)
+{
+ struct c2port_ops *ops = dev->ops;
+ int i;
+
+ /* START field */
+ c2port_strobe_ck(dev);
+
+ /* INS field (11b, LSB first) */
+ ops->c2d_dir(dev, 0);
+ ops->c2d_set(dev, 1);
+ c2port_strobe_ck(dev);
+ ops->c2d_set(dev, 1);
+ c2port_strobe_ck(dev);
+
+ /* ADDRESS field */
+ for (i = 0; i < 8; i++) {
+ ops->c2d_set(dev, addr & 0x01);
+ c2port_strobe_ck(dev);
+
+ addr >>= 1;
+ }
+
+ /* STOP field */
+ ops->c2d_dir(dev, 1);
+ c2port_strobe_ck(dev);
+}
+
+static int c2port_read_ar(struct c2port_device *dev, u8 *addr)
+{
+ struct c2port_ops *ops = dev->ops;
+ int i;
+
+ /* START field */
+ c2port_strobe_ck(dev);
+
+ /* INS field (10b, LSB first) */
+ ops->c2d_dir(dev, 0);
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+ ops->c2d_set(dev, 1);
+ c2port_strobe_ck(dev);
+
+ /* ADDRESS field */
+ ops->c2d_dir(dev, 1);
+ *addr = 0;
+ for (i = 0; i < 8; i++) {
+ *addr >>= 1; /* shift in 8-bit ADDRESS field LSB first */
+
+ c2port_strobe_ck(dev);
+ if (ops->c2d_get(dev))
+ *addr |= 0x80;
+ }
+
+ /* STOP field */
+ c2port_strobe_ck(dev);
+
+ return 0;
+}
+
+static int c2port_write_dr(struct c2port_device *dev, u8 data)
+{
+ struct c2port_ops *ops = dev->ops;
+ int timeout, i;
+
+ /* START field */
+ c2port_strobe_ck(dev);
+
+ /* INS field (01b, LSB first) */
+ ops->c2d_dir(dev, 0);
+ ops->c2d_set(dev, 1);
+ c2port_strobe_ck(dev);
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+
+ /* LENGTH field (00b, LSB first -> 1 byte) */
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+
+ /* DATA field */
+ for (i = 0; i < 8; i++) {
+ ops->c2d_set(dev, data & 0x01);
+ c2port_strobe_ck(dev);
+
+ data >>= 1;
+ }
+
+ /* WAIT field */
+ ops->c2d_dir(dev, 1);
+ timeout = 20;
+ do {
+ c2port_strobe_ck(dev);
+ if (ops->c2d_get(dev))
+ break;
+
+ udelay(1);
+ } while (--timeout > 0);
+ if (timeout == 0)
+ return -EIO;
+
+ /* STOP field */
+ c2port_strobe_ck(dev);
+
+ return 0;
+}
+
+static int c2port_read_dr(struct c2port_device *dev, u8 *data)
+{
+ struct c2port_ops *ops = dev->ops;
+ int timeout, i;
+
+ /* START field */
+ c2port_strobe_ck(dev);
+
+ /* INS field (00b, LSB first) */
+ ops->c2d_dir(dev, 0);
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+
+ /* LENGTH field (00b, LSB first -> 1 byte) */
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+ ops->c2d_set(dev, 0);
+ c2port_strobe_ck(dev);
+
+ /* WAIT field */
+ ops->c2d_dir(dev, 1);
+ timeout = 20;
+ do {
+ c2port_strobe_ck(dev);
+ if (ops->c2d_get(dev))
+ break;
+
+ udelay(1);
+ } while (--timeout > 0);
+ if (timeout == 0)
+ return -EIO;
+
+ /* DATA field */
+ *data = 0;
+ for (i = 0; i < 8; i++) {
+ *data >>= 1; /* shift in 8-bit DATA field LSB first */
+
+ c2port_strobe_ck(dev);
+ if (ops->c2d_get(dev))
+ *data |= 0x80;
+ }
+
+ /* STOP field */
+ c2port_strobe_ck(dev);
+
+ return 0;
+}
+
+static int c2port_poll_in_busy(struct c2port_device *dev)
+{
+ u8 addr;
+ int ret, timeout = 20;
+
+ do {
+ ret = (c2port_read_ar(dev, &addr));
+ if (ret < 0)
+ return -EIO;
+
+ if (!(addr & 0x02))
+ break;
+
+ udelay(1);
+ } while (--timeout > 0);
+ if (timeout == 0)
+ return -EIO;
+
+ return 0;
+}
+
+static int c2port_poll_out_ready(struct c2port_device *dev)
+{
+ u8 addr;
+ int ret, timeout = 10000; /* erase flash needs long time... */
+
+ do {
+ ret = (c2port_read_ar(dev, &addr));
+ if (ret < 0)
+ return -EIO;
+
+ if (addr & 0x01)
+ break;
+
+ udelay(1);
+ } while (--timeout > 0);
+ if (timeout == 0)
+ return -EIO;
+
+ return 0;
+}
+
+/*
+ * sysfs methods
+ */
+
+static ssize_t c2port_show_name(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", c2dev->name);
+}
+static DEVICE_ATTR(name, 0444, c2port_show_name, NULL);
+
+static ssize_t c2port_show_flash_blocks_num(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ struct c2port_ops *ops = c2dev->ops;
+
+ return sprintf(buf, "%d\n", ops->blocks_num);
+}
+static DEVICE_ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL);
+
+static ssize_t c2port_show_flash_block_size(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ struct c2port_ops *ops = c2dev->ops;
+
+ return sprintf(buf, "%d\n", ops->block_size);
+}
+static DEVICE_ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL);
+
+static ssize_t c2port_show_flash_size(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ struct c2port_ops *ops = c2dev->ops;
+
+ return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size);
+}
+static DEVICE_ATTR(flash_size, 0444, c2port_show_flash_size, NULL);
+
+static ssize_t access_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", c2dev->access);
+}
+
+static ssize_t access_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ struct c2port_ops *ops = c2dev->ops;
+ int status, ret;
+
+ ret = sscanf(buf, "%d", &status);
+ if (ret != 1)
+ return -EINVAL;
+
+ mutex_lock(&c2dev->mutex);
+
+ c2dev->access = !!status;
+
+ /* If access is "on" clock should be HIGH _before_ setting the line
+ * as output and data line should be set as INPUT anyway */
+ if (c2dev->access)
+ ops->c2ck_set(c2dev, 1);
+ ops->access(c2dev, c2dev->access);
+ if (c2dev->access)
+ ops->c2d_dir(c2dev, 1);
+
+ mutex_unlock(&c2dev->mutex);
+
+ return count;
+}
+static DEVICE_ATTR_RW(access);
+
+static ssize_t c2port_store_reset(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+ /* Check the device access status */
+ if (!c2dev->access)
+ return -EBUSY;
+
+ mutex_lock(&c2dev->mutex);
+
+ c2port_reset(c2dev);
+ c2dev->flash_access = 0;
+
+ mutex_unlock(&c2dev->mutex);
+
+ return count;
+}
+static DEVICE_ATTR(reset, 0200, NULL, c2port_store_reset);
+
+static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf)
+{
+ u8 data;
+ int ret;
+
+ /* Select DEVICEID register for C2 data register accesses */
+ c2port_write_ar(dev, C2PORT_DEVICEID);
+
+ /* Read and return the device ID register */
+ ret = c2port_read_dr(dev, &data);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_dev_id(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ /* Check the device access status */
+ if (!c2dev->access)
+ return -EBUSY;
+
+ mutex_lock(&c2dev->mutex);
+ ret = __c2port_show_dev_id(c2dev, buf);
+ mutex_unlock(&c2dev->mutex);
+
+ if (ret < 0)
+ dev_err(dev, "cannot read from %s\n", c2dev->name);
+
+ return ret;
+}
+static DEVICE_ATTR(dev_id, 0444, c2port_show_dev_id, NULL);
+
+static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf)
+{
+ u8 data;
+ int ret;
+
+ /* Select REVID register for C2 data register accesses */
+ c2port_write_ar(dev, C2PORT_REVID);
+
+ /* Read and return the revision ID register */
+ ret = c2port_read_dr(dev, &data);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_rev_id(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ /* Check the device access status */
+ if (!c2dev->access)
+ return -EBUSY;
+
+ mutex_lock(&c2dev->mutex);
+ ret = __c2port_show_rev_id(c2dev, buf);
+ mutex_unlock(&c2dev->mutex);
+
+ if (ret < 0)
+ dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name);
+
+ return ret;
+}
+static DEVICE_ATTR(rev_id, 0444, c2port_show_rev_id, NULL);
+
+static ssize_t c2port_show_flash_access(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", c2dev->flash_access);
+}
+
+static ssize_t __c2port_store_flash_access(struct c2port_device *dev,
+ int status)
+{
+ int ret;
+
+ /* Check the device access status */
+ if (!dev->access)
+ return -EBUSY;
+
+ dev->flash_access = !!status;
+
+ /* If flash_access is off we have nothing to do... */
+ if (dev->flash_access == 0)
+ return 0;
+
+ /* Target the C2 flash programming control register for C2 data
+ * register access */
+ c2port_write_ar(dev, C2PORT_FPCTL);
+
+ /* Write the first keycode to enable C2 Flash programming */
+ ret = c2port_write_dr(dev, 0x02);
+ if (ret < 0)
+ return ret;
+
+ /* Write the second keycode to enable C2 Flash programming */
+ ret = c2port_write_dr(dev, 0x01);
+ if (ret < 0)
+ return ret;
+
+ /* Delay for at least 20ms to ensure the target is ready for
+ * C2 flash programming */
+ mdelay(25);
+
+ return 0;
+}
+
+static ssize_t c2port_store_flash_access(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ int status;
+ ssize_t ret;
+
+ ret = sscanf(buf, "%d", &status);
+ if (ret != 1)
+ return -EINVAL;
+
+ mutex_lock(&c2dev->mutex);
+ ret = __c2port_store_flash_access(c2dev, status);
+ mutex_unlock(&c2dev->mutex);
+
+ if (ret < 0) {
+ dev_err(c2dev->dev, "cannot enable %s flash programming\n",
+ c2dev->name);
+ return ret;
+ }
+
+ return count;
+}
+static DEVICE_ATTR(flash_access, 0644, c2port_show_flash_access,
+ c2port_store_flash_access);
+
+static ssize_t __c2port_write_flash_erase(struct c2port_device *dev)
+{
+ u8 status;
+ int ret;
+
+ /* Target the C2 flash programming data register for C2 data register
+ * access.
+ */
+ c2port_write_ar(dev, C2PORT_FPDAT);
+
+ /* Send device erase command */
+ c2port_write_dr(dev, C2PORT_DEVICE_ERASE);
+
+ /* Wait for input acknowledge */
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Should check status before starting FLASH access sequence */
+
+ /* Wait for status information */
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Read flash programming interface status */
+ ret = c2port_read_dr(dev, &status);
+ if (ret < 0)
+ return ret;
+ if (status != C2PORT_COMMAND_OK)
+ return -EBUSY;
+
+ /* Send a three-byte arming sequence to enable the device erase.
+ * If the sequence is not received correctly, the command will be
+ * ignored.
+ * Sequence is: 0xde, 0xad, 0xa5.
+ */
+ c2port_write_dr(dev, 0xde);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+ c2port_write_dr(dev, 0xad);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+ c2port_write_dr(dev, 0xa5);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static ssize_t c2port_store_flash_erase(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct c2port_device *c2dev = dev_get_drvdata(dev);
+ int ret;
+
+ /* Check the device and flash access status */
+ if (!c2dev->access || !c2dev->flash_access)
+ return -EBUSY;
+
+ mutex_lock(&c2dev->mutex);
+ ret = __c2port_write_flash_erase(c2dev);
+ mutex_unlock(&c2dev->mutex);
+
+ if (ret < 0) {
+ dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name);
+ return ret;
+ }
+
+ return count;
+}
+static DEVICE_ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase);
+
+static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
+ char *buffer, loff_t offset, size_t count)
+{
+ struct c2port_ops *ops = dev->ops;
+ u8 status, nread = 128;
+ int i, ret;
+
+ /* Check for flash end */
+ if (offset >= ops->block_size * ops->blocks_num)
+ return 0;
+
+ if (ops->block_size * ops->blocks_num - offset < nread)
+ nread = ops->block_size * ops->blocks_num - offset;
+ if (count < nread)
+ nread = count;
+ if (nread == 0)
+ return nread;
+
+ /* Target the C2 flash programming data register for C2 data register
+ * access */
+ c2port_write_ar(dev, C2PORT_FPDAT);
+
+ /* Send flash block read command */
+ c2port_write_dr(dev, C2PORT_BLOCK_READ);
+
+ /* Wait for input acknowledge */
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Should check status before starting FLASH access sequence */
+
+ /* Wait for status information */
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Read flash programming interface status */
+ ret = c2port_read_dr(dev, &status);
+ if (ret < 0)
+ return ret;
+ if (status != C2PORT_COMMAND_OK)
+ return -EBUSY;
+
+ /* Send address high byte */
+ c2port_write_dr(dev, offset >> 8);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Send address low byte */
+ c2port_write_dr(dev, offset & 0x00ff);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Send address block size */
+ c2port_write_dr(dev, nread);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Should check status before reading FLASH block */
+
+ /* Wait for status information */
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Read flash programming interface status */
+ ret = c2port_read_dr(dev, &status);
+ if (ret < 0)
+ return ret;
+ if (status != C2PORT_COMMAND_OK)
+ return -EBUSY;
+
+ /* Read flash block */
+ for (i = 0; i < nread; i++) {
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = c2port_read_dr(dev, buffer+i);
+ if (ret < 0)
+ return ret;
+ }
+
+ return nread;
+}
+
+static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buffer, loff_t offset, size_t count)
+{
+ struct c2port_device *c2dev =
+ dev_get_drvdata(container_of(kobj,
+ struct device, kobj));
+ ssize_t ret;
+
+ /* Check the device and flash access status */
+ if (!c2dev->access || !c2dev->flash_access)
+ return -EBUSY;
+
+ mutex_lock(&c2dev->mutex);
+ ret = __c2port_read_flash_data(c2dev, buffer, offset, count);
+ mutex_unlock(&c2dev->mutex);
+
+ if (ret < 0)
+ dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name);
+
+ return ret;
+}
+
+static ssize_t __c2port_write_flash_data(struct c2port_device *dev,
+ char *buffer, loff_t offset, size_t count)
+{
+ struct c2port_ops *ops = dev->ops;
+ u8 status, nwrite = 128;
+ int i, ret;
+
+ if (nwrite > count)
+ nwrite = count;
+ if (ops->block_size * ops->blocks_num - offset < nwrite)
+ nwrite = ops->block_size * ops->blocks_num - offset;
+
+ /* Check for flash end */
+ if (offset >= ops->block_size * ops->blocks_num)
+ return -EINVAL;
+
+ /* Target the C2 flash programming data register for C2 data register
+ * access */
+ c2port_write_ar(dev, C2PORT_FPDAT);
+
+ /* Send flash block write command */
+ c2port_write_dr(dev, C2PORT_BLOCK_WRITE);
+
+ /* Wait for input acknowledge */
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Should check status before starting FLASH access sequence */
+
+ /* Wait for status information */
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Read flash programming interface status */
+ ret = c2port_read_dr(dev, &status);
+ if (ret < 0)
+ return ret;
+ if (status != C2PORT_COMMAND_OK)
+ return -EBUSY;
+
+ /* Send address high byte */
+ c2port_write_dr(dev, offset >> 8);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Send address low byte */
+ c2port_write_dr(dev, offset & 0x00ff);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Send address block size */
+ c2port_write_dr(dev, nwrite);
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Should check status before writing FLASH block */
+
+ /* Wait for status information */
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Read flash programming interface status */
+ ret = c2port_read_dr(dev, &status);
+ if (ret < 0)
+ return ret;
+ if (status != C2PORT_COMMAND_OK)
+ return -EBUSY;
+
+ /* Write flash block */
+ for (i = 0; i < nwrite; i++) {
+ ret = c2port_write_dr(dev, *(buffer+i));
+ if (ret < 0)
+ return ret;
+
+ ret = c2port_poll_in_busy(dev);
+ if (ret < 0)
+ return ret;
+
+ }
+
+ /* Wait for last flash write to complete */
+ ret = c2port_poll_out_ready(dev);
+ if (ret < 0)
+ return ret;
+
+ return nwrite;
+}
+
+static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buffer, loff_t offset, size_t count)
+{
+ struct c2port_device *c2dev =
+ dev_get_drvdata(container_of(kobj,
+ struct device, kobj));
+ int ret;
+
+ /* Check the device access status */
+ if (!c2dev->access || !c2dev->flash_access)
+ return -EBUSY;
+
+ mutex_lock(&c2dev->mutex);
+ ret = __c2port_write_flash_data(c2dev, buffer, offset, count);
+ mutex_unlock(&c2dev->mutex);
+
+ if (ret < 0)
+ dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name);
+
+ return ret;
+}
+/* size is computed at run-time */
+static BIN_ATTR(flash_data, 0644, c2port_read_flash_data,
+ c2port_write_flash_data, 0);
+
+/*
+ * Class attributes
+ */
+static struct attribute *c2port_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_flash_blocks_num.attr,
+ &dev_attr_flash_block_size.attr,
+ &dev_attr_flash_size.attr,
+ &dev_attr_access.attr,
+ &dev_attr_reset.attr,
+ &dev_attr_dev_id.attr,
+ &dev_attr_rev_id.attr,
+ &dev_attr_flash_access.attr,
+ &dev_attr_flash_erase.attr,
+ NULL,
+};
+
+static struct bin_attribute *c2port_bin_attrs[] = {
+ &bin_attr_flash_data,
+ NULL,
+};
+
+static const struct attribute_group c2port_group = {
+ .attrs = c2port_attrs,
+ .bin_attrs = c2port_bin_attrs,
+};
+
+static const struct attribute_group *c2port_groups[] = {
+ &c2port_group,
+ NULL,
+};
+
+/*
+ * Exported functions
+ */
+
+struct c2port_device *c2port_device_register(char *name,
+ struct c2port_ops *ops, void *devdata)
+{
+ struct c2port_device *c2dev;
+ int ret;
+
+ if (unlikely(!ops) || unlikely(!ops->access) || \
+ unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \
+ unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set))
+ return ERR_PTR(-EINVAL);
+
+ c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
+ kmemcheck_annotate_bitfield(c2dev, flags);
+ if (unlikely(!c2dev))
+ return ERR_PTR(-ENOMEM);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irq(&c2port_idr_lock);
+ ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT);
+ spin_unlock_irq(&c2port_idr_lock);
+ idr_preload_end();
+
+ if (ret < 0)
+ goto error_idr_alloc;
+ c2dev->id = ret;
+
+ bin_attr_flash_data.size = ops->blocks_num * ops->block_size;
+
+ c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
+ "c2port%d", c2dev->id);
+ if (IS_ERR(c2dev->dev)) {
+ ret = PTR_ERR(c2dev->dev);
+ goto error_device_create;
+ }
+ dev_set_drvdata(c2dev->dev, c2dev);
+
+ strncpy(c2dev->name, name, C2PORT_NAME_LEN);
+ c2dev->ops = ops;
+ mutex_init(&c2dev->mutex);
+
+ /* By default C2 port access is off */
+ c2dev->access = c2dev->flash_access = 0;
+ ops->access(c2dev, 0);
+
+ dev_info(c2dev->dev, "C2 port %s added\n", name);
+ dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes "
+ "(%d bytes total)\n",
+ name, ops->blocks_num, ops->block_size,
+ ops->blocks_num * ops->block_size);
+
+ return c2dev;
+
+error_device_create:
+ spin_lock_irq(&c2port_idr_lock);
+ idr_remove(&c2port_idr, c2dev->id);
+ spin_unlock_irq(&c2port_idr_lock);
+
+error_idr_alloc:
+ kfree(c2dev);
+
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(c2port_device_register);
+
+void c2port_device_unregister(struct c2port_device *c2dev)
+{
+ if (!c2dev)
+ return;
+
+ dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name);
+
+ spin_lock_irq(&c2port_idr_lock);
+ idr_remove(&c2port_idr, c2dev->id);
+ spin_unlock_irq(&c2port_idr_lock);
+
+ device_destroy(c2port_class, c2dev->id);
+
+ kfree(c2dev);
+}
+EXPORT_SYMBOL(c2port_device_unregister);
+
+/*
+ * Module stuff
+ */
+
+static int __init c2port_init(void)
+{
+ printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION
+ " - (C) 2007 Rodolfo Giometti\n");
+
+ c2port_class = class_create(THIS_MODULE, "c2port");
+ if (IS_ERR(c2port_class)) {
+ printk(KERN_ERR "c2port: failed to allocate class\n");
+ return PTR_ERR(c2port_class);
+ }
+ c2port_class->dev_groups = c2port_groups;
+
+ return 0;
+}
+
+static void __exit c2port_exit(void)
+{
+ class_destroy(c2port_class);
+}
+
+module_init(c2port_init);
+module_exit(c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
new file mode 100644
index 0000000..22429b8
--- /dev/null
+++ b/drivers/misc/cb710/Kconfig
@@ -0,0 +1,25 @@
+config CB710_CORE
+ tristate "ENE CB710/720 Flash memory card reader support"
+ depends on PCI
+ help
+ This option enables support for PCI ENE CB710/720 Flash memory card
+ reader found in some laptops (ie. some versions of HP Compaq nx9500).
+
+ You will also have to select some flash card format drivers (MMC/SD,
+ MemoryStick).
+
+ This driver can also be built as a module. If so, the module
+ will be called cb710.
+
+config CB710_DEBUG
+ bool "Enable driver debugging"
+ depends on CB710_CORE != n
+ default n
+ help
+ This is an option for use by developers; most people should
+ say N here. This adds a lot of debugging output to dmesg.
+
+config CB710_DEBUG_ASSUMPTIONS
+ bool
+ depends on CB710_CORE != n
+ default y
diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
new file mode 100644
index 0000000..467c8e9
--- /dev/null
+++ b/drivers/misc/cb710/Makefile
@@ -0,0 +1,6 @@
+ccflags-$(CONFIG_CB710_DEBUG) := -DDEBUG
+
+obj-$(CONFIG_CB710_CORE) += cb710.o
+
+cb710-y := core.o sgbuf2.o
+cb710-$(CONFIG_CB710_DEBUG) += debug.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
new file mode 100644
index 0000000..fb397e7
--- /dev/null
+++ b/drivers/misc/cb710/core.c
@@ -0,0 +1,359 @@
+/*
+ * cb710/core.c
+ *
+ * Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/cb710.h>
+#include <linux/gfp.h>
+
+static DEFINE_IDA(cb710_ida);
+static DEFINE_SPINLOCK(cb710_ida_lock);
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+ int reg, uint32_t mask, uint32_t xor)
+{
+ u32 rval;
+
+ pci_read_config_dword(pdev, reg, &rval);
+ rval = (rval & mask) ^ xor;
+ pci_write_config_dword(pdev, reg, rval);
+}
+EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
+
+/* Some magic writes based on Windows driver init code */
+static int cb710_pci_configure(struct pci_dev *pdev)
+{
+ unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+ struct pci_dev *pdev0;
+ u32 val;
+
+ cb710_pci_update_config_reg(pdev, 0x48,
+ ~0x000000FF, 0x0000003F);
+
+ pci_read_config_dword(pdev, 0x48, &val);
+ if (val & 0x80000000)
+ return 0;
+
+ pdev0 = pci_get_slot(pdev->bus, devfn);
+ if (!pdev0)
+ return -ENODEV;
+
+ if (pdev0->vendor == PCI_VENDOR_ID_ENE
+ && pdev0->device == PCI_DEVICE_ID_ENE_720) {
+ cb710_pci_update_config_reg(pdev0, 0x8C,
+ ~0x00F00000, 0x00100000);
+ cb710_pci_update_config_reg(pdev0, 0xB0,
+ ~0x08000000, 0x08000000);
+ }
+
+ cb710_pci_update_config_reg(pdev0, 0x8C,
+ ~0x00000F00, 0x00000200);
+ cb710_pci_update_config_reg(pdev0, 0x90,
+ ~0x00060000, 0x00040000);
+
+ pci_dev_put(pdev0);
+
+ return 0;
+}
+
+static irqreturn_t cb710_irq_handler(int irq, void *data)
+{
+ struct cb710_chip *chip = data;
+ struct cb710_slot *slot = &chip->slot[0];
+ irqreturn_t handled = IRQ_NONE;
+ unsigned nr;
+
+ spin_lock(&chip->irq_lock); /* incl. smp_rmb() */
+
+ for (nr = chip->slots; nr; ++slot, --nr) {
+ cb710_irq_handler_t handler_func = slot->irq_handler;
+ if (handler_func && handler_func(slot))
+ handled = IRQ_HANDLED;
+ }
+
+ spin_unlock(&chip->irq_lock);
+
+ return handled;
+}
+
+static void cb710_release_slot(struct device *dev)
+{
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+ struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev));
+ struct cb710_chip *chip = cb710_slot_to_chip(slot);
+
+ /* slot struct can be freed now */
+ atomic_dec(&chip->slot_refs_count);
+#endif
+}
+
+static int cb710_register_slot(struct cb710_chip *chip,
+ unsigned slot_mask, unsigned io_offset, const char *name)
+{
+ int nr = chip->slots;
+ struct cb710_slot *slot = &chip->slot[nr];
+ int err;
+
+ dev_dbg(cb710_chip_dev(chip),
+ "register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n",
+ name, chip->platform_id, nr, slot_mask, io_offset);
+
+ /* slot->irq_handler == NULL here; this needs to be
+ * seen before platform_device_register() */
+ ++chip->slots;
+ smp_wmb();
+
+ slot->iobase = chip->iobase + io_offset;
+ slot->pdev.name = name;
+ slot->pdev.id = chip->platform_id;
+ slot->pdev.dev.parent = &chip->pdev->dev;
+ slot->pdev.dev.release = cb710_release_slot;
+
+ err = platform_device_register(&slot->pdev);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+ atomic_inc(&chip->slot_refs_count);
+#endif
+
+ if (err) {
+ /* device_initialize() called from platform_device_register()
+ * wants this on error path */
+ platform_device_put(&slot->pdev);
+
+ /* slot->irq_handler == NULL here anyway, so no lock needed */
+ --chip->slots;
+ return err;
+ }
+
+ chip->slot_mask |= slot_mask;
+
+ return 0;
+}
+
+static void cb710_unregister_slot(struct cb710_chip *chip,
+ unsigned slot_mask)
+{
+ int nr = chip->slots - 1;
+
+ if (!(chip->slot_mask & slot_mask))
+ return;
+
+ platform_device_unregister(&chip->slot[nr].pdev);
+
+ /* complementary to spin_unlock() in cb710_set_irq_handler() */
+ smp_rmb();
+ BUG_ON(chip->slot[nr].irq_handler != NULL);
+
+ /* slot->irq_handler == NULL here, so no lock needed */
+ --chip->slots;
+ chip->slot_mask &= ~slot_mask;
+}
+
+void cb710_set_irq_handler(struct cb710_slot *slot,
+ cb710_irq_handler_t handler)
+{
+ struct cb710_chip *chip = cb710_slot_to_chip(slot);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->irq_lock, flags);
+ slot->irq_handler = handler;
+ spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cb710_set_irq_handler);
+
+#ifdef CONFIG_PM
+
+static int cb710_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+ devm_free_irq(&pdev->dev, pdev->irq, chip);
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ if (state.event & PM_EVENT_SLEEP)
+ pci_set_power_state(pdev, PCI_D3hot);
+ return 0;
+}
+
+static int cb710_resume(struct pci_dev *pdev)
+{
+ struct cb710_chip *chip = pci_get_drvdata(pdev);
+ int err;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ return devm_request_irq(&pdev->dev, pdev->irq,
+ cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+}
+
+#endif /* CONFIG_PM */
+
+static int cb710_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct cb710_chip *chip;
+ unsigned long flags;
+ u32 val;
+ int err;
+ int n = 0;
+
+ err = cb710_pci_configure(pdev);
+ if (err)
+ return err;
+
+ /* this is actually magic... */
+ pci_read_config_dword(pdev, 0x48, &val);
+ if (!(val & 0x80000000)) {
+ pci_write_config_dword(pdev, 0x48, val|0x71000000);
+ pci_read_config_dword(pdev, 0x48, &val);
+ }
+
+ dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val);
+ if (!(val & 0x70000000))
+ return -ENODEV;
+ val = (val >> 28) & 7;
+ if (val & CB710_SLOT_MMC)
+ ++n;
+ if (val & CB710_SLOT_MS)
+ ++n;
+ if (val & CB710_SLOT_SM)
+ ++n;
+
+ chip = devm_kzalloc(&pdev->dev,
+ sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME);
+ if (err)
+ return err;
+
+ spin_lock_init(&chip->irq_lock);
+ chip->pdev = pdev;
+ chip->iobase = pcim_iomap_table(pdev)[0];
+
+ pci_set_drvdata(pdev, chip);
+
+ err = devm_request_irq(&pdev->dev, pdev->irq,
+ cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+ if (err)
+ return err;
+
+ do {
+ if (!ida_pre_get(&cb710_ida, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock_irqsave(&cb710_ida_lock, flags);
+ err = ida_get_new(&cb710_ida, &chip->platform_id);
+ spin_unlock_irqrestore(&cb710_ida_lock, flags);
+
+ if (err && err != -EAGAIN)
+ return err;
+ } while (err);
+
+
+ dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n",
+ chip->platform_id, chip->iobase, pdev->irq);
+
+ if (val & CB710_SLOT_MMC) { /* MMC/SD slot */
+ err = cb710_register_slot(chip,
+ CB710_SLOT_MMC, 0x00, "cb710-mmc");
+ if (err)
+ return err;
+ }
+
+ if (val & CB710_SLOT_MS) { /* MemoryStick slot */
+ err = cb710_register_slot(chip,
+ CB710_SLOT_MS, 0x40, "cb710-ms");
+ if (err)
+ goto unreg_mmc;
+ }
+
+ if (val & CB710_SLOT_SM) { /* SmartMedia slot */
+ err = cb710_register_slot(chip,
+ CB710_SLOT_SM, 0x60, "cb710-sm");
+ if (err)
+ goto unreg_ms;
+ }
+
+ return 0;
+unreg_ms:
+ cb710_unregister_slot(chip, CB710_SLOT_MS);
+unreg_mmc:
+ cb710_unregister_slot(chip, CB710_SLOT_MMC);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+ BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+ return err;
+}
+
+static void cb710_remove_one(struct pci_dev *pdev)
+{
+ struct cb710_chip *chip = pci_get_drvdata(pdev);
+ unsigned long flags;
+
+ cb710_unregister_slot(chip, CB710_SLOT_SM);
+ cb710_unregister_slot(chip, CB710_SLOT_MS);
+ cb710_unregister_slot(chip, CB710_SLOT_MMC);
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+ BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+
+ spin_lock_irqsave(&cb710_ida_lock, flags);
+ ida_remove(&cb710_ida, chip->platform_id);
+ spin_unlock_irqrestore(&cb710_ida_lock, flags);
+}
+
+static const struct pci_device_id cb710_pci_tbl[] = {
+ { PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH,
+ PCI_ANY_ID, PCI_ANY_ID, },
+ { 0, }
+};
+
+static struct pci_driver cb710_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = cb710_pci_tbl,
+ .probe = cb710_probe,
+ .remove = cb710_remove_one,
+#ifdef CONFIG_PM
+ .suspend = cb710_suspend,
+ .resume = cb710_resume,
+#endif
+};
+
+static int __init cb710_init_module(void)
+{
+ return pci_register_driver(&cb710_driver);
+}
+
+static void __exit cb710_cleanup_module(void)
+{
+ pci_unregister_driver(&cb710_driver);
+ ida_destroy(&cb710_ida);
+}
+
+module_init(cb710_init_module);
+module_exit(cb710_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cb710_pci_tbl);
diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
new file mode 100644
index 0000000..fcb3b8e
--- /dev/null
+++ b/drivers/misc/cb710/debug.c
@@ -0,0 +1,118 @@
+/*
+ * cb710/debug.c
+ *
+ * Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cb710.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define CB710_REG_COUNT 0x80
+
+static const u16 allow[CB710_REG_COUNT/16] = {
+ 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+};
+static const char *const prefix[ARRAY_SIZE(allow)] = {
+ "MMC", "MMC", "MMC", "MMC",
+ "MS?", "MS?", "SM?", "SM?"
+};
+
+static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits)
+{
+ unsigned mask = (1 << bits/8) - 1;
+ offset *= bits/8;
+ return ((allow[block] >> offset) & mask) == mask;
+}
+
+#define CB710_READ_REGS_TEMPLATE(t) \
+static void cb710_read_regs_##t(void __iomem *iobase, \
+ u##t *reg, unsigned select) \
+{ \
+ unsigned i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \
+ if (!(select & (1 << i))) \
+ continue; \
+ \
+ for (j = 0; j < 0x10/(t/8); ++j) { \
+ if (!allow_reg_read(i, j, t)) \
+ continue; \
+ reg[j] = ioread##t(iobase \
+ + (i << 4) + (j * (t/8))); \
+ } \
+ } \
+}
+
+static const char cb710_regf_8[] = "%02X";
+static const char cb710_regf_16[] = "%04X";
+static const char cb710_regf_32[] = "%08X";
+static const char cb710_xes[] = "xxxxxxxx";
+
+#define CB710_DUMP_REGS_TEMPLATE(t) \
+static void cb710_dump_regs_##t(struct device *dev, \
+ const u##t *reg, unsigned select) \
+{ \
+ const char *const xp = &cb710_xes[8 - t/4]; \
+ const char *const format = cb710_regf_##t; \
+ \
+ char msg[100], *p; \
+ unsigned i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \
+ if (!(select & (1 << i))) \
+ continue; \
+ p = msg; \
+ for (j = 0; j < 0x10/(t/8); ++j) { \
+ *p++ = ' '; \
+ if (j == 8/(t/8)) \
+ *p++ = ' '; \
+ if (allow_reg_read(i, j, t)) \
+ p += sprintf(p, format, reg[j]); \
+ else \
+ p += sprintf(p, "%s", xp); \
+ } \
+ dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg); \
+ } \
+}
+
+#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t) \
+static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip, \
+ unsigned select) \
+{ \
+ u##t regs[CB710_REG_COUNT/sizeof(u##t)]; \
+ \
+ memset(®s, 0, sizeof(regs)); \
+ cb710_read_regs_##t(chip->iobase, regs, select); \
+ cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select); \
+}
+
+#define CB710_REG_ACCESS_TEMPLATES(t) \
+ CB710_READ_REGS_TEMPLATE(t) \
+ CB710_DUMP_REGS_TEMPLATE(t) \
+ CB710_READ_AND_DUMP_REGS_TEMPLATE(t)
+
+CB710_REG_ACCESS_TEMPLATES(8)
+CB710_REG_ACCESS_TEMPLATES(16)
+CB710_REG_ACCESS_TEMPLATES(32)
+
+void cb710_dump_regs(struct cb710_chip *chip, unsigned select)
+{
+ if (!(select & CB710_DUMP_REGS_MASK))
+ select = CB710_DUMP_REGS_ALL;
+ if (!(select & CB710_DUMP_ACCESS_MASK))
+ select |= CB710_DUMP_ACCESS_8;
+
+ if (select & CB710_DUMP_ACCESS_32)
+ cb710_read_and_dump_regs_32(chip, select);
+ if (select & CB710_DUMP_ACCESS_16)
+ cb710_read_and_dump_regs_16(chip, select);
+ if (select & CB710_DUMP_ACCESS_8)
+ cb710_read_and_dump_regs_8(chip, select);
+}
+EXPORT_SYMBOL_GPL(cb710_dump_regs);
+
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
new file mode 100644
index 0000000..2a40d0e
--- /dev/null
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -0,0 +1,146 @@
+/*
+ * cb710/sgbuf2.c
+ *
+ * Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cb710.h>
+
+static bool sg_dwiter_next(struct sg_mapping_iter *miter)
+{
+ if (sg_miter_next(miter)) {
+ miter->consumed = 0;
+ return true;
+ } else
+ return false;
+}
+
+static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter)
+{
+ return miter->length == miter->consumed && !sg_dwiter_next(miter);
+}
+
+static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
+{
+ size_t len, left = 4;
+ uint32_t data;
+ void *addr = &data;
+
+ do {
+ len = min(miter->length - miter->consumed, left);
+ memcpy(addr, miter->addr + miter->consumed, len);
+ miter->consumed += len;
+ left -= len;
+ if (!left)
+ return data;
+ addr += len;
+ } while (sg_dwiter_next(miter));
+
+ memset(addr, 0, left);
+ return data;
+}
+
+static inline bool needs_unaligned_copy(const void *ptr)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ return false;
+#else
+ return ((ptr - NULL) & 3) != 0;
+#endif
+}
+
+static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr)
+{
+ size_t len;
+
+ if (sg_dwiter_is_at_end(miter))
+ return true;
+
+ len = miter->length - miter->consumed;
+
+ if (likely(len >= 4 && !needs_unaligned_copy(
+ miter->addr + miter->consumed))) {
+ *ptr = miter->addr + miter->consumed;
+ miter->consumed += 4;
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer
+ * @miter: sg mapping iterator used for reading
+ *
+ * Description:
+ * Returns 32-bit word starting at byte pointed to by @miter@
+ * handling any alignment issues. Bytes past the buffer's end
+ * are not accessed (read) but are returned as zeroes. @miter@
+ * is advanced by 4 bytes or to the end of buffer whichever is
+ * closer.
+ *
+ * Context:
+ * Same requirements as in sg_miter_next().
+ *
+ * Returns:
+ * 32-bit word just read.
+ */
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter)
+{
+ uint32_t *ptr = NULL;
+
+ if (likely(sg_dwiter_get_next_block(miter, &ptr)))
+ return ptr ? *ptr : 0;
+
+ return sg_dwiter_read_buffer(miter);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block);
+
+static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
+{
+ size_t len, left = 4;
+ void *addr = &data;
+
+ do {
+ len = min(miter->length - miter->consumed, left);
+ memcpy(miter->addr, addr, len);
+ miter->consumed += len;
+ left -= len;
+ if (!left)
+ return;
+ addr += len;
+ } while (sg_dwiter_next(miter));
+}
+
+/**
+ * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer
+ * @miter: sg mapping iterator used for writing
+ *
+ * Description:
+ * Writes 32-bit word starting at byte pointed to by @miter@
+ * handling any alignment issues. Bytes which would be written
+ * past the buffer's end are silently discarded. @miter@ is
+ * advanced by 4 bytes or to the end of buffer whichever is closer.
+ *
+ * Context:
+ * Same requirements as in sg_miter_next().
+ */
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data)
+{
+ uint32_t *ptr = NULL;
+
+ if (likely(sg_dwiter_get_next_block(miter, &ptr))) {
+ if (ptr)
+ *ptr = data;
+ else
+ return;
+ } else
+ sg_dwiter_write_slow(miter, data);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
+
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
new file mode 100644
index 0000000..347f08f
--- /dev/null
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -0,0 +1,383 @@
+/*
+ * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT)
+ *
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007 Andres Salomon <dilinger@debian.org>
+ * Copyright (C) 2009 Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/cs5535.h>
+#include <linux/slab.h>
+
+#define DRV_NAME "cs5535-mfgpt"
+
+static int mfgpt_reset_timers;
+module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644);
+MODULE_PARM_DESC(mfgptfix, "Try to reset the MFGPT timers during init; "
+ "required by some broken BIOSes (ie, TinyBIOS < 0.99) or kexec "
+ "(1 = reset the MFGPT using an undocumented bit, "
+ "2 = perform a soft reset by unconfiguring all timers); "
+ "use what works best for you.");
+
+struct cs5535_mfgpt_timer {
+ struct cs5535_mfgpt_chip *chip;
+ int nr;
+};
+
+static struct cs5535_mfgpt_chip {
+ DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS);
+ resource_size_t base;
+
+ struct platform_device *pdev;
+ spinlock_t lock;
+ int initialized;
+} cs5535_mfgpt_chip;
+
+int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp,
+ int event, int enable)
+{
+ uint32_t msr, mask, value, dummy;
+ int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
+
+ if (!timer) {
+ WARN_ON(1);
+ return -EIO;
+ }
+
+ /*
+ * The register maps for these are described in sections 6.17.1.x of
+ * the AMD Geode CS5536 Companion Device Data Book.
+ */
+ switch (event) {
+ case MFGPT_EVENT_RESET:
+ /*
+ * XXX: According to the docs, we cannot reset timers above
+ * 6; that is, resets for 7 and 8 will be ignored. Is this
+ * a problem? -dilinger
+ */
+ msr = MSR_MFGPT_NR;
+ mask = 1 << (timer->nr + 24);
+ break;
+
+ case MFGPT_EVENT_NMI:
+ msr = MSR_MFGPT_NR;
+ mask = 1 << (timer->nr + shift);
+ break;
+
+ case MFGPT_EVENT_IRQ:
+ msr = MSR_MFGPT_IRQ;
+ mask = 1 << (timer->nr + shift);
+ break;
+
+ default:
+ return -EIO;
+ }
+
+ rdmsr(msr, value, dummy);
+
+ if (enable)
+ value |= mask;
+ else
+ value &= ~mask;
+
+ wrmsr(msr, value, dummy);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event);
+
+int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq,
+ int enable)
+{
+ uint32_t zsel, lpc, dummy;
+ int shift;
+
+ if (!timer) {
+ WARN_ON(1);
+ return -EIO;
+ }
+
+ /*
+ * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+ * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+ * 2, and we mustn't use nor change it.
+ * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+ * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+ * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+ */
+ rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+ shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4;
+ if (((zsel >> shift) & 0xF) == 2)
+ return -EIO;
+
+ /* Choose IRQ: if none supplied, keep IRQ already set or use default */
+ if (!*irq)
+ *irq = (zsel >> shift) & 0xF;
+ if (!*irq)
+ *irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ;
+
+ /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+ if (*irq < 1 || *irq == 2 || *irq > 15)
+ return -EIO;
+ rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+ if (lpc & (1 << *irq))
+ return -EIO;
+
+ /* All chosen and checked - go for it */
+ if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ return -EIO;
+ if (enable) {
+ zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+ wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq);
+
+struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain)
+{
+ struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip;
+ struct cs5535_mfgpt_timer *timer = NULL;
+ unsigned long flags;
+ int max;
+
+ if (!mfgpt->initialized)
+ goto done;
+
+ /* only allocate timers from the working domain if requested */
+ if (domain == MFGPT_DOMAIN_WORKING)
+ max = 6;
+ else
+ max = MFGPT_MAX_TIMERS;
+
+ if (timer_nr >= max) {
+ /* programmer error. silly programmers! */
+ WARN_ON(1);
+ goto done;
+ }
+
+ spin_lock_irqsave(&mfgpt->lock, flags);
+ if (timer_nr < 0) {
+ unsigned long t;
+
+ /* try to find any available timer */
+ t = find_first_bit(mfgpt->avail, max);
+ /* set timer_nr to -1 if no timers available */
+ timer_nr = t < max ? (int) t : -1;
+ } else {
+ /* check if the requested timer's available */
+ if (!test_bit(timer_nr, mfgpt->avail))
+ timer_nr = -1;
+ }
+
+ if (timer_nr >= 0)
+ /* if timer_nr is not -1, it's an available timer */
+ __clear_bit(timer_nr, mfgpt->avail);
+ spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+ if (timer_nr < 0)
+ goto done;
+
+ timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+ if (!timer) {
+ /* aw hell */
+ spin_lock_irqsave(&mfgpt->lock, flags);
+ __set_bit(timer_nr, mfgpt->avail);
+ spin_unlock_irqrestore(&mfgpt->lock, flags);
+ goto done;
+ }
+ timer->chip = mfgpt;
+ timer->nr = timer_nr;
+ dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr);
+
+done:
+ return timer;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer);
+
+/*
+ * XXX: This frees the timer memory, but never resets the actual hardware
+ * timer. The old geode_mfgpt code did this; it would be good to figure
+ * out a way to actually release the hardware timer. See comments below.
+ */
+void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer)
+{
+ unsigned long flags;
+ uint16_t val;
+
+ /* timer can be made available again only if never set up */
+ val = cs5535_mfgpt_read(timer, MFGPT_REG_SETUP);
+ if (!(val & MFGPT_SETUP_SETUP)) {
+ spin_lock_irqsave(&timer->chip->lock, flags);
+ __set_bit(timer->nr, timer->chip->avail);
+ spin_unlock_irqrestore(&timer->chip->lock, flags);
+ }
+
+ kfree(timer);
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer);
+
+uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg)
+{
+ return inw(timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_read);
+
+void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg,
+ uint16_t value)
+{
+ outw(value, timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_write);
+
+/*
+ * This is a sledgehammer that resets all MFGPT timers. This is required by
+ * some broken BIOSes which leave the system in an unstable state
+ * (TinyBIOS 0.98, for example; fixed in 0.99). It's uncertain as to
+ * whether or not this secret MSR can be used to release individual timers.
+ * Jordan tells me that he and Mitch once played w/ it, but it's unclear
+ * what the results of that were (and they experienced some instability).
+ */
+static void reset_all_timers(void)
+{
+ uint32_t val, dummy;
+
+ /* The following undocumented bit resets the MFGPT timers */
+ val = 0xFF; dummy = 0;
+ wrmsr(MSR_MFGPT_SETUP, val, dummy);
+}
+
+/*
+ * This is another sledgehammer to reset all MFGPT timers.
+ * Instead of using the undocumented bit method it clears
+ * IRQ, NMI and RESET settings.
+ */
+static void soft_reset(void)
+{
+ int i;
+ struct cs5535_mfgpt_timer t;
+
+ for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+ t.nr = i;
+
+ cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_RESET, 0);
+ cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_RESET, 0);
+ cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_NMI, 0);
+ cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_NMI, 0);
+ cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_IRQ, 0);
+ cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_IRQ, 0);
+ }
+}
+
+/*
+ * Check whether any MFGPTs are available for the kernel to use. In most
+ * cases, firmware that uses AMD's VSA code will claim all timers during
+ * bootup; we certainly don't want to take them if they're already in use.
+ * In other cases (such as with VSAless OpenFirmware), the system firmware
+ * leaves timers available for us to use.
+ */
+static int scan_timers(struct cs5535_mfgpt_chip *mfgpt)
+{
+ struct cs5535_mfgpt_timer timer = { .chip = mfgpt };
+ unsigned long flags;
+ int timers = 0;
+ uint16_t val;
+ int i;
+
+ /* bios workaround */
+ if (mfgpt_reset_timers == 1)
+ reset_all_timers();
+ else if (mfgpt_reset_timers == 2)
+ soft_reset();
+
+ /* just to be safe, protect this section w/ lock */
+ spin_lock_irqsave(&mfgpt->lock, flags);
+ for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+ timer.nr = i;
+ val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP);
+ if (!(val & MFGPT_SETUP_SETUP) || mfgpt_reset_timers == 2) {
+ __set_bit(i, mfgpt->avail);
+ timers++;
+ }
+ }
+ spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+ return timers;
+}
+
+static int cs5535_mfgpt_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int err = -EIO, t;
+
+ if (mfgpt_reset_timers < 0 || mfgpt_reset_timers > 2) {
+ dev_err(&pdev->dev, "Bad mfgpt_reset_timers value: %i\n",
+ mfgpt_reset_timers);
+ goto done;
+ }
+
+ /* There are two ways to get the MFGPT base address; one is by
+ * fetching it from MSR_LBAR_MFGPT, the other is by reading the
+ * PCI BAR info. The latter method is easier (especially across
+ * different architectures), so we'll stick with that for now. If
+ * it turns out to be unreliable in the face of crappy BIOSes, we
+ * can always go back to using MSRs.. */
+
+ res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "can't fetch device resource info\n");
+ goto done;
+ }
+
+ if (!request_region(res->start, resource_size(res), pdev->name)) {
+ dev_err(&pdev->dev, "can't request region\n");
+ goto done;
+ }
+
+ /* set up the driver-specific struct */
+ cs5535_mfgpt_chip.base = res->start;
+ cs5535_mfgpt_chip.pdev = pdev;
+ spin_lock_init(&cs5535_mfgpt_chip.lock);
+
+ dev_info(&pdev->dev, "reserved resource region %pR\n", res);
+
+ /* detect the available timers */
+ t = scan_timers(&cs5535_mfgpt_chip);
+ dev_info(&pdev->dev, "%d MFGPT timers available\n", t);
+ cs5535_mfgpt_chip.initialized = 1;
+ return 0;
+
+done:
+ return err;
+}
+
+static struct platform_driver cs5535_mfgpt_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ },
+ .probe = cs5535_mfgpt_probe,
+};
+
+
+static int __init cs5535_mfgpt_init(void)
+{
+ return platform_driver_register(&cs5535_mfgpt_driver);
+}
+
+module_init(cs5535_mfgpt_init);
+
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
+MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
new file mode 100644
index 0000000..8756d06
--- /dev/null
+++ b/drivers/misc/cxl/Kconfig
@@ -0,0 +1,35 @@
+#
+# IBM Coherent Accelerator (CXL) compatible devices
+#
+
+config CXL_BASE
+ bool
+ default n
+ select PPC_COPRO_BASE
+
+config CXL_KERNEL_API
+ bool
+ default n
+
+config CXL_EEH
+ bool
+ default n
+
+config CXL
+ tristate "Support for IBM Coherent Accelerators (CXL)"
+ depends on PPC_POWERNV && PCI_MSI && EEH
+ select CXL_BASE
+ select CXL_KERNEL_API
+ select CXL_EEH
+ default m
+ help
+ Select this option to enable driver support for IBM Coherent
+ Accelerators (CXL). CXL is otherwise known as Coherent Accelerator
+ Processor Interface (CAPI). CAPI allows accelerators in FPGAs to be
+ coherently attached to a CPU via an MMU. This driver enables
+ userspace programs to access these accelerators via /dev/cxl/afuM.N
+ devices.
+
+ CAPI adapters are found in POWER8 based systems.
+
+ If unsure, say N.
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
new file mode 100644
index 0000000..ab6f392
--- /dev/null
+++ b/drivers/misc/cxl/Makefile
@@ -0,0 +1,10 @@
+ccflags-y := -Werror $(call cc-disable-warning, unused-const-variable)
+
+cxl-y += main.o file.o irq.o fault.o native.o
+cxl-y += context.o sysfs.o debugfs.o pci.o trace.o
+cxl-y += vphb.o api.o
+obj-$(CONFIG_CXL) += cxl.o
+obj-$(CONFIG_CXL_BASE) += base.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
new file mode 100644
index 0000000..690eb1a
--- /dev/null
+++ b/drivers/misc/cxl/api.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <misc/cxl.h>
+#include <linux/fs.h>
+
+#include "cxl.h"
+
+struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
+{
+ struct address_space *mapping;
+ struct cxl_afu *afu;
+ struct cxl_context *ctx;
+ int rc;
+
+ afu = cxl_pci_to_afu(dev);
+
+ ctx = cxl_context_alloc();
+ if (IS_ERR(ctx)) {
+ rc = PTR_ERR(ctx);
+ goto err_dev;
+ }
+
+ ctx->kernelapi = true;
+
+ /*
+ * Make our own address space since we won't have one from the
+ * filesystem like the user api has, and even if we do associate a file
+ * with this context we don't want to use the global anonymous inode's
+ * address space as that can invalidate unrelated users:
+ */
+ mapping = kmalloc(sizeof(struct address_space), GFP_KERNEL);
+ if (!mapping) {
+ rc = -ENOMEM;
+ goto err_ctx;
+ }
+ address_space_init_once(mapping);
+
+ /* Make it a slave context. We can promote it later? */
+ rc = cxl_context_init(ctx, afu, false, mapping);
+ if (rc)
+ goto err_mapping;
+
+ cxl_assign_psn_space(ctx);
+
+ return ctx;
+
+err_mapping:
+ kfree(mapping);
+err_ctx:
+ kfree(ctx);
+err_dev:
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(cxl_dev_context_init);
+
+struct cxl_context *cxl_get_context(struct pci_dev *dev)
+{
+ return dev->dev.archdata.cxl_ctx;
+}
+EXPORT_SYMBOL_GPL(cxl_get_context);
+
+struct device *cxl_get_phys_dev(struct pci_dev *dev)
+{
+ struct cxl_afu *afu;
+
+ afu = cxl_pci_to_afu(dev);
+
+ return afu->adapter->dev.parent;
+}
+EXPORT_SYMBOL_GPL(cxl_get_phys_dev);
+
+int cxl_release_context(struct cxl_context *ctx)
+{
+ if (ctx->status >= STARTED)
+ return -EBUSY;
+
+ cxl_context_free(ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_release_context);
+
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+ if (num == 0)
+ num = ctx->afu->pp_irqs;
+ return afu_allocate_irqs(ctx, num);
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+ afu_irq_name_free(ctx);
+ cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
+static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
+{
+ __u16 range;
+ int r;
+
+ WARN_ON(num == 0);
+
+ for (r = 0; r < CXL_IRQ_RANGES; r++) {
+ range = ctx->irqs.range[r];
+ if (num < range) {
+ return ctx->irqs.offset[r] + num;
+ }
+ num -= range;
+ }
+ return 0;
+}
+
+int cxl_map_afu_irq(struct cxl_context *ctx, int num,
+ irq_handler_t handler, void *cookie, char *name)
+{
+ irq_hw_number_t hwirq;
+
+ /*
+ * Find interrupt we are to register.
+ */
+ hwirq = cxl_find_afu_irq(ctx, num);
+ if (!hwirq)
+ return -ENOENT;
+
+ return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
+}
+EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
+
+void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
+{
+ irq_hw_number_t hwirq;
+ unsigned int virq;
+
+ hwirq = cxl_find_afu_irq(ctx, num);
+ if (!hwirq)
+ return;
+
+ virq = irq_find_mapping(NULL, hwirq);
+ if (virq)
+ cxl_unmap_irq(virq, cookie);
+}
+EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
+
+/*
+ * Start a context
+ * Code here similar to afu_ioctl_start_work().
+ */
+int cxl_start_context(struct cxl_context *ctx, u64 wed,
+ struct task_struct *task)
+{
+ int rc = 0;
+ bool kernel = true;
+
+ pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+ mutex_lock(&ctx->status_mutex);
+ if (ctx->status == STARTED)
+ goto out; /* already started */
+
+ if (task) {
+ ctx->pid = get_task_pid(task, PIDTYPE_PID);
+ ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
+ kernel = false;
+ }
+
+ /*
+ * Increment driver use count. Enables global TLBIs for hash
+ * and callbacks to handle the segment table
+ */
+ cxl_ctx_get();
+
+ if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) {
+ put_pid(ctx->pid);
+ cxl_ctx_put();
+ goto out;
+ }
+
+ ctx->status = STARTED;
+out:
+ mutex_unlock(&ctx->status_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_start_context);
+
+int cxl_process_element(struct cxl_context *ctx)
+{
+ return ctx->pe;
+}
+EXPORT_SYMBOL_GPL(cxl_process_element);
+
+/* Stop a context. Returns 0 on success, otherwise -Errno */
+int cxl_stop_context(struct cxl_context *ctx)
+{
+ return __detach_context(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_stop_context);
+
+void cxl_set_master(struct cxl_context *ctx)
+{
+ ctx->master = true;
+ cxl_assign_psn_space(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_set_master);
+
+/* wrappers around afu_* file ops which are EXPORTED */
+int cxl_fd_open(struct inode *inode, struct file *file)
+{
+ return afu_open(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_open);
+int cxl_fd_release(struct inode *inode, struct file *file)
+{
+ return afu_release(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_release);
+long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return afu_ioctl(file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
+int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
+{
+ return afu_mmap(file, vm);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_mmap);
+unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
+{
+ return afu_poll(file, poll);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_poll);
+ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
+ loff_t *off)
+{
+ return afu_read(file, buf, count, off);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_read);
+
+#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
+
+/* Get a struct file and fd for a context and attach the ops */
+struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
+ int *fd)
+{
+ struct file *file;
+ int rc, flags, fdtmp;
+
+ flags = O_RDWR | O_CLOEXEC;
+
+ /* This code is similar to anon_inode_getfd() */
+ rc = get_unused_fd_flags(flags);
+ if (rc < 0)
+ return ERR_PTR(rc);
+ fdtmp = rc;
+
+ /*
+ * Patch the file ops. Needs to be careful that this is rentrant safe.
+ */
+ if (fops) {
+ PATCH_FOPS(open);
+ PATCH_FOPS(poll);
+ PATCH_FOPS(read);
+ PATCH_FOPS(release);
+ PATCH_FOPS(unlocked_ioctl);
+ PATCH_FOPS(compat_ioctl);
+ PATCH_FOPS(mmap);
+ } else /* use default ops */
+ fops = (struct file_operations *)&afu_fops;
+
+ file = anon_inode_getfile("cxl", fops, ctx, flags);
+ if (IS_ERR(file))
+ goto err_fd;
+
+ file->f_mapping = ctx->mapping;
+
+ *fd = fdtmp;
+ return file;
+
+err_fd:
+ put_unused_fd(fdtmp);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(cxl_get_fd);
+
+struct cxl_context *cxl_fops_get_context(struct file *file)
+{
+ return file->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_fops_get_context);
+
+int cxl_start_work(struct cxl_context *ctx,
+ struct cxl_ioctl_start_work *work)
+{
+ int rc;
+
+ /* code taken from afu_ioctl_start_work */
+ if (!(work->flags & CXL_START_WORK_NUM_IRQS))
+ work->num_interrupts = ctx->afu->pp_irqs;
+ else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
+ (work->num_interrupts > ctx->afu->irqs_max)) {
+ return -EINVAL;
+ }
+
+ rc = afu_register_irqs(ctx, work->num_interrupts);
+ if (rc)
+ return rc;
+
+ rc = cxl_start_context(ctx, work->work_element_descriptor, current);
+ if (rc < 0) {
+ afu_release_irqs(ctx, ctx);
+ return rc;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_start_work);
+
+void __iomem *cxl_psa_map(struct cxl_context *ctx)
+{
+ struct cxl_afu *afu = ctx->afu;
+ int rc;
+
+ rc = cxl_afu_check_and_enable(afu);
+ if (rc)
+ return NULL;
+
+ pr_devel("%s: psn_phys%llx size:%llx\n",
+ __func__, afu->psn_phys, afu->adapter->ps_size);
+ return ioremap(ctx->psn_phys, ctx->psn_size);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_map);
+
+void cxl_psa_unmap(void __iomem *addr)
+{
+ iounmap(addr);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_unmap);
+
+int cxl_afu_reset(struct cxl_context *ctx)
+{
+ struct cxl_afu *afu = ctx->afu;
+ int rc;
+
+ rc = __cxl_afu_reset(afu);
+ if (rc)
+ return rc;
+
+ return cxl_afu_check_and_enable(afu);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_reset);
+
+void cxl_perst_reloads_same_image(struct cxl_afu *afu,
+ bool perst_reloads_same_image)
+{
+ afu->adapter->perst_same_image = perst_reloads_same_image;
+}
+EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
new file mode 100644
index 0000000..a9f0dd3
--- /dev/null
+++ b/drivers/misc/cxl/base.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <asm/errno.h>
+#include <misc/cxl-base.h>
+#include "cxl.h"
+
+/* protected by rcu */
+static struct cxl_calls *cxl_calls;
+
+atomic_t cxl_use_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(cxl_use_count);
+
+#ifdef CONFIG_CXL_MODULE
+
+static inline struct cxl_calls *cxl_calls_get(void)
+{
+ struct cxl_calls *calls = NULL;
+
+ rcu_read_lock();
+ calls = rcu_dereference(cxl_calls);
+ if (calls && !try_module_get(calls->owner))
+ calls = NULL;
+ rcu_read_unlock();
+
+ return calls;
+}
+
+static inline void cxl_calls_put(struct cxl_calls *calls)
+{
+ BUG_ON(calls != cxl_calls);
+
+ /* we don't need to rcu this, as we hold a reference to the module */
+ module_put(cxl_calls->owner);
+}
+
+#else /* !defined CONFIG_CXL_MODULE */
+
+static inline struct cxl_calls *cxl_calls_get(void)
+{
+ return cxl_calls;
+}
+
+static inline void cxl_calls_put(struct cxl_calls *calls) { }
+
+#endif /* CONFIG_CXL_MODULE */
+
+void cxl_slbia(struct mm_struct *mm)
+{
+ struct cxl_calls *calls;
+
+ calls = cxl_calls_get();
+ if (!calls)
+ return;
+
+ if (cxl_ctx_in_use())
+ calls->cxl_slbia(mm);
+
+ cxl_calls_put(calls);
+}
+
+int register_cxl_calls(struct cxl_calls *calls)
+{
+ if (cxl_calls)
+ return -EBUSY;
+
+ rcu_assign_pointer(cxl_calls, calls);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_cxl_calls);
+
+void unregister_cxl_calls(struct cxl_calls *calls)
+{
+ BUG_ON(cxl_calls->owner != calls->owner);
+ RCU_INIT_POINTER(cxl_calls, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_cxl_calls);
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
new file mode 100644
index 0000000..262b88e
--- /dev/null
+++ b/drivers/misc/cxl/context.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitmap.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <asm/cputable.h>
+#include <asm/current.h>
+#include <asm/copro.h>
+
+#include "cxl.h"
+
+/*
+ * Allocates space for a CXL context.
+ */
+struct cxl_context *cxl_context_alloc(void)
+{
+ return kzalloc(sizeof(struct cxl_context), GFP_KERNEL);
+}
+
+/*
+ * Initialises a CXL context.
+ */
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+ struct address_space *mapping)
+{
+ int i;
+
+ spin_lock_init(&ctx->sste_lock);
+ ctx->afu = afu;
+ ctx->master = master;
+ ctx->pid = ctx->glpid = NULL; /* Set in start work ioctl */
+ mutex_init(&ctx->mapping_lock);
+ ctx->mapping = mapping;
+
+ /*
+ * Allocate the segment table before we put it in the IDR so that we
+ * can always access it when dereferenced from IDR. For the same
+ * reason, the segment table is only destroyed after the context is
+ * removed from the IDR. Access to this in the IOCTL is protected by
+ * Linux filesytem symantics (can't IOCTL until open is complete).
+ */
+ i = cxl_alloc_sst(ctx);
+ if (i)
+ return i;
+
+ INIT_WORK(&ctx->fault_work, cxl_handle_fault);
+
+ init_waitqueue_head(&ctx->wq);
+ spin_lock_init(&ctx->lock);
+
+ ctx->irq_bitmap = NULL;
+ ctx->pending_irq = false;
+ ctx->pending_fault = false;
+ ctx->pending_afu_err = false;
+
+ /*
+ * When we have to destroy all contexts in cxl_context_detach_all() we
+ * end up with afu_release_irqs() called from inside a
+ * idr_for_each_entry(). Hence we need to make sure that anything
+ * dereferenced from this IDR is ok before we allocate the IDR here.
+ * This clears out the IRQ ranges to ensure this.
+ */
+ for (i = 0; i < CXL_IRQ_RANGES; i++)
+ ctx->irqs.range[i] = 0;
+
+ mutex_init(&ctx->status_mutex);
+
+ ctx->status = OPENED;
+
+ /*
+ * Allocating IDR! We better make sure everything's setup that
+ * dereferences from it.
+ */
+ mutex_lock(&afu->contexts_lock);
+ idr_preload(GFP_KERNEL);
+ i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
+ ctx->afu->num_procs, GFP_NOWAIT);
+ idr_preload_end();
+ mutex_unlock(&afu->contexts_lock);
+ if (i < 0)
+ return i;
+
+ ctx->pe = i;
+ ctx->elem = &ctx->afu->spa[i];
+ ctx->pe_inserted = false;
+
+ /*
+ * take a ref on the afu so that it stays alive at-least till
+ * this context is reclaimed inside reclaim_ctx.
+ */
+ cxl_afu_get(afu);
+ return 0;
+}
+
+static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct cxl_context *ctx = vma->vm_file->private_data;
+ unsigned long address = (unsigned long)vmf->virtual_address;
+ u64 area, offset;
+
+ offset = vmf->pgoff << PAGE_SHIFT;
+
+ pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n",
+ __func__, ctx->pe, address, offset);
+
+ if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+ area = ctx->afu->psn_phys;
+ if (offset >= ctx->afu->adapter->ps_size)
+ return VM_FAULT_SIGBUS;
+ } else {
+ area = ctx->psn_phys;
+ if (offset >= ctx->psn_size)
+ return VM_FAULT_SIGBUS;
+ }
+
+ mutex_lock(&ctx->status_mutex);
+
+ if (ctx->status != STARTED) {
+ mutex_unlock(&ctx->status_mutex);
+ pr_devel("%s: Context not started, failing problem state access\n", __func__);
+ if (ctx->mmio_err_ff) {
+ if (!ctx->ff_page) {
+ ctx->ff_page = alloc_page(GFP_USER);
+ if (!ctx->ff_page)
+ return VM_FAULT_OOM;
+ memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE);
+ }
+ get_page(ctx->ff_page);
+ vmf->page = ctx->ff_page;
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+ return 0;
+ }
+ return VM_FAULT_SIGBUS;
+ }
+
+ vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT);
+
+ mutex_unlock(&ctx->status_mutex);
+
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct cxl_mmap_vmops = {
+ .fault = cxl_mmap_fault,
+};
+
+/*
+ * Map a per-context mmio space into the given vma.
+ */
+int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
+{
+ u64 start = vma->vm_pgoff << PAGE_SHIFT;
+ u64 len = vma->vm_end - vma->vm_start;
+
+ if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+ if (start + len > ctx->afu->adapter->ps_size)
+ return -EINVAL;
+ } else {
+ if (start + len > ctx->psn_size)
+ return -EINVAL;
+ }
+
+ if (ctx->afu->current_mode != CXL_MODE_DEDICATED) {
+ /* make sure there is a valid per process space for this AFU */
+ if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) {
+ pr_devel("AFU doesn't support mmio space\n");
+ return -EINVAL;
+ }
+
+ /* Can't mmap until the AFU is enabled */
+ if (!ctx->afu->enabled)
+ return -EBUSY;
+ }
+
+ pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__,
+ ctx->psn_phys, ctx->pe , ctx->master);
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_ops = &cxl_mmap_vmops;
+ return 0;
+}
+
+/*
+ * Detach a context from the hardware. This disables interrupts and doesn't
+ * return until all outstanding interrupts for this context have completed. The
+ * hardware should no longer access *ctx after this has returned.
+ */
+int __detach_context(struct cxl_context *ctx)
+{
+ enum cxl_context_status status;
+
+ mutex_lock(&ctx->status_mutex);
+ status = ctx->status;
+ ctx->status = CLOSED;
+ mutex_unlock(&ctx->status_mutex);
+ if (status != STARTED)
+ return -EBUSY;
+
+ /* Only warn if we detached while the link was OK.
+ * If detach fails when hw is down, we don't care.
+ */
+ WARN_ON(cxl_detach_process(ctx) &&
+ cxl_adapter_link_ok(ctx->afu->adapter));
+ flush_work(&ctx->fault_work); /* Only needed for dedicated process */
+
+ /* release the reference to the group leader and mm handling pid */
+ put_pid(ctx->pid);
+ put_pid(ctx->glpid);
+
+ cxl_ctx_put();
+ return 0;
+}
+
+/*
+ * Detach the given context from the AFU. This doesn't actually
+ * free the context but it should stop the context running in hardware
+ * (ie. prevent this context from generating any further interrupts
+ * so that it can be freed).
+ */
+void cxl_context_detach(struct cxl_context *ctx)
+{
+ int rc;
+
+ rc = __detach_context(ctx);
+ if (rc)
+ return;
+
+ afu_release_irqs(ctx, ctx);
+ wake_up_all(&ctx->wq);
+}
+
+/*
+ * Detach all contexts on the given AFU.
+ */
+void cxl_context_detach_all(struct cxl_afu *afu)
+{
+ struct cxl_context *ctx;
+ int tmp;
+
+ mutex_lock(&afu->contexts_lock);
+ idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+ /*
+ * Anything done in here needs to be setup before the IDR is
+ * created and torn down after the IDR removed
+ */
+ cxl_context_detach(ctx);
+
+ /*
+ * We are force detaching - remove any active PSA mappings so
+ * userspace cannot interfere with the card if it comes back.
+ * Easiest way to exercise this is to unbind and rebind the
+ * driver via sysfs while it is in use.
+ */
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->mapping)
+ unmap_mapping_range(ctx->mapping, 0, 0, 1);
+ mutex_unlock(&ctx->mapping_lock);
+ }
+ mutex_unlock(&afu->contexts_lock);
+}
+
+static void reclaim_ctx(struct rcu_head *rcu)
+{
+ struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
+
+ free_page((u64)ctx->sstp);
+ if (ctx->ff_page)
+ __free_page(ctx->ff_page);
+ ctx->sstp = NULL;
+ if (ctx->kernelapi)
+ kfree(ctx->mapping);
+
+ if (ctx->irq_bitmap)
+ kfree(ctx->irq_bitmap);
+
+ /* Drop ref to the afu device taken during cxl_context_init */
+ cxl_afu_put(ctx->afu);
+
+ kfree(ctx);
+}
+
+void cxl_context_free(struct cxl_context *ctx)
+{
+ mutex_lock(&ctx->afu->contexts_lock);
+ idr_remove(&ctx->afu->contexts_idr, ctx->pe);
+ mutex_unlock(&ctx->afu->contexts_lock);
+ call_rcu(&ctx->rcu, reclaim_ctx);
+}
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
new file mode 100644
index 0000000..a521bc7
--- /dev/null
+++ b/drivers/misc/cxl/cxl.h
@@ -0,0 +1,767 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _CXL_H_
+#define _CXL_H_
+
+#include <linux/interrupt.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/pid.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/reg.h>
+#include <misc/cxl-base.h>
+
+#include <uapi/misc/cxl.h>
+
+extern uint cxl_verbose;
+
+#define CXL_TIMEOUT 5
+
+/*
+ * Bump version each time a user API change is made, whether it is
+ * backwards compatible ot not.
+ */
+#define CXL_API_VERSION 2
+#define CXL_API_VERSION_COMPATIBLE 1
+
+/*
+ * Opaque types to avoid accidentally passing registers for the wrong MMIO
+ *
+ * At the end of the day, I'm not married to using typedef here, but it might
+ * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and
+ * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write.
+ *
+ * I'm quite happy if these are changed back to #defines before upstreaming, it
+ * should be little more than a regexp search+replace operation in this file.
+ */
+typedef struct {
+ const int x;
+} cxl_p1_reg_t;
+typedef struct {
+ const int x;
+} cxl_p1n_reg_t;
+typedef struct {
+ const int x;
+} cxl_p2n_reg_t;
+#define cxl_reg_off(reg) \
+ (reg.x)
+
+/* Memory maps. Ref CXL Appendix A */
+
+/* PSL Privilege 1 Memory Map */
+/* Configuration and Control area */
+static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000};
+static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008};
+static const cxl_p1_reg_t CXL_PSL_KEY1 = {0x0010};
+static const cxl_p1_reg_t CXL_PSL_KEY2 = {0x0018};
+static const cxl_p1_reg_t CXL_PSL_Control = {0x0020};
+/* Downloading */
+static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060};
+static const cxl_p1_reg_t CXL_PSL_DLADDR = {0x0068};
+
+/* PSL Lookaside Buffer Management Area */
+static const cxl_p1_reg_t CXL_PSL_LBISEL = {0x0080};
+static const cxl_p1_reg_t CXL_PSL_SLBIE = {0x0088};
+static const cxl_p1_reg_t CXL_PSL_SLBIA = {0x0090};
+static const cxl_p1_reg_t CXL_PSL_TLBIE = {0x00A0};
+static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8};
+static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0};
+
+/* 0x00C0:7EFF Implementation dependent area */
+static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100};
+static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110};
+static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118};
+static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
+static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148};
+static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150};
+static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
+static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170};
+/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */
+/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */
+
+/* PSL Slice Privilege 1 Memory Map */
+/* Configuration Area */
+static const cxl_p1n_reg_t CXL_PSL_SR_An = {0x00};
+static const cxl_p1n_reg_t CXL_PSL_LPID_An = {0x08};
+static const cxl_p1n_reg_t CXL_PSL_AMBAR_An = {0x10};
+static const cxl_p1n_reg_t CXL_PSL_SPOffset_An = {0x18};
+static const cxl_p1n_reg_t CXL_PSL_ID_An = {0x20};
+static const cxl_p1n_reg_t CXL_PSL_SERR_An = {0x28};
+/* Memory Management and Lookaside Buffer Management */
+static const cxl_p1n_reg_t CXL_PSL_SDR_An = {0x30};
+static const cxl_p1n_reg_t CXL_PSL_AMOR_An = {0x38};
+/* Pointer Area */
+static const cxl_p1n_reg_t CXL_HAURP_An = {0x80};
+static const cxl_p1n_reg_t CXL_PSL_SPAP_An = {0x88};
+static const cxl_p1n_reg_t CXL_PSL_LLCMD_An = {0x90};
+/* Control Area */
+static const cxl_p1n_reg_t CXL_PSL_SCNTL_An = {0xA0};
+static const cxl_p1n_reg_t CXL_PSL_CtxTime_An = {0xA8};
+static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0};
+static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An = {0xB8};
+/* 0xC0:FF Implementation Dependent Area */
+static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An = {0xC0};
+static const cxl_p1n_reg_t CXL_AFU_DEBUG_An = {0xC8};
+static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A = {0xD0};
+static const cxl_p1n_reg_t CXL_PSL_COALLOC_A = {0xD8};
+static const cxl_p1n_reg_t CXL_PSL_RXCTL_A = {0xE0};
+static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE = {0xE8};
+
+/* PSL Slice Privilege 2 Memory Map */
+/* Configuration and Control Area */
+static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000};
+static const cxl_p2n_reg_t CXL_CSRP_An = {0x008};
+static const cxl_p2n_reg_t CXL_AURP0_An = {0x010};
+static const cxl_p2n_reg_t CXL_AURP1_An = {0x018};
+static const cxl_p2n_reg_t CXL_SSTP0_An = {0x020};
+static const cxl_p2n_reg_t CXL_SSTP1_An = {0x028};
+static const cxl_p2n_reg_t CXL_PSL_AMR_An = {0x030};
+/* Segment Lookaside Buffer Management */
+static const cxl_p2n_reg_t CXL_SLBIE_An = {0x040};
+static const cxl_p2n_reg_t CXL_SLBIA_An = {0x048};
+static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050};
+/* Interrupt Registers */
+static const cxl_p2n_reg_t CXL_PSL_DSISR_An = {0x060};
+static const cxl_p2n_reg_t CXL_PSL_DAR_An = {0x068};
+static const cxl_p2n_reg_t CXL_PSL_DSR_An = {0x070};
+static const cxl_p2n_reg_t CXL_PSL_TFC_An = {0x078};
+static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080};
+static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088};
+/* AFU Registers */
+static const cxl_p2n_reg_t CXL_AFU_Cntl_An = {0x090};
+static const cxl_p2n_reg_t CXL_AFU_ERR_An = {0x098};
+/* Work Element Descriptor */
+static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
+/* 0x0C0:FFF Implementation Dependent Area */
+
+#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL
+#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL
+#define CXL_PSL_SPAP_Size_Shift 4
+#define CXL_PSL_SPAP_V 0x0000000000000001ULL
+
+/****** CXL_PSL_Control ****************************************************/
+#define CXL_PSL_Control_tb 0x0000000000000001ULL
+
+/****** CXL_PSL_DLCNTL *****************************************************/
+#define CXL_PSL_DLCNTL_D (0x1ull << (63-28))
+#define CXL_PSL_DLCNTL_C (0x1ull << (63-29))
+#define CXL_PSL_DLCNTL_E (0x1ull << (63-30))
+#define CXL_PSL_DLCNTL_S (0x1ull << (63-31))
+#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E)
+#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S)
+
+/****** CXL_PSL_SR_An ******************************************************/
+#define CXL_PSL_SR_An_SF MSR_SF /* 64bit */
+#define CXL_PSL_SR_An_TA (1ull << (63-1)) /* Tags active, GA1: 0 */
+#define CXL_PSL_SR_An_HV MSR_HV /* Hypervisor, GA1: 0 */
+#define CXL_PSL_SR_An_PR MSR_PR /* Problem state, GA1: 1 */
+#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */
+#define CXL_PSL_SR_An_TC (1ull << (63-54)) /* Page Table secondary hash */
+#define CXL_PSL_SR_An_US (1ull << (63-56)) /* User state, GA1: X */
+#define CXL_PSL_SR_An_SC (1ull << (63-58)) /* Segment Table secondary hash */
+#define CXL_PSL_SR_An_R MSR_DR /* Relocate, GA1: 1 */
+#define CXL_PSL_SR_An_MP (1ull << (63-62)) /* Master Process */
+#define CXL_PSL_SR_An_LE (1ull << (63-63)) /* Little Endian */
+
+/****** CXL_PSL_LLCMD_An ****************************************************/
+#define CXL_LLCMD_TERMINATE 0x0001000000000000ULL
+#define CXL_LLCMD_REMOVE 0x0002000000000000ULL
+#define CXL_LLCMD_SUSPEND 0x0003000000000000ULL
+#define CXL_LLCMD_RESUME 0x0004000000000000ULL
+#define CXL_LLCMD_ADD 0x0005000000000000ULL
+#define CXL_LLCMD_UPDATE 0x0006000000000000ULL
+#define CXL_LLCMD_HANDLE_MASK 0x000000000000ffffULL
+
+/****** CXL_PSL_ID_An ****************************************************/
+#define CXL_PSL_ID_An_F (1ull << (63-31))
+#define CXL_PSL_ID_An_L (1ull << (63-30))
+
+/****** CXL_PSL_SCNTL_An ****************************************************/
+#define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15))
+/* Programming Modes: */
+#define CXL_PSL_SCNTL_An_PM_MASK (0xffffull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_Shared (0x0000ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_OS (0x0001ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_Process (0x0002ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_AFU (0x0004ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_AFU_PBT (0x0104ull << (63-31))
+/* Purge Status (ro) */
+#define CXL_PSL_SCNTL_An_Ps_MASK (0x3ull << (63-39))
+#define CXL_PSL_SCNTL_An_Ps_Pending (0x1ull << (63-39))
+#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39))
+/* Purge */
+#define CXL_PSL_SCNTL_An_Pc (0x1ull << (63-48))
+/* Suspend Status (ro) */
+#define CXL_PSL_SCNTL_An_Ss_MASK (0x3ull << (63-55))
+#define CXL_PSL_SCNTL_An_Ss_Pending (0x1ull << (63-55))
+#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55))
+/* Suspend Control */
+#define CXL_PSL_SCNTL_An_Sc (0x1ull << (63-63))
+
+/* AFU Slice Enable Status (ro) */
+#define CXL_AFU_Cntl_An_ES_MASK (0x7ull << (63-2))
+#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2))
+#define CXL_AFU_Cntl_An_ES_Enabled (0x4ull << (63-2))
+/* AFU Slice Enable */
+#define CXL_AFU_Cntl_An_E (0x1ull << (63-3))
+/* AFU Slice Reset status (ro) */
+#define CXL_AFU_Cntl_An_RS_MASK (0x3ull << (63-5))
+#define CXL_AFU_Cntl_An_RS_Pending (0x1ull << (63-5))
+#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5))
+/* AFU Slice Reset */
+#define CXL_AFU_Cntl_An_RA (0x1ull << (63-7))
+
+/****** CXL_SSTP0/1_An ******************************************************/
+/* These top bits are for the segment that CONTAINS the segment table */
+#define CXL_SSTP0_An_B_SHIFT SLB_VSID_SSIZE_SHIFT
+#define CXL_SSTP0_An_KS (1ull << (63-2))
+#define CXL_SSTP0_An_KP (1ull << (63-3))
+#define CXL_SSTP0_An_N (1ull << (63-4))
+#define CXL_SSTP0_An_L (1ull << (63-5))
+#define CXL_SSTP0_An_C (1ull << (63-6))
+#define CXL_SSTP0_An_TA (1ull << (63-7))
+#define CXL_SSTP0_An_LP_SHIFT (63-9) /* 2 Bits */
+/* And finally, the virtual address & size of the segment table: */
+#define CXL_SSTP0_An_SegTableSize_SHIFT (63-31) /* 12 Bits */
+#define CXL_SSTP0_An_SegTableSize_MASK \
+ (((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT)
+#define CXL_SSTP0_An_STVA_U_MASK ((1ull << (63-49))-1)
+#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1))
+#define CXL_SSTP1_An_V (1ull << (63-63))
+
+/****** CXL_PSL_SLBIE_[An] **************************************************/
+/* write: */
+#define CXL_SLBIE_C PPC_BIT(36) /* Class */
+#define CXL_SLBIE_SS PPC_BITMASK(37, 38) /* Segment Size */
+#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38)
+#define CXL_SLBIE_TA PPC_BIT(38) /* Tags Active */
+/* read: */
+#define CXL_SLBIE_MAX PPC_BITMASK(24, 31)
+#define CXL_SLBIE_PENDING PPC_BITMASK(56, 63)
+
+/****** Common to all CXL_TLBIA/SLBIA_[An] **********************************/
+#define CXL_TLB_SLB_P (1ull) /* Pending (read) */
+
+/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers **********************/
+#define CXL_TLB_SLB_IQ_ALL (0ull) /* Inv qualifier */
+#define CXL_TLB_SLB_IQ_LPID (1ull) /* Inv qualifier */
+#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */
+
+/****** CXL_PSL_AFUSEL ******************************************************/
+#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */
+
+/****** CXL_PSL_DSISR_An ****************************************************/
+#define CXL_PSL_DSISR_An_DS (1ull << (63-0)) /* Segment not found */
+#define CXL_PSL_DSISR_An_DM (1ull << (63-1)) /* PTE not found (See also: M) or protection fault */
+#define CXL_PSL_DSISR_An_ST (1ull << (63-2)) /* Segment Table PTE not found */
+#define CXL_PSL_DSISR_An_UR (1ull << (63-3)) /* AURP PTE not found */
+#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR)
+#define CXL_PSL_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */
+#define CXL_PSL_DSISR_An_AE (1ull << (63-5)) /* AFU Error */
+#define CXL_PSL_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */
+/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */
+#define CXL_PSL_DSISR_An_M DSISR_NOHPTE /* PTE not found */
+#define CXL_PSL_DSISR_An_P DSISR_PROTFAULT /* Storage protection violation */
+#define CXL_PSL_DSISR_An_A (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */
+#define CXL_PSL_DSISR_An_S DSISR_ISSTORE /* Access was afu_wr or afu_zero */
+#define CXL_PSL_DSISR_An_K DSISR_KEYFAULT /* Access not permitted by virtual page class key protection */
+
+/****** CXL_PSL_TFC_An ******************************************************/
+#define CXL_PSL_TFC_An_A (1ull << (63-28)) /* Acknowledge non-translation fault */
+#define CXL_PSL_TFC_An_C (1ull << (63-29)) /* Continue (abort transaction) */
+#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */
+#define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */
+
+/* cxl_process_element->software_status */
+#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 - 0)) /* Valid */
+#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */
+#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */
+#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */
+
+/****** CXL_PSL_RXCTL_An (Implementation Specific) **************************
+ * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to
+ * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x
+ * of the hang pulse frequency.
+ */
+#define CXL_PSL_RXCTL_AFUHP_4S 0x7000000000000000ULL
+
+/* SPA->sw_command_status */
+#define CXL_SPA_SW_CMD_MASK 0xffff000000000000ULL
+#define CXL_SPA_SW_CMD_TERMINATE 0x0001000000000000ULL
+#define CXL_SPA_SW_CMD_REMOVE 0x0002000000000000ULL
+#define CXL_SPA_SW_CMD_SUSPEND 0x0003000000000000ULL
+#define CXL_SPA_SW_CMD_RESUME 0x0004000000000000ULL
+#define CXL_SPA_SW_CMD_ADD 0x0005000000000000ULL
+#define CXL_SPA_SW_CMD_UPDATE 0x0006000000000000ULL
+#define CXL_SPA_SW_STATE_MASK 0x0000ffff00000000ULL
+#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL
+#define CXL_SPA_SW_STATE_REMOVED 0x0000000200000000ULL
+#define CXL_SPA_SW_STATE_SUSPENDED 0x0000000300000000ULL
+#define CXL_SPA_SW_STATE_RESUMED 0x0000000400000000ULL
+#define CXL_SPA_SW_STATE_ADDED 0x0000000500000000ULL
+#define CXL_SPA_SW_STATE_UPDATED 0x0000000600000000ULL
+#define CXL_SPA_SW_PSL_ID_MASK 0x00000000ffff0000ULL
+#define CXL_SPA_SW_LINK_MASK 0x000000000000ffffULL
+
+#define CXL_MAX_SLICES 4
+#define MAX_AFU_MMIO_REGS 3
+
+#define CXL_MODE_TIME_SLICED 0x4
+#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED)
+
+enum cxl_context_status {
+ CLOSED,
+ OPENED,
+ STARTED
+};
+
+enum prefault_modes {
+ CXL_PREFAULT_NONE,
+ CXL_PREFAULT_WED,
+ CXL_PREFAULT_ALL,
+};
+
+struct cxl_sste {
+ __be64 esid_data;
+ __be64 vsid_data;
+};
+
+#define to_cxl_adapter(d) container_of(d, struct cxl, dev)
+#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev)
+
+struct cxl_afu {
+ irq_hw_number_t psl_hwirq;
+ irq_hw_number_t serr_hwirq;
+ char *err_irq_name;
+ char *psl_irq_name;
+ unsigned int serr_virq;
+ void __iomem *p1n_mmio;
+ void __iomem *p2n_mmio;
+ phys_addr_t psn_phys;
+ u64 pp_offset;
+ u64 pp_size;
+ void __iomem *afu_desc_mmio;
+ struct cxl *adapter;
+ struct device dev;
+ struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d;
+ struct device *chardev_s, *chardev_m, *chardev_d;
+ struct idr contexts_idr;
+ struct dentry *debugfs;
+ struct mutex contexts_lock;
+ struct mutex spa_mutex;
+ spinlock_t afu_cntl_lock;
+
+ /* AFU error buffer fields and bin attribute for sysfs */
+ u64 eb_len, eb_offset;
+ struct bin_attribute attr_eb;
+
+ /*
+ * Only the first part of the SPA is used for the process element
+ * linked list. The only other part that software needs to worry about
+ * is sw_command_status, which we store a separate pointer to.
+ * Everything else in the SPA is only used by hardware
+ */
+ struct cxl_process_element *spa;
+ __be64 *sw_command_status;
+ unsigned int spa_size;
+ int spa_order;
+ int spa_max_procs;
+ unsigned int psl_virq;
+
+ /* pointer to the vphb */
+ struct pci_controller *phb;
+
+ int pp_irqs;
+ int irqs_max;
+ int num_procs;
+ int max_procs_virtualised;
+ int slice;
+ int modes_supported;
+ int current_mode;
+ int crs_num;
+ u64 crs_len;
+ u64 crs_offset;
+ struct list_head crs;
+ enum prefault_modes prefault_mode;
+ bool psa;
+ bool pp_psa;
+ bool enabled;
+};
+
+/* AFU refcount management */
+static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu)
+{
+
+ return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+
+static inline void cxl_afu_put(struct cxl_afu *afu)
+{
+ put_device(&afu->dev);
+}
+
+
+struct cxl_irq_name {
+ struct list_head list;
+ char *name;
+};
+
+/*
+ * This is a cxl context. If the PSL is in dedicated mode, there will be one
+ * of these per AFU. If in AFU directed there can be lots of these.
+ */
+struct cxl_context {
+ struct cxl_afu *afu;
+
+ /* Problem state MMIO */
+ phys_addr_t psn_phys;
+ u64 psn_size;
+
+ /* Used to unmap any mmaps when force detaching */
+ struct address_space *mapping;
+ struct mutex mapping_lock;
+ struct page *ff_page;
+ bool mmio_err_ff;
+ bool kernelapi;
+
+ spinlock_t sste_lock; /* Protects segment table entries */
+ struct cxl_sste *sstp;
+ u64 sstp0, sstp1;
+ unsigned int sst_size, sst_lru;
+
+ wait_queue_head_t wq;
+ /* pid of the group leader associated with the pid */
+ struct pid *glpid;
+ /* use mm context associated with this pid for ds faults */
+ struct pid *pid;
+ spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */
+ /* Only used in PR mode */
+ u64 process_token;
+
+ unsigned long *irq_bitmap; /* Accessed from IRQ context */
+ struct cxl_irq_ranges irqs;
+ struct list_head irq_names;
+ u64 fault_addr;
+ u64 fault_dsisr;
+ u64 afu_err;
+
+ /*
+ * This status and it's lock pretects start and detach context
+ * from racing. It also prevents detach from racing with
+ * itself
+ */
+ enum cxl_context_status status;
+ struct mutex status_mutex;
+
+
+ /* XXX: Is it possible to need multiple work items at once? */
+ struct work_struct fault_work;
+ u64 dsisr;
+ u64 dar;
+
+ struct cxl_process_element *elem;
+
+ int pe; /* process element handle */
+ u32 irq_count;
+ bool pe_inserted;
+ bool master;
+ bool kernel;
+ bool pending_irq;
+ bool pending_fault;
+ bool pending_afu_err;
+
+ struct rcu_head rcu;
+};
+
+struct cxl {
+ void __iomem *p1_mmio;
+ void __iomem *p2_mmio;
+ irq_hw_number_t err_hwirq;
+ unsigned int err_virq;
+ spinlock_t afu_list_lock;
+ struct cxl_afu *afu[CXL_MAX_SLICES];
+ struct device dev;
+ struct dentry *trace;
+ struct dentry *psl_err_chk;
+ struct dentry *debugfs;
+ char *irq_name;
+ struct bin_attribute cxl_attr;
+ int adapter_num;
+ int user_irqs;
+ u64 afu_desc_off;
+ u64 afu_desc_size;
+ u64 ps_off;
+ u64 ps_size;
+ u16 psl_rev;
+ u16 base_image;
+ u8 vsec_status;
+ u8 caia_major;
+ u8 caia_minor;
+ u8 slices;
+ bool user_image_loaded;
+ bool perst_loads_image;
+ bool perst_select_user;
+ bool perst_same_image;
+};
+
+int cxl_alloc_one_irq(struct cxl *adapter);
+void cxl_release_one_irq(struct cxl *adapter, int hwirq);
+int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num);
+void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter);
+int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq);
+int cxl_update_image_control(struct cxl *adapter);
+int cxl_reset(struct cxl *adapter);
+
+/* common == phyp + powernv */
+struct cxl_process_element_common {
+ __be32 tid;
+ __be32 pid;
+ __be64 csrp;
+ __be64 aurp0;
+ __be64 aurp1;
+ __be64 sstp0;
+ __be64 sstp1;
+ __be64 amr;
+ u8 reserved3[4];
+ __be64 wed;
+} __packed;
+
+/* just powernv */
+struct cxl_process_element {
+ __be64 sr;
+ __be64 SPOffset;
+ __be64 sdr;
+ __be64 haurp;
+ __be32 ctxtime;
+ __be16 ivte_offsets[4];
+ __be16 ivte_ranges[4];
+ __be32 lpid;
+ struct cxl_process_element_common common;
+ __be32 software_state;
+} __packed;
+
+static inline bool cxl_adapter_link_ok(struct cxl *cxl)
+{
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(cxl->dev.parent);
+ return !pci_channel_offline(pdev);
+}
+
+static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+ WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
+ return cxl->p1_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
+{
+ if (likely(cxl_adapter_link_ok(cxl)))
+ out_be64(_cxl_p1_addr(cxl, reg), val);
+}
+
+static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+ if (likely(cxl_adapter_link_ok(cxl)))
+ return in_be64(_cxl_p1_addr(cxl, reg));
+ else
+ return ~0ULL;
+}
+
+static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+ WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
+ return afu->p1n_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val)
+{
+ if (likely(cxl_adapter_link_ok(afu->adapter)))
+ out_be64(_cxl_p1n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+ if (likely(cxl_adapter_link_ok(afu->adapter)))
+ return in_be64(_cxl_p1n_addr(afu, reg));
+ else
+ return ~0ULL;
+}
+
+static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+ return afu->p2n_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val)
+{
+ if (likely(cxl_adapter_link_ok(afu->adapter)))
+ out_be64(_cxl_p2n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+ if (likely(cxl_adapter_link_ok(afu->adapter)))
+ return in_be64(_cxl_p2n_addr(afu, reg));
+ else
+ return ~0ULL;
+}
+
+static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off)
+{
+ if (likely(cxl_adapter_link_ok(afu->adapter)))
+ return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset +
+ ((cr) * (afu)->crs_len) + (off));
+ else
+ return ~0ULL;
+}
+
+static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off)
+{
+ if (likely(cxl_adapter_link_ok(afu->adapter)))
+ return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset +
+ ((cr) * (afu)->crs_len) + (off));
+ else
+ return 0xffffffff;
+}
+u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
+u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
+
+ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+ loff_t off, size_t count);
+
+
+struct cxl_calls {
+ void (*cxl_slbia)(struct mm_struct *mm);
+ struct module *owner;
+};
+int register_cxl_calls(struct cxl_calls *calls);
+void unregister_cxl_calls(struct cxl_calls *calls);
+
+int cxl_alloc_adapter_nr(struct cxl *adapter);
+void cxl_remove_adapter_nr(struct cxl *adapter);
+
+int cxl_alloc_spa(struct cxl_afu *afu);
+void cxl_release_spa(struct cxl_afu *afu);
+
+int cxl_file_init(void);
+void cxl_file_exit(void);
+int cxl_register_adapter(struct cxl *adapter);
+int cxl_register_afu(struct cxl_afu *afu);
+int cxl_chardev_d_afu_add(struct cxl_afu *afu);
+int cxl_chardev_m_afu_add(struct cxl_afu *afu);
+int cxl_chardev_s_afu_add(struct cxl_afu *afu);
+void cxl_chardev_afu_remove(struct cxl_afu *afu);
+
+void cxl_context_detach_all(struct cxl_afu *afu);
+void cxl_context_free(struct cxl_context *ctx);
+void cxl_context_detach(struct cxl_context *ctx);
+
+int cxl_sysfs_adapter_add(struct cxl *adapter);
+void cxl_sysfs_adapter_remove(struct cxl *adapter);
+int cxl_sysfs_afu_add(struct cxl_afu *afu);
+void cxl_sysfs_afu_remove(struct cxl_afu *afu);
+int cxl_sysfs_afu_m_add(struct cxl_afu *afu);
+void cxl_sysfs_afu_m_remove(struct cxl_afu *afu);
+
+int cxl_afu_activate_mode(struct cxl_afu *afu, int mode);
+int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode);
+int cxl_afu_deactivate_mode(struct cxl_afu *afu);
+int cxl_afu_select_best_mode(struct cxl_afu *afu);
+
+int cxl_register_psl_irq(struct cxl_afu *afu);
+void cxl_release_psl_irq(struct cxl_afu *afu);
+int cxl_register_psl_err_irq(struct cxl *adapter);
+void cxl_release_psl_err_irq(struct cxl *adapter);
+int cxl_register_serr_irq(struct cxl_afu *afu);
+void cxl_release_serr_irq(struct cxl_afu *afu);
+int afu_register_irqs(struct cxl_context *ctx, u32 count);
+void afu_release_irqs(struct cxl_context *ctx, void *cookie);
+void afu_irq_name_free(struct cxl_context *ctx);
+irqreturn_t cxl_slice_irq_err(int irq, void *data);
+
+int cxl_debugfs_init(void);
+void cxl_debugfs_exit(void);
+int cxl_debugfs_adapter_add(struct cxl *adapter);
+void cxl_debugfs_adapter_remove(struct cxl *adapter);
+int cxl_debugfs_afu_add(struct cxl_afu *afu);
+void cxl_debugfs_afu_remove(struct cxl_afu *afu);
+
+void cxl_handle_fault(struct work_struct *work);
+void cxl_prefault(struct cxl_context *ctx, u64 wed);
+
+struct cxl *get_cxl_adapter(int num);
+int cxl_alloc_sst(struct cxl_context *ctx);
+
+void init_cxl_native(void);
+
+struct cxl_context *cxl_context_alloc(void);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+ struct address_space *mapping);
+void cxl_context_free(struct cxl_context *ctx);
+int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+ irq_handler_t handler, void *cookie, const char *name);
+void cxl_unmap_irq(unsigned int virq, void *cookie);
+int __detach_context(struct cxl_context *ctx);
+
+/* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */
+struct cxl_irq_info {
+ u64 dsisr;
+ u64 dar;
+ u64 dsr;
+ u32 pid;
+ u32 tid;
+ u64 afu_err;
+ u64 errstat;
+ u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */
+};
+
+void cxl_assign_psn_space(struct cxl_context *ctx);
+int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
+ u64 amr);
+int cxl_detach_process(struct cxl_context *ctx);
+
+int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info);
+int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
+
+int cxl_check_error(struct cxl_afu *afu);
+int cxl_afu_slbia(struct cxl_afu *afu);
+int cxl_tlb_slb_invalidate(struct cxl *adapter);
+int cxl_afu_disable(struct cxl_afu *afu);
+int __cxl_afu_reset(struct cxl_afu *afu);
+int cxl_afu_check_and_enable(struct cxl_afu *afu);
+int cxl_psl_purge(struct cxl_afu *afu);
+
+void cxl_stop_trace(struct cxl *cxl);
+int cxl_pci_vphb_add(struct cxl_afu *afu);
+void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
+void cxl_pci_vphb_remove(struct cxl_afu *afu);
+
+extern struct pci_driver cxl_pci_driver;
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count);
+
+int afu_open(struct inode *inode, struct file *file);
+int afu_release(struct inode *inode, struct file *file);
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int afu_mmap(struct file *file, struct vm_area_struct *vm);
+unsigned int afu_poll(struct file *file, struct poll_table_struct *poll);
+ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off);
+extern const struct file_operations afu_fops;
+
+#endif
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
new file mode 100644
index 0000000..18df6f4
--- /dev/null
+++ b/drivers/misc/cxl/debugfs.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "cxl.h"
+
+static struct dentry *cxl_debugfs;
+
+void cxl_stop_trace(struct cxl *adapter)
+{
+ int slice;
+
+ /* Stop the trace */
+ cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
+
+ /* Stop the slice traces */
+ spin_lock(&adapter->afu_list_lock);
+ for (slice = 0; slice < adapter->slices; slice++) {
+ if (adapter->afu[slice])
+ cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, 0x8000000000000000LL);
+ }
+ spin_unlock(&adapter->afu_list_lock);
+}
+
+/* Helpers to export CXL mmaped IO registers via debugfs */
+static int debugfs_io_u64_get(void *data, u64 *val)
+{
+ *val = in_be64((u64 __iomem *)data);
+ return 0;
+}
+
+static int debugfs_io_u64_set(void *data, u64 val)
+{
+ out_be64((u64 __iomem *)data, val);
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x%016llx\n");
+
+static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 __iomem *value)
+{
+ return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64);
+}
+
+int cxl_debugfs_adapter_add(struct cxl *adapter)
+{
+ struct dentry *dir;
+ char buf[32];
+
+ if (!cxl_debugfs)
+ return -ENODEV;
+
+ snprintf(buf, 32, "card%i", adapter->adapter_num);
+ dir = debugfs_create_dir(buf, cxl_debugfs);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+ adapter->debugfs = dir;
+
+ debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1));
+ debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2));
+ debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL));
+ debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE));
+
+ debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE));
+
+ return 0;
+}
+
+void cxl_debugfs_adapter_remove(struct cxl *adapter)
+{
+ debugfs_remove_recursive(adapter->debugfs);
+}
+
+int cxl_debugfs_afu_add(struct cxl_afu *afu)
+{
+ struct dentry *dir;
+ char buf[32];
+
+ if (!afu->adapter->debugfs)
+ return -ENODEV;
+
+ snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice);
+ dir = debugfs_create_dir(buf, afu->adapter->debugfs);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+ afu->debugfs = dir;
+
+ debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An));
+ debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An));
+ debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An));
+ debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An));
+
+ debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An));
+ debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An));
+ debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An));
+ debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An));
+ debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An));
+
+ debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE));
+
+ return 0;
+}
+
+void cxl_debugfs_afu_remove(struct cxl_afu *afu)
+{
+ debugfs_remove_recursive(afu->debugfs);
+}
+
+int __init cxl_debugfs_init(void)
+{
+ struct dentry *ent;
+ ent = debugfs_create_dir("cxl", NULL);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+ cxl_debugfs = ent;
+
+ return 0;
+}
+
+void cxl_debugfs_exit(void)
+{
+ debugfs_remove_recursive(cxl_debugfs);
+}
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
new file mode 100644
index 0000000..81c3f75
--- /dev/null
+++ b/drivers/misc/cxl/fault.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "cxl" "."
+#include <asm/current.h>
+#include <asm/copro.h>
+#include <asm/mmu.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
+{
+ return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
+ (sste->esid_data == cpu_to_be64(slb->esid)));
+}
+
+/*
+ * This finds a free SSTE for the given SLB, or returns NULL if it's already in
+ * the segment table.
+ */
+static struct cxl_sste* find_free_sste(struct cxl_context *ctx,
+ struct copro_slb *slb)
+{
+ struct cxl_sste *primary, *sste, *ret = NULL;
+ unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
+ unsigned int entry;
+ unsigned int hash;
+
+ if (slb->vsid & SLB_VSID_B_1T)
+ hash = (slb->esid >> SID_SHIFT_1T) & mask;
+ else /* 256M */
+ hash = (slb->esid >> SID_SHIFT) & mask;
+
+ primary = ctx->sstp + (hash << 3);
+
+ for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
+ if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
+ ret = sste;
+ if (sste_matches(sste, slb))
+ return NULL;
+ }
+ if (ret)
+ return ret;
+
+ /* Nothing free, select an entry to cast out */
+ ret = primary + ctx->sst_lru;
+ ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
+
+ return ret;
+}
+
+static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
+{
+ /* mask is the group index, we search primary and secondary here. */
+ struct cxl_sste *sste;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->sste_lock, flags);
+ sste = find_free_sste(ctx, slb);
+ if (!sste)
+ goto out_unlock;
+
+ pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
+ sste - ctx->sstp, slb->vsid, slb->esid);
+ trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
+
+ sste->vsid_data = cpu_to_be64(slb->vsid);
+ sste->esid_data = cpu_to_be64(slb->esid);
+out_unlock:
+ spin_unlock_irqrestore(&ctx->sste_lock, flags);
+}
+
+static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
+ u64 ea)
+{
+ struct copro_slb slb = {0,0};
+ int rc;
+
+ if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
+ cxl_load_segment(ctx, &slb);
+ }
+
+ return rc;
+}
+
+static void cxl_ack_ae(struct cxl_context *ctx)
+{
+ unsigned long flags;
+
+ cxl_ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ ctx->pending_fault = true;
+ ctx->fault_addr = ctx->dar;
+ ctx->fault_dsisr = ctx->dsisr;
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ wake_up_all(&ctx->wq);
+}
+
+static int cxl_handle_segment_miss(struct cxl_context *ctx,
+ struct mm_struct *mm, u64 ea)
+{
+ int rc;
+
+ pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
+ trace_cxl_ste_miss(ctx, ea);
+
+ if ((rc = cxl_fault_segment(ctx, mm, ea)))
+ cxl_ack_ae(ctx);
+ else {
+
+ mb(); /* Order seg table write to TFC MMIO write */
+ cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void cxl_handle_page_fault(struct cxl_context *ctx,
+ struct mm_struct *mm, u64 dsisr, u64 dar)
+{
+ unsigned flt = 0;
+ int result;
+ unsigned long access, flags, inv_flags = 0;
+
+ trace_cxl_pte_miss(ctx, dsisr, dar);
+
+ if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
+ pr_devel("copro_handle_mm_fault failed: %#x\n", result);
+ return cxl_ack_ae(ctx);
+ }
+
+ /*
+ * update_mmu_cache() will not have loaded the hash since current->trap
+ * is not a 0x400 or 0x300, so just call hash_page_mm() here.
+ */
+ access = _PAGE_PRESENT;
+ if (dsisr & CXL_PSL_DSISR_An_S)
+ access |= _PAGE_RW;
+ if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
+ access |= _PAGE_USER;
+
+ if (dsisr & DSISR_NOHPTE)
+ inv_flags |= HPTE_NOHPTE_UPDATE;
+
+ local_irq_save(flags);
+ hash_page_mm(mm, dar, access, 0x300, inv_flags);
+ local_irq_restore(flags);
+
+ pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
+ cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+}
+
+/*
+ * Returns the mm_struct corresponding to the context ctx via ctx->pid
+ * In case the task has exited we use the task group leader accessible
+ * via ctx->glpid to find the next task in the thread group that has a
+ * valid mm_struct associated with it. If a task with valid mm_struct
+ * is found the ctx->pid is updated to use the task struct for subsequent
+ * translations. In case no valid mm_struct is found in the task group to
+ * service the fault a NULL is returned.
+ */
+static struct mm_struct *get_mem_context(struct cxl_context *ctx)
+{
+ struct task_struct *task = NULL;
+ struct mm_struct *mm = NULL;
+ struct pid *old_pid = ctx->pid;
+
+ if (old_pid == NULL) {
+ pr_warn("%s: Invalid context for pe=%d\n",
+ __func__, ctx->pe);
+ return NULL;
+ }
+
+ task = get_pid_task(old_pid, PIDTYPE_PID);
+
+ /*
+ * pid_alive may look racy but this saves us from costly
+ * get_task_mm when the task is a zombie. In worst case
+ * we may think a task is alive, which is about to die
+ * but get_task_mm will return NULL.
+ */
+ if (task != NULL && pid_alive(task))
+ mm = get_task_mm(task);
+
+ /* release the task struct that was taken earlier */
+ if (task)
+ put_task_struct(task);
+ else
+ pr_devel("%s: Context owning pid=%i for pe=%i dead\n",
+ __func__, pid_nr(old_pid), ctx->pe);
+
+ /*
+ * If we couldn't find the mm context then use the group
+ * leader to iterate over the task group and find a task
+ * that gives us mm_struct.
+ */
+ if (unlikely(mm == NULL && ctx->glpid != NULL)) {
+
+ rcu_read_lock();
+ task = pid_task(ctx->glpid, PIDTYPE_PID);
+ if (task)
+ do {
+ mm = get_task_mm(task);
+ if (mm) {
+ ctx->pid = get_task_pid(task,
+ PIDTYPE_PID);
+ break;
+ }
+ task = next_thread(task);
+ } while (task && !thread_group_leader(task));
+ rcu_read_unlock();
+
+ /* check if we switched pid */
+ if (ctx->pid != old_pid) {
+ if (mm)
+ pr_devel("%s:pe=%i switch pid %i->%i\n",
+ __func__, ctx->pe, pid_nr(old_pid),
+ pid_nr(ctx->pid));
+ else
+ pr_devel("%s:Cannot find mm for pid=%i\n",
+ __func__, pid_nr(old_pid));
+
+ /* drop the reference to older pid */
+ put_pid(old_pid);
+ }
+ }
+
+ return mm;
+}
+
+
+
+void cxl_handle_fault(struct work_struct *fault_work)
+{
+ struct cxl_context *ctx =
+ container_of(fault_work, struct cxl_context, fault_work);
+ u64 dsisr = ctx->dsisr;
+ u64 dar = ctx->dar;
+ struct mm_struct *mm = NULL;
+
+ if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
+ cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
+ cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
+ /* Most likely explanation is harmless - a dedicated process
+ * has detached and these were cleared by the PSL purge, but
+ * warn about it just in case */
+ dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
+ return;
+ }
+
+ /* Early return if the context is being / has been detached */
+ if (ctx->status == CLOSED) {
+ cxl_ack_ae(ctx);
+ return;
+ }
+
+ pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
+ "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
+
+ if (!ctx->kernel) {
+
+ mm = get_mem_context(ctx);
+ /* indicates all the thread in task group have exited */
+ if (mm == NULL) {
+ pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+ __func__, ctx->pe, pid_nr(ctx->pid));
+ cxl_ack_ae(ctx);
+ return;
+ } else {
+ pr_devel("Handling page fault for pe=%d pid=%i\n",
+ ctx->pe, pid_nr(ctx->pid));
+ }
+ }
+
+ if (dsisr & CXL_PSL_DSISR_An_DS)
+ cxl_handle_segment_miss(ctx, mm, dar);
+ else if (dsisr & CXL_PSL_DSISR_An_DM)
+ cxl_handle_page_fault(ctx, mm, dsisr, dar);
+ else
+ WARN(1, "cxl_handle_fault has nothing to handle\n");
+
+ if (mm)
+ mmput(mm);
+}
+
+static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
+{
+ struct mm_struct *mm;
+
+ mm = get_mem_context(ctx);
+ if (mm == NULL) {
+ pr_devel("cxl_prefault_one unable to get mm %i\n",
+ pid_nr(ctx->pid));
+ return;
+ }
+
+ cxl_fault_segment(ctx, mm, ea);
+
+ mmput(mm);
+}
+
+static u64 next_segment(u64 ea, u64 vsid)
+{
+ if (vsid & SLB_VSID_B_1T)
+ ea |= (1ULL << 40) - 1;
+ else
+ ea |= (1ULL << 28) - 1;
+
+ return ea + 1;
+}
+
+static void cxl_prefault_vma(struct cxl_context *ctx)
+{
+ u64 ea, last_esid = 0;
+ struct copro_slb slb;
+ struct vm_area_struct *vma;
+ int rc;
+ struct mm_struct *mm;
+
+ mm = get_mem_context(ctx);
+ if (mm == NULL) {
+ pr_devel("cxl_prefault_vm unable to get mm %i\n",
+ pid_nr(ctx->pid));
+ return;
+ }
+
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for (ea = vma->vm_start; ea < vma->vm_end;
+ ea = next_segment(ea, slb.vsid)) {
+ rc = copro_calculate_slb(mm, ea, &slb);
+ if (rc)
+ continue;
+
+ if (last_esid == slb.esid)
+ continue;
+
+ cxl_load_segment(ctx, &slb);
+ last_esid = slb.esid;
+ }
+ }
+ up_read(&mm->mmap_sem);
+
+ mmput(mm);
+}
+
+void cxl_prefault(struct cxl_context *ctx, u64 wed)
+{
+ switch (ctx->afu->prefault_mode) {
+ case CXL_PREFAULT_WED:
+ cxl_prefault_one(ctx, wed);
+ break;
+ case CXL_PREFAULT_ALL:
+ cxl_prefault_vma(ctx);
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
new file mode 100644
index 0000000..013558f
--- /dev/null
+++ b/drivers/misc/cxl/file.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/bitmap.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/cputable.h>
+#include <asm/current.h>
+#include <asm/copro.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+#define CXL_NUM_MINORS 256 /* Total to reserve */
+#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */
+
+#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
+#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice))
+#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1)
+#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2)
+#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu))
+#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu))
+#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu))
+
+#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
+#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3)
+
+#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0)
+
+static dev_t cxl_dev;
+
+static struct class *cxl_class;
+
+static int __afu_open(struct inode *inode, struct file *file, bool master)
+{
+ struct cxl *adapter;
+ struct cxl_afu *afu;
+ struct cxl_context *ctx;
+ int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev);
+ int slice = CXL_DEVT_AFU(inode->i_rdev);
+ int rc = -ENODEV;
+
+ pr_devel("afu_open afu%i.%i\n", slice, adapter_num);
+
+ if (!(adapter = get_cxl_adapter(adapter_num)))
+ return -ENODEV;
+
+ if (slice > adapter->slices)
+ goto err_put_adapter;
+
+ spin_lock(&adapter->afu_list_lock);
+ if (!(afu = adapter->afu[slice])) {
+ spin_unlock(&adapter->afu_list_lock);
+ goto err_put_adapter;
+ }
+
+ /*
+ * taking a ref to the afu so that it doesn't go away
+ * for rest of the function. This ref is released before
+ * we return.
+ */
+ cxl_afu_get(afu);
+ spin_unlock(&adapter->afu_list_lock);
+
+ if (!afu->current_mode)
+ goto err_put_afu;
+
+ if (!cxl_adapter_link_ok(adapter)) {
+ rc = -EIO;
+ goto err_put_afu;
+ }
+
+ if (!(ctx = cxl_context_alloc())) {
+ rc = -ENOMEM;
+ goto err_put_afu;
+ }
+
+ if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
+ goto err_put_afu;
+
+ pr_devel("afu_open pe: %i\n", ctx->pe);
+ file->private_data = ctx;
+
+ /* indicate success */
+ rc = 0;
+
+err_put_afu:
+ /* release the ref taken earlier */
+ cxl_afu_put(afu);
+err_put_adapter:
+ put_device(&adapter->dev);
+ return rc;
+}
+
+int afu_open(struct inode *inode, struct file *file)
+{
+ return __afu_open(inode, file, false);
+}
+
+static int afu_master_open(struct inode *inode, struct file *file)
+{
+ return __afu_open(inode, file, true);
+}
+
+int afu_release(struct inode *inode, struct file *file)
+{
+ struct cxl_context *ctx = file->private_data;
+
+ pr_devel("%s: closing cxl file descriptor. pe: %i\n",
+ __func__, ctx->pe);
+ cxl_context_detach(ctx);
+
+
+ /*
+ * Delete the context's mapping pointer, unless it's created by the
+ * kernel API, in which case leave it so it can be freed by reclaim_ctx()
+ */
+ if (!ctx->kernelapi) {
+ mutex_lock(&ctx->mapping_lock);
+ ctx->mapping = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ }
+
+ /*
+ * At this this point all bottom halfs have finished and we should be
+ * getting no more IRQs from the hardware for this context. Once it's
+ * removed from the IDR (and RCU synchronised) it's safe to free the
+ * sstp and context.
+ */
+ cxl_context_free(ctx);
+
+ return 0;
+}
+
+static long afu_ioctl_start_work(struct cxl_context *ctx,
+ struct cxl_ioctl_start_work __user *uwork)
+{
+ struct cxl_ioctl_start_work work;
+ u64 amr = 0;
+ int rc;
+
+ pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+ /* Do this outside the status_mutex to avoid a circular dependency with
+ * the locking in cxl_mmap_fault() */
+ if (copy_from_user(&work, uwork, sizeof(work)))
+ return -EFAULT;
+
+ mutex_lock(&ctx->status_mutex);
+ if (ctx->status != OPENED) {
+ rc = -EIO;
+ goto out;
+ }
+
+ /*
+ * if any of the reserved fields are set or any of the unused
+ * flags are set it's invalid
+ */
+ if (work.reserved1 || work.reserved2 || work.reserved3 ||
+ work.reserved4 || work.reserved5 || work.reserved6 ||
+ (work.flags & ~CXL_START_WORK_ALL)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (!(work.flags & CXL_START_WORK_NUM_IRQS))
+ work.num_interrupts = ctx->afu->pp_irqs;
+ else if ((work.num_interrupts < ctx->afu->pp_irqs) ||
+ (work.num_interrupts > ctx->afu->irqs_max)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
+ goto out;
+
+ if (work.flags & CXL_START_WORK_AMR)
+ amr = work.amr & mfspr(SPRN_UAMOR);
+
+ ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
+
+ /*
+ * We grab the PID here and not in the file open to allow for the case
+ * where a process (master, some daemon, etc) has opened the chardev on
+ * behalf of another process, so the AFU's mm gets bound to the process
+ * that performs this ioctl and not the process that opened the file.
+ * Also we grab the PID of the group leader so that if the task that
+ * has performed the attach operation exits the mm context of the
+ * process is still accessible.
+ */
+ ctx->pid = get_task_pid(current, PIDTYPE_PID);
+ ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
+
+ /*
+ * Increment driver use count. Enables global TLBIs for hash
+ * and callbacks to handle the segment table
+ */
+ cxl_ctx_get();
+
+ trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
+
+ if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor,
+ amr))) {
+ afu_release_irqs(ctx, ctx);
+ cxl_ctx_put();
+ goto out;
+ }
+
+ ctx->status = STARTED;
+ rc = 0;
+out:
+ mutex_unlock(&ctx->status_mutex);
+ return rc;
+}
+static long afu_ioctl_process_element(struct cxl_context *ctx,
+ int __user *upe)
+{
+ pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+ if (copy_to_user(upe, &ctx->pe, sizeof(__u32)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long afu_ioctl_get_afu_id(struct cxl_context *ctx,
+ struct cxl_afu_id __user *upafuid)
+{
+ struct cxl_afu_id afuid = { 0 };
+
+ afuid.card_id = ctx->afu->adapter->adapter_num;
+ afuid.afu_offset = ctx->afu->slice;
+ afuid.afu_mode = ctx->afu->current_mode;
+
+ /* set the flag bit in case the afu is a slave */
+ if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master)
+ afuid.flags |= CXL_AFUID_FLAG_SLAVE;
+
+ if (copy_to_user(upafuid, &afuid, sizeof(afuid)))
+ return -EFAULT;
+
+ return 0;
+}
+
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct cxl_context *ctx = file->private_data;
+
+ if (ctx->status == CLOSED)
+ return -EIO;
+
+ if (!cxl_adapter_link_ok(ctx->afu->adapter))
+ return -EIO;
+
+ pr_devel("afu_ioctl\n");
+ switch (cmd) {
+ case CXL_IOCTL_START_WORK:
+ return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg);
+ case CXL_IOCTL_GET_PROCESS_ELEMENT:
+ return afu_ioctl_process_element(ctx, (__u32 __user *)arg);
+ case CXL_IOCTL_GET_AFU_ID:
+ return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *)
+ arg);
+ }
+ return -EINVAL;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return afu_ioctl(file, cmd, arg);
+}
+
+int afu_mmap(struct file *file, struct vm_area_struct *vm)
+{
+ struct cxl_context *ctx = file->private_data;
+
+ /* AFU must be started before we can MMIO */
+ if (ctx->status != STARTED)
+ return -EIO;
+
+ if (!cxl_adapter_link_ok(ctx->afu->adapter))
+ return -EIO;
+
+ return cxl_context_iomap(ctx, vm);
+}
+
+unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
+{
+ struct cxl_context *ctx = file->private_data;
+ int mask = 0;
+ unsigned long flags;
+
+
+ poll_wait(file, &ctx->wq, poll);
+
+ pr_devel("afu_poll wait done pe: %i\n", ctx->pe);
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ if (ctx->pending_irq || ctx->pending_fault ||
+ ctx->pending_afu_err)
+ mask |= POLLIN | POLLRDNORM;
+ else if (ctx->status == CLOSED)
+ /* Only error on closed when there are no futher events pending
+ */
+ mask |= POLLERR;
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask);
+
+ return mask;
+}
+
+static inline int ctx_event_pending(struct cxl_context *ctx)
+{
+ return (ctx->pending_irq || ctx->pending_fault ||
+ ctx->pending_afu_err || (ctx->status == CLOSED));
+}
+
+ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+ loff_t *off)
+{
+ struct cxl_context *ctx = file->private_data;
+ struct cxl_event event;
+ unsigned long flags;
+ int rc;
+ DEFINE_WAIT(wait);
+
+ if (!cxl_adapter_link_ok(ctx->afu->adapter))
+ return -EIO;
+
+ if (count < CXL_READ_MIN_SIZE)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ for (;;) {
+ prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE);
+ if (ctx_event_pending(ctx))
+ break;
+
+ if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+ rc = -EIO;
+ goto out;
+ }
+
+ if (file->f_flags & O_NONBLOCK) {
+ rc = -EAGAIN;
+ goto out;
+ }
+
+ if (signal_pending(current)) {
+ rc = -ERESTARTSYS;
+ goto out;
+ }
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ pr_devel("afu_read going to sleep...\n");
+ schedule();
+ pr_devel("afu_read woken up\n");
+ spin_lock_irqsave(&ctx->lock, flags);
+ }
+
+ finish_wait(&ctx->wq, &wait);
+
+ memset(&event, 0, sizeof(event));
+ event.header.process_element = ctx->pe;
+ event.header.size = sizeof(struct cxl_event_header);
+ if (ctx->pending_irq) {
+ pr_devel("afu_read delivering AFU interrupt\n");
+ event.header.size += sizeof(struct cxl_event_afu_interrupt);
+ event.header.type = CXL_EVENT_AFU_INTERRUPT;
+ event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1;
+ clear_bit(event.irq.irq - 1, ctx->irq_bitmap);
+ if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count))
+ ctx->pending_irq = false;
+ } else if (ctx->pending_fault) {
+ pr_devel("afu_read delivering data storage fault\n");
+ event.header.size += sizeof(struct cxl_event_data_storage);
+ event.header.type = CXL_EVENT_DATA_STORAGE;
+ event.fault.addr = ctx->fault_addr;
+ event.fault.dsisr = ctx->fault_dsisr;
+ ctx->pending_fault = false;
+ } else if (ctx->pending_afu_err) {
+ pr_devel("afu_read delivering afu error\n");
+ event.header.size += sizeof(struct cxl_event_afu_error);
+ event.header.type = CXL_EVENT_AFU_ERROR;
+ event.afu_error.error = ctx->afu_err;
+ ctx->pending_afu_err = false;
+ } else if (ctx->status == CLOSED) {
+ pr_devel("afu_read fatal error\n");
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ return -EIO;
+ } else
+ WARN(1, "afu_read must be buggy\n");
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ if (copy_to_user(buf, &event, event.header.size))
+ return -EFAULT;
+ return event.header.size;
+
+out:
+ finish_wait(&ctx->wq, &wait);
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ return rc;
+}
+
+/*
+ * Note: if this is updated, we need to update api.c to patch the new ones in
+ * too
+ */
+const struct file_operations afu_fops = {
+ .owner = THIS_MODULE,
+ .open = afu_open,
+ .poll = afu_poll,
+ .read = afu_read,
+ .release = afu_release,
+ .unlocked_ioctl = afu_ioctl,
+ .compat_ioctl = afu_compat_ioctl,
+ .mmap = afu_mmap,
+};
+
+static const struct file_operations afu_master_fops = {
+ .owner = THIS_MODULE,
+ .open = afu_master_open,
+ .poll = afu_poll,
+ .read = afu_read,
+ .release = afu_release,
+ .unlocked_ioctl = afu_ioctl,
+ .compat_ioctl = afu_compat_ioctl,
+ .mmap = afu_mmap,
+};
+
+
+static char *cxl_devnode(struct device *dev, umode_t *mode)
+{
+ if (CXL_DEVT_IS_CARD(dev->devt)) {
+ /*
+ * These minor numbers will eventually be used to program the
+ * PSL and AFUs once we have dynamic reprogramming support
+ */
+ return NULL;
+ }
+ return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
+}
+
+extern struct class *cxl_class;
+
+static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev,
+ struct device **chardev, char *postfix, char *desc,
+ const struct file_operations *fops)
+{
+ struct device *dev;
+ int rc;
+
+ cdev_init(cdev, fops);
+ if ((rc = cdev_add(cdev, devt, 1))) {
+ dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc);
+ return rc;
+ }
+
+ dev = device_create(cxl_class, &afu->dev, devt, afu,
+ "afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix);
+ if (IS_ERR(dev)) {
+ dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc);
+ rc = PTR_ERR(dev);
+ goto err;
+ }
+
+ *chardev = dev;
+
+ return 0;
+err:
+ cdev_del(cdev);
+ return rc;
+}
+
+int cxl_chardev_d_afu_add(struct cxl_afu *afu)
+{
+ return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d,
+ &afu->chardev_d, "d", "dedicated",
+ &afu_master_fops); /* Uses master fops */
+}
+
+int cxl_chardev_m_afu_add(struct cxl_afu *afu)
+{
+ return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m,
+ &afu->chardev_m, "m", "master",
+ &afu_master_fops);
+}
+
+int cxl_chardev_s_afu_add(struct cxl_afu *afu)
+{
+ return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s,
+ &afu->chardev_s, "s", "shared",
+ &afu_fops);
+}
+
+void cxl_chardev_afu_remove(struct cxl_afu *afu)
+{
+ if (afu->chardev_d) {
+ cdev_del(&afu->afu_cdev_d);
+ device_unregister(afu->chardev_d);
+ afu->chardev_d = NULL;
+ }
+ if (afu->chardev_m) {
+ cdev_del(&afu->afu_cdev_m);
+ device_unregister(afu->chardev_m);
+ afu->chardev_m = NULL;
+ }
+ if (afu->chardev_s) {
+ cdev_del(&afu->afu_cdev_s);
+ device_unregister(afu->chardev_s);
+ afu->chardev_s = NULL;
+ }
+}
+
+int cxl_register_afu(struct cxl_afu *afu)
+{
+ afu->dev.class = cxl_class;
+
+ return device_register(&afu->dev);
+}
+
+int cxl_register_adapter(struct cxl *adapter)
+{
+ adapter->dev.class = cxl_class;
+
+ /*
+ * Future: When we support dynamically reprogramming the PSL & AFU we
+ * will expose the interface to do that via a chardev:
+ * adapter->dev.devt = CXL_CARD_MKDEV(adapter);
+ */
+
+ return device_register(&adapter->dev);
+}
+
+int __init cxl_file_init(void)
+{
+ int rc;
+
+ /*
+ * If these change we really need to update API. Either change some
+ * flags or update API version number CXL_API_VERSION.
+ */
+ BUILD_BUG_ON(CXL_API_VERSION != 2);
+ BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64);
+ BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8);
+ BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8);
+ BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32);
+ BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16);
+
+ if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) {
+ pr_err("Unable to allocate CXL major number: %i\n", rc);
+ return rc;
+ }
+
+ pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev));
+
+ cxl_class = class_create(THIS_MODULE, "cxl");
+ if (IS_ERR(cxl_class)) {
+ pr_err("Unable to create CXL class\n");
+ rc = PTR_ERR(cxl_class);
+ goto err;
+ }
+ cxl_class->devnode = cxl_devnode;
+
+ return 0;
+
+err:
+ unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS);
+ return rc;
+}
+
+void cxl_file_exit(void)
+{
+ unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS);
+ class_destroy(cxl_class);
+}
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
new file mode 100644
index 0000000..efbb694
--- /dev/null
+++ b/drivers/misc/cxl/irq.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/pid.h>
+#include <asm/cputable.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+/* XXX: This is implementation specific */
+static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat)
+{
+ u64 fir1, fir2, fir_slice, serr, afu_debug;
+
+ fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1);
+ fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2);
+ fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An);
+ serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
+ afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
+
+ dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
+ dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+ dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
+ dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+ dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+ dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+
+ dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
+ cxl_stop_trace(ctx->afu->adapter);
+
+ return cxl_ack_irq(ctx, 0, errstat);
+}
+
+irqreturn_t cxl_slice_irq_err(int irq, void *data)
+{
+ struct cxl_afu *afu = data;
+ u64 fir_slice, errstat, serr, afu_debug;
+
+ WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
+
+ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
+ errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+ afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An);
+ dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+ dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+ dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat);
+ dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t cxl_irq_err(int irq, void *data)
+{
+ struct cxl *adapter = data;
+ u64 fir1, fir2, err_ivte;
+
+ WARN(1, "CXL ERROR interrupt %i\n", irq);
+
+ err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE);
+ dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte);
+
+ dev_crit(&adapter->dev, "STOPPING CXL TRACE\n");
+ cxl_stop_trace(adapter);
+
+ fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
+ fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
+
+ dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar)
+{
+ ctx->dsisr = dsisr;
+ ctx->dar = dar;
+ schedule_work(&ctx->fault_work);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
+{
+ struct cxl_context *ctx = data;
+ u64 dsisr, dar;
+
+ dsisr = irq_info->dsisr;
+ dar = irq_info->dar;
+
+ trace_cxl_psl_irq(ctx, irq, dsisr, dar);
+
+ pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
+
+ if (dsisr & CXL_PSL_DSISR_An_DS) {
+ /*
+ * We don't inherently need to sleep to handle this, but we do
+ * need to get a ref to the task's mm, which we can't do from
+ * irq context without the potential for a deadlock since it
+ * takes the task_lock. An alternate option would be to keep a
+ * reference to the task's mm the entire time it has cxl open,
+ * but to do that we need to solve the issue where we hold a
+ * ref to the mm, but the mm can hold a ref to the fd after an
+ * mmap preventing anything from being cleaned up.
+ */
+ pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe);
+ return schedule_cxl_fault(ctx, dsisr, dar);
+ }
+
+ if (dsisr & CXL_PSL_DSISR_An_M)
+ pr_devel("CXL interrupt: PTE not found\n");
+ if (dsisr & CXL_PSL_DSISR_An_P)
+ pr_devel("CXL interrupt: Storage protection violation\n");
+ if (dsisr & CXL_PSL_DSISR_An_A)
+ pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n");
+ if (dsisr & CXL_PSL_DSISR_An_S)
+ pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n");
+ if (dsisr & CXL_PSL_DSISR_An_K)
+ pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n");
+
+ if (dsisr & CXL_PSL_DSISR_An_DM) {
+ /*
+ * In some cases we might be able to handle the fault
+ * immediately if hash_page would succeed, but we still need
+ * the task's mm, which as above we can't get without a lock
+ */
+ pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe);
+ return schedule_cxl_fault(ctx, dsisr, dar);
+ }
+ if (dsisr & CXL_PSL_DSISR_An_ST)
+ WARN(1, "CXL interrupt: Segment Table PTE not found\n");
+ if (dsisr & CXL_PSL_DSISR_An_UR)
+ pr_devel("CXL interrupt: AURP PTE not found\n");
+ if (dsisr & CXL_PSL_DSISR_An_PE)
+ return handle_psl_slice_error(ctx, dsisr, irq_info->errstat);
+ if (dsisr & CXL_PSL_DSISR_An_AE) {
+ pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
+
+ if (ctx->pending_afu_err) {
+ /*
+ * This shouldn't happen - the PSL treats these errors
+ * as fatal and will have reset the AFU, so there's not
+ * much point buffering multiple AFU errors.
+ * OTOH if we DO ever see a storm of these come in it's
+ * probably best that we log them somewhere:
+ */
+ dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
+ "undelivered to pe %i: 0x%016llx\n",
+ ctx->pe, irq_info->afu_err);
+ } else {
+ spin_lock(&ctx->lock);
+ ctx->afu_err = irq_info->afu_err;
+ ctx->pending_afu_err = 1;
+ spin_unlock(&ctx->lock);
+
+ wake_up_all(&ctx->wq);
+ }
+
+ cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
+ return IRQ_HANDLED;
+ }
+ if (dsisr & CXL_PSL_DSISR_An_OC)
+ pr_devel("CXL interrupt: OS Context Warning\n");
+
+ WARN(1, "Unhandled CXL PSL IRQ\n");
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
+{
+ if (irq_info->dsisr & CXL_PSL_DSISR_TRANS)
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+ else
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t cxl_irq_multiplexed(int irq, void *data)
+{
+ struct cxl_afu *afu = data;
+ struct cxl_context *ctx;
+ struct cxl_irq_info irq_info;
+ int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
+ int ret;
+
+ if ((ret = cxl_get_irq(afu, &irq_info))) {
+ WARN(1, "Unable to get CXL IRQ Info: %i\n", ret);
+ return fail_psl_irq(afu, &irq_info);
+ }
+
+ rcu_read_lock();
+ ctx = idr_find(&afu->contexts_idr, ph);
+ if (ctx) {
+ ret = cxl_irq(irq, ctx, &irq_info);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
+
+ WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR"
+ " %016llx\n(Possible AFU HW issue - was a term/remove acked"
+ " with outstanding transactions?)\n", ph, irq_info.dsisr,
+ irq_info.dar);
+ return fail_psl_irq(afu, &irq_info);
+}
+
+static irqreturn_t cxl_irq_afu(int irq, void *data)
+{
+ struct cxl_context *ctx = data;
+ irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq));
+ int irq_off, afu_irq = 1;
+ __u16 range;
+ int r;
+
+ for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ irq_off = hwirq - ctx->irqs.offset[r];
+ range = ctx->irqs.range[r];
+ if (irq_off >= 0 && irq_off < range) {
+ afu_irq += irq_off;
+ break;
+ }
+ afu_irq += range;
+ }
+ if (unlikely(r >= CXL_IRQ_RANGES)) {
+ WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n",
+ ctx->pe, irq, hwirq);
+ return IRQ_HANDLED;
+ }
+
+ trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq);
+ pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n",
+ afu_irq, ctx->pe, irq, hwirq);
+
+ if (unlikely(!ctx->irq_bitmap)) {
+ WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n");
+ return IRQ_HANDLED;
+ }
+ spin_lock(&ctx->lock);
+ set_bit(afu_irq - 1, ctx->irq_bitmap);
+ ctx->pending_irq = true;
+ spin_unlock(&ctx->lock);
+
+ wake_up_all(&ctx->wq);
+
+ return IRQ_HANDLED;
+}
+
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+ irq_handler_t handler, void *cookie, const char *name)
+{
+ unsigned int virq;
+ int result;
+
+ /* IRQ Domain? */
+ virq = irq_create_mapping(NULL, hwirq);
+ if (!virq) {
+ dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n");
+ return 0;
+ }
+
+ cxl_setup_irq(adapter, hwirq, virq);
+
+ pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq);
+
+ result = request_irq(virq, handler, 0, name, cookie);
+ if (result) {
+ dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result);
+ return 0;
+ }
+
+ return virq;
+}
+
+void cxl_unmap_irq(unsigned int virq, void *cookie)
+{
+ free_irq(virq, cookie);
+}
+
+static int cxl_register_one_irq(struct cxl *adapter,
+ irq_handler_t handler,
+ void *cookie,
+ irq_hw_number_t *dest_hwirq,
+ unsigned int *dest_virq,
+ const char *name)
+{
+ int hwirq, virq;
+
+ if ((hwirq = cxl_alloc_one_irq(adapter)) < 0)
+ return hwirq;
+
+ if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name)))
+ goto err;
+
+ *dest_hwirq = hwirq;
+ *dest_virq = virq;
+
+ return 0;
+
+err:
+ cxl_release_one_irq(adapter, hwirq);
+ return -ENOMEM;
+}
+
+int cxl_register_psl_err_irq(struct cxl *adapter)
+{
+ int rc;
+
+ adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+ dev_name(&adapter->dev));
+ if (!adapter->irq_name)
+ return -ENOMEM;
+
+ if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter,
+ &adapter->err_hwirq,
+ &adapter->err_virq,
+ adapter->irq_name))) {
+ kfree(adapter->irq_name);
+ adapter->irq_name = NULL;
+ return rc;
+ }
+
+ cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff);
+
+ return 0;
+}
+
+void cxl_release_psl_err_irq(struct cxl *adapter)
+{
+ if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq))
+ return;
+
+ cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
+ cxl_unmap_irq(adapter->err_virq, adapter);
+ cxl_release_one_irq(adapter, adapter->err_hwirq);
+ kfree(adapter->irq_name);
+}
+
+int cxl_register_serr_irq(struct cxl_afu *afu)
+{
+ u64 serr;
+ int rc;
+
+ afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+ dev_name(&afu->dev));
+ if (!afu->err_irq_name)
+ return -ENOMEM;
+
+ if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu,
+ &afu->serr_hwirq,
+ &afu->serr_virq, afu->err_irq_name))) {
+ kfree(afu->err_irq_name);
+ afu->err_irq_name = NULL;
+ return rc;
+ }
+
+ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff);
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+
+ return 0;
+}
+
+void cxl_release_serr_irq(struct cxl_afu *afu)
+{
+ if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+ return;
+
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
+ cxl_unmap_irq(afu->serr_virq, afu);
+ cxl_release_one_irq(afu->adapter, afu->serr_hwirq);
+ kfree(afu->err_irq_name);
+}
+
+int cxl_register_psl_irq(struct cxl_afu *afu)
+{
+ int rc;
+
+ afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s",
+ dev_name(&afu->dev));
+ if (!afu->psl_irq_name)
+ return -ENOMEM;
+
+ if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu,
+ &afu->psl_hwirq, &afu->psl_virq,
+ afu->psl_irq_name))) {
+ kfree(afu->psl_irq_name);
+ afu->psl_irq_name = NULL;
+ }
+ return rc;
+}
+
+void cxl_release_psl_irq(struct cxl_afu *afu)
+{
+ if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq))
+ return;
+
+ cxl_unmap_irq(afu->psl_virq, afu);
+ cxl_release_one_irq(afu->adapter, afu->psl_hwirq);
+ kfree(afu->psl_irq_name);
+}
+
+void afu_irq_name_free(struct cxl_context *ctx)
+{
+ struct cxl_irq_name *irq_name, *tmp;
+
+ list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) {
+ kfree(irq_name->name);
+ list_del(&irq_name->list);
+ kfree(irq_name);
+ }
+}
+
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
+{
+ int rc, r, i, j = 1;
+ struct cxl_irq_name *irq_name;
+
+ /* Initialize the list head to hold irq names */
+ INIT_LIST_HEAD(&ctx->irq_names);
+
+ if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count)))
+ return rc;
+
+ /* Multiplexed PSL Interrupt */
+ ctx->irqs.offset[0] = ctx->afu->psl_hwirq;
+ ctx->irqs.range[0] = 1;
+
+ ctx->irq_count = count;
+ ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count),
+ sizeof(*ctx->irq_bitmap), GFP_KERNEL);
+ if (!ctx->irq_bitmap)
+ goto out;
+
+ /*
+ * Allocate names first. If any fail, bail out before allocating
+ * actual hardware IRQs.
+ */
+ for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ for (i = 0; i < ctx->irqs.range[r]; i++) {
+ irq_name = kmalloc(sizeof(struct cxl_irq_name),
+ GFP_KERNEL);
+ if (!irq_name)
+ goto out;
+ irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i",
+ dev_name(&ctx->afu->dev),
+ ctx->pe, j);
+ if (!irq_name->name) {
+ kfree(irq_name);
+ goto out;
+ }
+ /* Add to tail so next look get the correct order */
+ list_add_tail(&irq_name->list, &ctx->irq_names);
+ j++;
+ }
+ }
+ return 0;
+
+out:
+ cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+ afu_irq_name_free(ctx);
+ return -ENOMEM;
+}
+
+static void afu_register_hwirqs(struct cxl_context *ctx)
+{
+ irq_hw_number_t hwirq;
+ struct cxl_irq_name *irq_name;
+ int r,i;
+
+ /* We've allocated all memory now, so let's do the irq allocations */
+ irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list);
+ for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ hwirq = ctx->irqs.offset[r];
+ for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+ cxl_map_irq(ctx->afu->adapter, hwirq,
+ cxl_irq_afu, ctx, irq_name->name);
+ irq_name = list_next_entry(irq_name, list);
+ }
+ }
+}
+
+int afu_register_irqs(struct cxl_context *ctx, u32 count)
+{
+ int rc;
+
+ rc = afu_allocate_irqs(ctx, count);
+ if (rc)
+ return rc;
+
+ afu_register_hwirqs(ctx);
+ return 0;
+ }
+
+void afu_release_irqs(struct cxl_context *ctx, void *cookie)
+{
+ irq_hw_number_t hwirq;
+ unsigned int virq;
+ int r, i;
+
+ for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ hwirq = ctx->irqs.offset[r];
+ for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+ virq = irq_find_mapping(NULL, hwirq);
+ if (virq)
+ cxl_unmap_irq(virq, cookie);
+ }
+ }
+
+ afu_irq_name_free(ctx);
+ cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+
+ ctx->irq_count = 0;
+}
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
new file mode 100644
index 0000000..9fde75e
--- /dev/null
+++ b/drivers/misc/cxl/main.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <asm/cputable.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static DEFINE_SPINLOCK(adapter_idr_lock);
+static DEFINE_IDR(cxl_adapter_idr);
+
+uint cxl_verbose;
+module_param_named(verbose, cxl_verbose, uint, 0600);
+MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+
+static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm)
+{
+ struct task_struct *task;
+ unsigned long flags;
+ if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
+ pr_devel("%s unable to get task %i\n",
+ __func__, pid_nr(ctx->pid));
+ return;
+ }
+
+ if (task->mm != mm)
+ goto out_put;
+
+ pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__,
+ ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe);
+
+ spin_lock_irqsave(&ctx->sste_lock, flags);
+ trace_cxl_slbia(ctx);
+ memset(ctx->sstp, 0, ctx->sst_size);
+ spin_unlock_irqrestore(&ctx->sste_lock, flags);
+ mb();
+ cxl_afu_slbia(ctx->afu);
+out_put:
+ put_task_struct(task);
+}
+
+static inline void cxl_slbia_core(struct mm_struct *mm)
+{
+ struct cxl *adapter;
+ struct cxl_afu *afu;
+ struct cxl_context *ctx;
+ int card, slice, id;
+
+ pr_devel("%s called\n", __func__);
+
+ spin_lock(&adapter_idr_lock);
+ idr_for_each_entry(&cxl_adapter_idr, adapter, card) {
+ /* XXX: Make this lookup faster with link from mm to ctx */
+ spin_lock(&adapter->afu_list_lock);
+ for (slice = 0; slice < adapter->slices; slice++) {
+ afu = adapter->afu[slice];
+ if (!afu || !afu->enabled)
+ continue;
+ rcu_read_lock();
+ idr_for_each_entry(&afu->contexts_idr, ctx, id)
+ _cxl_slbia(ctx, mm);
+ rcu_read_unlock();
+ }
+ spin_unlock(&adapter->afu_list_lock);
+ }
+ spin_unlock(&adapter_idr_lock);
+}
+
+static struct cxl_calls cxl_calls = {
+ .cxl_slbia = cxl_slbia_core,
+ .owner = THIS_MODULE,
+};
+
+int cxl_alloc_sst(struct cxl_context *ctx)
+{
+ unsigned long vsid;
+ u64 ea_mask, size, sstp0, sstp1;
+
+ sstp0 = 0;
+ sstp1 = 0;
+
+ ctx->sst_size = PAGE_SIZE;
+ ctx->sst_lru = 0;
+ ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL);
+ if (!ctx->sstp) {
+ pr_err("cxl_alloc_sst: Unable to allocate segment table\n");
+ return -ENOMEM;
+ }
+ pr_devel("SSTP allocated at 0x%p\n", ctx->sstp);
+
+ vsid = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12;
+
+ sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT;
+ sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50;
+
+ size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT;
+ if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) {
+ WARN(1, "Impossible segment table size\n");
+ return -EINVAL;
+ }
+ sstp0 |= size;
+
+ if (mmu_kernel_ssize == MMU_SEGSIZE_256M)
+ ea_mask = 0xfffff00ULL;
+ else
+ ea_mask = 0xffffffff00ULL;
+
+ sstp0 |= vsid >> (50-14); /* Top 14 bits of VSID */
+ sstp1 |= (vsid << (64-(50-14))) & ~ea_mask;
+ sstp1 |= (u64)ctx->sstp & ea_mask;
+ sstp1 |= CXL_SSTP1_An_V;
+
+ pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n",
+ (u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1);
+
+ /* Store calculated sstp hardware points for use later */
+ ctx->sstp0 = sstp0;
+ ctx->sstp1 = sstp1;
+
+ return 0;
+}
+
+/* Find a CXL adapter by it's number and increase it's refcount */
+struct cxl *get_cxl_adapter(int num)
+{
+ struct cxl *adapter;
+
+ spin_lock(&adapter_idr_lock);
+ if ((adapter = idr_find(&cxl_adapter_idr, num)))
+ get_device(&adapter->dev);
+ spin_unlock(&adapter_idr_lock);
+
+ return adapter;
+}
+
+int cxl_alloc_adapter_nr(struct cxl *adapter)
+{
+ int i;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&adapter_idr_lock);
+ i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT);
+ spin_unlock(&adapter_idr_lock);
+ idr_preload_end();
+ if (i < 0)
+ return i;
+
+ adapter->adapter_num = i;
+
+ return 0;
+}
+
+void cxl_remove_adapter_nr(struct cxl *adapter)
+{
+ idr_remove(&cxl_adapter_idr, adapter->adapter_num);
+}
+
+int cxl_afu_select_best_mode(struct cxl_afu *afu)
+{
+ if (afu->modes_supported & CXL_MODE_DIRECTED)
+ return cxl_afu_activate_mode(afu, CXL_MODE_DIRECTED);
+
+ if (afu->modes_supported & CXL_MODE_DEDICATED)
+ return cxl_afu_activate_mode(afu, CXL_MODE_DEDICATED);
+
+ dev_warn(&afu->dev, "No supported programming modes available\n");
+ /* We don't fail this so the user can inspect sysfs */
+ return 0;
+}
+
+static int __init init_cxl(void)
+{
+ int rc = 0;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EPERM;
+
+ if ((rc = cxl_file_init()))
+ return rc;
+
+ cxl_debugfs_init();
+
+ if ((rc = register_cxl_calls(&cxl_calls)))
+ goto err;
+
+ if ((rc = pci_register_driver(&cxl_pci_driver)))
+ goto err1;
+
+ return 0;
+err1:
+ unregister_cxl_calls(&cxl_calls);
+err:
+ cxl_debugfs_exit();
+ cxl_file_exit();
+
+ return rc;
+}
+
+static void exit_cxl(void)
+{
+ pci_unregister_driver(&cxl_pci_driver);
+
+ cxl_debugfs_exit();
+ cxl_file_exit();
+ unregister_cxl_calls(&cxl_calls);
+ idr_destroy(&cxl_adapter_idr);
+}
+
+module_init(init_cxl);
+module_exit(exit_cxl);
+
+MODULE_DESCRIPTION("IBM Coherent Accelerator");
+MODULE_AUTHOR("Ian Munsie <imunsie@au1.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
new file mode 100644
index 0000000..f409097
--- /dev/null
+++ b/drivers/misc/cxl/native.c
@@ -0,0 +1,786 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/synch.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static int afu_control(struct cxl_afu *afu, u64 command,
+ u64 result, u64 mask, bool enabled)
+{
+ u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+ int rc = 0;
+
+ spin_lock(&afu->afu_cntl_lock);
+ pr_devel("AFU command starting: %llx\n", command);
+
+ trace_cxl_afu_ctrl(afu, command);
+
+ cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command);
+
+ AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+ while ((AFU_Cntl & mask) != result) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&afu->dev, "WARNING: AFU control timed out!\n");
+ rc = -EBUSY;
+ goto out;
+ }
+
+ if (!cxl_adapter_link_ok(afu->adapter)) {
+ afu->enabled = enabled;
+ rc = -EIO;
+ goto out;
+ }
+
+ pr_devel_ratelimited("AFU control... (0x%016llx)\n",
+ AFU_Cntl | command);
+ cpu_relax();
+ AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+ };
+ pr_devel("AFU command complete: %llx\n", command);
+ afu->enabled = enabled;
+out:
+ trace_cxl_afu_ctrl_done(afu, command, rc);
+ spin_unlock(&afu->afu_cntl_lock);
+
+ return rc;
+}
+
+static int afu_enable(struct cxl_afu *afu)
+{
+ pr_devel("AFU enable request\n");
+
+ return afu_control(afu, CXL_AFU_Cntl_An_E,
+ CXL_AFU_Cntl_An_ES_Enabled,
+ CXL_AFU_Cntl_An_ES_MASK, true);
+}
+
+int cxl_afu_disable(struct cxl_afu *afu)
+{
+ pr_devel("AFU disable request\n");
+
+ return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled,
+ CXL_AFU_Cntl_An_ES_MASK, false);
+}
+
+/* This will disable as well as reset */
+int __cxl_afu_reset(struct cxl_afu *afu)
+{
+ pr_devel("AFU reset request\n");
+
+ return afu_control(afu, CXL_AFU_Cntl_An_RA,
+ CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled,
+ CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
+ false);
+}
+
+int cxl_afu_check_and_enable(struct cxl_afu *afu)
+{
+ if (!cxl_adapter_link_ok(afu->adapter)) {
+ WARN(1, "Refusing to enable afu while link down!\n");
+ return -EIO;
+ }
+ if (afu->enabled)
+ return 0;
+ return afu_enable(afu);
+}
+
+int cxl_psl_purge(struct cxl_afu *afu)
+{
+ u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+ u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+ u64 dsisr, dar;
+ u64 start, end;
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+ int rc = 0;
+
+ trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc);
+
+ pr_devel("PSL purge request\n");
+
+ if (!cxl_adapter_link_ok(afu->adapter)) {
+ dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n");
+ rc = -EIO;
+ goto out;
+ }
+
+ if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+ WARN(1, "psl_purge request while AFU not disabled!\n");
+ cxl_afu_disable(afu);
+ }
+
+ cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+ PSL_CNTL | CXL_PSL_SCNTL_An_Pc);
+ start = local_clock();
+ PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+ while ((PSL_CNTL & CXL_PSL_SCNTL_An_Ps_MASK)
+ == CXL_PSL_SCNTL_An_Ps_Pending) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n");
+ rc = -EBUSY;
+ goto out;
+ }
+ if (!cxl_adapter_link_ok(afu->adapter)) {
+ rc = -EIO;
+ goto out;
+ }
+
+ dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+ pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", PSL_CNTL, dsisr);
+ if (dsisr & CXL_PSL_DSISR_TRANS) {
+ dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+ dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n", dsisr, dar);
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+ } else if (dsisr) {
+ dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n", dsisr);
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+ } else {
+ cpu_relax();
+ }
+ PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+ };
+ end = local_clock();
+ pr_devel("PSL purged in %lld ns\n", end - start);
+
+ cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+ PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc);
+out:
+ trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc);
+ return rc;
+}
+
+static int spa_max_procs(int spa_size)
+{
+ /*
+ * From the CAIA:
+ * end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255
+ * Most of that junk is really just an overly-complicated way of saying
+ * the last 256 bytes are __aligned(128), so it's really:
+ * end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255
+ * and
+ * end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1
+ * so
+ * sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256
+ * Ignore the alignment (which is safe in this case as long as we are
+ * careful with our rounding) and solve for n:
+ */
+ return ((spa_size / 8) - 96) / 17;
+}
+
+int cxl_alloc_spa(struct cxl_afu *afu)
+{
+ /* Work out how many pages to allocate */
+ afu->spa_order = 0;
+ do {
+ afu->spa_order++;
+ afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE;
+ afu->spa_max_procs = spa_max_procs(afu->spa_size);
+ } while (afu->spa_max_procs < afu->num_procs);
+
+ WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */
+
+ if (!(afu->spa = (struct cxl_process_element *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) {
+ pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
+ return -ENOMEM;
+ }
+ pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n",
+ 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs);
+
+ return 0;
+}
+
+static void attach_spa(struct cxl_afu *afu)
+{
+ u64 spap;
+
+ afu->sw_command_status = (__be64 *)((char *)afu->spa +
+ ((afu->spa_max_procs + 3) * 128));
+
+ spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr;
+ spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
+ spap |= CXL_PSL_SPAP_V;
+ pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap);
+ cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
+}
+
+static inline void detach_spa(struct cxl_afu *afu)
+{
+ cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
+}
+
+void cxl_release_spa(struct cxl_afu *afu)
+{
+ if (afu->spa) {
+ free_pages((unsigned long) afu->spa, afu->spa_order);
+ afu->spa = NULL;
+ }
+}
+
+int cxl_tlb_slb_invalidate(struct cxl *adapter)
+{
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+ pr_devel("CXL adapter wide TLBIA & SLBIA\n");
+
+ cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A);
+
+ cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL);
+ while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
+ return -EBUSY;
+ }
+ if (!cxl_adapter_link_ok(adapter))
+ return -EIO;
+ cpu_relax();
+ }
+
+ cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL);
+ while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
+ return -EBUSY;
+ }
+ if (!cxl_adapter_link_ok(adapter))
+ return -EIO;
+ cpu_relax();
+ }
+ return 0;
+}
+
+int cxl_afu_slbia(struct cxl_afu *afu)
+{
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+ pr_devel("cxl_afu_slbia issuing SLBIA command\n");
+ cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
+ while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
+ return -EBUSY;
+ }
+ /* If the adapter has gone down, we can assume that we
+ * will PERST it and that will invalidate everything.
+ */
+ if (!cxl_adapter_link_ok(afu->adapter))
+ return -EIO;
+ cpu_relax();
+ }
+ return 0;
+}
+
+static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
+{
+ int rc;
+
+ /* 1. Disable SSTP by writing 0 to SSTP1[V] */
+ cxl_p2n_write(afu, CXL_SSTP1_An, 0);
+
+ /* 2. Invalidate all SLB entries */
+ if ((rc = cxl_afu_slbia(afu)))
+ return rc;
+
+ /* 3. Set SSTP0_An */
+ cxl_p2n_write(afu, CXL_SSTP0_An, sstp0);
+
+ /* 4. Set SSTP1_An */
+ cxl_p2n_write(afu, CXL_SSTP1_An, sstp1);
+
+ return 0;
+}
+
+/* Using per slice version may improve performance here. (ie. SLBIA_An) */
+static void slb_invalid(struct cxl_context *ctx)
+{
+ struct cxl *adapter = ctx->afu->adapter;
+ u64 slbia;
+
+ WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex));
+
+ cxl_p1_write(adapter, CXL_PSL_LBISEL,
+ ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) |
+ be32_to_cpu(ctx->elem->lpid));
+ cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
+
+ while (1) {
+ if (!cxl_adapter_link_ok(adapter))
+ break;
+ slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
+ if (!(slbia & CXL_TLB_SLB_P))
+ break;
+ cpu_relax();
+ }
+}
+
+static int do_process_element_cmd(struct cxl_context *ctx,
+ u64 cmd, u64 pe_state)
+{
+ u64 state;
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+ int rc = 0;
+
+ trace_cxl_llcmd(ctx, cmd);
+
+ WARN_ON(!ctx->afu->enabled);
+
+ ctx->elem->software_state = cpu_to_be32(pe_state);
+ smp_wmb();
+ *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe);
+ smp_mb();
+ cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
+ while (1) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+ rc = -EBUSY;
+ goto out;
+ }
+ if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+ dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n");
+ rc = -EIO;
+ goto out;
+ }
+ state = be64_to_cpup(ctx->afu->sw_command_status);
+ if (state == ~0ULL) {
+ pr_err("cxl: Error adding process element to AFU\n");
+ rc = -1;
+ goto out;
+ }
+ if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK | CXL_SPA_SW_LINK_MASK)) ==
+ (cmd | (cmd >> 16) | ctx->pe))
+ break;
+ /*
+ * The command won't finish in the PSL if there are
+ * outstanding DSIs. Hence we need to yield here in
+ * case there are outstanding DSIs that we need to
+ * service. Tuning possiblity: we could wait for a
+ * while before sched
+ */
+ schedule();
+
+ }
+out:
+ trace_cxl_llcmd_done(ctx, cmd, rc);
+ return rc;
+}
+
+static int add_process_element(struct cxl_context *ctx)
+{
+ int rc = 0;
+
+ mutex_lock(&ctx->afu->spa_mutex);
+ pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe);
+ if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V)))
+ ctx->pe_inserted = true;
+ pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe);
+ mutex_unlock(&ctx->afu->spa_mutex);
+ return rc;
+}
+
+static int terminate_process_element(struct cxl_context *ctx)
+{
+ int rc = 0;
+
+ /* fast path terminate if it's already invalid */
+ if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V)))
+ return rc;
+
+ mutex_lock(&ctx->afu->spa_mutex);
+ pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
+ /* We could be asked to terminate when the hw is down. That
+ * should always succeed: it's not running if the hw has gone
+ * away and is being reset.
+ */
+ if (cxl_adapter_link_ok(ctx->afu->adapter))
+ rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
+ CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
+ ctx->elem->software_state = 0; /* Remove Valid bit */
+ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
+ mutex_unlock(&ctx->afu->spa_mutex);
+ return rc;
+}
+
+static int remove_process_element(struct cxl_context *ctx)
+{
+ int rc = 0;
+
+ mutex_lock(&ctx->afu->spa_mutex);
+ pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
+
+ /* We could be asked to remove when the hw is down. Again, if
+ * the hw is down, the PE is gone, so we succeed.
+ */
+ if (cxl_adapter_link_ok(ctx->afu->adapter))
+ rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0);
+
+ if (!rc)
+ ctx->pe_inserted = false;
+ slb_invalid(ctx);
+ pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
+ mutex_unlock(&ctx->afu->spa_mutex);
+
+ return rc;
+}
+
+
+void cxl_assign_psn_space(struct cxl_context *ctx)
+{
+ if (!ctx->afu->pp_size || ctx->master) {
+ ctx->psn_phys = ctx->afu->psn_phys;
+ ctx->psn_size = ctx->afu->adapter->ps_size;
+ } else {
+ ctx->psn_phys = ctx->afu->psn_phys +
+ (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe);
+ ctx->psn_size = ctx->afu->pp_size;
+ }
+}
+
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+ int rc;
+
+ dev_info(&afu->dev, "Activating AFU directed mode\n");
+
+ afu->num_procs = afu->max_procs_virtualised;
+ if (afu->spa == NULL) {
+ if (cxl_alloc_spa(afu))
+ return -ENOMEM;
+ }
+ attach_spa(afu);
+
+ cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
+ cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+ cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
+
+ afu->current_mode = CXL_MODE_DIRECTED;
+
+ if ((rc = cxl_chardev_m_afu_add(afu)))
+ return rc;
+
+ if ((rc = cxl_sysfs_afu_m_add(afu)))
+ goto err;
+
+ if ((rc = cxl_chardev_s_afu_add(afu)))
+ goto err1;
+
+ return 0;
+err1:
+ cxl_sysfs_afu_m_remove(afu);
+err:
+ cxl_chardev_afu_remove(afu);
+ return rc;
+}
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE)
+#else
+#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
+#endif
+
+static u64 calculate_sr(struct cxl_context *ctx)
+{
+ u64 sr = 0;
+
+ set_endian(sr);
+ if (ctx->master)
+ sr |= CXL_PSL_SR_An_MP;
+ if (mfspr(SPRN_LPCR) & LPCR_TC)
+ sr |= CXL_PSL_SR_An_TC;
+ if (ctx->kernel) {
+ sr |= CXL_PSL_SR_An_R | (mfmsr() & MSR_SF);
+ sr |= CXL_PSL_SR_An_HV;
+ } else {
+ sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+ sr &= ~(CXL_PSL_SR_An_HV);
+ if (!test_tsk_thread_flag(current, TIF_32BIT))
+ sr |= CXL_PSL_SR_An_SF;
+ }
+ return sr;
+}
+
+static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+ u32 pid;
+ int r, result;
+
+ cxl_assign_psn_space(ctx);
+
+ ctx->elem->ctxtime = 0; /* disable */
+ ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+ ctx->elem->haurp = 0; /* disable */
+ ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1));
+
+ pid = current->pid;
+ if (ctx->kernel)
+ pid = 0;
+ ctx->elem->common.tid = 0;
+ ctx->elem->common.pid = cpu_to_be32(pid);
+
+ ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
+
+ ctx->elem->common.csrp = 0; /* disable */
+ ctx->elem->common.aurp0 = 0; /* disable */
+ ctx->elem->common.aurp1 = 0; /* disable */
+
+ cxl_prefault(ctx, wed);
+
+ ctx->elem->common.sstp0 = cpu_to_be64(ctx->sstp0);
+ ctx->elem->common.sstp1 = cpu_to_be64(ctx->sstp1);
+
+ for (r = 0; r < CXL_IRQ_RANGES; r++) {
+ ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
+ ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
+ }
+
+ ctx->elem->common.amr = cpu_to_be64(amr);
+ ctx->elem->common.wed = cpu_to_be64(wed);
+
+ /* first guy needs to enable */
+ if ((result = cxl_afu_check_and_enable(ctx->afu)))
+ return result;
+
+ return add_process_element(ctx);
+}
+
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+ dev_info(&afu->dev, "Deactivating AFU directed mode\n");
+
+ afu->current_mode = 0;
+ afu->num_procs = 0;
+
+ cxl_sysfs_afu_m_remove(afu);
+ cxl_chardev_afu_remove(afu);
+
+ __cxl_afu_reset(afu);
+ cxl_afu_disable(afu);
+ cxl_psl_purge(afu);
+
+ return 0;
+}
+
+static int activate_dedicated_process(struct cxl_afu *afu)
+{
+ dev_info(&afu->dev, "Activating dedicated process mode\n");
+
+ cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
+
+ cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */
+ cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); /* disable */
+ cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+ cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID));
+ cxl_p1n_write(afu, CXL_HAURP_An, 0); /* disable */
+ cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1));
+
+ cxl_p2n_write(afu, CXL_CSRP_An, 0); /* disable */
+ cxl_p2n_write(afu, CXL_AURP0_An, 0); /* disable */
+ cxl_p2n_write(afu, CXL_AURP1_An, 0); /* disable */
+
+ afu->current_mode = CXL_MODE_DEDICATED;
+ afu->num_procs = 1;
+
+ return cxl_chardev_d_afu_add(afu);
+}
+
+static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+ struct cxl_afu *afu = ctx->afu;
+ u64 pid;
+ int rc;
+
+ pid = (u64)current->pid << 32;
+ if (ctx->kernel)
+ pid = 0;
+ cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid);
+
+ cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx));
+
+ if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
+ return rc;
+
+ cxl_prefault(ctx, wed);
+
+ cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An,
+ (((u64)ctx->irqs.offset[0] & 0xffff) << 48) |
+ (((u64)ctx->irqs.offset[1] & 0xffff) << 32) |
+ (((u64)ctx->irqs.offset[2] & 0xffff) << 16) |
+ ((u64)ctx->irqs.offset[3] & 0xffff));
+ cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64)
+ (((u64)ctx->irqs.range[0] & 0xffff) << 48) |
+ (((u64)ctx->irqs.range[1] & 0xffff) << 32) |
+ (((u64)ctx->irqs.range[2] & 0xffff) << 16) |
+ ((u64)ctx->irqs.range[3] & 0xffff));
+
+ cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
+
+ /* master only context for dedicated */
+ cxl_assign_psn_space(ctx);
+
+ if ((rc = __cxl_afu_reset(afu)))
+ return rc;
+
+ cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
+
+ return afu_enable(afu);
+}
+
+static int deactivate_dedicated_process(struct cxl_afu *afu)
+{
+ dev_info(&afu->dev, "Deactivating dedicated process mode\n");
+
+ afu->current_mode = 0;
+ afu->num_procs = 0;
+
+ cxl_chardev_afu_remove(afu);
+
+ return 0;
+}
+
+int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+ if (mode == CXL_MODE_DIRECTED)
+ return deactivate_afu_directed(afu);
+ if (mode == CXL_MODE_DEDICATED)
+ return deactivate_dedicated_process(afu);
+ return 0;
+}
+
+int cxl_afu_deactivate_mode(struct cxl_afu *afu)
+{
+ return _cxl_afu_deactivate_mode(afu, afu->current_mode);
+}
+
+int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+ if (!mode)
+ return 0;
+ if (!(mode & afu->modes_supported))
+ return -EINVAL;
+
+ if (!cxl_adapter_link_ok(afu->adapter)) {
+ WARN(1, "Device link is down, refusing to activate!\n");
+ return -EIO;
+ }
+
+ if (mode == CXL_MODE_DIRECTED)
+ return activate_afu_directed(afu);
+ if (mode == CXL_MODE_DEDICATED)
+ return activate_dedicated_process(afu);
+
+ return -EINVAL;
+}
+
+int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
+{
+ if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+ WARN(1, "Device link is down, refusing to attach process!\n");
+ return -EIO;
+ }
+
+ ctx->kernel = kernel;
+ if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+ return attach_afu_directed(ctx, wed, amr);
+
+ if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
+ return attach_dedicated(ctx, wed, amr);
+
+ return -EINVAL;
+}
+
+static inline int detach_process_native_dedicated(struct cxl_context *ctx)
+{
+ __cxl_afu_reset(ctx->afu);
+ cxl_afu_disable(ctx->afu);
+ cxl_psl_purge(ctx->afu);
+ return 0;
+}
+
+static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
+{
+ if (!ctx->pe_inserted)
+ return 0;
+ if (terminate_process_element(ctx))
+ return -1;
+ if (remove_process_element(ctx))
+ return -1;
+
+ return 0;
+}
+
+int cxl_detach_process(struct cxl_context *ctx)
+{
+ trace_cxl_detach(ctx);
+
+ if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
+ return detach_process_native_dedicated(ctx);
+
+ return detach_process_native_afu_directed(ctx);
+}
+
+int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
+{
+ u64 pidtid;
+
+ /* If the adapter has gone away, we can't get any meaningful
+ * information.
+ */
+ if (!cxl_adapter_link_ok(afu->adapter))
+ return -EIO;
+
+ info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+ info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+ info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
+ pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An);
+ info->pid = pidtid >> 32;
+ info->tid = pidtid & 0xffffffff;
+ info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+ info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+
+ return 0;
+}
+
+static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
+{
+ u64 dsisr;
+
+ pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat);
+
+ /* Clear PSL_DSISR[PE] */
+ dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+ cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE);
+
+ /* Write 1s to clear error status bits */
+ cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat);
+}
+
+int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+ trace_cxl_psl_irq_ack(ctx, tfc);
+ if (tfc)
+ cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc);
+ if (psl_reset_mask)
+ recover_psl_err(ctx->afu, psl_reset_mask);
+
+ return 0;
+}
+
+int cxl_check_error(struct cxl_afu *afu)
+{
+ return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL);
+}
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
new file mode 100644
index 0000000..0c6c17a
--- /dev/null
+++ b/drivers/misc/cxl/pci.c
@@ -0,0 +1,1580 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <asm/opal.h>
+#include <asm/msi_bitmap.h>
+#include <asm/pci-bridge.h> /* for struct pci_controller */
+#include <asm/pnv-pci.h>
+#include <asm/io.h>
+
+#include "cxl.h"
+#include <misc/cxl.h>
+
+
+#define CXL_PCI_VSEC_ID 0x1280
+#define CXL_VSEC_MIN_SIZE 0x80
+
+#define CXL_READ_VSEC_LENGTH(dev, vsec, dest) \
+ { \
+ pci_read_config_word(dev, vsec + 0x6, dest); \
+ *dest >>= 4; \
+ }
+#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \
+ pci_read_config_byte(dev, vsec + 0x8, dest)
+
+#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \
+ pci_read_config_byte(dev, vsec + 0x9, dest)
+#define CXL_STATUS_SECOND_PORT 0x80
+#define CXL_STATUS_MSI_X_FULL 0x40
+#define CXL_STATUS_MSI_X_SINGLE 0x20
+#define CXL_STATUS_FLASH_RW 0x08
+#define CXL_STATUS_FLASH_RO 0x04
+#define CXL_STATUS_LOADABLE_AFU 0x02
+#define CXL_STATUS_LOADABLE_PSL 0x01
+/* If we see these features we won't try to use the card */
+#define CXL_UNSUPPORTED_FEATURES \
+ (CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE)
+
+#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \
+ pci_read_config_byte(dev, vsec + 0xa, dest)
+#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \
+ pci_write_config_byte(dev, vsec + 0xa, val)
+#define CXL_VSEC_PROTOCOL_MASK 0xe0
+#define CXL_VSEC_PROTOCOL_1024TB 0x80
+#define CXL_VSEC_PROTOCOL_512TB 0x40
+#define CXL_VSEC_PROTOCOL_256TB 0x20 /* Power 8 uses this */
+#define CXL_VSEC_PROTOCOL_ENABLE 0x01
+
+#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \
+ pci_read_config_word(dev, vsec + 0xc, dest)
+#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \
+ pci_read_config_byte(dev, vsec + 0xe, dest)
+#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \
+ pci_read_config_byte(dev, vsec + 0xf, dest)
+#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \
+ pci_read_config_word(dev, vsec + 0x10, dest)
+
+#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \
+ pci_read_config_byte(dev, vsec + 0x13, dest)
+#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \
+ pci_write_config_byte(dev, vsec + 0x13, val)
+#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */
+#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */
+#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */
+
+#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \
+ pci_read_config_dword(dev, vsec + 0x20, dest)
+#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \
+ pci_read_config_dword(dev, vsec + 0x24, dest)
+#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \
+ pci_read_config_dword(dev, vsec + 0x28, dest)
+#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \
+ pci_read_config_dword(dev, vsec + 0x2c, dest)
+
+
+/* This works a little different than the p1/p2 register accesses to make it
+ * easier to pull out individual fields */
+#define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off)
+#define AFUD_READ_LE(afu, off) in_le64(afu->afu_desc_mmio + off)
+#define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit)))
+#define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be))
+
+#define AFUD_READ_INFO(afu) AFUD_READ(afu, 0x0)
+#define AFUD_NUM_INTS_PER_PROC(val) EXTRACT_PPC_BITS(val, 0, 15)
+#define AFUD_NUM_PROCS(val) EXTRACT_PPC_BITS(val, 16, 31)
+#define AFUD_NUM_CRS(val) EXTRACT_PPC_BITS(val, 32, 47)
+#define AFUD_MULTIMODE(val) EXTRACT_PPC_BIT(val, 48)
+#define AFUD_PUSH_BLOCK_TRANSFER(val) EXTRACT_PPC_BIT(val, 55)
+#define AFUD_DEDICATED_PROCESS(val) EXTRACT_PPC_BIT(val, 59)
+#define AFUD_AFU_DIRECTED(val) EXTRACT_PPC_BIT(val, 61)
+#define AFUD_TIME_SLICED(val) EXTRACT_PPC_BIT(val, 63)
+#define AFUD_READ_CR(afu) AFUD_READ(afu, 0x20)
+#define AFUD_CR_LEN(val) EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_CR_OFF(afu) AFUD_READ(afu, 0x28)
+#define AFUD_READ_PPPSA(afu) AFUD_READ(afu, 0x30)
+#define AFUD_PPPSA_PP(val) EXTRACT_PPC_BIT(val, 6)
+#define AFUD_PPPSA_PSA(val) EXTRACT_PPC_BIT(val, 7)
+#define AFUD_PPPSA_LEN(val) EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_PPPSA_OFF(afu) AFUD_READ(afu, 0x38)
+#define AFUD_READ_EB(afu) AFUD_READ(afu, 0x40)
+#define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48)
+
+u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off)
+{
+ u64 aligned_off = off & ~0x3L;
+ u32 val;
+
+ val = cxl_afu_cr_read32(afu, cr, aligned_off);
+ return (val >> ((off & 0x2) * 8)) & 0xffff;
+}
+
+u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off)
+{
+ u64 aligned_off = off & ~0x3L;
+ u32 val;
+
+ val = cxl_afu_cr_read32(afu, cr, aligned_off);
+ return (val >> ((off & 0x3) * 8)) & 0xff;
+}
+
+static const struct pci_device_id cxl_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
+ { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
+ { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
+ { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
+ { PCI_DEVICE_CLASS(0x120000, ~0), },
+
+ { }
+};
+MODULE_DEVICE_TABLE(pci, cxl_pci_tbl);
+
+
+/*
+ * Mostly using these wrappers to avoid confusion:
+ * priv 1 is BAR2, while priv 2 is BAR0
+ */
+static inline resource_size_t p1_base(struct pci_dev *dev)
+{
+ return pci_resource_start(dev, 2);
+}
+
+static inline resource_size_t p1_size(struct pci_dev *dev)
+{
+ return pci_resource_len(dev, 2);
+}
+
+static inline resource_size_t p2_base(struct pci_dev *dev)
+{
+ return pci_resource_start(dev, 0);
+}
+
+static inline resource_size_t p2_size(struct pci_dev *dev)
+{
+ return pci_resource_len(dev, 0);
+}
+
+static int find_cxl_vsec(struct pci_dev *dev)
+{
+ int vsec = 0;
+ u16 val;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) {
+ pci_read_config_word(dev, vsec + 0x4, &val);
+ if (val == CXL_PCI_VSEC_ID)
+ return vsec;
+ }
+ return 0;
+
+}
+
+static void dump_cxl_config_space(struct pci_dev *dev)
+{
+ int vsec;
+ u32 val;
+
+ dev_info(&dev->dev, "dump_cxl_config_space\n");
+
+ pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val);
+ dev_info(&dev->dev, "BAR0: %#.8x\n", val);
+ pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val);
+ dev_info(&dev->dev, "BAR1: %#.8x\n", val);
+ pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val);
+ dev_info(&dev->dev, "BAR2: %#.8x\n", val);
+ pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val);
+ dev_info(&dev->dev, "BAR3: %#.8x\n", val);
+ pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val);
+ dev_info(&dev->dev, "BAR4: %#.8x\n", val);
+ pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val);
+ dev_info(&dev->dev, "BAR5: %#.8x\n", val);
+
+ dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n",
+ p1_base(dev), p1_size(dev));
+ dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n",
+ p2_base(dev), p2_size(dev));
+ dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n",
+ pci_resource_start(dev, 4), pci_resource_len(dev, 4));
+
+ if (!(vsec = find_cxl_vsec(dev)))
+ return;
+
+#define show_reg(name, what) \
+ dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what)
+
+ pci_read_config_dword(dev, vsec + 0x0, &val);
+ show_reg("Cap ID", (val >> 0) & 0xffff);
+ show_reg("Cap Ver", (val >> 16) & 0xf);
+ show_reg("Next Cap Ptr", (val >> 20) & 0xfff);
+ pci_read_config_dword(dev, vsec + 0x4, &val);
+ show_reg("VSEC ID", (val >> 0) & 0xffff);
+ show_reg("VSEC Rev", (val >> 16) & 0xf);
+ show_reg("VSEC Length", (val >> 20) & 0xfff);
+ pci_read_config_dword(dev, vsec + 0x8, &val);
+ show_reg("Num AFUs", (val >> 0) & 0xff);
+ show_reg("Status", (val >> 8) & 0xff);
+ show_reg("Mode Control", (val >> 16) & 0xff);
+ show_reg("Reserved", (val >> 24) & 0xff);
+ pci_read_config_dword(dev, vsec + 0xc, &val);
+ show_reg("PSL Rev", (val >> 0) & 0xffff);
+ show_reg("CAIA Ver", (val >> 16) & 0xffff);
+ pci_read_config_dword(dev, vsec + 0x10, &val);
+ show_reg("Base Image Rev", (val >> 0) & 0xffff);
+ show_reg("Reserved", (val >> 16) & 0x0fff);
+ show_reg("Image Control", (val >> 28) & 0x3);
+ show_reg("Reserved", (val >> 30) & 0x1);
+ show_reg("Image Loaded", (val >> 31) & 0x1);
+
+ pci_read_config_dword(dev, vsec + 0x14, &val);
+ show_reg("Reserved", val);
+ pci_read_config_dword(dev, vsec + 0x18, &val);
+ show_reg("Reserved", val);
+ pci_read_config_dword(dev, vsec + 0x1c, &val);
+ show_reg("Reserved", val);
+
+ pci_read_config_dword(dev, vsec + 0x20, &val);
+ show_reg("AFU Descriptor Offset", val);
+ pci_read_config_dword(dev, vsec + 0x24, &val);
+ show_reg("AFU Descriptor Size", val);
+ pci_read_config_dword(dev, vsec + 0x28, &val);
+ show_reg("Problem State Offset", val);
+ pci_read_config_dword(dev, vsec + 0x2c, &val);
+ show_reg("Problem State Size", val);
+
+ pci_read_config_dword(dev, vsec + 0x30, &val);
+ show_reg("Reserved", val);
+ pci_read_config_dword(dev, vsec + 0x34, &val);
+ show_reg("Reserved", val);
+ pci_read_config_dword(dev, vsec + 0x38, &val);
+ show_reg("Reserved", val);
+ pci_read_config_dword(dev, vsec + 0x3c, &val);
+ show_reg("Reserved", val);
+
+ pci_read_config_dword(dev, vsec + 0x40, &val);
+ show_reg("PSL Programming Port", val);
+ pci_read_config_dword(dev, vsec + 0x44, &val);
+ show_reg("PSL Programming Control", val);
+
+ pci_read_config_dword(dev, vsec + 0x48, &val);
+ show_reg("Reserved", val);
+ pci_read_config_dword(dev, vsec + 0x4c, &val);
+ show_reg("Reserved", val);
+
+ pci_read_config_dword(dev, vsec + 0x50, &val);
+ show_reg("Flash Address Register", val);
+ pci_read_config_dword(dev, vsec + 0x54, &val);
+ show_reg("Flash Size Register", val);
+ pci_read_config_dword(dev, vsec + 0x58, &val);
+ show_reg("Flash Status/Control Register", val);
+ pci_read_config_dword(dev, vsec + 0x58, &val);
+ show_reg("Flash Data Port", val);
+
+#undef show_reg
+}
+
+static void dump_afu_descriptor(struct cxl_afu *afu)
+{
+ u64 val, afu_cr_num, afu_cr_off, afu_cr_len;
+ int i;
+
+#define show_reg(name, what) \
+ dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what)
+
+ val = AFUD_READ_INFO(afu);
+ show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val));
+ show_reg("num_of_processes", AFUD_NUM_PROCS(val));
+ show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val));
+ show_reg("req_prog_mode", val & 0xffffULL);
+ afu_cr_num = AFUD_NUM_CRS(val);
+
+ val = AFUD_READ(afu, 0x8);
+ show_reg("Reserved", val);
+ val = AFUD_READ(afu, 0x10);
+ show_reg("Reserved", val);
+ val = AFUD_READ(afu, 0x18);
+ show_reg("Reserved", val);
+
+ val = AFUD_READ_CR(afu);
+ show_reg("Reserved", (val >> (63-7)) & 0xff);
+ show_reg("AFU_CR_len", AFUD_CR_LEN(val));
+ afu_cr_len = AFUD_CR_LEN(val) * 256;
+
+ val = AFUD_READ_CR_OFF(afu);
+ afu_cr_off = val;
+ show_reg("AFU_CR_offset", val);
+
+ val = AFUD_READ_PPPSA(afu);
+ show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff);
+ show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val));
+
+ val = AFUD_READ_PPPSA_OFF(afu);
+ show_reg("PerProcessPSA_offset", val);
+
+ val = AFUD_READ_EB(afu);
+ show_reg("Reserved", (val >> (63-7)) & 0xff);
+ show_reg("AFU_EB_len", AFUD_EB_LEN(val));
+
+ val = AFUD_READ_EB_OFF(afu);
+ show_reg("AFU_EB_offset", val);
+
+ for (i = 0; i < afu_cr_num; i++) {
+ val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len);
+ show_reg("CR Vendor", val & 0xffff);
+ show_reg("CR Device", (val >> 16) & 0xffff);
+ }
+#undef show_reg
+}
+
+static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
+{
+ struct device_node *np;
+ const __be32 *prop;
+ u64 psl_dsnctl;
+ u64 chipid;
+
+ if (!(np = pnv_pci_get_phb_node(dev)))
+ return -ENODEV;
+
+ while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL)))
+ np = of_get_next_parent(np);
+ if (!np)
+ return -ENODEV;
+ chipid = be32_to_cpup(prop);
+ of_node_put(np);
+
+ /* Tell PSL where to route data to */
+ psl_dsnctl = 0x02E8900002000000ULL | (chipid << (63-5));
+ cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
+ cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
+ /* snoop write mask */
+ cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
+ /* set fir_accum */
+ cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+ /* for debugging with trace arrays */
+ cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
+
+ return 0;
+}
+
+#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6))
+#define _2048_250MHZ_CYCLES 1
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+ u64 psl_tb;
+ int delta;
+ unsigned int retry = 0;
+ struct device_node *np;
+
+ if (!(np = pnv_pci_get_phb_node(dev)))
+ return -ENODEV;
+
+ /* Do not fail when CAPP timebase sync is not supported by OPAL */
+ of_node_get(np);
+ if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) {
+ of_node_put(np);
+ pr_err("PSL: Timebase sync: OPAL support missing\n");
+ return 0;
+ }
+ of_node_put(np);
+
+ /*
+ * Setup PSL Timebase Control and Status register
+ * with the recommended Timebase Sync Count value
+ */
+ cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+ TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));
+
+ /* Enable PSL Timebase */
+ cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
+ cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+ /* Wait until CORE TB and PSL TB difference <= 16usecs */
+ do {
+ msleep(1);
+ if (retry++ > 5) {
+ pr_err("PSL: Timebase sync: giving up!\n");
+ return -EIO;
+ }
+ psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+ delta = mftb() - psl_tb;
+ if (delta < 0)
+ delta = -delta;
+ } while (tb_to_ns(delta) > 16000);
+
+ return 0;
+}
+
+static int init_implementation_afu_regs(struct cxl_afu *afu)
+{
+ /* read/write masks for this slice */
+ cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL);
+ /* APC read/write masks for this slice */
+ cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL);
+ /* for debugging with trace arrays */
+ cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL);
+ cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S);
+
+ return 0;
+}
+
+int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq,
+ unsigned int virq)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+ return pnv_cxl_ioda_msi_setup(dev, hwirq, virq);
+}
+
+int cxl_update_image_control(struct cxl *adapter)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+ int rc;
+ int vsec;
+ u8 image_state;
+
+ if (!(vsec = find_cxl_vsec(dev))) {
+ dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+ return -ENODEV;
+ }
+
+ if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) {
+ dev_err(&dev->dev, "failed to read image state: %i\n", rc);
+ return rc;
+ }
+
+ if (adapter->perst_loads_image)
+ image_state |= CXL_VSEC_PERST_LOADS_IMAGE;
+ else
+ image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE;
+
+ if (adapter->perst_select_user)
+ image_state |= CXL_VSEC_PERST_SELECT_USER;
+ else
+ image_state &= ~CXL_VSEC_PERST_SELECT_USER;
+
+ if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) {
+ dev_err(&dev->dev, "failed to update image control: %i\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+int cxl_alloc_one_irq(struct cxl *adapter)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+ return pnv_cxl_alloc_hwirqs(dev, 1);
+}
+
+void cxl_release_one_irq(struct cxl *adapter, int hwirq)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+ return pnv_cxl_release_hwirqs(dev, hwirq, 1);
+}
+
+int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+ return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num);
+}
+
+void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+ pnv_cxl_release_hwirq_ranges(irqs, dev);
+}
+
+static int setup_cxl_bars(struct pci_dev *dev)
+{
+ /* Safety check in case we get backported to < 3.17 without M64 */
+ if ((p1_base(dev) < 0x100000000ULL) ||
+ (p2_base(dev) < 0x100000000ULL)) {
+ dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n");
+ return -ENODEV;
+ }
+
+ /*
+ * BAR 4/5 has a special meaning for CXL and must be programmed with a
+ * special value corresponding to the CXL protocol address range.
+ * For POWER 8 that means bits 48:49 must be set to 10
+ */
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000);
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000);
+
+ return 0;
+}
+
+/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */
+static int switch_card_to_cxl(struct pci_dev *dev)
+{
+ int vsec;
+ u8 val;
+ int rc;
+
+ dev_info(&dev->dev, "switch card to CXL\n");
+
+ if (!(vsec = find_cxl_vsec(dev))) {
+ dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+ return -ENODEV;
+ }
+
+ if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) {
+ dev_err(&dev->dev, "failed to read current mode control: %i", rc);
+ return rc;
+ }
+ val &= ~CXL_VSEC_PROTOCOL_MASK;
+ val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
+ if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) {
+ dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc);
+ return rc;
+ }
+ /*
+ * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states
+ * we must wait 100ms after this mode switch before touching
+ * PCIe config space.
+ */
+ msleep(100);
+
+ return 0;
+}
+
+static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+ u64 p1n_base, p2n_base, afu_desc;
+ const u64 p1n_size = 0x100;
+ const u64 p2n_size = 0x1000;
+
+ p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size);
+ p2n_base = p2_base(dev) + (afu->slice * p2n_size);
+ afu->psn_phys = p2_base(dev) + (adapter->ps_off + (afu->slice * adapter->ps_size));
+ afu_desc = p2_base(dev) + adapter->afu_desc_off + (afu->slice * adapter->afu_desc_size);
+
+ if (!(afu->p1n_mmio = ioremap(p1n_base, p1n_size)))
+ goto err;
+ if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size)))
+ goto err1;
+ if (afu_desc) {
+ if (!(afu->afu_desc_mmio = ioremap(afu_desc, adapter->afu_desc_size)))
+ goto err2;
+ }
+
+ return 0;
+err2:
+ iounmap(afu->p2n_mmio);
+err1:
+ iounmap(afu->p1n_mmio);
+err:
+ dev_err(&afu->dev, "Error mapping AFU MMIO regions\n");
+ return -ENOMEM;
+}
+
+static void cxl_unmap_slice_regs(struct cxl_afu *afu)
+{
+ if (afu->p2n_mmio) {
+ iounmap(afu->p2n_mmio);
+ afu->p2n_mmio = NULL;
+ }
+ if (afu->p1n_mmio) {
+ iounmap(afu->p1n_mmio);
+ afu->p1n_mmio = NULL;
+ }
+ if (afu->afu_desc_mmio) {
+ iounmap(afu->afu_desc_mmio);
+ afu->afu_desc_mmio = NULL;
+ }
+}
+
+static void cxl_release_afu(struct device *dev)
+{
+ struct cxl_afu *afu = to_cxl_afu(dev);
+
+ pr_devel("cxl_release_afu\n");
+
+ idr_destroy(&afu->contexts_idr);
+ cxl_release_spa(afu);
+
+ kfree(afu);
+}
+
+static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
+{
+ struct cxl_afu *afu;
+
+ if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL)))
+ return NULL;
+
+ afu->adapter = adapter;
+ afu->dev.parent = &adapter->dev;
+ afu->dev.release = cxl_release_afu;
+ afu->slice = slice;
+ idr_init(&afu->contexts_idr);
+ mutex_init(&afu->contexts_lock);
+ spin_lock_init(&afu->afu_cntl_lock);
+ mutex_init(&afu->spa_mutex);
+
+ afu->prefault_mode = CXL_PREFAULT_NONE;
+ afu->irqs_max = afu->adapter->user_irqs;
+
+ return afu;
+}
+
+/* Expects AFU struct to have recently been zeroed out */
+static int cxl_read_afu_descriptor(struct cxl_afu *afu)
+{
+ u64 val;
+
+ val = AFUD_READ_INFO(afu);
+ afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val);
+ afu->max_procs_virtualised = AFUD_NUM_PROCS(val);
+ afu->crs_num = AFUD_NUM_CRS(val);
+
+ if (AFUD_AFU_DIRECTED(val))
+ afu->modes_supported |= CXL_MODE_DIRECTED;
+ if (AFUD_DEDICATED_PROCESS(val))
+ afu->modes_supported |= CXL_MODE_DEDICATED;
+ if (AFUD_TIME_SLICED(val))
+ afu->modes_supported |= CXL_MODE_TIME_SLICED;
+
+ val = AFUD_READ_PPPSA(afu);
+ afu->pp_size = AFUD_PPPSA_LEN(val) * 4096;
+ afu->psa = AFUD_PPPSA_PSA(val);
+ if ((afu->pp_psa = AFUD_PPPSA_PP(val)))
+ afu->pp_offset = AFUD_READ_PPPSA_OFF(afu);
+
+ val = AFUD_READ_CR(afu);
+ afu->crs_len = AFUD_CR_LEN(val) * 256;
+ afu->crs_offset = AFUD_READ_CR_OFF(afu);
+
+
+ /* eb_len is in multiple of 4K */
+ afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096;
+ afu->eb_offset = AFUD_READ_EB_OFF(afu);
+
+ /* eb_off is 4K aligned so lower 12 bits are always zero */
+ if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) {
+ dev_warn(&afu->dev,
+ "Invalid AFU error buffer offset %Lx\n",
+ afu->eb_offset);
+ dev_info(&afu->dev,
+ "Ignoring AFU error buffer in the descriptor\n");
+ /* indicate that no afu buffer exists */
+ afu->eb_len = 0;
+ }
+
+ return 0;
+}
+
+static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu)
+{
+ int i;
+
+ if (afu->psa && afu->adapter->ps_size <
+ (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) {
+ dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n");
+ return -ENODEV;
+ }
+
+ if (afu->pp_psa && (afu->pp_size < PAGE_SIZE))
+ dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!");
+
+ for (i = 0; i < afu->crs_num; i++) {
+ if ((cxl_afu_cr_read32(afu, i, 0) == 0)) {
+ dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int sanitise_afu_regs(struct cxl_afu *afu)
+{
+ u64 reg;
+
+ /*
+ * Clear out any regs that contain either an IVTE or address or may be
+ * waiting on an acknowledgement to try to be a bit safer as we bring
+ * it online
+ */
+ reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+ if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+ dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
+ if (__cxl_afu_reset(afu))
+ return -EIO;
+ if (cxl_afu_disable(afu))
+ return -EIO;
+ if (cxl_psl_purge(afu))
+ return -EIO;
+ }
+ cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000);
+ cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000);
+ cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000);
+ cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000);
+ cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000);
+ cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000);
+ cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000);
+ cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000);
+ cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000);
+ cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000);
+ cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000);
+ reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+ if (reg) {
+ dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg);
+ if (reg & CXL_PSL_DSISR_TRANS)
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+ else
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+ }
+ reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ if (reg) {
+ if (reg & ~0xffff)
+ dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+ }
+ reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+ if (reg) {
+ dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg);
+ cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg);
+ }
+
+ return 0;
+}
+
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+/*
+ * afu_eb_read:
+ * Called from sysfs and reads the afu error info buffer. The h/w only supports
+ * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte
+ * aligned the function uses a bounce buffer which can be max PAGE_SIZE.
+ */
+ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+ loff_t off, size_t count)
+{
+ loff_t aligned_start, aligned_end;
+ size_t aligned_length;
+ void *tbuf;
+ const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset;
+
+ if (count == 0 || off < 0 || (size_t)off >= afu->eb_len)
+ return 0;
+
+ /* calculate aligned read window */
+ count = min((size_t)(afu->eb_len - off), count);
+ aligned_start = round_down(off, 8);
+ aligned_end = round_up(off + count, 8);
+ aligned_length = aligned_end - aligned_start;
+
+ /* max we can copy in one read is PAGE_SIZE */
+ if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) {
+ aligned_length = ERR_BUFF_MAX_COPY_SIZE;
+ count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+ }
+
+ /* use bounce buffer for copy */
+ tbuf = (void *)__get_free_page(GFP_TEMPORARY);
+ if (!tbuf)
+ return -ENOMEM;
+
+ /* perform aligned read from the mmio region */
+ memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length);
+ memcpy(buf, tbuf + (off & 0x7), count);
+
+ free_page((unsigned long)tbuf);
+
+ return count;
+}
+
+static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+ int rc;
+
+ if ((rc = cxl_map_slice_regs(afu, adapter, dev)))
+ return rc;
+
+ if ((rc = sanitise_afu_regs(afu)))
+ goto err1;
+
+ /* We need to reset the AFU before we can read the AFU descriptor */
+ if ((rc = __cxl_afu_reset(afu)))
+ goto err1;
+
+ if (cxl_verbose)
+ dump_afu_descriptor(afu);
+
+ if ((rc = cxl_read_afu_descriptor(afu)))
+ goto err1;
+
+ if ((rc = cxl_afu_descriptor_looks_ok(afu)))
+ goto err1;
+
+ if ((rc = init_implementation_afu_regs(afu)))
+ goto err1;
+
+ if ((rc = cxl_register_serr_irq(afu)))
+ goto err1;
+
+ if ((rc = cxl_register_psl_irq(afu)))
+ goto err2;
+
+ return 0;
+
+err2:
+ cxl_release_serr_irq(afu);
+err1:
+ cxl_unmap_slice_regs(afu);
+ return rc;
+}
+
+static void cxl_deconfigure_afu(struct cxl_afu *afu)
+{
+ cxl_release_psl_irq(afu);
+ cxl_release_serr_irq(afu);
+ cxl_unmap_slice_regs(afu);
+}
+
+static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+{
+ struct cxl_afu *afu;
+ int rc;
+
+ afu = cxl_alloc_afu(adapter, slice);
+ if (!afu)
+ return -ENOMEM;
+
+ rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice);
+ if (rc)
+ goto err_free;
+
+ rc = cxl_configure_afu(afu, adapter, dev);
+ if (rc)
+ goto err_free;
+
+ /* Don't care if this fails */
+ cxl_debugfs_afu_add(afu);
+
+ /*
+ * After we call this function we must not free the afu directly, even
+ * if it returns an error!
+ */
+ if ((rc = cxl_register_afu(afu)))
+ goto err_put1;
+
+ if ((rc = cxl_sysfs_afu_add(afu)))
+ goto err_put1;
+
+ adapter->afu[afu->slice] = afu;
+
+ if ((rc = cxl_pci_vphb_add(afu)))
+ dev_info(&afu->dev, "Can't register vPHB\n");
+
+ return 0;
+
+err_put1:
+ cxl_deconfigure_afu(afu);
+ cxl_debugfs_afu_remove(afu);
+ device_unregister(&afu->dev);
+ return rc;
+
+err_free:
+ kfree(afu);
+ return rc;
+
+}
+
+static void cxl_remove_afu(struct cxl_afu *afu)
+{
+ pr_devel("cxl_remove_afu\n");
+
+ if (!afu)
+ return;
+
+ cxl_sysfs_afu_remove(afu);
+ cxl_debugfs_afu_remove(afu);
+
+ spin_lock(&afu->adapter->afu_list_lock);
+ afu->adapter->afu[afu->slice] = NULL;
+ spin_unlock(&afu->adapter->afu_list_lock);
+
+ cxl_context_detach_all(afu);
+ cxl_afu_deactivate_mode(afu);
+
+ cxl_deconfigure_afu(afu);
+ device_unregister(&afu->dev);
+}
+
+int cxl_reset(struct cxl *adapter)
+{
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+ int rc;
+
+ if (adapter->perst_same_image) {
+ dev_warn(&dev->dev,
+ "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n");
+ return -EINVAL;
+ }
+
+ dev_info(&dev->dev, "CXL reset\n");
+
+ /* pcie_warm_reset requests a fundamental pci reset which includes a
+ * PERST assert/deassert. PERST triggers a loading of the image
+ * if "user" or "factory" is selected in sysfs */
+ if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) {
+ dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n");
+ return rc;
+ }
+
+ return rc;
+}
+
+static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
+{
+ if (pci_request_region(dev, 2, "priv 2 regs"))
+ goto err1;
+ if (pci_request_region(dev, 0, "priv 1 regs"))
+ goto err2;
+
+ pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx",
+ p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev));
+
+ if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev))))
+ goto err3;
+
+ if (!(adapter->p2_mmio = ioremap(p2_base(dev), p2_size(dev))))
+ goto err4;
+
+ return 0;
+
+err4:
+ iounmap(adapter->p1_mmio);
+ adapter->p1_mmio = NULL;
+err3:
+ pci_release_region(dev, 0);
+err2:
+ pci_release_region(dev, 2);
+err1:
+ return -ENOMEM;
+}
+
+static void cxl_unmap_adapter_regs(struct cxl *adapter)
+{
+ if (adapter->p1_mmio) {
+ iounmap(adapter->p1_mmio);
+ adapter->p1_mmio = NULL;
+ pci_release_region(to_pci_dev(adapter->dev.parent), 2);
+ }
+ if (adapter->p2_mmio) {
+ iounmap(adapter->p2_mmio);
+ adapter->p2_mmio = NULL;
+ pci_release_region(to_pci_dev(adapter->dev.parent), 0);
+ }
+}
+
+static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
+{
+ int vsec;
+ u32 afu_desc_off, afu_desc_size;
+ u32 ps_off, ps_size;
+ u16 vseclen;
+ u8 image_state;
+
+ if (!(vsec = find_cxl_vsec(dev))) {
+ dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+ return -ENODEV;
+ }
+
+ CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen);
+ if (vseclen < CXL_VSEC_MIN_SIZE) {
+ dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n");
+ return -EINVAL;
+ }
+
+ CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status);
+ CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev);
+ CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major);
+ CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor);
+ CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image);
+ CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state);
+ adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
+ adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
+
+ CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices);
+ CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off);
+ CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size);
+ CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off);
+ CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size);
+
+ /* Convert everything to bytes, because there is NO WAY I'd look at the
+ * code a month later and forget what units these are in ;-) */
+ adapter->ps_off = ps_off * 64 * 1024;
+ adapter->ps_size = ps_size * 64 * 1024;
+ adapter->afu_desc_off = afu_desc_off * 64 * 1024;
+ adapter->afu_desc_size = afu_desc_size *64 * 1024;
+
+ /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */
+ adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices;
+
+ return 0;
+}
+
+/*
+ * Workaround a PCIe Host Bridge defect on some cards, that can cause
+ * malformed Transaction Layer Packet (TLP) errors to be erroneously
+ * reported. Mask this error in the Uncorrectable Error Mask Register.
+ *
+ * The upper nibble of the PSL revision is used to distinguish between
+ * different cards. The affected ones have it set to 0.
+ */
+static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev)
+{
+ int aer;
+ u32 data;
+
+ if (adapter->psl_rev & 0xf000)
+ return;
+ if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)))
+ return;
+ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data);
+ if (data & PCI_ERR_UNC_MALF_TLP)
+ if (data & PCI_ERR_UNC_INTN)
+ return;
+ data |= PCI_ERR_UNC_MALF_TLP;
+ data |= PCI_ERR_UNC_INTN;
+ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data);
+}
+
+static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev)
+{
+ if (adapter->vsec_status & CXL_STATUS_SECOND_PORT)
+ return -EBUSY;
+
+ if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) {
+ dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n");
+ return -EINVAL;
+ }
+
+ if (!adapter->slices) {
+ /* Once we support dynamic reprogramming we can use the card if
+ * it supports loadable AFUs */
+ dev_err(&dev->dev, "ABORTING: Device has no AFUs\n");
+ return -EINVAL;
+ }
+
+ if (!adapter->afu_desc_off || !adapter->afu_desc_size) {
+ dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n");
+ return -EINVAL;
+ }
+
+ if (adapter->ps_size > p2_size(dev) - adapter->ps_off) {
+ dev_err(&dev->dev, "ABORTING: Problem state size larger than "
+ "available in BAR2: 0x%llx > 0x%llx\n",
+ adapter->ps_size, p2_size(dev) - adapter->ps_off);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void cxl_release_adapter(struct device *dev)
+{
+ struct cxl *adapter = to_cxl_adapter(dev);
+
+ pr_devel("cxl_release_adapter\n");
+
+ cxl_remove_adapter_nr(adapter);
+
+ kfree(adapter);
+}
+
+static struct cxl *cxl_alloc_adapter(void)
+{
+ struct cxl *adapter;
+
+ if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
+ return NULL;
+
+ spin_lock_init(&adapter->afu_list_lock);
+
+ if (cxl_alloc_adapter_nr(adapter))
+ goto err1;
+
+ if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
+ goto err2;
+
+ return adapter;
+
+err2:
+ cxl_remove_adapter_nr(adapter);
+err1:
+ kfree(adapter);
+ return NULL;
+}
+
+#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31))
+
+static int sanitise_adapter_regs(struct cxl *adapter)
+{
+ /* Clear PSL tberror bit by writing 1 to it */
+ cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
+ return cxl_tlb_slb_invalidate(adapter);
+}
+
+/* This should contain *only* operations that can safely be done in
+ * both creation and recovery.
+ */
+static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
+{
+ int rc;
+
+ adapter->dev.parent = &dev->dev;
+ adapter->dev.release = cxl_release_adapter;
+ pci_set_drvdata(dev, adapter);
+
+ rc = pci_enable_device(dev);
+ if (rc) {
+ dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc);
+ return rc;
+ }
+
+ if ((rc = cxl_read_vsec(adapter, dev)))
+ return rc;
+
+ if ((rc = cxl_vsec_looks_ok(adapter, dev)))
+ return rc;
+
+ cxl_fixup_malformed_tlp(adapter, dev);
+
+ if ((rc = setup_cxl_bars(dev)))
+ return rc;
+
+ if ((rc = switch_card_to_cxl(dev)))
+ return rc;
+
+ if ((rc = cxl_update_image_control(adapter)))
+ return rc;
+
+ if ((rc = cxl_map_adapter_regs(adapter, dev)))
+ return rc;
+
+ if ((rc = sanitise_adapter_regs(adapter)))
+ goto err;
+
+ if ((rc = init_implementation_adapter_regs(adapter, dev)))
+ goto err;
+
+ if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI)))
+ goto err;
+
+ /* If recovery happened, the last step is to turn on snooping.
+ * In the non-recovery case this has no effect */
+ if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON)))
+ goto err;
+
+ if ((rc = cxl_setup_psl_timebase(adapter, dev)))
+ goto err;
+
+ if ((rc = cxl_register_psl_err_irq(adapter)))
+ goto err;
+
+ return 0;
+
+err:
+ cxl_unmap_adapter_regs(adapter);
+ return rc;
+
+}
+
+static void cxl_deconfigure_adapter(struct cxl *adapter)
+{
+ struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
+
+ cxl_release_psl_err_irq(adapter);
+ cxl_unmap_adapter_regs(adapter);
+
+ pci_disable_device(pdev);
+}
+
+static struct cxl *cxl_init_adapter(struct pci_dev *dev)
+{
+ struct cxl *adapter;
+ int rc;
+
+ adapter = cxl_alloc_adapter();
+ if (!adapter)
+ return ERR_PTR(-ENOMEM);
+
+ /* Set defaults for parameters which need to persist over
+ * configure/reconfigure
+ */
+ adapter->perst_loads_image = true;
+ adapter->perst_same_image = false;
+
+ rc = cxl_configure_adapter(adapter, dev);
+ if (rc) {
+ pci_disable_device(dev);
+ cxl_release_adapter(&adapter->dev);
+ return ERR_PTR(rc);
+ }
+
+ /* Don't care if this one fails: */
+ cxl_debugfs_adapter_add(adapter);
+
+ /*
+ * After we call this function we must not free the adapter directly,
+ * even if it returns an error!
+ */
+ if ((rc = cxl_register_adapter(adapter)))
+ goto err_put1;
+
+ if ((rc = cxl_sysfs_adapter_add(adapter)))
+ goto err_put1;
+
+ return adapter;
+
+err_put1:
+ /* This should mirror cxl_remove_adapter, except without the
+ * sysfs parts
+ */
+ cxl_debugfs_adapter_remove(adapter);
+ cxl_deconfigure_adapter(adapter);
+ device_unregister(&adapter->dev);
+ return ERR_PTR(rc);
+}
+
+static void cxl_remove_adapter(struct cxl *adapter)
+{
+ pr_devel("cxl_remove_adapter\n");
+
+ cxl_sysfs_adapter_remove(adapter);
+ cxl_debugfs_adapter_remove(adapter);
+
+ cxl_deconfigure_adapter(adapter);
+
+ device_unregister(&adapter->dev);
+}
+
+static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct cxl *adapter;
+ int slice;
+ int rc;
+
+ if (cxl_verbose)
+ dump_cxl_config_space(dev);
+
+ adapter = cxl_init_adapter(dev);
+ if (IS_ERR(adapter)) {
+ dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter));
+ return PTR_ERR(adapter);
+ }
+
+ for (slice = 0; slice < adapter->slices; slice++) {
+ if ((rc = cxl_init_afu(adapter, slice, dev))) {
+ dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc);
+ continue;
+ }
+
+ rc = cxl_afu_select_best_mode(adapter->afu[slice]);
+ if (rc)
+ dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc);
+ }
+
+ return 0;
+}
+
+static void cxl_remove(struct pci_dev *dev)
+{
+ struct cxl *adapter = pci_get_drvdata(dev);
+ struct cxl_afu *afu;
+ int i;
+
+ /*
+ * Lock to prevent someone grabbing a ref through the adapter list as
+ * we are removing it
+ */
+ for (i = 0; i < adapter->slices; i++) {
+ afu = adapter->afu[i];
+ cxl_pci_vphb_remove(afu);
+ cxl_remove_afu(afu);
+ }
+ cxl_remove_adapter(adapter);
+}
+
+static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
+ pci_channel_state_t state)
+{
+ struct pci_dev *afu_dev;
+ pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+ pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
+
+ /* There should only be one entry, but go through the list
+ * anyway
+ */
+ list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+ if (!afu_dev->driver)
+ continue;
+
+ afu_dev->error_state = state;
+
+ if (afu_dev->driver->err_handler)
+ afu_result = afu_dev->driver->err_handler->error_detected(afu_dev,
+ state);
+ /* Disconnect trumps all, NONE trumps NEED_RESET */
+ if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+ result = PCI_ERS_RESULT_DISCONNECT;
+ else if ((afu_result == PCI_ERS_RESULT_NONE) &&
+ (result == PCI_ERS_RESULT_NEED_RESET))
+ result = PCI_ERS_RESULT_NONE;
+ }
+ return result;
+}
+
+static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct cxl *adapter = pci_get_drvdata(pdev);
+ struct cxl_afu *afu;
+ pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+ int i;
+
+ /* At this point, we could still have an interrupt pending.
+ * Let's try to get them out of the way before they do
+ * anything we don't like.
+ */
+ schedule();
+
+ /* If we're permanently dead, give up. */
+ if (state == pci_channel_io_perm_failure) {
+ /* Tell the AFU drivers; but we don't care what they
+ * say, we're going away.
+ */
+ for (i = 0; i < adapter->slices; i++) {
+ afu = adapter->afu[i];
+ cxl_vphb_error_detected(afu, state);
+ }
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ /* Are we reflashing?
+ *
+ * If we reflash, we could come back as something entirely
+ * different, including a non-CAPI card. As such, by default
+ * we don't participate in the process. We'll be unbound and
+ * the slot re-probed. (TODO: check EEH doesn't blindly rebind
+ * us!)
+ *
+ * However, this isn't the entire story: for reliablity
+ * reasons, we usually want to reflash the FPGA on PERST in
+ * order to get back to a more reliable known-good state.
+ *
+ * This causes us a bit of a problem: if we reflash we can't
+ * trust that we'll come back the same - we could have a new
+ * image and been PERSTed in order to load that
+ * image. However, most of the time we actually *will* come
+ * back the same - for example a regular EEH event.
+ *
+ * Therefore, we allow the user to assert that the image is
+ * indeed the same and that we should continue on into EEH
+ * anyway.
+ */
+ if (adapter->perst_loads_image && !adapter->perst_same_image) {
+ /* TODO take the PHB out of CXL mode */
+ dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n");
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ /*
+ * At this point, we want to try to recover. We'll always
+ * need a complete slot reset: we don't trust any other reset.
+ *
+ * Now, we go through each AFU:
+ * - We send the driver, if bound, an error_detected callback.
+ * We expect it to clean up, but it can also tell us to give
+ * up and permanently detach the card. To simplify things, if
+ * any bound AFU driver doesn't support EEH, we give up on EEH.
+ *
+ * - We detach all contexts associated with the AFU. This
+ * does not free them, but puts them into a CLOSED state
+ * which causes any the associated files to return useful
+ * errors to userland. It also unmaps, but does not free,
+ * any IRQs.
+ *
+ * - We clean up our side: releasing and unmapping resources we hold
+ * so we can wire them up again when the hardware comes back up.
+ *
+ * Driver authors should note:
+ *
+ * - Any contexts you create in your kernel driver (except
+ * those associated with anonymous file descriptors) are
+ * your responsibility to free and recreate. Likewise with
+ * any attached resources.
+ *
+ * - We will take responsibility for re-initialising the
+ * device context (the one set up for you in
+ * cxl_pci_enable_device_hook and accessed through
+ * cxl_get_context). If you've attached IRQs or other
+ * resources to it, they remains yours to free.
+ *
+ * You can call the same functions to release resources as you
+ * normally would: we make sure that these functions continue
+ * to work when the hardware is down.
+ *
+ * Two examples:
+ *
+ * 1) If you normally free all your resources at the end of
+ * each request, or if you use anonymous FDs, your
+ * error_detected callback can simply set a flag to tell
+ * your driver not to start any new calls. You can then
+ * clear the flag in the resume callback.
+ *
+ * 2) If you normally allocate your resources on startup:
+ * * Set a flag in error_detected as above.
+ * * Let CXL detach your contexts.
+ * * In slot_reset, free the old resources and allocate new ones.
+ * * In resume, clear the flag to allow things to start.
+ */
+ for (i = 0; i < adapter->slices; i++) {
+ afu = adapter->afu[i];
+
+ result = cxl_vphb_error_detected(afu, state);
+
+ /* Only continue if everyone agrees on NEED_RESET */
+ if (result != PCI_ERS_RESULT_NEED_RESET)
+ return result;
+
+ cxl_context_detach_all(afu);
+ cxl_afu_deactivate_mode(afu);
+ cxl_deconfigure_afu(afu);
+ }
+ cxl_deconfigure_adapter(adapter);
+
+ return result;
+}
+
+static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
+{
+ struct cxl *adapter = pci_get_drvdata(pdev);
+ struct cxl_afu *afu;
+ struct cxl_context *ctx;
+ struct pci_dev *afu_dev;
+ pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED;
+ pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+ int i;
+
+ if (cxl_configure_adapter(adapter, pdev))
+ goto err;
+
+ for (i = 0; i < adapter->slices; i++) {
+ afu = adapter->afu[i];
+
+ if (cxl_configure_afu(afu, adapter, pdev))
+ goto err;
+
+ if (cxl_afu_select_best_mode(afu))
+ goto err;
+
+ cxl_pci_vphb_reconfigure(afu);
+
+ list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+ /* Reset the device context.
+ * TODO: make this less disruptive
+ */
+ ctx = cxl_get_context(afu_dev);
+
+ if (ctx && cxl_release_context(ctx))
+ goto err;
+
+ ctx = cxl_dev_context_init(afu_dev);
+ if (!ctx)
+ goto err;
+
+ afu_dev->dev.archdata.cxl_ctx = ctx;
+
+ if (cxl_afu_check_and_enable(afu))
+ goto err;
+
+ afu_dev->error_state = pci_channel_io_normal;
+
+ /* If there's a driver attached, allow it to
+ * chime in on recovery. Drivers should check
+ * if everything has come back OK, but
+ * shouldn't start new work until we call
+ * their resume function.
+ */
+ if (!afu_dev->driver)
+ continue;
+
+ if (afu_dev->driver->err_handler &&
+ afu_dev->driver->err_handler->slot_reset)
+ afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev);
+
+ if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+ result = PCI_ERS_RESULT_DISCONNECT;
+ }
+ }
+ return result;
+
+err:
+ /* All the bits that happen in both error_detected and cxl_remove
+ * should be idempotent, so we don't need to worry about leaving a mix
+ * of unconfigured and reconfigured resources.
+ */
+ dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+}
+
+static void cxl_pci_resume(struct pci_dev *pdev)
+{
+ struct cxl *adapter = pci_get_drvdata(pdev);
+ struct cxl_afu *afu;
+ struct pci_dev *afu_dev;
+ int i;
+
+ /* Everything is back now. Drivers should restart work now.
+ * This is not the place to be checking if everything came back up
+ * properly, because there's no return value: do that in slot_reset.
+ */
+ for (i = 0; i < adapter->slices; i++) {
+ afu = adapter->afu[i];
+
+ list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+ if (afu_dev->driver && afu_dev->driver->err_handler &&
+ afu_dev->driver->err_handler->resume)
+ afu_dev->driver->err_handler->resume(afu_dev);
+ }
+ }
+}
+
+static const struct pci_error_handlers cxl_err_handler = {
+ .error_detected = cxl_pci_error_detected,
+ .slot_reset = cxl_pci_slot_reset,
+ .resume = cxl_pci_resume,
+};
+
+struct pci_driver cxl_pci_driver = {
+ .name = "cxl-pci",
+ .id_table = cxl_pci_tbl,
+ .probe = cxl_probe,
+ .remove = cxl_remove,
+ .shutdown = cxl_remove,
+ .err_handler = &cxl_err_handler,
+};
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
new file mode 100644
index 0000000..02006f7
--- /dev/null
+++ b/drivers/misc/cxl/sysfs.c
@@ -0,0 +1,657 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/pci_regs.h>
+
+#include "cxl.h"
+
+#define to_afu_chardev_m(d) dev_get_drvdata(d)
+
+/********* Adapter attributes **********************************************/
+
+static ssize_t caia_version_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major,
+ adapter->caia_minor);
+}
+
+static ssize_t psl_revision_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev);
+}
+
+static ssize_t base_image_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image);
+}
+
+static ssize_t image_loaded_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ if (adapter->user_image_loaded)
+ return scnprintf(buf, PAGE_SIZE, "user\n");
+ return scnprintf(buf, PAGE_SIZE, "factory\n");
+}
+
+static ssize_t reset_adapter_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+ int rc;
+ int val;
+
+ rc = sscanf(buf, "%i", &val);
+ if ((rc != 1) || (val != 1))
+ return -EINVAL;
+
+ if ((rc = cxl_reset(adapter)))
+ return rc;
+ return count;
+}
+
+static ssize_t load_image_on_perst_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ if (!adapter->perst_loads_image)
+ return scnprintf(buf, PAGE_SIZE, "none\n");
+
+ if (adapter->perst_select_user)
+ return scnprintf(buf, PAGE_SIZE, "user\n");
+ return scnprintf(buf, PAGE_SIZE, "factory\n");
+}
+
+static ssize_t load_image_on_perst_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+ int rc;
+
+ if (!strncmp(buf, "none", 4))
+ adapter->perst_loads_image = false;
+ else if (!strncmp(buf, "user", 4)) {
+ adapter->perst_select_user = true;
+ adapter->perst_loads_image = true;
+ } else if (!strncmp(buf, "factory", 7)) {
+ adapter->perst_select_user = false;
+ adapter->perst_loads_image = true;
+ } else
+ return -EINVAL;
+
+ if ((rc = cxl_update_image_control(adapter)))
+ return rc;
+
+ return count;
+}
+
+static ssize_t perst_reloads_same_image_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image);
+}
+
+static ssize_t perst_reloads_same_image_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+ int rc;
+ int val;
+
+ rc = sscanf(buf, "%i", &val);
+ if ((rc != 1) || !(val == 1 || val == 0))
+ return -EINVAL;
+
+ adapter->perst_same_image = (val == 1 ? true : false);
+ return count;
+}
+
+static struct device_attribute adapter_attrs[] = {
+ __ATTR_RO(caia_version),
+ __ATTR_RO(psl_revision),
+ __ATTR_RO(base_image),
+ __ATTR_RO(image_loaded),
+ __ATTR_RW(load_image_on_perst),
+ __ATTR_RW(perst_reloads_same_image),
+ __ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
+};
+
+
+/********* AFU master specific attributes **********************************/
+
+static ssize_t mmio_size_show_master(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_afu_chardev_m(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size);
+}
+
+static ssize_t pp_mmio_off_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_afu_chardev_m(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_offset);
+}
+
+static ssize_t pp_mmio_len_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_afu_chardev_m(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size);
+}
+
+static struct device_attribute afu_master_attrs[] = {
+ __ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL),
+ __ATTR_RO(pp_mmio_off),
+ __ATTR_RO(pp_mmio_len),
+};
+
+
+/********* AFU attributes **************************************************/
+
+static ssize_t mmio_size_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+
+ if (afu->pp_size)
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size);
+}
+
+static ssize_t reset_store_afu(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+ int rc;
+
+ /* Not safe to reset if it is currently in use */
+ mutex_lock(&afu->contexts_lock);
+ if (!idr_is_empty(&afu->contexts_idr)) {
+ rc = -EBUSY;
+ goto err;
+ }
+
+ if ((rc = __cxl_afu_reset(afu)))
+ goto err;
+
+ rc = count;
+err:
+ mutex_unlock(&afu->contexts_lock);
+ return rc;
+}
+
+static ssize_t irqs_min_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs);
+}
+
+static ssize_t irqs_max_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max);
+}
+
+static ssize_t irqs_max_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+ ssize_t ret;
+ int irqs_max;
+
+ ret = sscanf(buf, "%i", &irqs_max);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (irqs_max < afu->pp_irqs)
+ return -EINVAL;
+
+ if (irqs_max > afu->adapter->user_irqs)
+ return -EINVAL;
+
+ afu->irqs_max = irqs_max;
+ return count;
+}
+
+static ssize_t modes_supported_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+ char *p = buf, *end = buf + PAGE_SIZE;
+
+ if (afu->modes_supported & CXL_MODE_DEDICATED)
+ p += scnprintf(p, end - p, "dedicated_process\n");
+ if (afu->modes_supported & CXL_MODE_DIRECTED)
+ p += scnprintf(p, end - p, "afu_directed\n");
+ return (p - buf);
+}
+
+static ssize_t prefault_mode_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+
+ switch (afu->prefault_mode) {
+ case CXL_PREFAULT_WED:
+ return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n");
+ case CXL_PREFAULT_ALL:
+ return scnprintf(buf, PAGE_SIZE, "all\n");
+ default:
+ return scnprintf(buf, PAGE_SIZE, "none\n");
+ }
+}
+
+static ssize_t prefault_mode_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+ enum prefault_modes mode = -1;
+
+ if (!strncmp(buf, "work_element_descriptor", 23))
+ mode = CXL_PREFAULT_WED;
+ if (!strncmp(buf, "all", 3))
+ mode = CXL_PREFAULT_ALL;
+ if (!strncmp(buf, "none", 4))
+ mode = CXL_PREFAULT_NONE;
+
+ if (mode == -1)
+ return -EINVAL;
+
+ afu->prefault_mode = mode;
+ return count;
+}
+
+static ssize_t mode_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+
+ if (afu->current_mode == CXL_MODE_DEDICATED)
+ return scnprintf(buf, PAGE_SIZE, "dedicated_process\n");
+ if (afu->current_mode == CXL_MODE_DIRECTED)
+ return scnprintf(buf, PAGE_SIZE, "afu_directed\n");
+ return scnprintf(buf, PAGE_SIZE, "none\n");
+}
+
+static ssize_t mode_store(struct device *device, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl_afu *afu = to_cxl_afu(device);
+ int old_mode, mode = -1;
+ int rc = -EBUSY;
+
+ /* can't change this if we have a user */
+ mutex_lock(&afu->contexts_lock);
+ if (!idr_is_empty(&afu->contexts_idr))
+ goto err;
+
+ if (!strncmp(buf, "dedicated_process", 17))
+ mode = CXL_MODE_DEDICATED;
+ if (!strncmp(buf, "afu_directed", 12))
+ mode = CXL_MODE_DIRECTED;
+ if (!strncmp(buf, "none", 4))
+ mode = 0;
+
+ if (mode == -1) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * cxl_afu_deactivate_mode needs to be done outside the lock, prevent
+ * other contexts coming in before we are ready:
+ */
+ old_mode = afu->current_mode;
+ afu->current_mode = 0;
+ afu->num_procs = 0;
+
+ mutex_unlock(&afu->contexts_lock);
+
+ if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
+ return rc;
+ if ((rc = cxl_afu_activate_mode(afu, mode)))
+ return rc;
+
+ return count;
+err:
+ mutex_unlock(&afu->contexts_lock);
+ return rc;
+}
+
+static ssize_t api_version_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION);
+}
+
+static ssize_t api_version_compatible_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE);
+}
+
+static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct cxl_afu *afu = to_cxl_afu(container_of(kobj,
+ struct device, kobj));
+
+ return cxl_afu_read_err_buffer(afu, buf, off, count);
+}
+
+static struct device_attribute afu_attrs[] = {
+ __ATTR_RO(mmio_size),
+ __ATTR_RO(irqs_min),
+ __ATTR_RW(irqs_max),
+ __ATTR_RO(modes_supported),
+ __ATTR_RW(mode),
+ __ATTR_RW(prefault_mode),
+ __ATTR_RO(api_version),
+ __ATTR_RO(api_version_compatible),
+ __ATTR(reset, S_IWUSR, NULL, reset_store_afu),
+};
+
+int cxl_sysfs_adapter_add(struct cxl *adapter)
+{
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
+ if ((rc = device_create_file(&adapter->dev, &adapter_attrs[i])))
+ goto err;
+ }
+ return 0;
+err:
+ for (i--; i >= 0; i--)
+ device_remove_file(&adapter->dev, &adapter_attrs[i]);
+ return rc;
+}
+void cxl_sysfs_adapter_remove(struct cxl *adapter)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++)
+ device_remove_file(&adapter->dev, &adapter_attrs[i]);
+}
+
+struct afu_config_record {
+ struct kobject kobj;
+ struct bin_attribute config_attr;
+ struct list_head list;
+ int cr;
+ u16 device;
+ u16 vendor;
+ u32 class;
+};
+
+#define to_cr(obj) container_of(obj, struct afu_config_record, kobj)
+
+static ssize_t vendor_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct afu_config_record *cr = to_cr(kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor);
+}
+
+static ssize_t device_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct afu_config_record *cr = to_cr(kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device);
+}
+
+static ssize_t class_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct afu_config_record *cr = to_cr(kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class);
+}
+
+static ssize_t afu_read_config(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct afu_config_record *cr = to_cr(kobj);
+ struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj));
+
+ u64 i, j, val;
+
+ for (i = 0; i < count;) {
+ val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7);
+ for (j = off & 0x7; j < 8 && i < count; i++, j++, off++)
+ buf[i] = (val >> (j * 8)) & 0xff;
+ }
+
+ return count;
+}
+
+static struct kobj_attribute vendor_attribute =
+ __ATTR_RO(vendor);
+static struct kobj_attribute device_attribute =
+ __ATTR_RO(device);
+static struct kobj_attribute class_attribute =
+ __ATTR_RO(class);
+
+static struct attribute *afu_cr_attrs[] = {
+ &vendor_attribute.attr,
+ &device_attribute.attr,
+ &class_attribute.attr,
+ NULL,
+};
+
+static void release_afu_config_record(struct kobject *kobj)
+{
+ struct afu_config_record *cr = to_cr(kobj);
+
+ kfree(cr);
+}
+
+static struct kobj_type afu_config_record_type = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .release = release_afu_config_record,
+ .default_attrs = afu_cr_attrs,
+};
+
+static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx)
+{
+ struct afu_config_record *cr;
+ int rc;
+
+ cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL);
+ if (!cr)
+ return ERR_PTR(-ENOMEM);
+
+ cr->cr = cr_idx;
+ cr->device = cxl_afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID);
+ cr->vendor = cxl_afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID);
+ cr->class = cxl_afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION) >> 8;
+
+ /*
+ * Export raw AFU PCIe like config record. For now this is read only by
+ * root - we can expand that later to be readable by non-root and maybe
+ * even writable provided we have a good use-case. Once we suport
+ * exposing AFUs through a virtual PHB they will get that for free from
+ * Linux' PCI infrastructure, but until then it's not clear that we
+ * need it for anything since the main use case is just identifying
+ * AFUs, which can be done via the vendor, device and class attributes.
+ */
+ sysfs_bin_attr_init(&cr->config_attr);
+ cr->config_attr.attr.name = "config";
+ cr->config_attr.attr.mode = S_IRUSR;
+ cr->config_attr.size = afu->crs_len;
+ cr->config_attr.read = afu_read_config;
+
+ rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type,
+ &afu->dev.kobj, "cr%i", cr->cr);
+ if (rc)
+ goto err;
+
+ rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr);
+ if (rc)
+ goto err1;
+
+ rc = kobject_uevent(&cr->kobj, KOBJ_ADD);
+ if (rc)
+ goto err2;
+
+ return cr;
+err2:
+ sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
+err1:
+ kobject_put(&cr->kobj);
+ return ERR_PTR(rc);
+err:
+ kfree(cr);
+ return ERR_PTR(rc);
+}
+
+void cxl_sysfs_afu_remove(struct cxl_afu *afu)
+{
+ struct afu_config_record *cr, *tmp;
+ int i;
+
+ /* remove the err buffer bin attribute */
+ if (afu->eb_len)
+ device_remove_bin_file(&afu->dev, &afu->attr_eb);
+
+ for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
+ device_remove_file(&afu->dev, &afu_attrs[i]);
+
+ list_for_each_entry_safe(cr, tmp, &afu->crs, list) {
+ sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
+ kobject_put(&cr->kobj);
+ }
+}
+
+int cxl_sysfs_afu_add(struct cxl_afu *afu)
+{
+ struct afu_config_record *cr;
+ int i, rc;
+
+ INIT_LIST_HEAD(&afu->crs);
+
+ for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+ if ((rc = device_create_file(&afu->dev, &afu_attrs[i])))
+ goto err;
+ }
+
+ /* conditionally create the add the binary file for error info buffer */
+ if (afu->eb_len) {
+ sysfs_attr_init(&afu->attr_eb.attr);
+
+ afu->attr_eb.attr.name = "afu_err_buff";
+ afu->attr_eb.attr.mode = S_IRUGO;
+ afu->attr_eb.size = afu->eb_len;
+ afu->attr_eb.read = afu_eb_read;
+
+ rc = device_create_bin_file(&afu->dev, &afu->attr_eb);
+ if (rc) {
+ dev_err(&afu->dev,
+ "Unable to create eb attr for the afu. Err(%d)\n",
+ rc);
+ goto err;
+ }
+ }
+
+ for (i = 0; i < afu->crs_num; i++) {
+ cr = cxl_sysfs_afu_new_cr(afu, i);
+ if (IS_ERR(cr)) {
+ rc = PTR_ERR(cr);
+ goto err1;
+ }
+ list_add(&cr->list, &afu->crs);
+ }
+
+ return 0;
+
+err1:
+ cxl_sysfs_afu_remove(afu);
+ return rc;
+err:
+ /* reset the eb_len as we havent created the bin attr */
+ afu->eb_len = 0;
+
+ for (i--; i >= 0; i--)
+ device_remove_file(&afu->dev, &afu_attrs[i]);
+ return rc;
+}
+
+int cxl_sysfs_afu_m_add(struct cxl_afu *afu)
+{
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
+ if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i])))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (i--; i >= 0; i--)
+ device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+ return rc;
+}
+
+void cxl_sysfs_afu_m_remove(struct cxl_afu *afu)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++)
+ device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+}
diff --git a/drivers/misc/cxl/trace.c b/drivers/misc/cxl/trace.c
new file mode 100644
index 0000000..c2b06d3
--- /dev/null
+++ b/drivers/misc/cxl/trace.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h
new file mode 100644
index 0000000..6e1e2ad
--- /dev/null
+++ b/drivers/misc/cxl/trace.h
@@ -0,0 +1,459 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#include "cxl.h"
+
+#define DSISR_FLAGS \
+ { CXL_PSL_DSISR_An_DS, "DS" }, \
+ { CXL_PSL_DSISR_An_DM, "DM" }, \
+ { CXL_PSL_DSISR_An_ST, "ST" }, \
+ { CXL_PSL_DSISR_An_UR, "UR" }, \
+ { CXL_PSL_DSISR_An_PE, "PE" }, \
+ { CXL_PSL_DSISR_An_AE, "AE" }, \
+ { CXL_PSL_DSISR_An_OC, "OC" }, \
+ { CXL_PSL_DSISR_An_M, "M" }, \
+ { CXL_PSL_DSISR_An_P, "P" }, \
+ { CXL_PSL_DSISR_An_A, "A" }, \
+ { CXL_PSL_DSISR_An_S, "S" }, \
+ { CXL_PSL_DSISR_An_K, "K" }
+
+#define TFC_FLAGS \
+ { CXL_PSL_TFC_An_A, "A" }, \
+ { CXL_PSL_TFC_An_C, "C" }, \
+ { CXL_PSL_TFC_An_AE, "AE" }, \
+ { CXL_PSL_TFC_An_R, "R" }
+
+#define LLCMD_NAMES \
+ { CXL_SPA_SW_CMD_TERMINATE, "TERMINATE" }, \
+ { CXL_SPA_SW_CMD_REMOVE, "REMOVE" }, \
+ { CXL_SPA_SW_CMD_SUSPEND, "SUSPEND" }, \
+ { CXL_SPA_SW_CMD_RESUME, "RESUME" }, \
+ { CXL_SPA_SW_CMD_ADD, "ADD" }, \
+ { CXL_SPA_SW_CMD_UPDATE, "UPDATE" }
+
+#define AFU_COMMANDS \
+ { 0, "DISABLE" }, \
+ { CXL_AFU_Cntl_An_E, "ENABLE" }, \
+ { CXL_AFU_Cntl_An_RA, "RESET" }
+
+#define PSL_COMMANDS \
+ { CXL_PSL_SCNTL_An_Pc, "PURGE" }, \
+ { CXL_PSL_SCNTL_An_Sc, "SUSPEND" }
+
+
+DECLARE_EVENT_CLASS(cxl_pe_class,
+ TP_PROTO(struct cxl_context *ctx),
+
+ TP_ARGS(ctx),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ ),
+
+ TP_printk("afu%i.%i pe=%i",
+ __entry->card,
+ __entry->afu,
+ __entry->pe
+ )
+);
+
+
+TRACE_EVENT(cxl_attach,
+ TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr),
+
+ TP_ARGS(ctx, wed, num_interrupts, amr),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(pid_t, pid)
+ __field(u64, wed)
+ __field(u64, amr)
+ __field(s16, num_interrupts)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->pid = pid_nr(ctx->pid);
+ __entry->wed = wed;
+ __entry->amr = amr;
+ __entry->num_interrupts = num_interrupts;
+ ),
+
+ TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx",
+ __entry->card,
+ __entry->afu,
+ __entry->pid,
+ __entry->pe,
+ __entry->wed,
+ __entry->num_interrupts,
+ __entry->amr
+ )
+);
+
+DEFINE_EVENT(cxl_pe_class, cxl_detach,
+ TP_PROTO(struct cxl_context *ctx),
+ TP_ARGS(ctx)
+);
+
+TRACE_EVENT(cxl_afu_irq,
+ TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq),
+
+ TP_ARGS(ctx, afu_irq, virq, hwirq),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(u16, afu_irq)
+ __field(int, virq)
+ __field(irq_hw_number_t, hwirq)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->afu_irq = afu_irq;
+ __entry->virq = virq;
+ __entry->hwirq = hwirq;
+ ),
+
+ TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __entry->afu_irq,
+ __entry->virq,
+ __entry->hwirq
+ )
+);
+
+TRACE_EVENT(cxl_psl_irq,
+ TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar),
+
+ TP_ARGS(ctx, irq, dsisr, dar),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(int, irq)
+ __field(u64, dsisr)
+ __field(u64, dar)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->irq = irq;
+ __entry->dsisr = dsisr;
+ __entry->dar = dar;
+ ),
+
+ TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __entry->irq,
+ __print_flags(__entry->dsisr, "|", DSISR_FLAGS),
+ __entry->dar
+ )
+);
+
+TRACE_EVENT(cxl_psl_irq_ack,
+ TP_PROTO(struct cxl_context *ctx, u64 tfc),
+
+ TP_ARGS(ctx, tfc),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(u64, tfc)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->tfc = tfc;
+ ),
+
+ TP_printk("afu%i.%i pe=%i tfc=%s",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __print_flags(__entry->tfc, "|", TFC_FLAGS)
+ )
+);
+
+TRACE_EVENT(cxl_ste_miss,
+ TP_PROTO(struct cxl_context *ctx, u64 dar),
+
+ TP_ARGS(ctx, dar),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(u64, dar)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->dar = dar;
+ ),
+
+ TP_printk("afu%i.%i pe=%i dar=0x%016llx",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __entry->dar
+ )
+);
+
+TRACE_EVENT(cxl_ste_write,
+ TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v),
+
+ TP_ARGS(ctx, idx, e, v),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(unsigned int, idx)
+ __field(u64, e)
+ __field(u64, v)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->idx = idx;
+ __entry->e = e;
+ __entry->v = v;
+ ),
+
+ TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __entry->idx,
+ __entry->e,
+ __entry->v
+ )
+);
+
+TRACE_EVENT(cxl_pte_miss,
+ TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar),
+
+ TP_ARGS(ctx, dsisr, dar),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(u64, dsisr)
+ __field(u64, dar)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->dsisr = dsisr;
+ __entry->dar = dar;
+ ),
+
+ TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __print_flags(__entry->dsisr, "|", DSISR_FLAGS),
+ __entry->dar
+ )
+);
+
+TRACE_EVENT(cxl_llcmd,
+ TP_PROTO(struct cxl_context *ctx, u64 cmd),
+
+ TP_ARGS(ctx, cmd),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(u64, cmd)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->cmd = cmd;
+ ),
+
+ TP_printk("afu%i.%i pe=%i cmd=%s",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __print_symbolic_u64(__entry->cmd, LLCMD_NAMES)
+ )
+);
+
+TRACE_EVENT(cxl_llcmd_done,
+ TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc),
+
+ TP_ARGS(ctx, cmd, rc),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u16, pe)
+ __field(u64, cmd)
+ __field(int, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->card = ctx->afu->adapter->adapter_num;
+ __entry->afu = ctx->afu->slice;
+ __entry->pe = ctx->pe;
+ __entry->rc = rc;
+ __entry->cmd = cmd;
+ ),
+
+ TP_printk("afu%i.%i pe=%i cmd=%s rc=%i",
+ __entry->card,
+ __entry->afu,
+ __entry->pe,
+ __print_symbolic_u64(__entry->cmd, LLCMD_NAMES),
+ __entry->rc
+ )
+);
+
+DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl,
+ TP_PROTO(struct cxl_afu *afu, u64 cmd),
+
+ TP_ARGS(afu, cmd),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u64, cmd)
+ ),
+
+ TP_fast_assign(
+ __entry->card = afu->adapter->adapter_num;
+ __entry->afu = afu->slice;
+ __entry->cmd = cmd;
+ ),
+
+ TP_printk("afu%i.%i cmd=%s",
+ __entry->card,
+ __entry->afu,
+ __print_symbolic_u64(__entry->cmd, AFU_COMMANDS)
+ )
+);
+
+DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done,
+ TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+
+ TP_ARGS(afu, cmd, rc),
+
+ TP_STRUCT__entry(
+ __field(u8, card)
+ __field(u8, afu)
+ __field(u64, cmd)
+ __field(int, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->card = afu->adapter->adapter_num;
+ __entry->afu = afu->slice;
+ __entry->rc = rc;
+ __entry->cmd = cmd;
+ ),
+
+ TP_printk("afu%i.%i cmd=%s rc=%i",
+ __entry->card,
+ __entry->afu,
+ __print_symbolic_u64(__entry->cmd, AFU_COMMANDS),
+ __entry->rc
+ )
+);
+
+DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl,
+ TP_PROTO(struct cxl_afu *afu, u64 cmd),
+ TP_ARGS(afu, cmd)
+);
+
+DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done,
+ TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+ TP_ARGS(afu, cmd, rc)
+);
+
+DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl,
+ TP_PROTO(struct cxl_afu *afu, u64 cmd),
+ TP_ARGS(afu, cmd),
+
+ TP_printk("psl%i.%i cmd=%s",
+ __entry->card,
+ __entry->afu,
+ __print_symbolic_u64(__entry->cmd, PSL_COMMANDS)
+ )
+);
+
+DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done,
+ TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+ TP_ARGS(afu, cmd, rc),
+
+ TP_printk("psl%i.%i cmd=%s rc=%i",
+ __entry->card,
+ __entry->afu,
+ __print_symbolic_u64(__entry->cmd, PSL_COMMANDS),
+ __entry->rc
+ )
+);
+
+DEFINE_EVENT(cxl_pe_class, cxl_slbia,
+ TP_PROTO(struct cxl_context *ctx),
+ TP_ARGS(ctx)
+);
+
+#endif /* _CXL_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
new file mode 100644
index 0000000..cbd4331
--- /dev/null
+++ b/drivers/misc/cxl/vphb.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <misc/cxl.h>
+#include "cxl.h"
+
+static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+ if (dma_mask < DMA_BIT_MASK(64)) {
+ pr_info("%s only 64bit DMA supported on CXL", __func__);
+ return -EIO;
+ }
+
+ *(pdev->dev.dma_mask) = dma_mask;
+ return 0;
+}
+
+static int cxl_pci_probe_mode(struct pci_bus *bus)
+{
+ return PCI_PROBE_NORMAL;
+}
+
+static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ return -ENODEV;
+}
+
+static void cxl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ /*
+ * MSI should never be set but need still need to provide this call
+ * back.
+ */
+}
+
+static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
+{
+ struct pci_controller *phb;
+ struct cxl_afu *afu;
+ struct cxl_context *ctx;
+
+ phb = pci_bus_to_host(dev->bus);
+ afu = (struct cxl_afu *)phb->private_data;
+
+ if (!cxl_adapter_link_ok(afu->adapter)) {
+ dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__);
+ return false;
+ }
+
+ set_dma_ops(&dev->dev, &dma_direct_ops);
+ set_dma_offset(&dev->dev, PAGE_OFFSET);
+
+ /*
+ * Allocate a context to do cxl things too. If we eventually do real
+ * DMA ops, we'll need a default context to attach them to
+ */
+ ctx = cxl_dev_context_init(dev);
+ if (!ctx)
+ return false;
+ dev->dev.archdata.cxl_ctx = ctx;
+
+ return (cxl_afu_check_and_enable(afu) == 0);
+}
+
+static void cxl_pci_disable_device(struct pci_dev *dev)
+{
+ struct cxl_context *ctx = cxl_get_context(dev);
+
+ if (ctx) {
+ if (ctx->status == STARTED) {
+ dev_err(&dev->dev, "Default context started\n");
+ return;
+ }
+ dev->dev.archdata.cxl_ctx = NULL;
+ cxl_release_context(ctx);
+ }
+}
+
+static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus,
+ unsigned long type)
+{
+ return 1;
+}
+
+static void cxl_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+ /* Should we do an AFU reset here ? */
+}
+
+static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
+{
+ return (bus << 8) + devfn;
+}
+
+static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb,
+ u8 bus, u8 devfn, int offset)
+{
+ int record = cxl_pcie_cfg_record(bus, devfn);
+
+ return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset;
+}
+
+
+static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len,
+ volatile void __iomem **ioaddr,
+ u32 *mask, int *shift)
+{
+ struct pci_controller *phb;
+ struct cxl_afu *afu;
+ unsigned long addr;
+
+ phb = pci_bus_to_host(bus);
+ if (phb == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ afu = (struct cxl_afu *)phb->private_data;
+
+ if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (offset >= (unsigned long)phb->cfg_data)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+ addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset);
+
+ *ioaddr = (void *)(addr & ~0x3ULL);
+ *shift = ((addr & 0x3) * 8);
+ switch (len) {
+ case 1:
+ *mask = 0xff;
+ break;
+ case 2:
+ *mask = 0xffff;
+ break;
+ default:
+ *mask = 0xffffffff;
+ break;
+ }
+ return 0;
+}
+
+
+static inline bool cxl_config_link_ok(struct pci_bus *bus)
+{
+ struct pci_controller *phb;
+ struct cxl_afu *afu;
+
+ /* Config space IO is based on phb->cfg_addr, which is based on
+ * afu_desc_mmio. This isn't safe to read/write when the link
+ * goes down, as EEH tears down MMIO space.
+ *
+ * Check if the link is OK before proceeding.
+ */
+
+ phb = pci_bus_to_host(bus);
+ if (phb == NULL)
+ return false;
+ afu = (struct cxl_afu *)phb->private_data;
+ return cxl_adapter_link_ok(afu->adapter);
+}
+
+static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ volatile void __iomem *ioaddr;
+ int shift, rc;
+ u32 mask;
+
+ rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
+ &mask, &shift);
+ if (rc)
+ return rc;
+
+ if (!cxl_config_link_ok(bus))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /* Can only read 32 bits */
+ *val = (in_le32(ioaddr) >> shift) & mask;
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ volatile void __iomem *ioaddr;
+ u32 v, mask;
+ int shift, rc;
+
+ rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
+ &mask, &shift);
+ if (rc)
+ return rc;
+
+ if (!cxl_config_link_ok(bus))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /* Can only write 32 bits so do read-modify-write */
+ mask <<= shift;
+ val <<= shift;
+
+ v = (in_le32(ioaddr) & ~mask) | (val & mask);
+
+ out_le32(ioaddr, v);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops cxl_pcie_pci_ops =
+{
+ .read = cxl_pcie_read_config,
+ .write = cxl_pcie_write_config,
+};
+
+
+static struct pci_controller_ops cxl_pci_controller_ops =
+{
+ .probe_mode = cxl_pci_probe_mode,
+ .enable_device_hook = cxl_pci_enable_device_hook,
+ .disable_device = cxl_pci_disable_device,
+ .release_device = cxl_pci_disable_device,
+ .window_alignment = cxl_pci_window_alignment,
+ .reset_secondary_bus = cxl_pci_reset_secondary_bus,
+ .setup_msi_irqs = cxl_setup_msi_irqs,
+ .teardown_msi_irqs = cxl_teardown_msi_irqs,
+ .dma_set_mask = cxl_dma_set_mask,
+};
+
+int cxl_pci_vphb_add(struct cxl_afu *afu)
+{
+ struct pci_dev *phys_dev;
+ struct pci_controller *phb, *phys_phb;
+
+ phys_dev = to_pci_dev(afu->adapter->dev.parent);
+ phys_phb = pci_bus_to_host(phys_dev->bus);
+
+ /* Alloc and setup PHB data structure */
+ phb = pcibios_alloc_controller(phys_phb->dn);
+
+ if (!phb)
+ return -ENODEV;
+
+ /* Setup parent in sysfs */
+ phb->parent = &phys_dev->dev;
+
+ /* Setup the PHB using arch provided callback */
+ phb->ops = &cxl_pcie_pci_ops;
+ phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
+ phb->cfg_data = (void *)(u64)afu->crs_len;
+ phb->private_data = afu;
+ phb->controller_ops = cxl_pci_controller_ops;
+
+ /* Scan the bus */
+ pcibios_scan_phb(phb);
+ if (phb->bus == NULL)
+ return -ENXIO;
+
+ /* Claim resources. This might need some rework as well depending
+ * whether we are doing probe-only or not, like assigning unassigned
+ * resources etc...
+ */
+ pcibios_claim_one_bus(phb->bus);
+
+ /* Add probed PCI devices to the device model */
+ pci_bus_add_devices(phb->bus);
+
+ afu->phb = phb;
+
+ return 0;
+}
+
+void cxl_pci_vphb_reconfigure(struct cxl_afu *afu)
+{
+ /* When we are reconfigured, the AFU's MMIO space is unmapped
+ * and remapped. We need to reflect this in the PHB's view of
+ * the world.
+ */
+ afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
+}
+
+void cxl_pci_vphb_remove(struct cxl_afu *afu)
+{
+ struct pci_controller *phb;
+
+ /* If there is no configuration record we won't have one of these */
+ if (!afu || !afu->phb)
+ return;
+
+ phb = afu->phb;
+ afu->phb = NULL;
+
+ pci_remove_root_bus(phb->bus);
+ pcibios_free_controller(phb);
+}
+
+struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev)
+{
+ struct pci_controller *phb;
+
+ phb = pci_bus_to_host(dev->bus);
+
+ return (struct cxl_afu *)phb->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_afu);
+
+unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev)
+{
+ return cxl_pcie_cfg_record(dev->bus->number, dev->devfn);
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record);
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
new file mode 100644
index 0000000..c711227
--- /dev/null
+++ b/drivers/misc/ds1682.c
@@ -0,0 +1,243 @@
+/*
+ * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) 2007 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The DS1682 elapsed timer recorder is a simple device that implements
+ * one elapsed time counter, one event counter, an alarm signal and 10
+ * bytes of general purpose EEPROM.
+ *
+ * This driver provides access to the DS1682 counters and user data via
+ * the sysfs. The following attributes are added to the device node:
+ * elapsed_time (u32): Total elapsed event time in ms resolution
+ * alarm_time (u32): When elapsed time exceeds the value in alarm_time,
+ * then the alarm pin is asserted.
+ * event_count (u16): number of times the event pin has gone low.
+ * eeprom (u8[10]): general purpose EEPROM
+ *
+ * Counter registers and user data are both read/write unless the device
+ * has been write protected. This driver does not support turning off write
+ * protection. Once write protection is turned on, it is impossible to
+ * turn it off again, so I have left the feature out of this driver to avoid
+ * accidental enabling, but it is trivial to add write protect support.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/hwmon-sysfs.h>
+
+/* Device registers */
+#define DS1682_REG_CONFIG 0x00
+#define DS1682_REG_ALARM 0x01
+#define DS1682_REG_ELAPSED 0x05
+#define DS1682_REG_EVT_CNTR 0x09
+#define DS1682_REG_EEPROM 0x0b
+#define DS1682_REG_RESET 0x1d
+#define DS1682_REG_WRITE_DISABLE 0x1e
+#define DS1682_REG_WRITE_MEM_DISABLE 0x1f
+
+#define DS1682_EEPROM_SIZE 10
+
+/*
+ * Generic counter attributes
+ */
+static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+ struct i2c_client *client = to_i2c_client(dev);
+ __le32 val = 0;
+ int rc;
+
+ dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name);
+
+ /* Read the register */
+ rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr,
+ (u8 *) & val);
+ if (rc < 0)
+ return -EIO;
+
+ /* Special case: the 32 bit regs are time values with 1/4s
+ * resolution, scale them up to milliseconds */
+ if (sattr->nr == 4)
+ return sprintf(buf, "%llu\n",
+ ((unsigned long long)le32_to_cpu(val)) * 250);
+
+ /* Format the output string and return # of bytes */
+ return sprintf(buf, "%li\n", (long)le32_to_cpu(val));
+}
+
+static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+ struct i2c_client *client = to_i2c_client(dev);
+ u64 val;
+ __le32 val_le;
+ int rc;
+
+ dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name);
+
+ /* Decode input */
+ rc = kstrtoull(buf, 0, &val);
+ if (rc < 0) {
+ dev_dbg(dev, "input string not a number\n");
+ return -EINVAL;
+ }
+
+ /* Special case: the 32 bit regs are time values with 1/4s
+ * resolution, scale input down to quarter-seconds */
+ if (sattr->nr == 4)
+ do_div(val, 250);
+
+ /* write out the value */
+ val_le = cpu_to_le32(val);
+ rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr,
+ (u8 *) & val_le);
+ if (rc < 0) {
+ dev_err(dev, "register write failed; reg=0x%x, size=%i\n",
+ sattr->index, sattr->nr);
+ return -EIO;
+ }
+
+ return count;
+}
+
+/*
+ * Simple register attributes
+ */
+static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show,
+ ds1682_store, 4, DS1682_REG_ELAPSED);
+static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show,
+ ds1682_store, 4, DS1682_REG_ALARM);
+static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show,
+ ds1682_store, 2, DS1682_REG_EVT_CNTR);
+
+static const struct attribute_group ds1682_group = {
+ .attrs = (struct attribute *[]) {
+ &sensor_dev_attr_elapsed_time.dev_attr.attr,
+ &sensor_dev_attr_alarm_time.dev_attr.attr,
+ &sensor_dev_attr_event_count.dev_attr.attr,
+ NULL,
+ },
+};
+
+/*
+ * User data attribute
+ */
+static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct i2c_client *client = kobj_to_i2c_client(kobj);
+ int rc;
+
+ dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n",
+ buf, off, count);
+
+ rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off,
+ count, buf);
+ if (rc < 0)
+ return -EIO;
+
+ return count;
+}
+
+static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct i2c_client *client = kobj_to_i2c_client(kobj);
+
+ dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n",
+ buf, off, count);
+
+ /* Write out to the device */
+ if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off,
+ count, buf) < 0)
+ return -EIO;
+
+ return count;
+}
+
+static struct bin_attribute ds1682_eeprom_attr = {
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO | S_IWUSR,
+ },
+ .size = DS1682_EEPROM_SIZE,
+ .read = ds1682_eeprom_read,
+ .write = ds1682_eeprom_write,
+};
+
+/*
+ * Called when a ds1682 device is matched with this driver
+ */
+static int ds1682_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int rc;
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_I2C_BLOCK)) {
+ dev_err(&client->dev, "i2c bus does not support the ds1682\n");
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ rc = sysfs_create_group(&client->dev.kobj, &ds1682_group);
+ if (rc)
+ goto exit;
+
+ rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+ if (rc)
+ goto exit_bin_attr;
+
+ return 0;
+
+ exit_bin_attr:
+ sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+ exit:
+ return rc;
+}
+
+static int ds1682_remove(struct i2c_client *client)
+{
+ sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+ sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+ return 0;
+}
+
+static const struct i2c_device_id ds1682_id[] = {
+ { "ds1682", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, ds1682_id);
+
+static struct i2c_driver ds1682_driver = {
+ .driver = {
+ .name = "ds1682",
+ },
+ .probe = ds1682_probe,
+ .remove = ds1682_remove,
+ .id_table = ds1682_id,
+};
+
+module_i2c_driver(ds1682_driver);
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/dummy-irq.c b/drivers/misc/dummy-irq.c
new file mode 100644
index 0000000..acbbe03
--- /dev/null
+++ b/drivers/misc/dummy-irq.c
@@ -0,0 +1,64 @@
+/*
+ * Dummy IRQ handler driver.
+ *
+ * This module only registers itself as a handler that is specified to it
+ * by the 'irq' parameter.
+ *
+ * The sole purpose of this module is to help with debugging of systems on
+ * which spurious IRQs would happen on disabled IRQ vector.
+ *
+ * Copyright (C) 2013 Jiri Kosina
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+static int irq = -1;
+
+static irqreturn_t dummy_interrupt(int irq, void *dev_id)
+{
+ static int count = 0;
+
+ if (count == 0) {
+ printk(KERN_INFO "dummy-irq: interrupt occurred on IRQ %d\n",
+ irq);
+ count++;
+ }
+
+ return IRQ_NONE;
+}
+
+static int __init dummy_irq_init(void)
+{
+ if (irq < 0) {
+ printk(KERN_ERR "dummy-irq: no IRQ given. Use irq=N\n");
+ return -EIO;
+ }
+ if (request_irq(irq, &dummy_interrupt, IRQF_SHARED, "dummy_irq", &irq)) {
+ printk(KERN_ERR "dummy-irq: cannot register IRQ %d\n", irq);
+ return -EIO;
+ }
+ printk(KERN_INFO "dummy-irq: registered for IRQ %d\n", irq);
+ return 0;
+}
+
+static void __exit dummy_irq_exit(void)
+{
+ printk(KERN_INFO "dummy-irq unloaded\n");
+ free_irq(irq, &irq);
+}
+
+module_init(dummy_irq_init);
+module_exit(dummy_irq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jiri Kosina");
+module_param(irq, uint, 0444);
+MODULE_PARM_DESC(irq, "The IRQ to register for");
+MODULE_DESCRIPTION("Dummy IRQ handler driver");
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig
new file mode 100644
index 0000000..f1d41ea
--- /dev/null
+++ b/drivers/misc/echo/Kconfig
@@ -0,0 +1,9 @@
+config ECHO
+ tristate "Line Echo Canceller support"
+ default n
+ ---help---
+ This driver provides line echo cancelling support for mISDN and
+ Zaptel drivers.
+
+ To compile this driver as a module, choose M here. The module
+ will be called echo.
diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile
new file mode 100644
index 0000000..7d4caac
--- /dev/null
+++ b/drivers/misc/echo/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ECHO) += echo.o
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c
new file mode 100644
index 0000000..9597e95
--- /dev/null
+++ b/drivers/misc/echo/echo.c
@@ -0,0 +1,674 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller. This code is being developed
+ * against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ * and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
+ *
+ * Based on a bit from here, a bit from there, eye of toad, ear of
+ * bat, 15 years of failed attempts by David and a few fried brain
+ * cells.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*! \file */
+
+/* Implementation Notes
+ David Rowe
+ April 2007
+
+ This code started life as Steve's NLMS algorithm with a tap
+ rotation algorithm to handle divergence during double talk. I
+ added a Geigel Double Talk Detector (DTD) [2] and performed some
+ G168 tests. However I had trouble meeting the G168 requirements,
+ especially for double talk - there were always cases where my DTD
+ failed, for example where near end speech was under the 6dB
+ threshold required for declaring double talk.
+
+ So I tried a two path algorithm [1], which has so far given better
+ results. The original tap rotation/Geigel algorithm is available
+ in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
+ It's probably possible to make it work if some one wants to put some
+ serious work into it.
+
+ At present no special treatment is provided for tones, which
+ generally cause NLMS algorithms to diverge. Initial runs of a
+ subset of the G168 tests for tones (e.g ./echo_test 6) show the
+ current algorithm is passing OK, which is kind of surprising. The
+ full set of tests needs to be performed to confirm this result.
+
+ One other interesting change is that I have managed to get the NLMS
+ code to work with 16 bit coefficients, rather than the original 32
+ bit coefficents. This reduces the MIPs and storage required.
+ I evaulated the 16 bit port using g168_tests.sh and listening tests
+ on 4 real-world samples.
+
+ I also attempted the implementation of a block based NLMS update
+ [2] but although this passes g168_tests.sh it didn't converge well
+ on the real-world samples. I have no idea why, perhaps a scaling
+ problem. The block based code is also available in SVN
+ http://svn.rowetel.com/software/oslec/tags/before_16bit. If this
+ code can be debugged, it will lead to further reduction in MIPS, as
+ the block update code maps nicely onto DSP instruction sets (it's a
+ dot product) compared to the current sample-by-sample update.
+
+ Steve also has some nice notes on echo cancellers in echo.h
+
+ References:
+
+ [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
+ Path Models", IEEE Transactions on communications, COM-25,
+ No. 6, June
+ 1977.
+ http://www.rowetel.com/images/echo/dual_path_paper.pdf
+
+ [2] The classic, very useful paper that tells you how to
+ actually build a real world echo canceller:
+ Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
+ Echo Canceller with a TMS320020,
+ http://www.rowetel.com/images/echo/spra129.pdf
+
+ [3] I have written a series of blog posts on this work, here is
+ Part 1: http://www.rowetel.com/blog/?p=18
+
+ [4] The source code http://svn.rowetel.com/software/oslec/
+
+ [5] A nice reference on LMS filters:
+ http://en.wikipedia.org/wiki/Least_mean_squares_filter
+
+ Credits:
+
+ Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
+ Muthukrishnan for their suggestions and email discussions. Thanks
+ also to those people who collected echo samples for me such as
+ Mark, Pawel, and Pavel.
+*/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "echo.h"
+
+#define MIN_TX_POWER_FOR_ADAPTION 64
+#define MIN_RX_POWER_FOR_ADAPTION 64
+#define DTD_HANGOVER 600 /* 600 samples, or 75ms */
+#define DC_LOG2BETA 3 /* log2() of DC filter Beta */
+
+/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
+
+#ifdef __bfin__
+static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+{
+ int i;
+ int offset1;
+ int offset2;
+ int factor;
+ int exp;
+ int16_t *phist;
+ int n;
+
+ if (shift > 0)
+ factor = clean << shift;
+ else
+ factor = clean >> -shift;
+
+ /* Update the FIR taps */
+
+ offset2 = ec->curr_pos;
+ offset1 = ec->taps - offset2;
+ phist = &ec->fir_state_bg.history[offset2];
+
+ /* st: and en: help us locate the assembler in echo.s */
+
+ /* asm("st:"); */
+ n = ec->taps;
+ for (i = 0; i < n; i++) {
+ exp = *phist++ * factor;
+ ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+ }
+ /* asm("en:"); */
+
+ /* Note the asm for the inner loop above generated by Blackfin gcc
+ 4.1.1 is pretty good (note even parallel instructions used):
+
+ R0 = W [P0++] (X);
+ R0 *= R2;
+ R0 = R0 + R3 (NS) ||
+ R1 = W [P1] (X) ||
+ nop;
+ R0 >>>= 15;
+ R0 = R0 + R1;
+ W [P1++] = R0;
+
+ A block based update algorithm would be much faster but the
+ above can't be improved on much. Every instruction saved in
+ the loop above is 2 MIPs/ch! The for loop above is where the
+ Blackfin spends most of it's time - about 17 MIPs/ch measured
+ with speedtest.c with 256 taps (32ms). Write-back and
+ Write-through cache gave about the same performance.
+ */
+}
+
+/*
+ IDEAS for further optimisation of lms_adapt_bg():
+
+ 1/ The rounding is quite costly. Could we keep as 32 bit coeffs
+ then make filter pluck the MS 16-bits of the coeffs when filtering?
+ However this would lower potential optimisation of filter, as I
+ think the dual-MAC architecture requires packed 16 bit coeffs.
+
+ 2/ Block based update would be more efficient, as per comments above,
+ could use dual MAC architecture.
+
+ 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC
+ packing.
+
+ 4/ Execute the whole e/c in a block of say 20ms rather than sample
+ by sample. Processing a few samples every ms is inefficient.
+*/
+
+#else
+static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+{
+ int i;
+
+ int offset1;
+ int offset2;
+ int factor;
+ int exp;
+
+ if (shift > 0)
+ factor = clean << shift;
+ else
+ factor = clean >> -shift;
+
+ /* Update the FIR taps */
+
+ offset2 = ec->curr_pos;
+ offset1 = ec->taps - offset2;
+
+ for (i = ec->taps - 1; i >= offset1; i--) {
+ exp = (ec->fir_state_bg.history[i - offset1] * factor);
+ ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+ }
+ for (; i >= 0; i--) {
+ exp = (ec->fir_state_bg.history[i + offset2] * factor);
+ ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+ }
+}
+#endif
+
+static inline int top_bit(unsigned int bits)
+{
+ if (bits == 0)
+ return -1;
+ else
+ return (int)fls((int32_t) bits) - 1;
+}
+
+struct oslec_state *oslec_create(int len, int adaption_mode)
+{
+ struct oslec_state *ec;
+ int i;
+ const int16_t *history;
+
+ ec = kzalloc(sizeof(*ec), GFP_KERNEL);
+ if (!ec)
+ return NULL;
+
+ ec->taps = len;
+ ec->log2taps = top_bit(len);
+ ec->curr_pos = ec->taps - 1;
+
+ ec->fir_taps16[0] =
+ kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+ if (!ec->fir_taps16[0])
+ goto error_oom_0;
+
+ ec->fir_taps16[1] =
+ kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+ if (!ec->fir_taps16[1])
+ goto error_oom_1;
+
+ history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
+ if (!history)
+ goto error_state;
+ history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
+ if (!history)
+ goto error_state_bg;
+
+ for (i = 0; i < 5; i++)
+ ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
+
+ ec->cng_level = 1000;
+ oslec_adaption_mode(ec, adaption_mode);
+
+ ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+ if (!ec->snapshot)
+ goto error_snap;
+
+ ec->cond_met = 0;
+ ec->pstates = 0;
+ ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+ ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+ ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+ ec->lbgn = ec->lbgn_acc = 0;
+ ec->lbgn_upper = 200;
+ ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+ return ec;
+
+error_snap:
+ fir16_free(&ec->fir_state_bg);
+error_state_bg:
+ fir16_free(&ec->fir_state);
+error_state:
+ kfree(ec->fir_taps16[1]);
+error_oom_1:
+ kfree(ec->fir_taps16[0]);
+error_oom_0:
+ kfree(ec);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(oslec_create);
+
+void oslec_free(struct oslec_state *ec)
+{
+ int i;
+
+ fir16_free(&ec->fir_state);
+ fir16_free(&ec->fir_state_bg);
+ for (i = 0; i < 2; i++)
+ kfree(ec->fir_taps16[i]);
+ kfree(ec->snapshot);
+ kfree(ec);
+}
+EXPORT_SYMBOL_GPL(oslec_free);
+
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
+{
+ ec->adaption_mode = adaption_mode;
+}
+EXPORT_SYMBOL_GPL(oslec_adaption_mode);
+
+void oslec_flush(struct oslec_state *ec)
+{
+ int i;
+
+ ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+ ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+ ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+
+ ec->lbgn = ec->lbgn_acc = 0;
+ ec->lbgn_upper = 200;
+ ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+ ec->nonupdate_dwell = 0;
+
+ fir16_flush(&ec->fir_state);
+ fir16_flush(&ec->fir_state_bg);
+ ec->fir_state.curr_pos = ec->taps - 1;
+ ec->fir_state_bg.curr_pos = ec->taps - 1;
+ for (i = 0; i < 2; i++)
+ memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
+
+ ec->curr_pos = ec->taps - 1;
+ ec->pstates = 0;
+}
+EXPORT_SYMBOL_GPL(oslec_flush);
+
+void oslec_snapshot(struct oslec_state *ec)
+{
+ memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
+}
+EXPORT_SYMBOL_GPL(oslec_snapshot);
+
+/* Dual Path Echo Canceller */
+
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
+{
+ int32_t echo_value;
+ int clean_bg;
+ int tmp;
+ int tmp1;
+
+ /*
+ * Input scaling was found be required to prevent problems when tx
+ * starts clipping. Another possible way to handle this would be the
+ * filter coefficent scaling.
+ */
+
+ ec->tx = tx;
+ ec->rx = rx;
+ tx >>= 1;
+ rx >>= 1;
+
+ /*
+ * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
+ * required otherwise values do not track down to 0. Zero at DC, Pole
+ * at (1-Beta) on real axis. Some chip sets (like Si labs) don't
+ * need this, but something like a $10 X100P card does. Any DC really
+ * slows down convergence.
+ *
+ * Note: removes some low frequency from the signal, this reduces the
+ * speech quality when listening to samples through headphones but may
+ * not be obvious through a telephone handset.
+ *
+ * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
+ * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
+ */
+
+ if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
+ tmp = rx << 15;
+
+ /*
+ * Make sure the gain of the HPF is 1.0. This can still
+ * saturate a little under impulse conditions, and it might
+ * roll to 32768 and need clipping on sustained peak level
+ * signals. However, the scale of such clipping is small, and
+ * the error due to any saturation should not markedly affect
+ * the downstream processing.
+ */
+ tmp -= (tmp >> 4);
+
+ ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
+
+ /*
+ * hard limit filter to prevent clipping. Note that at this
+ * stage rx should be limited to +/- 16383 due to right shift
+ * above
+ */
+ tmp1 = ec->rx_1 >> 15;
+ if (tmp1 > 16383)
+ tmp1 = 16383;
+ if (tmp1 < -16383)
+ tmp1 = -16383;
+ rx = tmp1;
+ ec->rx_2 = tmp;
+ }
+
+ /* Block average of power in the filter states. Used for
+ adaption power calculation. */
+
+ {
+ int new, old;
+
+ /* efficient "out with the old and in with the new" algorithm so
+ we don't have to recalculate over the whole block of
+ samples. */
+ new = (int)tx * (int)tx;
+ old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
+ (int)ec->fir_state.history[ec->fir_state.curr_pos];
+ ec->pstates +=
+ ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps;
+ if (ec->pstates < 0)
+ ec->pstates = 0;
+ }
+
+ /* Calculate short term average levels using simple single pole IIRs */
+
+ ec->ltxacc += abs(tx) - ec->ltx;
+ ec->ltx = (ec->ltxacc + (1 << 4)) >> 5;
+ ec->lrxacc += abs(rx) - ec->lrx;
+ ec->lrx = (ec->lrxacc + (1 << 4)) >> 5;
+
+ /* Foreground filter */
+
+ ec->fir_state.coeffs = ec->fir_taps16[0];
+ echo_value = fir16(&ec->fir_state, tx);
+ ec->clean = rx - echo_value;
+ ec->lcleanacc += abs(ec->clean) - ec->lclean;
+ ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5;
+
+ /* Background filter */
+
+ echo_value = fir16(&ec->fir_state_bg, tx);
+ clean_bg = rx - echo_value;
+ ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg;
+ ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5;
+
+ /* Background Filter adaption */
+
+ /* Almost always adap bg filter, just simple DT and energy
+ detection to minimise adaption in cases of strong double talk.
+ However this is not critical for the dual path algorithm.
+ */
+ ec->factor = 0;
+ ec->shift = 0;
+ if ((ec->nonupdate_dwell == 0)) {
+ int p, logp, shift;
+
+ /* Determine:
+
+ f = Beta * clean_bg_rx/P ------ (1)
+
+ where P is the total power in the filter states.
+
+ The Boffins have shown that if we obey (1) we converge
+ quickly and avoid instability.
+
+ The correct factor f must be in Q30, as this is the fixed
+ point format required by the lms_adapt_bg() function,
+ therefore the scaled version of (1) is:
+
+ (2^30) * f = (2^30) * Beta * clean_bg_rx/P
+ factor = (2^30) * Beta * clean_bg_rx/P ----- (2)
+
+ We have chosen Beta = 0.25 by experiment, so:
+
+ factor = (2^30) * (2^-2) * clean_bg_rx/P
+
+ (30 - 2 - log2(P))
+ factor = clean_bg_rx 2 ----- (3)
+
+ To avoid a divide we approximate log2(P) as top_bit(P),
+ which returns the position of the highest non-zero bit in
+ P. This approximation introduces an error as large as a
+ factor of 2, but the algorithm seems to handle it OK.
+
+ Come to think of it a divide may not be a big deal on a
+ modern DSP, so its probably worth checking out the cycles
+ for a divide versus a top_bit() implementation.
+ */
+
+ p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates;
+ logp = top_bit(p) + ec->log2taps;
+ shift = 30 - 2 - logp;
+ ec->shift = shift;
+
+ lms_adapt_bg(ec, clean_bg, shift);
+ }
+
+ /* very simple DTD to make sure we dont try and adapt with strong
+ near end speech */
+
+ ec->adapt = 0;
+ if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx))
+ ec->nonupdate_dwell = DTD_HANGOVER;
+ if (ec->nonupdate_dwell)
+ ec->nonupdate_dwell--;
+
+ /* Transfer logic */
+
+ /* These conditions are from the dual path paper [1], I messed with
+ them a bit to improve performance. */
+
+ if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
+ (ec->nonupdate_dwell == 0) &&
+ /* (ec->Lclean_bg < 0.875*ec->Lclean) */
+ (8 * ec->lclean_bg < 7 * ec->lclean) &&
+ /* (ec->Lclean_bg < 0.125*ec->Ltx) */
+ (8 * ec->lclean_bg < ec->ltx)) {
+ if (ec->cond_met == 6) {
+ /*
+ * BG filter has had better results for 6 consecutive
+ * samples
+ */
+ ec->adapt = 1;
+ memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
+ ec->taps * sizeof(int16_t));
+ } else
+ ec->cond_met++;
+ } else
+ ec->cond_met = 0;
+
+ /* Non-Linear Processing */
+
+ ec->clean_nlp = ec->clean;
+ if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
+ /*
+ * Non-linear processor - a fancy way to say "zap small
+ * signals, to avoid residual echo due to (uLaw/ALaw)
+ * non-linearity in the channel.".
+ */
+
+ if ((16 * ec->lclean < ec->ltx)) {
+ /*
+ * Our e/c has improved echo by at least 24 dB (each
+ * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
+ * 6+6+6+6=24dB)
+ */
+ if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
+ ec->cng_level = ec->lbgn;
+
+ /*
+ * Very elementary comfort noise generation.
+ * Just random numbers rolled off very vaguely
+ * Hoth-like. DR: This noise doesn't sound
+ * quite right to me - I suspect there are some
+ * overflow issues in the filtering as it's too
+ * "crackly".
+ * TODO: debug this, maybe just play noise at
+ * high level or look at spectrum.
+ */
+
+ ec->cng_rndnum =
+ 1664525U * ec->cng_rndnum + 1013904223U;
+ ec->cng_filter =
+ ((ec->cng_rndnum & 0xFFFF) - 32768 +
+ 5 * ec->cng_filter) >> 3;
+ ec->clean_nlp =
+ (ec->cng_filter * ec->cng_level * 8) >> 14;
+
+ } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
+ /* This sounds much better than CNG */
+ if (ec->clean_nlp > ec->lbgn)
+ ec->clean_nlp = ec->lbgn;
+ if (ec->clean_nlp < -ec->lbgn)
+ ec->clean_nlp = -ec->lbgn;
+ } else {
+ /*
+ * just mute the residual, doesn't sound very
+ * good, used mainly in G168 tests
+ */
+ ec->clean_nlp = 0;
+ }
+ } else {
+ /*
+ * Background noise estimator. I tried a few
+ * algorithms here without much luck. This very simple
+ * one seems to work best, we just average the level
+ * using a slow (1 sec time const) filter if the
+ * current level is less than a (experimentally
+ * derived) constant. This means we dont include high
+ * level signals like near end speech. When combined
+ * with CNG or especially CLIP seems to work OK.
+ */
+ if (ec->lclean < 40) {
+ ec->lbgn_acc += abs(ec->clean) - ec->lbgn;
+ ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12;
+ }
+ }
+ }
+
+ /* Roll around the taps buffer */
+ if (ec->curr_pos <= 0)
+ ec->curr_pos = ec->taps;
+ ec->curr_pos--;
+
+ if (ec->adaption_mode & ECHO_CAN_DISABLE)
+ ec->clean_nlp = rx;
+
+ /* Output scaled back up again to match input scaling */
+
+ return (int16_t) ec->clean_nlp << 1;
+}
+EXPORT_SYMBOL_GPL(oslec_update);
+
+/* This function is separated from the echo canceller is it is usually called
+ as part of the tx process. See rx HP (DC blocking) filter above, it's
+ the same design.
+
+ Some soft phones send speech signals with a lot of low frequency
+ energy, e.g. down to 20Hz. This can make the hybrid non-linear
+ which causes the echo canceller to fall over. This filter can help
+ by removing any low frequency before it gets to the tx port of the
+ hybrid.
+
+ It can also help by removing and DC in the tx signal. DC is bad
+ for LMS algorithms.
+
+ This is one of the classic DC removal filters, adjusted to provide
+ sufficient bass rolloff to meet the above requirement to protect hybrids
+ from things that upset them. The difference between successive samples
+ produces a lousy HPF, and then a suitably placed pole flattens things out.
+ The final result is a nicely rolled off bass end. The filtering is
+ implemented with extended fractional precision, which noise shapes things,
+ giving very clean DC removal.
+*/
+
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
+{
+ int tmp;
+ int tmp1;
+
+ if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
+ tmp = tx << 15;
+
+ /*
+ * Make sure the gain of the HPF is 1.0. The first can still
+ * saturate a little under impulse conditions, and it might
+ * roll to 32768 and need clipping on sustained peak level
+ * signals. However, the scale of such clipping is small, and
+ * the error due to any saturation should not markedly affect
+ * the downstream processing.
+ */
+ tmp -= (tmp >> 4);
+
+ ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
+ tmp1 = ec->tx_1 >> 15;
+ if (tmp1 > 32767)
+ tmp1 = 32767;
+ if (tmp1 < -32767)
+ tmp1 = -32767;
+ tx = tmp1;
+ ec->tx_2 = tmp;
+ }
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(oslec_hpf_tx);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Rowe");
+MODULE_DESCRIPTION("Open Source Line Echo Canceller");
+MODULE_VERSION("0.3.0");
diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h
new file mode 100644
index 0000000..9b08c63
--- /dev/null
+++ b/drivers/misc/echo/echo.h
@@ -0,0 +1,187 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller. This code is being developed
+ * against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ * and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007 David Rowe
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ECHO_H
+#define __ECHO_H
+
+/*
+Line echo cancellation for voice
+
+What does it do?
+
+This module aims to provide G.168-2002 compliant echo cancellation, to remove
+electrical echoes (e.g. from 2-4 wire hybrids) from voice calls.
+
+How does it work?
+
+The heart of the echo cancellor is FIR filter. This is adapted to match the
+echo impulse response of the telephone line. It must be long enough to
+adequately cover the duration of that impulse response. The signal transmitted
+to the telephone line is passed through the FIR filter. Once the FIR is
+properly adapted, the resulting output is an estimate of the echo signal
+received from the line. This is subtracted from the received signal. The result
+is an estimate of the signal which originated at the far end of the line, free
+from echos of our own transmitted signal.
+
+The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and
+was introduced in 1960. It is the commonest form of filter adaption used in
+things like modem line equalisers and line echo cancellers. There it works very
+well. However, it only works well for signals of constant amplitude. It works
+very poorly for things like speech echo cancellation, where the signal level
+varies widely. This is quite easy to fix. If the signal level is normalised -
+similar to applying AGC - LMS can work as well for a signal of varying
+amplitude as it does for a modem signal. This normalised least mean squares
+(NLMS) algorithm is the commonest one used for speech echo cancellation. Many
+other algorithms exist - e.g. RLS (essentially the same as Kalman filtering),
+FAP, etc. Some perform significantly better than NLMS. However, factors such
+as computational complexity and patents favour the use of NLMS.
+
+A simple refinement to NLMS can improve its performance with speech. NLMS tends
+to adapt best to the strongest parts of a signal. If the signal is white noise,
+the NLMS algorithm works very well. However, speech has more low frequency than
+high frequency content. Pre-whitening (i.e. filtering the signal to flatten its
+spectrum) the echo signal improves the adapt rate for speech, and ensures the
+final residual signal is not heavily biased towards high frequencies. A very
+low complexity filter is adequate for this, so pre-whitening adds little to the
+compute requirements of the echo canceller.
+
+An FIR filter adapted using pre-whitened NLMS performs well, provided certain
+conditions are met:
+
+ - The transmitted signal has poor self-correlation.
+ - There is no signal being generated within the environment being
+ cancelled.
+
+The difficulty is that neither of these can be guaranteed.
+
+If the adaption is performed while transmitting noise (or something fairly
+noise like, such as voice) the adaption works very well. If the adaption is
+performed while transmitting something highly correlative (typically narrow
+band energy such as signalling tones or DTMF), the adaption can go seriously
+wrong. The reason is there is only one solution for the adaption on a near
+random signal - the impulse response of the line. For a repetitive signal,
+there are any number of solutions which converge the adaption, and nothing
+guides the adaption to choose the generalised one. Allowing an untrained
+canceller to converge on this kind of narrowband energy probably a good thing,
+since at least it cancels the tones. Allowing a well converged canceller to
+continue converging on such energy is just a way to ruin its generalised
+adaption. A narrowband detector is needed, so adapation can be suspended at
+appropriate times.
+
+The adaption process is based on trying to eliminate the received signal. When
+there is any signal from within the environment being cancelled it may upset
+the adaption process. Similarly, if the signal we are transmitting is small,
+noise may dominate and disturb the adaption process. If we can ensure that the
+adaption is only performed when we are transmitting a significant signal level,
+and the environment is not, things will be OK. Clearly, it is easy to tell when
+we are sending a significant signal. Telling, if the environment is generating
+a significant signal, and doing it with sufficient speed that the adaption will
+not have diverged too much more we stop it, is a little harder.
+
+The key problem in detecting when the environment is sourcing significant
+energy is that we must do this very quickly. Given a reasonably long sample of
+the received signal, there are a number of strategies which may be used to
+assess whether that signal contains a strong far end component. However, by the
+time that assessment is complete the far end signal will have already caused
+major mis-convergence in the adaption process. An assessment algorithm is
+needed which produces a fairly accurate result from a very short burst of far
+end energy.
+
+How do I use it?
+
+The echo cancellor processes both the transmit and receive streams sample by
+sample. The processing function is not declared inline. Unfortunately,
+cancellation requires many operations per sample, so the call overhead is only
+a minor burden.
+*/
+
+#include "fir.h"
+#include "oslec.h"
+
+/*
+ G.168 echo canceller descriptor. This defines the working state for a line
+ echo canceller.
+*/
+struct oslec_state {
+ int16_t tx;
+ int16_t rx;
+ int16_t clean;
+ int16_t clean_nlp;
+
+ int nonupdate_dwell;
+ int curr_pos;
+ int taps;
+ int log2taps;
+ int adaption_mode;
+
+ int cond_met;
+ int32_t pstates;
+ int16_t adapt;
+ int32_t factor;
+ int16_t shift;
+
+ /* Average levels and averaging filter states */
+ int ltxacc;
+ int lrxacc;
+ int lcleanacc;
+ int lclean_bgacc;
+ int ltx;
+ int lrx;
+ int lclean;
+ int lclean_bg;
+ int lbgn;
+ int lbgn_acc;
+ int lbgn_upper;
+ int lbgn_upper_acc;
+
+ /* foreground and background filter states */
+ struct fir16_state_t fir_state;
+ struct fir16_state_t fir_state_bg;
+ int16_t *fir_taps16[2];
+
+ /* DC blocking filter states */
+ int tx_1;
+ int tx_2;
+ int rx_1;
+ int rx_2;
+
+ /* optional High Pass Filter states */
+ int32_t xvtx[5];
+ int32_t yvtx[5];
+ int32_t xvrx[5];
+ int32_t yvrx[5];
+
+ /* Parameters for the optional Hoth noise generator */
+ int cng_level;
+ int cng_rndnum;
+ int cng_filter;
+
+ /* snapshot sample of coeffs used for development */
+ int16_t *snapshot;
+};
+
+#endif /* __ECHO_H */
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h
new file mode 100644
index 0000000..7b9fabf
--- /dev/null
+++ b/drivers/misc/echo/fir.h
@@ -0,0 +1,216 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * fir.h - General telephony FIR routines
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2002 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if !defined(_FIR_H_)
+#define _FIR_H_
+
+/*
+ Blackfin NOTES & IDEAS:
+
+ A simple dot product function is used to implement the filter. This performs
+ just one MAC/cycle which is inefficient but was easy to implement as a first
+ pass. The current Blackfin code also uses an unrolled form of the filter
+ history to avoid 0 length hardware loop issues. This is wasteful of
+ memory.
+
+ Ideas for improvement:
+
+ 1/ Rewrite filter for dual MAC inner loop. The issue here is handling
+ history sample offsets that are 16 bit aligned - the dual MAC needs
+ 32 bit aligmnent. There are some good examples in libbfdsp.
+
+ 2/ Use the hardware circular buffer facility tohalve memory usage.
+
+ 3/ Consider using internal memory.
+
+ Using less memory might also improve speed as cache misses will be
+ reduced. A drop in MIPs and memory approaching 50% should be
+ possible.
+
+ The foreground and background filters currenlty use a total of
+ about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
+ can.
+*/
+
+/*
+ * 16 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 16 bit integer coefficients.
+ */
+struct fir16_state_t {
+ int taps;
+ int curr_pos;
+ const int16_t *coeffs;
+ int16_t *history;
+};
+
+/*
+ * 32 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 32 bit integer coefficients, and filtering
+ * 16 bit integer data.
+ */
+struct fir32_state_t {
+ int taps;
+ int curr_pos;
+ const int32_t *coeffs;
+ int16_t *history;
+};
+
+/*
+ * Floating point FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using floating point coefficients and data.
+ */
+struct fir_float_state_t {
+ int taps;
+ int curr_pos;
+ const float *coeffs;
+ float *history;
+};
+
+static inline const int16_t *fir16_create(struct fir16_state_t *fir,
+ const int16_t *coeffs, int taps)
+{
+ fir->taps = taps;
+ fir->curr_pos = taps - 1;
+ fir->coeffs = coeffs;
+#if defined(__bfin__)
+ fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
+#else
+ fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+#endif
+ return fir->history;
+}
+
+static inline void fir16_flush(struct fir16_state_t *fir)
+{
+#if defined(__bfin__)
+ memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
+#else
+ memset(fir->history, 0, fir->taps * sizeof(int16_t));
+#endif
+}
+
+static inline void fir16_free(struct fir16_state_t *fir)
+{
+ kfree(fir->history);
+}
+
+#ifdef __bfin__
+static inline int32_t dot_asm(short *x, short *y, int len)
+{
+ int dot;
+
+ len--;
+
+ __asm__("I0 = %1;\n\t"
+ "I1 = %2;\n\t"
+ "A0 = 0;\n\t"
+ "R0.L = W[I0++] || R1.L = W[I1++];\n\t"
+ "LOOP dot%= LC0 = %3;\n\t"
+ "LOOP_BEGIN dot%=;\n\t"
+ "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
+ "LOOP_END dot%=;\n\t"
+ "A0 += R0.L*R1.L (IS);\n\t"
+ "R0 = A0;\n\t"
+ "%0 = R0;\n\t"
+ : "=&d"(dot)
+ : "a"(x), "a"(y), "a"(len)
+ : "I0", "I1", "A1", "A0", "R0", "R1"
+ );
+
+ return dot;
+}
+#endif
+
+static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
+{
+ int32_t y;
+#if defined(__bfin__)
+ fir->history[fir->curr_pos] = sample;
+ fir->history[fir->curr_pos + fir->taps] = sample;
+ y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
+ fir->taps);
+#else
+ int i;
+ int offset1;
+ int offset2;
+
+ fir->history[fir->curr_pos] = sample;
+
+ offset2 = fir->curr_pos;
+ offset1 = fir->taps - offset2;
+ y = 0;
+ for (i = fir->taps - 1; i >= offset1; i--)
+ y += fir->coeffs[i] * fir->history[i - offset1];
+ for (; i >= 0; i--)
+ y += fir->coeffs[i] * fir->history[i + offset2];
+#endif
+ if (fir->curr_pos <= 0)
+ fir->curr_pos = fir->taps;
+ fir->curr_pos--;
+ return (int16_t) (y >> 15);
+}
+
+static inline const int16_t *fir32_create(struct fir32_state_t *fir,
+ const int32_t *coeffs, int taps)
+{
+ fir->taps = taps;
+ fir->curr_pos = taps - 1;
+ fir->coeffs = coeffs;
+ fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+ return fir->history;
+}
+
+static inline void fir32_flush(struct fir32_state_t *fir)
+{
+ memset(fir->history, 0, fir->taps * sizeof(int16_t));
+}
+
+static inline void fir32_free(struct fir32_state_t *fir)
+{
+ kfree(fir->history);
+}
+
+static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
+{
+ int i;
+ int32_t y;
+ int offset1;
+ int offset2;
+
+ fir->history[fir->curr_pos] = sample;
+ offset2 = fir->curr_pos;
+ offset1 = fir->taps - offset2;
+ y = 0;
+ for (i = fir->taps - 1; i >= offset1; i--)
+ y += fir->coeffs[i] * fir->history[i - offset1];
+ for (; i >= 0; i--)
+ y += fir->coeffs[i] * fir->history[i + offset2];
+ if (fir->curr_pos <= 0)
+ fir->curr_pos = fir->taps;
+ fir->curr_pos--;
+ return (int16_t) (y >> 15);
+}
+
+#endif
diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h
new file mode 100644
index 0000000..f417536
--- /dev/null
+++ b/drivers/misc/echo/oslec.h
@@ -0,0 +1,94 @@
+/*
+ * OSLEC - A line echo canceller. This code is being developed
+ * against and partially complies with G168. Using code from SpanDSP
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ * and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __OSLEC_H
+#define __OSLEC_H
+
+/* Mask bits for the adaption mode */
+#define ECHO_CAN_USE_ADAPTION 0x01
+#define ECHO_CAN_USE_NLP 0x02
+#define ECHO_CAN_USE_CNG 0x04
+#define ECHO_CAN_USE_CLIP 0x08
+#define ECHO_CAN_USE_TX_HPF 0x10
+#define ECHO_CAN_USE_RX_HPF 0x20
+#define ECHO_CAN_DISABLE 0x40
+
+/**
+ * oslec_state: G.168 echo canceller descriptor.
+ *
+ * This defines the working state for a line echo canceller.
+ */
+struct oslec_state;
+
+/**
+ * oslec_create - Create a voice echo canceller context.
+ * @len: The length of the canceller, in samples.
+ * @return: The new canceller context, or NULL if the canceller could not be
+ * created.
+ */
+struct oslec_state *oslec_create(int len, int adaption_mode);
+
+/**
+ * oslec_free - Free a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_free(struct oslec_state *ec);
+
+/**
+ * oslec_flush - Flush (reinitialise) a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_flush(struct oslec_state *ec);
+
+/**
+ * oslec_adaption_mode - set the adaption mode of a voice echo canceller context.
+ * @ec The echo canceller context.
+ * @adaption_mode: The mode.
+ */
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode);
+
+void oslec_snapshot(struct oslec_state *ec);
+
+/**
+ * oslec_update: Process a sample through a voice echo canceller.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted audio sample.
+ * @rx: The received audio sample.
+ *
+ * The return value is the clean (echo cancelled) received sample.
+ */
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx);
+
+/**
+ * oslec_hpf_tx: Process to high pass filter the tx signal.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted auio sample.
+ *
+ * The return value is the HP filtered transmit sample, send this to your D/A.
+ */
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx);
+
+#endif /* __OSLEC_H */
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
new file mode 100644
index 0000000..04f2e1f
--- /dev/null
+++ b/drivers/misc/eeprom/Kconfig
@@ -0,0 +1,99 @@
+menu "EEPROM support"
+
+config EEPROM_AT24
+ tristate "I2C EEPROMs / RAMs / ROMs from most vendors"
+ depends on I2C && SYSFS
+ help
+ Enable this driver to get read/write support to most I2C EEPROMs
+ and compatible devices like FRAMs, SRAMs, ROMs etc. After you
+ configure the driver to know about each chip on your target
+ board. Use these generic chip names, instead of vendor-specific
+ ones like at24c64, 24lc02 or fm24c04:
+
+ 24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08,
+ 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024
+
+ Unless you like data loss puzzles, always be sure that any chip
+ you configure as a 24c32 (32 kbit) or larger is NOT really a
+ 24c16 (16 kbit) or smaller, and vice versa. Marking the chip
+ as read-only won't help recover from this. Also, if your chip
+ has any software write-protect mechanism you may want to review the
+ code to make sure this driver won't turn it on by accident.
+
+ If you use this with an SMBus adapter instead of an I2C adapter,
+ full functionality is not available. Only smaller devices are
+ supported (24c16 and below, max 4 kByte).
+
+ This driver can also be built as a module. If so, the module
+ will be called at24.
+
+config EEPROM_AT25
+ tristate "SPI EEPROMs from most vendors"
+ depends on SPI && SYSFS
+ help
+ Enable this driver to get read/write support to most SPI EEPROMs,
+ after you configure the board init code to know about each eeprom
+ on your target board.
+
+ This driver can also be built as a module. If so, the module
+ will be called at25.
+
+config EEPROM_LEGACY
+ tristate "Old I2C EEPROM reader"
+ depends on I2C && SYSFS
+ help
+ If you say yes here you get read-only access to the EEPROM data
+ available on modern memory DIMMs and Sony Vaio laptops via I2C. Such
+ EEPROMs could theoretically be available on other devices as well.
+
+ This driver can also be built as a module. If so, the module
+ will be called eeprom.
+
+config EEPROM_MAX6875
+ tristate "Maxim MAX6874/5 power supply supervisor"
+ depends on I2C
+ help
+ If you say yes here you get read-only support for the user EEPROM of
+ the Maxim MAX6874/5 EEPROM-programmable, quad power-supply
+ sequencer/supervisor.
+
+ All other features of this chip should be accessed via i2c-dev.
+
+ This driver can also be built as a module. If so, the module
+ will be called max6875.
+
+
+config EEPROM_93CX6
+ tristate "EEPROM 93CX6 support"
+ help
+ This is a driver for the EEPROM chipsets 93c46 and 93c66.
+ The driver supports both read as well as write commands.
+
+ If unsure, say N.
+
+config EEPROM_93XX46
+ tristate "Microwire EEPROM 93XX46 support"
+ depends on SPI && SYSFS
+ help
+ Driver for the microwire EEPROM chipsets 93xx46x. The driver
+ supports both read and write commands and also the command to
+ erase the whole EEPROM.
+
+ This driver can also be built as a module. If so, the module
+ will be called eeprom_93xx46.
+
+ If unsure, say N.
+
+config EEPROM_DIGSY_MTC_CFG
+ bool "DigsyMTC display configuration EEPROMs device"
+ depends on GPIO_MPC5200 && SPI_GPIO
+ help
+ This option enables access to display configuration EEPROMs
+ on digsy_mtc board. You have to additionally select Microwire
+ EEPROM 93XX46 driver. sysfs entries will be created for that
+ EEPROM allowing to read/write the configuration data or to
+ erase the whole EEPROM.
+
+ If unsure, say N.
+
+endmenu
diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile
new file mode 100644
index 0000000..fc1e81d
--- /dev/null
+++ b/drivers/misc/eeprom/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_EEPROM_AT24) += at24.o
+obj-$(CONFIG_EEPROM_AT25) += at25.o
+obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o
+obj-$(CONFIG_EEPROM_MAX6875) += max6875.o
+obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o
+obj-$(CONFIG_EEPROM_93XX46) += eeprom_93xx46.o
+obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
new file mode 100644
index 0000000..5d7c090
--- /dev/null
+++ b/drivers/misc/eeprom/at24.c
@@ -0,0 +1,706 @@
+/*
+ * at24.c - handle most I2C EEPROMs
+ *
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/log2.h>
+#include <linux/bitops.h>
+#include <linux/jiffies.h>
+#include <linux/of.h>
+#include <linux/acpi.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/at24.h>
+
+/*
+ * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
+ * Differences between different vendor product lines (like Atmel AT24C or
+ * MicroChip 24LC, etc) won't much matter for typical read/write access.
+ * There are also I2C RAM chips, likewise interchangeable. One example
+ * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
+ *
+ * However, misconfiguration can lose data. "Set 16-bit memory address"
+ * to a part with 8-bit addressing will overwrite data. Writing with too
+ * big a page size also loses data. And it's not safe to assume that the
+ * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
+ * uses 0x51, for just one example.
+ *
+ * Accordingly, explicit board-specific configuration data should be used
+ * in almost all cases. (One partial exception is an SMBus used to access
+ * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
+ *
+ * So this driver uses "new style" I2C driver binding, expecting to be
+ * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
+ * similar kernel-resident tables; or, configuration data coming from
+ * a bootloader.
+ *
+ * Other than binding model, current differences from "eeprom" driver are
+ * that this one handles write access and isn't restricted to 24c02 devices.
+ * It also handles larger devices (32 kbit and up) with two-byte addresses,
+ * which won't work on pure SMBus systems.
+ */
+
+struct at24_data {
+ struct at24_platform_data chip;
+ struct memory_accessor macc;
+ int use_smbus;
+ int use_smbus_write;
+
+ /*
+ * Lock protects against activities from other Linux tasks,
+ * but not from changes by other I2C masters.
+ */
+ struct mutex lock;
+ struct bin_attribute bin;
+
+ u8 *writebuf;
+ unsigned write_max;
+ unsigned num_addresses;
+
+ /*
+ * Some chips tie up multiple I2C addresses; dummy devices reserve
+ * them for us, and we'll use them with SMBus calls.
+ */
+ struct i2c_client *client[];
+};
+
+/*
+ * This parameter is to help this driver avoid blocking other drivers out
+ * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
+ * clock, one 256 byte read takes about 1/43 second which is excessive;
+ * but the 1/170 second it takes at 400 kHz may be quite reasonable; and
+ * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
+ *
+ * This value is forced to be a power of two so that writes align on pages.
+ */
+static unsigned io_limit = 128;
+module_param(io_limit, uint, 0);
+MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
+
+/*
+ * Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+static unsigned write_timeout = 25;
+module_param(write_timeout, uint, 0);
+MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
+
+#define AT24_SIZE_BYTELEN 5
+#define AT24_SIZE_FLAGS 8
+
+#define AT24_BITMASK(x) (BIT(x) - 1)
+
+/* create non-zero magic value for given eeprom parameters */
+#define AT24_DEVICE_MAGIC(_len, _flags) \
+ ((1 << AT24_SIZE_FLAGS | (_flags)) \
+ << AT24_SIZE_BYTELEN | ilog2(_len))
+
+static const struct i2c_device_id at24_ids[] = {
+ /* needs 8 addresses as A0-A2 are ignored */
+ { "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) },
+ /* old variants can't be handled with this generic entry! */
+ { "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) },
+ { "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) },
+ /* spd is a 24c02 in memory DIMMs */
+ { "spd", AT24_DEVICE_MAGIC(2048 / 8,
+ AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
+ { "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) },
+ /* 24rf08 quirk is handled at i2c-core */
+ { "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) },
+ { "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) },
+ { "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) },
+ { "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) },
+ { "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) },
+ { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) },
+ { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) },
+ { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) },
+ { "at24", 0 },
+ { /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, at24_ids);
+
+static const struct acpi_device_id at24_acpi_ids[] = {
+ { "INT3499", AT24_DEVICE_MAGIC(8192 / 8, 0) },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * This routine supports chips which consume multiple I2C addresses. It
+ * computes the addressing information to be used for a given r/w request.
+ * Assumes that sanity checks for offset happened at sysfs-layer.
+ */
+static struct i2c_client *at24_translate_offset(struct at24_data *at24,
+ unsigned *offset)
+{
+ unsigned i;
+
+ if (at24->chip.flags & AT24_FLAG_ADDR16) {
+ i = *offset >> 16;
+ *offset &= 0xffff;
+ } else {
+ i = *offset >> 8;
+ *offset &= 0xff;
+ }
+
+ return at24->client[i];
+}
+
+static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
+ unsigned offset, size_t count)
+{
+ struct i2c_msg msg[2];
+ u8 msgbuf[2];
+ struct i2c_client *client;
+ unsigned long timeout, read_time;
+ int status, i;
+
+ memset(msg, 0, sizeof(msg));
+
+ /*
+ * REVISIT some multi-address chips don't rollover page reads to
+ * the next slave address, so we may need to truncate the count.
+ * Those chips might need another quirk flag.
+ *
+ * If the real hardware used four adjacent 24c02 chips and that
+ * were misconfigured as one 24c08, that would be a similar effect:
+ * one "eeprom" file not four, but larger reads would fail when
+ * they crossed certain pages.
+ */
+
+ /*
+ * Slave address and byte offset derive from the offset. Always
+ * set the byte address; on a multi-master board, another master
+ * may have changed the chip's "current" address pointer.
+ */
+ client = at24_translate_offset(at24, &offset);
+
+ if (count > io_limit)
+ count = io_limit;
+
+ if (at24->use_smbus) {
+ /* Smaller eeproms can work given some SMBus extension calls */
+ if (count > I2C_SMBUS_BLOCK_MAX)
+ count = I2C_SMBUS_BLOCK_MAX;
+ } else {
+ /*
+ * When we have a better choice than SMBus calls, use a
+ * combined I2C message. Write address; then read up to
+ * io_limit data bytes. Note that read page rollover helps us
+ * here (unlike writes). msgbuf is u8 and will cast to our
+ * needs.
+ */
+ i = 0;
+ if (at24->chip.flags & AT24_FLAG_ADDR16)
+ msgbuf[i++] = offset >> 8;
+ msgbuf[i++] = offset;
+
+ msg[0].addr = client->addr;
+ msg[0].buf = msgbuf;
+ msg[0].len = i;
+
+ msg[1].addr = client->addr;
+ msg[1].flags = I2C_M_RD;
+ msg[1].buf = buf;
+ msg[1].len = count;
+ }
+
+ /*
+ * Reads fail if the previous write didn't complete yet. We may
+ * loop a few times until this one succeeds, waiting at least
+ * long enough for one entire page write to work.
+ */
+ timeout = jiffies + msecs_to_jiffies(write_timeout);
+ do {
+ read_time = jiffies;
+ if (at24->use_smbus) {
+ status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset,
+ count, buf);
+ } else {
+ status = i2c_transfer(client->adapter, msg, 2);
+ if (status == 2)
+ status = count;
+ }
+ dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
+ count, offset, status, jiffies);
+
+ if (status == count)
+ return count;
+
+ /* REVISIT: at HZ=100, this is sloooow */
+ msleep(1);
+ } while (time_before(read_time, timeout));
+
+ return -ETIMEDOUT;
+}
+
+static ssize_t at24_read(struct at24_data *at24,
+ char *buf, loff_t off, size_t count)
+{
+ ssize_t retval = 0;
+
+ if (unlikely(!count))
+ return count;
+
+ /*
+ * Read data from chip, protecting against concurrent updates
+ * from this host, but not from other I2C masters.
+ */
+ mutex_lock(&at24->lock);
+
+ while (count) {
+ ssize_t status;
+
+ status = at24_eeprom_read(at24, buf, off, count);
+ if (status <= 0) {
+ if (retval == 0)
+ retval = status;
+ break;
+ }
+ buf += status;
+ off += status;
+ count -= status;
+ retval += status;
+ }
+
+ mutex_unlock(&at24->lock);
+
+ return retval;
+}
+
+static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct at24_data *at24;
+
+ at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+ return at24_read(at24, buf, off, count);
+}
+
+
+/*
+ * Note that if the hardware write-protect pin is pulled high, the whole
+ * chip is normally write protected. But there are plenty of product
+ * variants here, including OTP fuses and partial chip protect.
+ *
+ * We only use page mode writes; the alternative is sloooow. This routine
+ * writes at most one page.
+ */
+static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
+ unsigned offset, size_t count)
+{
+ struct i2c_client *client;
+ struct i2c_msg msg;
+ ssize_t status = 0;
+ unsigned long timeout, write_time;
+ unsigned next_page;
+
+ /* Get corresponding I2C address and adjust offset */
+ client = at24_translate_offset(at24, &offset);
+
+ /* write_max is at most a page */
+ if (count > at24->write_max)
+ count = at24->write_max;
+
+ /* Never roll over backwards, to the start of this page */
+ next_page = roundup(offset + 1, at24->chip.page_size);
+ if (offset + count > next_page)
+ count = next_page - offset;
+
+ /* If we'll use I2C calls for I/O, set up the message */
+ if (!at24->use_smbus) {
+ int i = 0;
+
+ msg.addr = client->addr;
+ msg.flags = 0;
+
+ /* msg.buf is u8 and casts will mask the values */
+ msg.buf = at24->writebuf;
+ if (at24->chip.flags & AT24_FLAG_ADDR16)
+ msg.buf[i++] = offset >> 8;
+
+ msg.buf[i++] = offset;
+ memcpy(&msg.buf[i], buf, count);
+ msg.len = i + count;
+ }
+
+ /*
+ * Writes fail if the previous one didn't complete yet. We may
+ * loop a few times until this one succeeds, waiting at least
+ * long enough for one entire page write to work.
+ */
+ timeout = jiffies + msecs_to_jiffies(write_timeout);
+ do {
+ write_time = jiffies;
+ if (at24->use_smbus_write) {
+ switch (at24->use_smbus_write) {
+ case I2C_SMBUS_I2C_BLOCK_DATA:
+ status = i2c_smbus_write_i2c_block_data(client,
+ offset, count, buf);
+ break;
+ case I2C_SMBUS_BYTE_DATA:
+ status = i2c_smbus_write_byte_data(client,
+ offset, buf[0]);
+ break;
+ }
+
+ if (status == 0)
+ status = count;
+ } else {
+ status = i2c_transfer(client->adapter, &msg, 1);
+ if (status == 1)
+ status = count;
+ }
+ dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n",
+ count, offset, status, jiffies);
+
+ if (status == count)
+ return count;
+
+ /* REVISIT: at HZ=100, this is sloooow */
+ msleep(1);
+ } while (time_before(write_time, timeout));
+
+ return -ETIMEDOUT;
+}
+
+static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
+ size_t count)
+{
+ ssize_t retval = 0;
+
+ if (unlikely(!count))
+ return count;
+
+ /*
+ * Write data to chip, protecting against concurrent updates
+ * from this host, but not from other I2C masters.
+ */
+ mutex_lock(&at24->lock);
+
+ while (count) {
+ ssize_t status;
+
+ status = at24_eeprom_write(at24, buf, off, count);
+ if (status <= 0) {
+ if (retval == 0)
+ retval = status;
+ break;
+ }
+ buf += status;
+ off += status;
+ count -= status;
+ retval += status;
+ }
+
+ mutex_unlock(&at24->lock);
+
+ return retval;
+}
+
+static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct at24_data *at24;
+
+ at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+ return at24_write(at24, buf, off, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * This lets other kernel code access the eeprom data. For example, it
+ * might hold a board's Ethernet address, or board-specific calibration
+ * data generated on the manufacturing floor.
+ */
+
+static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
+ off_t offset, size_t count)
+{
+ struct at24_data *at24 = container_of(macc, struct at24_data, macc);
+
+ return at24_read(at24, buf, offset, count);
+}
+
+static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
+ off_t offset, size_t count)
+{
+ struct at24_data *at24 = container_of(macc, struct at24_data, macc);
+
+ return at24_write(at24, buf, offset, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+#ifdef CONFIG_OF
+static void at24_get_ofdata(struct i2c_client *client,
+ struct at24_platform_data *chip)
+{
+ const __be32 *val;
+ struct device_node *node = client->dev.of_node;
+
+ if (node) {
+ if (of_get_property(node, "read-only", NULL))
+ chip->flags |= AT24_FLAG_READONLY;
+ val = of_get_property(node, "pagesize", NULL);
+ if (val)
+ chip->page_size = be32_to_cpup(val);
+ }
+}
+#else
+static void at24_get_ofdata(struct i2c_client *client,
+ struct at24_platform_data *chip)
+{ }
+#endif /* CONFIG_OF */
+
+static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+ struct at24_platform_data chip;
+ kernel_ulong_t magic = 0;
+ bool writable;
+ int use_smbus = 0;
+ int use_smbus_write = 0;
+ struct at24_data *at24;
+ int err;
+ unsigned i, num_addresses;
+
+ if (client->dev.platform_data) {
+ chip = *(struct at24_platform_data *)client->dev.platform_data;
+ } else {
+ if (id) {
+ magic = id->driver_data;
+ } else {
+ const struct acpi_device_id *aid;
+
+ aid = acpi_match_device(at24_acpi_ids, &client->dev);
+ if (aid)
+ magic = aid->driver_data;
+ }
+ if (!magic)
+ return -ENODEV;
+
+ chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
+ magic >>= AT24_SIZE_BYTELEN;
+ chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
+ /*
+ * This is slow, but we can't know all eeproms, so we better
+ * play safe. Specifying custom eeprom-types via platform_data
+ * is recommended anyhow.
+ */
+ chip.page_size = 1;
+
+ /* update chipdata if OF is present */
+ at24_get_ofdata(client, &chip);
+
+ chip.setup = NULL;
+ chip.context = NULL;
+ }
+
+ if (!is_power_of_2(chip.byte_len))
+ dev_warn(&client->dev,
+ "byte_len looks suspicious (no power of 2)!\n");
+ if (!chip.page_size) {
+ dev_err(&client->dev, "page_size must not be 0!\n");
+ return -EINVAL;
+ }
+ if (!is_power_of_2(chip.page_size))
+ dev_warn(&client->dev,
+ "page_size looks suspicious (no power of 2)!\n");
+
+ /* Use I2C operations unless we're stuck with SMBus extensions. */
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+ if (chip.flags & AT24_FLAG_ADDR16)
+ return -EPFNOSUPPORT;
+
+ if (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+ use_smbus = I2C_SMBUS_I2C_BLOCK_DATA;
+ } else if (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_READ_WORD_DATA)) {
+ use_smbus = I2C_SMBUS_WORD_DATA;
+ } else if (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+ use_smbus = I2C_SMBUS_BYTE_DATA;
+ } else {
+ return -EPFNOSUPPORT;
+ }
+ }
+
+ /* Use I2C operations unless we're stuck with SMBus extensions. */
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+ if (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+ use_smbus_write = I2C_SMBUS_I2C_BLOCK_DATA;
+ } else if (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
+ use_smbus_write = I2C_SMBUS_BYTE_DATA;
+ chip.page_size = 1;
+ }
+ }
+
+ if (chip.flags & AT24_FLAG_TAKE8ADDR)
+ num_addresses = 8;
+ else
+ num_addresses = DIV_ROUND_UP(chip.byte_len,
+ (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
+
+ at24 = devm_kzalloc(&client->dev, sizeof(struct at24_data) +
+ num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
+ if (!at24)
+ return -ENOMEM;
+
+ mutex_init(&at24->lock);
+ at24->use_smbus = use_smbus;
+ at24->use_smbus_write = use_smbus_write;
+ at24->chip = chip;
+ at24->num_addresses = num_addresses;
+
+ /*
+ * Export the EEPROM bytes through sysfs, since that's convenient.
+ * By default, only root should see the data (maybe passwords etc)
+ */
+ sysfs_bin_attr_init(&at24->bin);
+ at24->bin.attr.name = "eeprom";
+ at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
+ at24->bin.read = at24_bin_read;
+ at24->bin.size = chip.byte_len;
+
+ at24->macc.read = at24_macc_read;
+
+ writable = !(chip.flags & AT24_FLAG_READONLY);
+ if (writable) {
+ if (!use_smbus || use_smbus_write) {
+
+ unsigned write_max = chip.page_size;
+
+ at24->macc.write = at24_macc_write;
+
+ at24->bin.write = at24_bin_write;
+ at24->bin.attr.mode |= S_IWUSR;
+
+ if (write_max > io_limit)
+ write_max = io_limit;
+ if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
+ write_max = I2C_SMBUS_BLOCK_MAX;
+ at24->write_max = write_max;
+
+ /* buffer (data + address at the beginning) */
+ at24->writebuf = devm_kzalloc(&client->dev,
+ write_max + 2, GFP_KERNEL);
+ if (!at24->writebuf)
+ return -ENOMEM;
+ } else {
+ dev_warn(&client->dev,
+ "cannot write due to controller restrictions.");
+ }
+ }
+
+ at24->client[0] = client;
+
+ /* use dummy devices for multiple-address chips */
+ for (i = 1; i < num_addresses; i++) {
+ at24->client[i] = i2c_new_dummy(client->adapter,
+ client->addr + i);
+ if (!at24->client[i]) {
+ dev_err(&client->dev, "address 0x%02x unavailable\n",
+ client->addr + i);
+ err = -EADDRINUSE;
+ goto err_clients;
+ }
+ }
+
+ err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin);
+ if (err)
+ goto err_clients;
+
+ i2c_set_clientdata(client, at24);
+
+ dev_info(&client->dev, "%zu byte %s EEPROM, %s, %u bytes/write\n",
+ at24->bin.size, client->name,
+ writable ? "writable" : "read-only", at24->write_max);
+ if (use_smbus == I2C_SMBUS_WORD_DATA ||
+ use_smbus == I2C_SMBUS_BYTE_DATA) {
+ dev_notice(&client->dev, "Falling back to %s reads, "
+ "performance will suffer\n", use_smbus ==
+ I2C_SMBUS_WORD_DATA ? "word" : "byte");
+ }
+
+ /* export data to kernel code */
+ if (chip.setup)
+ chip.setup(&at24->macc, chip.context);
+
+ return 0;
+
+err_clients:
+ for (i = 1; i < num_addresses; i++)
+ if (at24->client[i])
+ i2c_unregister_device(at24->client[i]);
+
+ return err;
+}
+
+static int at24_remove(struct i2c_client *client)
+{
+ struct at24_data *at24;
+ int i;
+
+ at24 = i2c_get_clientdata(client);
+ sysfs_remove_bin_file(&client->dev.kobj, &at24->bin);
+
+ for (i = 1; i < at24->num_addresses; i++)
+ i2c_unregister_device(at24->client[i]);
+
+ return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct i2c_driver at24_driver = {
+ .driver = {
+ .name = "at24",
+ .acpi_match_table = ACPI_PTR(at24_acpi_ids),
+ },
+ .probe = at24_probe,
+ .remove = at24_remove,
+ .id_table = at24_ids,
+};
+
+static int __init at24_init(void)
+{
+ if (!io_limit) {
+ pr_err("at24: io_limit must not be 0!\n");
+ return -EINVAL;
+ }
+
+ io_limit = rounddown_pow_of_two(io_limit);
+ return i2c_add_driver(&at24_driver);
+}
+module_init(at24_init);
+
+static void __exit at24_exit(void)
+{
+ i2c_del_driver(&at24_driver);
+}
+module_exit(at24_exit);
+
+MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
+MODULE_AUTHOR("David Brownell and Wolfram Sang");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
new file mode 100644
index 0000000..f850ef5
--- /dev/null
+++ b/drivers/misc/eeprom/at25.c
@@ -0,0 +1,476 @@
+/*
+ * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models
+ *
+ * Copyright (C) 2006 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/eeprom.h>
+#include <linux/property.h>
+
+/*
+ * NOTE: this is an *EEPROM* driver. The vagaries of product naming
+ * mean that some AT25 products are EEPROMs, and others are FLASH.
+ * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver,
+ * not this one!
+ */
+
+struct at25_data {
+ struct spi_device *spi;
+ struct memory_accessor mem;
+ struct mutex lock;
+ struct spi_eeprom chip;
+ struct bin_attribute bin;
+ unsigned addrlen;
+};
+
+#define AT25_WREN 0x06 /* latch the write enable */
+#define AT25_WRDI 0x04 /* reset the write enable */
+#define AT25_RDSR 0x05 /* read status register */
+#define AT25_WRSR 0x01 /* write status register */
+#define AT25_READ 0x03 /* read byte(s) */
+#define AT25_WRITE 0x02 /* write byte(s)/sector */
+
+#define AT25_SR_nRDY 0x01 /* nRDY = write-in-progress */
+#define AT25_SR_WEN 0x02 /* write enable (latched) */
+#define AT25_SR_BP0 0x04 /* BP for software writeprotect */
+#define AT25_SR_BP1 0x08
+#define AT25_SR_WPEN 0x80 /* writeprotect enable */
+
+#define AT25_INSTR_BIT3 0x08 /* Additional address bit in instr */
+
+#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */
+
+/* Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+#define EE_TIMEOUT 25
+
+/*-------------------------------------------------------------------------*/
+
+#define io_limit PAGE_SIZE /* bytes */
+
+static ssize_t
+at25_ee_read(
+ struct at25_data *at25,
+ char *buf,
+ unsigned offset,
+ size_t count
+)
+{
+ u8 command[EE_MAXADDRLEN + 1];
+ u8 *cp;
+ ssize_t status;
+ struct spi_transfer t[2];
+ struct spi_message m;
+ u8 instr;
+
+ if (unlikely(offset >= at25->bin.size))
+ return 0;
+ if ((offset + count) > at25->bin.size)
+ count = at25->bin.size - offset;
+ if (unlikely(!count))
+ return count;
+
+ cp = command;
+
+ instr = AT25_READ;
+ if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+ if (offset >= (1U << (at25->addrlen * 8)))
+ instr |= AT25_INSTR_BIT3;
+ *cp++ = instr;
+
+ /* 8/16/24-bit address is written MSB first */
+ switch (at25->addrlen) {
+ default: /* case 3 */
+ *cp++ = offset >> 16;
+ case 2:
+ *cp++ = offset >> 8;
+ case 1:
+ case 0: /* can't happen: for better codegen */
+ *cp++ = offset >> 0;
+ }
+
+ spi_message_init(&m);
+ memset(t, 0, sizeof t);
+
+ t[0].tx_buf = command;
+ t[0].len = at25->addrlen + 1;
+ spi_message_add_tail(&t[0], &m);
+
+ t[1].rx_buf = buf;
+ t[1].len = count;
+ spi_message_add_tail(&t[1], &m);
+
+ mutex_lock(&at25->lock);
+
+ /* Read it all at once.
+ *
+ * REVISIT that's potentially a problem with large chips, if
+ * other devices on the bus need to be accessed regularly or
+ * this chip is clocked very slowly
+ */
+ status = spi_sync(at25->spi, &m);
+ dev_dbg(&at25->spi->dev,
+ "read %Zd bytes at %d --> %d\n",
+ count, offset, (int) status);
+
+ mutex_unlock(&at25->lock);
+ return status ? status : count;
+}
+
+static ssize_t
+at25_bin_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct device *dev;
+ struct at25_data *at25;
+
+ dev = container_of(kobj, struct device, kobj);
+ at25 = dev_get_drvdata(dev);
+
+ return at25_ee_read(at25, buf, off, count);
+}
+
+
+static ssize_t
+at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
+ size_t count)
+{
+ ssize_t status = 0;
+ unsigned written = 0;
+ unsigned buf_size;
+ u8 *bounce;
+
+ if (unlikely(off >= at25->bin.size))
+ return -EFBIG;
+ if ((off + count) > at25->bin.size)
+ count = at25->bin.size - off;
+ if (unlikely(!count))
+ return count;
+
+ /* Temp buffer starts with command and address */
+ buf_size = at25->chip.page_size;
+ if (buf_size > io_limit)
+ buf_size = io_limit;
+ bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL);
+ if (!bounce)
+ return -ENOMEM;
+
+ /* For write, rollover is within the page ... so we write at
+ * most one page, then manually roll over to the next page.
+ */
+ mutex_lock(&at25->lock);
+ do {
+ unsigned long timeout, retries;
+ unsigned segment;
+ unsigned offset = (unsigned) off;
+ u8 *cp = bounce;
+ int sr;
+ u8 instr;
+
+ *cp = AT25_WREN;
+ status = spi_write(at25->spi, cp, 1);
+ if (status < 0) {
+ dev_dbg(&at25->spi->dev, "WREN --> %d\n",
+ (int) status);
+ break;
+ }
+
+ instr = AT25_WRITE;
+ if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+ if (offset >= (1U << (at25->addrlen * 8)))
+ instr |= AT25_INSTR_BIT3;
+ *cp++ = instr;
+
+ /* 8/16/24-bit address is written MSB first */
+ switch (at25->addrlen) {
+ default: /* case 3 */
+ *cp++ = offset >> 16;
+ case 2:
+ *cp++ = offset >> 8;
+ case 1:
+ case 0: /* can't happen: for better codegen */
+ *cp++ = offset >> 0;
+ }
+
+ /* Write as much of a page as we can */
+ segment = buf_size - (offset % buf_size);
+ if (segment > count)
+ segment = count;
+ memcpy(cp, buf, segment);
+ status = spi_write(at25->spi, bounce,
+ segment + at25->addrlen + 1);
+ dev_dbg(&at25->spi->dev,
+ "write %u bytes at %u --> %d\n",
+ segment, offset, (int) status);
+ if (status < 0)
+ break;
+
+ /* REVISIT this should detect (or prevent) failed writes
+ * to readonly sections of the EEPROM...
+ */
+
+ /* Wait for non-busy status */
+ timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
+ retries = 0;
+ do {
+
+ sr = spi_w8r8(at25->spi, AT25_RDSR);
+ if (sr < 0 || (sr & AT25_SR_nRDY)) {
+ dev_dbg(&at25->spi->dev,
+ "rdsr --> %d (%02x)\n", sr, sr);
+ /* at HZ=100, this is sloooow */
+ msleep(1);
+ continue;
+ }
+ if (!(sr & AT25_SR_nRDY))
+ break;
+ } while (retries++ < 3 || time_before_eq(jiffies, timeout));
+
+ if ((sr < 0) || (sr & AT25_SR_nRDY)) {
+ dev_err(&at25->spi->dev,
+ "write %d bytes offset %d, "
+ "timeout after %u msecs\n",
+ segment, offset,
+ jiffies_to_msecs(jiffies -
+ (timeout - EE_TIMEOUT)));
+ status = -ETIMEDOUT;
+ break;
+ }
+
+ off += segment;
+ buf += segment;
+ count -= segment;
+ written += segment;
+
+ } while (count > 0);
+
+ mutex_unlock(&at25->lock);
+
+ kfree(bounce);
+ return written ? written : status;
+}
+
+static ssize_t
+at25_bin_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct device *dev;
+ struct at25_data *at25;
+
+ dev = container_of(kobj, struct device, kobj);
+ at25 = dev_get_drvdata(dev);
+
+ return at25_ee_write(at25, buf, off, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* Let in-kernel code access the eeprom data. */
+
+static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
+ off_t offset, size_t count)
+{
+ struct at25_data *at25 = container_of(mem, struct at25_data, mem);
+
+ return at25_ee_read(at25, buf, offset, count);
+}
+
+static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf,
+ off_t offset, size_t count)
+{
+ struct at25_data *at25 = container_of(mem, struct at25_data, mem);
+
+ return at25_ee_write(at25, buf, offset, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
+{
+ u32 val;
+
+ memset(chip, 0, sizeof(*chip));
+ strncpy(chip->name, "at25", sizeof(chip->name));
+
+ if (device_property_read_u32(dev, "size", &val) == 0 ||
+ device_property_read_u32(dev, "at25,byte-len", &val) == 0) {
+ chip->byte_len = val;
+ } else {
+ dev_err(dev, "Error: missing \"size\" property\n");
+ return -ENODEV;
+ }
+
+ if (device_property_read_u32(dev, "pagesize", &val) == 0 ||
+ device_property_read_u32(dev, "at25,page-size", &val) == 0) {
+ chip->page_size = (u16)val;
+ } else {
+ dev_err(dev, "Error: missing \"pagesize\" property\n");
+ return -ENODEV;
+ }
+
+ if (device_property_read_u32(dev, "at25,addr-mode", &val) == 0) {
+ chip->flags = (u16)val;
+ } else {
+ if (device_property_read_u32(dev, "address-width", &val)) {
+ dev_err(dev,
+ "Error: missing \"address-width\" property\n");
+ return -ENODEV;
+ }
+ switch (val) {
+ case 8:
+ chip->flags |= EE_ADDR1;
+ break;
+ case 16:
+ chip->flags |= EE_ADDR2;
+ break;
+ case 24:
+ chip->flags |= EE_ADDR3;
+ break;
+ default:
+ dev_err(dev,
+ "Error: bad \"address-width\" property: %u\n",
+ val);
+ return -ENODEV;
+ }
+ if (device_property_present(dev, "read-only"))
+ chip->flags |= EE_READONLY;
+ }
+ return 0;
+}
+
+static int at25_probe(struct spi_device *spi)
+{
+ struct at25_data *at25 = NULL;
+ struct spi_eeprom chip;
+ int err;
+ int sr;
+ int addrlen;
+
+ /* Chip description */
+ if (!spi->dev.platform_data) {
+ err = at25_fw_to_chip(&spi->dev, &chip);
+ if (err)
+ return err;
+ } else
+ chip = *(struct spi_eeprom *)spi->dev.platform_data;
+
+ /* For now we only support 8/16/24 bit addressing */
+ if (chip.flags & EE_ADDR1)
+ addrlen = 1;
+ else if (chip.flags & EE_ADDR2)
+ addrlen = 2;
+ else if (chip.flags & EE_ADDR3)
+ addrlen = 3;
+ else {
+ dev_dbg(&spi->dev, "unsupported address type\n");
+ return -EINVAL;
+ }
+
+ /* Ping the chip ... the status register is pretty portable,
+ * unlike probing manufacturer IDs. We do expect that system
+ * firmware didn't write it in the past few milliseconds!
+ */
+ sr = spi_w8r8(spi, AT25_RDSR);
+ if (sr < 0 || sr & AT25_SR_nRDY) {
+ dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr);
+ return -ENXIO;
+ }
+
+ at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL);
+ if (!at25)
+ return -ENOMEM;
+
+ mutex_init(&at25->lock);
+ at25->chip = chip;
+ at25->spi = spi_dev_get(spi);
+ spi_set_drvdata(spi, at25);
+ at25->addrlen = addrlen;
+
+ /* Export the EEPROM bytes through sysfs, since that's convenient.
+ * And maybe to other kernel code; it might hold a board's Ethernet
+ * address, or board-specific calibration data generated on the
+ * manufacturing floor.
+ *
+ * Default to root-only access to the data; EEPROMs often hold data
+ * that's sensitive for read and/or write, like ethernet addresses,
+ * security codes, board-specific manufacturing calibrations, etc.
+ */
+ sysfs_bin_attr_init(&at25->bin);
+ at25->bin.attr.name = "eeprom";
+ at25->bin.attr.mode = S_IRUSR;
+ at25->bin.read = at25_bin_read;
+ at25->mem.read = at25_mem_read;
+
+ at25->bin.size = at25->chip.byte_len;
+ if (!(chip.flags & EE_READONLY)) {
+ at25->bin.write = at25_bin_write;
+ at25->bin.attr.mode |= S_IWUSR;
+ at25->mem.write = at25_mem_write;
+ }
+
+ err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin);
+ if (err)
+ return err;
+
+ if (chip.setup)
+ chip.setup(&at25->mem, chip.context);
+
+ dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n",
+ (at25->bin.size < 1024)
+ ? at25->bin.size
+ : (at25->bin.size / 1024),
+ (at25->bin.size < 1024) ? "Byte" : "KByte",
+ at25->chip.name,
+ (chip.flags & EE_READONLY) ? " (readonly)" : "",
+ at25->chip.page_size);
+ return 0;
+}
+
+static int at25_remove(struct spi_device *spi)
+{
+ struct at25_data *at25;
+
+ at25 = spi_get_drvdata(spi);
+ sysfs_remove_bin_file(&spi->dev.kobj, &at25->bin);
+ return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static const struct of_device_id at25_of_match[] = {
+ { .compatible = "atmel,at25", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, at25_of_match);
+
+static struct spi_driver at25_driver = {
+ .driver = {
+ .name = "at25",
+ .of_match_table = at25_of_match,
+ },
+ .probe = at25_probe,
+ .remove = at25_remove,
+};
+
+module_spi_driver(at25_driver);
+
+MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
+MODULE_AUTHOR("David Brownell");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c
new file mode 100644
index 0000000..66d9e1b
--- /dev/null
+++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c
@@ -0,0 +1,85 @@
+/*
+ * EEPROMs access control driver for display configuration EEPROMs
+ * on DigsyMTC board.
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_gpio.h>
+#include <linux/eeprom_93xx46.h>
+
+#define GPIO_EEPROM_CLK 216
+#define GPIO_EEPROM_CS 210
+#define GPIO_EEPROM_DI 217
+#define GPIO_EEPROM_DO 249
+#define GPIO_EEPROM_OE 255
+#define EE_SPI_BUS_NUM 1
+
+static void digsy_mtc_op_prepare(void *p)
+{
+ /* enable */
+ gpio_set_value(GPIO_EEPROM_OE, 0);
+}
+
+static void digsy_mtc_op_finish(void *p)
+{
+ /* disable */
+ gpio_set_value(GPIO_EEPROM_OE, 1);
+}
+
+struct eeprom_93xx46_platform_data digsy_mtc_eeprom_data = {
+ .flags = EE_ADDR8,
+ .prepare = digsy_mtc_op_prepare,
+ .finish = digsy_mtc_op_finish,
+};
+
+static struct spi_gpio_platform_data eeprom_spi_gpio_data = {
+ .sck = GPIO_EEPROM_CLK,
+ .mosi = GPIO_EEPROM_DI,
+ .miso = GPIO_EEPROM_DO,
+ .num_chipselect = 1,
+};
+
+static struct platform_device digsy_mtc_eeprom = {
+ .name = "spi_gpio",
+ .id = EE_SPI_BUS_NUM,
+ .dev = {
+ .platform_data = &eeprom_spi_gpio_data,
+ },
+};
+
+static struct spi_board_info digsy_mtc_eeprom_info[] __initdata = {
+ {
+ .modalias = "93xx46",
+ .max_speed_hz = 1000000,
+ .bus_num = EE_SPI_BUS_NUM,
+ .chip_select = 0,
+ .mode = SPI_MODE_0,
+ .controller_data = (void *)GPIO_EEPROM_CS,
+ .platform_data = &digsy_mtc_eeprom_data,
+ },
+};
+
+static int __init digsy_mtc_eeprom_devices_init(void)
+{
+ int ret;
+
+ ret = gpio_request_one(GPIO_EEPROM_OE, GPIOF_OUT_INIT_HIGH,
+ "93xx46 EEPROMs OE");
+ if (ret) {
+ pr_err("can't request gpio %d\n", GPIO_EEPROM_OE);
+ return ret;
+ }
+ spi_register_board_info(digsy_mtc_eeprom_info,
+ ARRAY_SIZE(digsy_mtc_eeprom_info));
+ return platform_device_register(&digsy_mtc_eeprom);
+}
+device_initcall(digsy_mtc_eeprom_devices_init);
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
new file mode 100644
index 0000000..7342fd6
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and
+ * Philip Edelbrock <phil@netroedge.com>
+ * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2003 IBM Corp.
+ * Copyright (C) 2004 Jean Delvare <jdelvare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54,
+ 0x55, 0x56, 0x57, I2C_CLIENT_END };
+
+
+/* Size of EEPROM in bytes */
+#define EEPROM_SIZE 256
+
+/* possible types of eeprom devices */
+enum eeprom_nature {
+ UNKNOWN,
+ VAIO,
+};
+
+/* Each client has this additional data */
+struct eeprom_data {
+ struct mutex update_lock;
+ u8 valid; /* bitfield, bit!=0 if slice is valid */
+ unsigned long last_updated[8]; /* In jiffies, 8 slices */
+ u8 data[EEPROM_SIZE]; /* Register values */
+ enum eeprom_nature nature;
+};
+
+
+static void eeprom_update_client(struct i2c_client *client, u8 slice)
+{
+ struct eeprom_data *data = i2c_get_clientdata(client);
+ int i;
+
+ mutex_lock(&data->update_lock);
+
+ if (!(data->valid & (1 << slice)) ||
+ time_after(jiffies, data->last_updated[slice] + 300 * HZ)) {
+ dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice);
+
+ if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+ for (i = slice << 5; i < (slice + 1) << 5; i += 32)
+ if (i2c_smbus_read_i2c_block_data(client, i,
+ 32, data->data + i)
+ != 32)
+ goto exit;
+ } else {
+ for (i = slice << 5; i < (slice + 1) << 5; i += 2) {
+ int word = i2c_smbus_read_word_data(client, i);
+ if (word < 0)
+ goto exit;
+ data->data[i] = word & 0xff;
+ data->data[i + 1] = word >> 8;
+ }
+ }
+ data->last_updated[slice] = jiffies;
+ data->valid |= (1 << slice);
+ }
+exit:
+ mutex_unlock(&data->update_lock);
+}
+
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj));
+ struct eeprom_data *data = i2c_get_clientdata(client);
+ u8 slice;
+
+ /* Only refresh slices which contain requested bytes */
+ for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++)
+ eeprom_update_client(client, slice);
+
+ /* Hide Vaio private settings to regular users:
+ - BIOS passwords: bytes 0x00 to 0x0f
+ - UUID: bytes 0x10 to 0x1f
+ - Serial number: 0xc0 to 0xdf */
+ if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) {
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if ((off + i <= 0x1f) ||
+ (off + i >= 0xc0 && off + i <= 0xdf))
+ buf[i] = 0;
+ else
+ buf[i] = data->data[off + i];
+ }
+ } else {
+ memcpy(buf, &data->data[off], count);
+ }
+
+ return count;
+}
+
+static struct bin_attribute eeprom_attr = {
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO,
+ },
+ .size = EEPROM_SIZE,
+ .read = eeprom_read,
+};
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
+{
+ struct i2c_adapter *adapter = client->adapter;
+
+ /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all
+ addresses 0x50-0x57, but we only care about 0x50. So decline
+ attaching to addresses >= 0x51 on DDC buses */
+ if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51)
+ return -ENODEV;
+
+ /* There are four ways we can read the EEPROM data:
+ (1) I2C block reads (faster, but unsupported by most adapters)
+ (2) Word reads (128% overhead)
+ (3) Consecutive byte reads (88% overhead, unsafe)
+ (4) Regular byte data reads (265% overhead)
+ The third and fourth methods are not implemented by this driver
+ because all known adapters support one of the first two. */
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)
+ && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+ return -ENODEV;
+
+ strlcpy(info->type, "eeprom", I2C_NAME_SIZE);
+
+ return 0;
+}
+
+static int eeprom_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adapter = client->adapter;
+ struct eeprom_data *data;
+
+ data = devm_kzalloc(&client->dev, sizeof(struct eeprom_data),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ memset(data->data, 0xff, EEPROM_SIZE);
+ i2c_set_clientdata(client, data);
+ mutex_init(&data->update_lock);
+ data->nature = UNKNOWN;
+
+ /* Detect the Vaio nature of EEPROMs.
+ We use the "PCG-" or "VGN-" prefix as the signature. */
+ if (client->addr == 0x57
+ && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+ char name[4];
+
+ name[0] = i2c_smbus_read_byte_data(client, 0x80);
+ name[1] = i2c_smbus_read_byte_data(client, 0x81);
+ name[2] = i2c_smbus_read_byte_data(client, 0x82);
+ name[3] = i2c_smbus_read_byte_data(client, 0x83);
+
+ if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) {
+ dev_info(&client->dev, "Vaio EEPROM detected, "
+ "enabling privacy protection\n");
+ data->nature = VAIO;
+ }
+ }
+
+ /* create the sysfs eeprom file */
+ return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
+}
+
+static int eeprom_remove(struct i2c_client *client)
+{
+ sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr);
+
+ return 0;
+}
+
+static const struct i2c_device_id eeprom_id[] = {
+ { "eeprom", 0 },
+ { }
+};
+
+static struct i2c_driver eeprom_driver = {
+ .driver = {
+ .name = "eeprom",
+ },
+ .probe = eeprom_probe,
+ .remove = eeprom_remove,
+ .id_table = eeprom_id,
+
+ .class = I2C_CLASS_DDC | I2C_CLASS_SPD,
+ .detect = eeprom_detect,
+ .address_list = normal_i2c,
+};
+
+module_i2c_driver(eeprom_driver);
+
+MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and "
+ "Philip Edelbrock <phil@netroedge.com> and "
+ "Greg Kroah-Hartman <greg@kroah.com>");
+MODULE_DESCRIPTION("I2C EEPROM driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
new file mode 100644
index 0000000..0cf2c9d
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2004 - 2006 rt2x00 SourceForge Project
+ * <http://rt2x00.serialmonkey.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Module: eeprom_93cx6
+ * Abstract: EEPROM reader routines for 93cx6 chipsets.
+ * Supported chipsets: 93c46 & 93c66.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/eeprom_93cx6.h>
+
+MODULE_AUTHOR("http://rt2x00.serialmonkey.com");
+MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("EEPROM 93cx6 chip driver");
+MODULE_LICENSE("GPL");
+
+static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom)
+{
+ eeprom->reg_data_clock = 1;
+ eeprom->register_write(eeprom);
+
+ /*
+ * Add a short delay for the pulse to work.
+ * According to the specifications the "maximum minimum"
+ * time should be 450ns.
+ */
+ ndelay(450);
+}
+
+static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom)
+{
+ eeprom->reg_data_clock = 0;
+ eeprom->register_write(eeprom);
+
+ /*
+ * Add a short delay for the pulse to work.
+ * According to the specifications the "maximum minimum"
+ * time should be 450ns.
+ */
+ ndelay(450);
+}
+
+static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom)
+{
+ /*
+ * Clear all flags, and enable chip select.
+ */
+ eeprom->register_read(eeprom);
+ eeprom->reg_data_in = 0;
+ eeprom->reg_data_out = 0;
+ eeprom->reg_data_clock = 0;
+ eeprom->reg_chip_select = 1;
+ eeprom->drive_data = 1;
+ eeprom->register_write(eeprom);
+
+ /*
+ * kick a pulse.
+ */
+ eeprom_93cx6_pulse_high(eeprom);
+ eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom)
+{
+ /*
+ * Clear chip_select and data_in flags.
+ */
+ eeprom->register_read(eeprom);
+ eeprom->reg_data_in = 0;
+ eeprom->reg_chip_select = 0;
+ eeprom->register_write(eeprom);
+
+ /*
+ * kick a pulse.
+ */
+ eeprom_93cx6_pulse_high(eeprom);
+ eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom,
+ const u16 data, const u16 count)
+{
+ unsigned int i;
+
+ eeprom->register_read(eeprom);
+
+ /*
+ * Clear data flags.
+ */
+ eeprom->reg_data_in = 0;
+ eeprom->reg_data_out = 0;
+ eeprom->drive_data = 1;
+
+ /*
+ * Start writing all bits.
+ */
+ for (i = count; i > 0; i--) {
+ /*
+ * Check if this bit needs to be set.
+ */
+ eeprom->reg_data_in = !!(data & (1 << (i - 1)));
+
+ /*
+ * Write the bit to the eeprom register.
+ */
+ eeprom->register_write(eeprom);
+
+ /*
+ * Kick a pulse.
+ */
+ eeprom_93cx6_pulse_high(eeprom);
+ eeprom_93cx6_pulse_low(eeprom);
+ }
+
+ eeprom->reg_data_in = 0;
+ eeprom->register_write(eeprom);
+}
+
+static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
+ u16 *data, const u16 count)
+{
+ unsigned int i;
+ u16 buf = 0;
+
+ eeprom->register_read(eeprom);
+
+ /*
+ * Clear data flags.
+ */
+ eeprom->reg_data_in = 0;
+ eeprom->reg_data_out = 0;
+ eeprom->drive_data = 0;
+
+ /*
+ * Start reading all bits.
+ */
+ for (i = count; i > 0; i--) {
+ eeprom_93cx6_pulse_high(eeprom);
+
+ eeprom->register_read(eeprom);
+
+ /*
+ * Clear data_in flag.
+ */
+ eeprom->reg_data_in = 0;
+
+ /*
+ * Read if the bit has been set.
+ */
+ if (eeprom->reg_data_out)
+ buf |= (1 << (i - 1));
+
+ eeprom_93cx6_pulse_low(eeprom);
+ }
+
+ *data = buf;
+}
+
+/**
+ * eeprom_93cx6_read - Read a word from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read the eeprom data as host-endian word
+ * into the given data pointer.
+ */
+void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word,
+ u16 *data)
+{
+ u16 command;
+
+ /*
+ * Initialize the eeprom register
+ */
+ eeprom_93cx6_startup(eeprom);
+
+ /*
+ * Select the read opcode and the word to be read.
+ */
+ command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word;
+ eeprom_93cx6_write_bits(eeprom, command,
+ PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+ /*
+ * Read the requested 16 bits.
+ */
+ eeprom_93cx6_read_bits(eeprom, data, 16);
+
+ /*
+ * Cleanup eeprom register.
+ */
+ eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_read);
+
+/**
+ * eeprom_93cx6_multiread - Read multiple words from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of words that should be read.
+ *
+ * This function will read all requested words from the eeprom,
+ * this is done by calling eeprom_93cx6_read() multiple times.
+ * But with the additional change that while the eeprom_93cx6_read
+ * will return host ordered bytes, this method will return little
+ * endian words.
+ */
+void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
+ __le16 *data, const u16 words)
+{
+ unsigned int i;
+ u16 tmp;
+
+ for (i = 0; i < words; i++) {
+ tmp = 0;
+ eeprom_93cx6_read(eeprom, word + i, &tmp);
+ data[i] = cpu_to_le16(tmp);
+ }
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
+
+/**
+ * eeprom_93cx6_readb - Read a byte from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Byte index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read a byte of the eeprom data
+ * into the given data pointer.
+ */
+void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte,
+ u8 *data)
+{
+ u16 command;
+ u16 tmp;
+
+ /*
+ * Initialize the eeprom register
+ */
+ eeprom_93cx6_startup(eeprom);
+
+ /*
+ * Select the read opcode and the byte to be read.
+ */
+ command = (PCI_EEPROM_READ_OPCODE << (eeprom->width + 1)) | byte;
+ eeprom_93cx6_write_bits(eeprom, command,
+ PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1);
+
+ /*
+ * Read the requested 8 bits.
+ */
+ eeprom_93cx6_read_bits(eeprom, &tmp, 8);
+ *data = tmp & 0xff;
+
+ /*
+ * Cleanup eeprom register.
+ */
+ eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_readb);
+
+/**
+ * eeprom_93cx6_multireadb - Read multiple bytes from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @byte: Index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of bytes that should be read.
+ *
+ * This function will read all requested bytes from the eeprom,
+ * this is done by calling eeprom_93cx6_readb() multiple times.
+ */
+void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom, const u8 byte,
+ u8 *data, const u16 bytes)
+{
+ unsigned int i;
+
+ for (i = 0; i < bytes; i++)
+ eeprom_93cx6_readb(eeprom, byte + i, &data[i]);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multireadb);
+
+/**
+ * eeprom_93cx6_wren - set the write enable state
+ * @eeprom: Pointer to eeprom structure
+ * @enable: true to enable writes, otherwise disable writes
+ *
+ * Set the EEPROM write enable state to either allow or deny
+ * writes depending on the @enable value.
+ */
+void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable)
+{
+ u16 command;
+
+ /* start the command */
+ eeprom_93cx6_startup(eeprom);
+
+ /* create command to enable/disable */
+
+ command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE;
+ command <<= (eeprom->width - 2);
+
+ eeprom_93cx6_write_bits(eeprom, command,
+ PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+ eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_wren);
+
+/**
+ * eeprom_93cx6_write - write data to the EEPROM
+ * @eeprom: Pointer to eeprom structure
+ * @addr: Address to write data to.
+ * @data: The data to write to address @addr.
+ *
+ * Write the @data to the specified @addr in the EEPROM and
+ * waiting for the device to finish writing.
+ *
+ * Note, since we do not expect large number of write operations
+ * we delay in between parts of the operation to avoid using excessive
+ * amounts of CPU time busy waiting.
+ */
+void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data)
+{
+ int timeout = 100;
+ u16 command;
+
+ /* start the command */
+ eeprom_93cx6_startup(eeprom);
+
+ command = PCI_EEPROM_WRITE_OPCODE << eeprom->width;
+ command |= addr;
+
+ /* send write command */
+ eeprom_93cx6_write_bits(eeprom, command,
+ PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+ /* send data */
+ eeprom_93cx6_write_bits(eeprom, data, 16);
+
+ /* get ready to check for busy */
+ eeprom->drive_data = 0;
+ eeprom->reg_chip_select = 1;
+ eeprom->register_write(eeprom);
+
+ /* wait at-least 250ns to get DO to be the busy signal */
+ usleep_range(1000, 2000);
+
+ /* wait for DO to go high to signify finish */
+
+ while (true) {
+ eeprom->register_read(eeprom);
+
+ if (eeprom->reg_data_out)
+ break;
+
+ usleep_range(1000, 2000);
+
+ if (--timeout <= 0) {
+ printk(KERN_ERR "%s: timeout\n", __func__);
+ break;
+ }
+ }
+
+ eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_write);
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
new file mode 100644
index 0000000..ff63f05
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -0,0 +1,383 @@
+/*
+ * Driver for 93xx46 EEPROMs
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/sysfs.h>
+#include <linux/eeprom_93xx46.h>
+
+#define OP_START 0x4
+#define OP_WRITE (OP_START | 0x1)
+#define OP_READ (OP_START | 0x2)
+#define ADDR_EWDS 0x00
+#define ADDR_ERAL 0x20
+#define ADDR_EWEN 0x30
+
+struct eeprom_93xx46_dev {
+ struct spi_device *spi;
+ struct eeprom_93xx46_platform_data *pdata;
+ struct bin_attribute bin;
+ struct mutex lock;
+ int addrlen;
+};
+
+static ssize_t
+eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct eeprom_93xx46_dev *edev;
+ struct device *dev;
+ struct spi_message m;
+ struct spi_transfer t[2];
+ int bits, ret;
+ u16 cmd_addr;
+
+ dev = container_of(kobj, struct device, kobj);
+ edev = dev_get_drvdata(dev);
+
+ cmd_addr = OP_READ << edev->addrlen;
+
+ if (edev->addrlen == 7) {
+ cmd_addr |= off & 0x7f;
+ bits = 10;
+ } else {
+ cmd_addr |= off & 0x3f;
+ bits = 9;
+ }
+
+ dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
+ cmd_addr, edev->spi->max_speed_hz);
+
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
+
+ t[0].tx_buf = (char *)&cmd_addr;
+ t[0].len = 2;
+ t[0].bits_per_word = bits;
+ spi_message_add_tail(&t[0], &m);
+
+ t[1].rx_buf = buf;
+ t[1].len = count;
+ t[1].bits_per_word = 8;
+ spi_message_add_tail(&t[1], &m);
+
+ mutex_lock(&edev->lock);
+
+ if (edev->pdata->prepare)
+ edev->pdata->prepare(edev);
+
+ ret = spi_sync(edev->spi, &m);
+ /* have to wait at least Tcsl ns */
+ ndelay(250);
+ if (ret) {
+ dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n",
+ count, (int)off, ret);
+ }
+
+ if (edev->pdata->finish)
+ edev->pdata->finish(edev);
+
+ mutex_unlock(&edev->lock);
+ return ret ? : count;
+}
+
+static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on)
+{
+ struct spi_message m;
+ struct spi_transfer t;
+ int bits, ret;
+ u16 cmd_addr;
+
+ cmd_addr = OP_START << edev->addrlen;
+ if (edev->addrlen == 7) {
+ cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS) << 1;
+ bits = 10;
+ } else {
+ cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS);
+ bits = 9;
+ }
+
+ dev_dbg(&edev->spi->dev, "ew cmd 0x%04x\n", cmd_addr);
+
+ spi_message_init(&m);
+ memset(&t, 0, sizeof(t));
+
+ t.tx_buf = &cmd_addr;
+ t.len = 2;
+ t.bits_per_word = bits;
+ spi_message_add_tail(&t, &m);
+
+ mutex_lock(&edev->lock);
+
+ if (edev->pdata->prepare)
+ edev->pdata->prepare(edev);
+
+ ret = spi_sync(edev->spi, &m);
+ /* have to wait at least Tcsl ns */
+ ndelay(250);
+ if (ret)
+ dev_err(&edev->spi->dev, "erase/write %sable error %d\n",
+ is_on ? "en" : "dis", ret);
+
+ if (edev->pdata->finish)
+ edev->pdata->finish(edev);
+
+ mutex_unlock(&edev->lock);
+ return ret;
+}
+
+static ssize_t
+eeprom_93xx46_write_word(struct eeprom_93xx46_dev *edev,
+ const char *buf, unsigned off)
+{
+ struct spi_message m;
+ struct spi_transfer t[2];
+ int bits, data_len, ret;
+ u16 cmd_addr;
+
+ cmd_addr = OP_WRITE << edev->addrlen;
+
+ if (edev->addrlen == 7) {
+ cmd_addr |= off & 0x7f;
+ bits = 10;
+ data_len = 1;
+ } else {
+ cmd_addr |= off & 0x3f;
+ bits = 9;
+ data_len = 2;
+ }
+
+ dev_dbg(&edev->spi->dev, "write cmd 0x%x\n", cmd_addr);
+
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
+
+ t[0].tx_buf = (char *)&cmd_addr;
+ t[0].len = 2;
+ t[0].bits_per_word = bits;
+ spi_message_add_tail(&t[0], &m);
+
+ t[1].tx_buf = buf;
+ t[1].len = data_len;
+ t[1].bits_per_word = 8;
+ spi_message_add_tail(&t[1], &m);
+
+ ret = spi_sync(edev->spi, &m);
+ /* have to wait program cycle time Twc ms */
+ mdelay(6);
+ return ret;
+}
+
+static ssize_t
+eeprom_93xx46_bin_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct eeprom_93xx46_dev *edev;
+ struct device *dev;
+ int i, ret, step = 1;
+
+ dev = container_of(kobj, struct device, kobj);
+ edev = dev_get_drvdata(dev);
+
+ /* only write even number of bytes on 16-bit devices */
+ if (edev->addrlen == 6) {
+ step = 2;
+ count &= ~1;
+ }
+
+ /* erase/write enable */
+ ret = eeprom_93xx46_ew(edev, 1);
+ if (ret)
+ return ret;
+
+ mutex_lock(&edev->lock);
+
+ if (edev->pdata->prepare)
+ edev->pdata->prepare(edev);
+
+ for (i = 0; i < count; i += step) {
+ ret = eeprom_93xx46_write_word(edev, &buf[i], off + i);
+ if (ret) {
+ dev_err(&edev->spi->dev, "write failed at %d: %d\n",
+ (int)off + i, ret);
+ break;
+ }
+ }
+
+ if (edev->pdata->finish)
+ edev->pdata->finish(edev);
+
+ mutex_unlock(&edev->lock);
+
+ /* erase/write disable */
+ eeprom_93xx46_ew(edev, 0);
+ return ret ? : count;
+}
+
+static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev)
+{
+ struct eeprom_93xx46_platform_data *pd = edev->pdata;
+ struct spi_message m;
+ struct spi_transfer t;
+ int bits, ret;
+ u16 cmd_addr;
+
+ cmd_addr = OP_START << edev->addrlen;
+ if (edev->addrlen == 7) {
+ cmd_addr |= ADDR_ERAL << 1;
+ bits = 10;
+ } else {
+ cmd_addr |= ADDR_ERAL;
+ bits = 9;
+ }
+
+ spi_message_init(&m);
+ memset(&t, 0, sizeof(t));
+
+ t.tx_buf = &cmd_addr;
+ t.len = 2;
+ t.bits_per_word = bits;
+ spi_message_add_tail(&t, &m);
+
+ mutex_lock(&edev->lock);
+
+ if (edev->pdata->prepare)
+ edev->pdata->prepare(edev);
+
+ ret = spi_sync(edev->spi, &m);
+ if (ret)
+ dev_err(&edev->spi->dev, "erase error %d\n", ret);
+ /* have to wait erase cycle time Tec ms */
+ mdelay(6);
+
+ if (pd->finish)
+ pd->finish(edev);
+
+ mutex_unlock(&edev->lock);
+ return ret;
+}
+
+static ssize_t eeprom_93xx46_store_erase(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct eeprom_93xx46_dev *edev = dev_get_drvdata(dev);
+ int erase = 0, ret;
+
+ sscanf(buf, "%d", &erase);
+ if (erase) {
+ ret = eeprom_93xx46_ew(edev, 1);
+ if (ret)
+ return ret;
+ ret = eeprom_93xx46_eral(edev);
+ if (ret)
+ return ret;
+ ret = eeprom_93xx46_ew(edev, 0);
+ if (ret)
+ return ret;
+ }
+ return count;
+}
+static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase);
+
+static int eeprom_93xx46_probe(struct spi_device *spi)
+{
+ struct eeprom_93xx46_platform_data *pd;
+ struct eeprom_93xx46_dev *edev;
+ int err;
+
+ pd = spi->dev.platform_data;
+ if (!pd) {
+ dev_err(&spi->dev, "missing platform data\n");
+ return -ENODEV;
+ }
+
+ edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+ if (!edev)
+ return -ENOMEM;
+
+ if (pd->flags & EE_ADDR8)
+ edev->addrlen = 7;
+ else if (pd->flags & EE_ADDR16)
+ edev->addrlen = 6;
+ else {
+ dev_err(&spi->dev, "unspecified address type\n");
+ err = -EINVAL;
+ goto fail;
+ }
+
+ mutex_init(&edev->lock);
+
+ edev->spi = spi_dev_get(spi);
+ edev->pdata = pd;
+
+ sysfs_bin_attr_init(&edev->bin);
+ edev->bin.attr.name = "eeprom";
+ edev->bin.attr.mode = S_IRUSR;
+ edev->bin.read = eeprom_93xx46_bin_read;
+ edev->bin.size = 128;
+ if (!(pd->flags & EE_READONLY)) {
+ edev->bin.write = eeprom_93xx46_bin_write;
+ edev->bin.attr.mode |= S_IWUSR;
+ }
+
+ err = sysfs_create_bin_file(&spi->dev.kobj, &edev->bin);
+ if (err)
+ goto fail;
+
+ dev_info(&spi->dev, "%d-bit eeprom %s\n",
+ (pd->flags & EE_ADDR8) ? 8 : 16,
+ (pd->flags & EE_READONLY) ? "(readonly)" : "");
+
+ if (!(pd->flags & EE_READONLY)) {
+ if (device_create_file(&spi->dev, &dev_attr_erase))
+ dev_err(&spi->dev, "can't create erase interface\n");
+ }
+
+ spi_set_drvdata(spi, edev);
+ return 0;
+fail:
+ kfree(edev);
+ return err;
+}
+
+static int eeprom_93xx46_remove(struct spi_device *spi)
+{
+ struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi);
+
+ if (!(edev->pdata->flags & EE_READONLY))
+ device_remove_file(&spi->dev, &dev_attr_erase);
+
+ sysfs_remove_bin_file(&spi->dev.kobj, &edev->bin);
+ kfree(edev);
+ return 0;
+}
+
+static struct spi_driver eeprom_93xx46_driver = {
+ .driver = {
+ .name = "93xx46",
+ },
+ .probe = eeprom_93xx46_probe,
+ .remove = eeprom_93xx46_remove,
+};
+
+module_spi_driver(eeprom_93xx46_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs");
+MODULE_AUTHOR("Anatolij Gustschin <agust@denx.de>");
+MODULE_ALIAS("spi:93xx46");
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
new file mode 100644
index 0000000..e4dd93b
--- /dev/null
+++ b/drivers/misc/eeprom/max6875.c
@@ -0,0 +1,209 @@
+/*
+ * max6875.c - driver for MAX6874/MAX6875
+ *
+ * Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
+ *
+ * Based on eeprom.c
+ *
+ * The MAX6875 has a bank of registers and two banks of EEPROM.
+ * Address ranges are defined as follows:
+ * * 0x0000 - 0x0046 = configuration registers
+ * * 0x8000 - 0x8046 = configuration EEPROM
+ * * 0x8100 - 0x82FF = user EEPROM
+ *
+ * This driver makes the user EEPROM available for read.
+ *
+ * The registers & config EEPROM should be accessed via i2c-dev.
+ *
+ * The MAX6875 ignores the lowest address bit, so each chip responds to
+ * two addresses - 0x50/0x51 and 0x52/0x53.
+ *
+ * Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read
+ * address, so this driver is destructive if loaded for the wrong EEPROM chip.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* The MAX6875 can only read/write 16 bytes at a time */
+#define SLICE_SIZE 16
+#define SLICE_BITS 4
+
+/* USER EEPROM is at addresses 0x8100 - 0x82FF */
+#define USER_EEPROM_BASE 0x8100
+#define USER_EEPROM_SIZE 0x0200
+#define USER_EEPROM_SLICES 32
+
+/* MAX6875 commands */
+#define MAX6875_CMD_BLK_READ 0x84
+
+/* Each client has this additional data */
+struct max6875_data {
+ struct i2c_client *fake_client;
+ struct mutex update_lock;
+
+ u32 valid;
+ u8 data[USER_EEPROM_SIZE];
+ unsigned long last_updated[USER_EEPROM_SLICES];
+};
+
+static void max6875_update_slice(struct i2c_client *client, int slice)
+{
+ struct max6875_data *data = i2c_get_clientdata(client);
+ int i, j, addr;
+ u8 *buf;
+
+ if (slice >= USER_EEPROM_SLICES)
+ return;
+
+ mutex_lock(&data->update_lock);
+
+ buf = &data->data[slice << SLICE_BITS];
+
+ if (!(data->valid & (1 << slice)) ||
+ time_after(jiffies, data->last_updated[slice])) {
+
+ dev_dbg(&client->dev, "Starting update of slice %u\n", slice);
+
+ data->valid &= ~(1 << slice);
+
+ addr = USER_EEPROM_BASE + (slice << SLICE_BITS);
+
+ /* select the eeprom address */
+ if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) {
+ dev_err(&client->dev, "address set failed\n");
+ goto exit_up;
+ }
+
+ if (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+ if (i2c_smbus_read_i2c_block_data(client,
+ MAX6875_CMD_BLK_READ,
+ SLICE_SIZE,
+ buf) != SLICE_SIZE) {
+ goto exit_up;
+ }
+ } else {
+ for (i = 0; i < SLICE_SIZE; i++) {
+ j = i2c_smbus_read_byte(client);
+ if (j < 0) {
+ goto exit_up;
+ }
+ buf[i] = j;
+ }
+ }
+ data->last_updated[slice] = jiffies;
+ data->valid |= (1 << slice);
+ }
+exit_up:
+ mutex_unlock(&data->update_lock);
+}
+
+static ssize_t max6875_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct i2c_client *client = kobj_to_i2c_client(kobj);
+ struct max6875_data *data = i2c_get_clientdata(client);
+ int slice, max_slice;
+
+ /* refresh slices which contain requested bytes */
+ max_slice = (off + count - 1) >> SLICE_BITS;
+ for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++)
+ max6875_update_slice(client, slice);
+
+ memcpy(buf, &data->data[off], count);
+
+ return count;
+}
+
+static struct bin_attribute user_eeprom_attr = {
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO,
+ },
+ .size = USER_EEPROM_SIZE,
+ .read = max6875_read,
+};
+
+static int max6875_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adapter = client->adapter;
+ struct max6875_data *data;
+ int err;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+ | I2C_FUNC_SMBUS_READ_BYTE))
+ return -ENODEV;
+
+ /* Only bind to even addresses */
+ if (client->addr & 1)
+ return -ENODEV;
+
+ if (!(data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL)))
+ return -ENOMEM;
+
+ /* A fake client is created on the odd address */
+ data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1);
+ if (!data->fake_client) {
+ err = -ENOMEM;
+ goto exit_kfree;
+ }
+
+ /* Init real i2c_client */
+ i2c_set_clientdata(client, data);
+ mutex_init(&data->update_lock);
+
+ err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr);
+ if (err)
+ goto exit_remove_fake;
+
+ return 0;
+
+exit_remove_fake:
+ i2c_unregister_device(data->fake_client);
+exit_kfree:
+ kfree(data);
+ return err;
+}
+
+static int max6875_remove(struct i2c_client *client)
+{
+ struct max6875_data *data = i2c_get_clientdata(client);
+
+ i2c_unregister_device(data->fake_client);
+
+ sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr);
+ kfree(data);
+
+ return 0;
+}
+
+static const struct i2c_device_id max6875_id[] = {
+ { "max6875", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, max6875_id);
+
+static struct i2c_driver max6875_driver = {
+ .driver = {
+ .name = "max6875",
+ },
+ .probe = max6875_probe,
+ .remove = max6875_remove,
+ .id_table = max6875_id,
+};
+
+module_i2c_driver(max6875_driver);
+
+MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>");
+MODULE_DESCRIPTION("MAX6875 driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
new file mode 100644
index 0000000..cc91f7b
--- /dev/null
+++ b/drivers/misc/enclosure.c
@@ -0,0 +1,697 @@
+/*
+ * Enclosure Services
+ *
+ * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ *
+**-----------------------------------------------------------------------------
+**
+** This program is free software; you can redistribute it and/or
+** modify it under the terms of the GNU General Public License
+** version 2 as published by the Free Software Foundation.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**-----------------------------------------------------------------------------
+*/
+#include <linux/device.h>
+#include <linux/enclosure.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(container_list);
+static DEFINE_MUTEX(container_list_lock);
+static struct class enclosure_class;
+
+/**
+ * enclosure_find - find an enclosure given a parent device
+ * @dev: the parent to match against
+ * @start: Optional enclosure device to start from (NULL if none)
+ *
+ * Looks through the list of registered enclosures to find all those
+ * with @dev as a parent. Returns NULL if no enclosure is
+ * found. @start can be used as a starting point to obtain multiple
+ * enclosures per parent (should begin with NULL and then be set to
+ * each returned enclosure device). Obtains a reference to the
+ * enclosure class device which must be released with device_put().
+ * If @start is not NULL, a reference must be taken on it which is
+ * released before returning (this allows a loop through all
+ * enclosures to exit with only the reference on the enclosure of
+ * interest held). Note that the @dev may correspond to the actual
+ * device housing the enclosure, in which case no iteration via @start
+ * is required.
+ */
+struct enclosure_device *enclosure_find(struct device *dev,
+ struct enclosure_device *start)
+{
+ struct enclosure_device *edev;
+
+ mutex_lock(&container_list_lock);
+ edev = list_prepare_entry(start, &container_list, node);
+ if (start)
+ put_device(&start->edev);
+
+ list_for_each_entry_continue(edev, &container_list, node) {
+ struct device *parent = edev->edev.parent;
+ /* parent might not be immediate, so iterate up to
+ * the root of the tree if necessary */
+ while (parent) {
+ if (parent == dev) {
+ get_device(&edev->edev);
+ mutex_unlock(&container_list_lock);
+ return edev;
+ }
+ parent = parent->parent;
+ }
+ }
+ mutex_unlock(&container_list_lock);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(enclosure_find);
+
+/**
+ * enclosure_for_each_device - calls a function for each enclosure
+ * @fn: the function to call
+ * @data: the data to pass to each call
+ *
+ * Loops over all the enclosures calling the function.
+ *
+ * Note, this function uses a mutex which will be held across calls to
+ * @fn, so it must have non atomic context, and @fn may (although it
+ * should not) sleep or otherwise cause the mutex to be held for
+ * indefinite periods
+ */
+int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
+ void *data)
+{
+ int error = 0;
+ struct enclosure_device *edev;
+
+ mutex_lock(&container_list_lock);
+ list_for_each_entry(edev, &container_list, node) {
+ error = fn(edev, data);
+ if (error)
+ break;
+ }
+ mutex_unlock(&container_list_lock);
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(enclosure_for_each_device);
+
+/**
+ * enclosure_register - register device as an enclosure
+ *
+ * @dev: device containing the enclosure
+ * @components: number of components in the enclosure
+ *
+ * This sets up the device for being an enclosure. Note that @dev does
+ * not have to be a dedicated enclosure device. It may be some other type
+ * of device that additionally responds to enclosure services
+ */
+struct enclosure_device *
+enclosure_register(struct device *dev, const char *name, int components,
+ struct enclosure_component_callbacks *cb)
+{
+ struct enclosure_device *edev =
+ kzalloc(sizeof(struct enclosure_device) +
+ sizeof(struct enclosure_component)*components,
+ GFP_KERNEL);
+ int err, i;
+
+ BUG_ON(!cb);
+
+ if (!edev)
+ return ERR_PTR(-ENOMEM);
+
+ edev->components = components;
+
+ edev->edev.class = &enclosure_class;
+ edev->edev.parent = get_device(dev);
+ edev->cb = cb;
+ dev_set_name(&edev->edev, "%s", name);
+ err = device_register(&edev->edev);
+ if (err)
+ goto err;
+
+ for (i = 0; i < components; i++) {
+ edev->component[i].number = -1;
+ edev->component[i].slot = -1;
+ edev->component[i].power_status = 1;
+ }
+
+ mutex_lock(&container_list_lock);
+ list_add_tail(&edev->node, &container_list);
+ mutex_unlock(&container_list_lock);
+
+ return edev;
+
+ err:
+ put_device(edev->edev.parent);
+ kfree(edev);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(enclosure_register);
+
+static struct enclosure_component_callbacks enclosure_null_callbacks;
+
+/**
+ * enclosure_unregister - remove an enclosure
+ *
+ * @edev: the registered enclosure to remove;
+ */
+void enclosure_unregister(struct enclosure_device *edev)
+{
+ int i;
+
+ mutex_lock(&container_list_lock);
+ list_del(&edev->node);
+ mutex_unlock(&container_list_lock);
+
+ for (i = 0; i < edev->components; i++)
+ if (edev->component[i].number != -1)
+ device_unregister(&edev->component[i].cdev);
+
+ /* prevent any callbacks into service user */
+ edev->cb = &enclosure_null_callbacks;
+ device_unregister(&edev->edev);
+}
+EXPORT_SYMBOL_GPL(enclosure_unregister);
+
+#define ENCLOSURE_NAME_SIZE 64
+#define COMPONENT_NAME_SIZE 64
+
+static void enclosure_link_name(struct enclosure_component *cdev, char *name)
+{
+ strcpy(name, "enclosure_device:");
+ strcat(name, dev_name(&cdev->cdev));
+}
+
+static void enclosure_remove_links(struct enclosure_component *cdev)
+{
+ char name[ENCLOSURE_NAME_SIZE];
+
+ enclosure_link_name(cdev, name);
+
+ /*
+ * In odd circumstances, like multipath devices, something else may
+ * already have removed the links, so check for this condition first.
+ */
+ if (cdev->dev->kobj.sd)
+ sysfs_remove_link(&cdev->dev->kobj, name);
+
+ if (cdev->cdev.kobj.sd)
+ sysfs_remove_link(&cdev->cdev.kobj, "device");
+}
+
+static int enclosure_add_links(struct enclosure_component *cdev)
+{
+ int error;
+ char name[ENCLOSURE_NAME_SIZE];
+
+ error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device");
+ if (error)
+ return error;
+
+ enclosure_link_name(cdev, name);
+ error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name);
+ if (error)
+ sysfs_remove_link(&cdev->cdev.kobj, "device");
+
+ return error;
+}
+
+static void enclosure_release(struct device *cdev)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev);
+
+ put_device(cdev->parent);
+ kfree(edev);
+}
+
+static void enclosure_component_release(struct device *dev)
+{
+ struct enclosure_component *cdev = to_enclosure_component(dev);
+
+ if (cdev->dev) {
+ enclosure_remove_links(cdev);
+ put_device(cdev->dev);
+ }
+ put_device(dev->parent);
+}
+
+static struct enclosure_component *
+enclosure_component_find_by_name(struct enclosure_device *edev,
+ const char *name)
+{
+ int i;
+ const char *cname;
+ struct enclosure_component *ecomp;
+
+ if (!edev || !name || !name[0])
+ return NULL;
+
+ for (i = 0; i < edev->components; i++) {
+ ecomp = &edev->component[i];
+ cname = dev_name(&ecomp->cdev);
+ if (ecomp->number != -1 &&
+ cname && cname[0] &&
+ !strcmp(cname, name))
+ return ecomp;
+ }
+
+ return NULL;
+}
+
+static const struct attribute_group *enclosure_component_groups[];
+
+/**
+ * enclosure_component_alloc - prepare a new enclosure component
+ * @edev: the enclosure to add the component
+ * @num: the device number
+ * @type: the type of component being added
+ * @name: an optional name to appear in sysfs (leave NULL if none)
+ *
+ * The name is optional for enclosures that give their components a unique
+ * name. If not, leave the field NULL and a name will be assigned.
+ *
+ * Returns a pointer to the enclosure component or an error.
+ */
+struct enclosure_component *
+enclosure_component_alloc(struct enclosure_device *edev,
+ unsigned int number,
+ enum enclosure_component_type type,
+ const char *name)
+{
+ struct enclosure_component *ecomp;
+ struct device *cdev;
+ int i;
+ char newname[COMPONENT_NAME_SIZE];
+
+ if (number >= edev->components)
+ return ERR_PTR(-EINVAL);
+
+ ecomp = &edev->component[number];
+
+ if (ecomp->number != -1)
+ return ERR_PTR(-EINVAL);
+
+ ecomp->type = type;
+ ecomp->number = number;
+ cdev = &ecomp->cdev;
+ cdev->parent = get_device(&edev->edev);
+
+ if (name && name[0]) {
+ /* Some hardware (e.g. enclosure in RX300 S6) has components
+ * with non unique names. Registering duplicates in sysfs
+ * will lead to warnings during bootup. So make the names
+ * unique by appending consecutive numbers -1, -2, ... */
+ i = 1;
+ snprintf(newname, COMPONENT_NAME_SIZE,
+ "%s", name);
+ while (enclosure_component_find_by_name(edev, newname))
+ snprintf(newname, COMPONENT_NAME_SIZE,
+ "%s-%i", name, i++);
+ dev_set_name(cdev, "%s", newname);
+ } else
+ dev_set_name(cdev, "%u", number);
+
+ cdev->release = enclosure_component_release;
+ cdev->groups = enclosure_component_groups;
+
+ return ecomp;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_alloc);
+
+/**
+ * enclosure_component_register - publishes an initialized enclosure component
+ * @ecomp: component to add
+ *
+ * Returns 0 on successful registration, releases the component otherwise
+ */
+int enclosure_component_register(struct enclosure_component *ecomp)
+{
+ struct device *cdev;
+ int err;
+
+ cdev = &ecomp->cdev;
+ err = device_register(cdev);
+ if (err) {
+ ecomp->number = -1;
+ put_device(cdev);
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_register);
+
+/**
+ * enclosure_add_device - add a device as being part of an enclosure
+ * @edev: the enclosure device being added to.
+ * @num: the number of the component
+ * @dev: the device being added
+ *
+ * Declares a real device to reside in slot (or identifier) @num of an
+ * enclosure. This will cause the relevant sysfs links to appear.
+ * This function may also be used to change a device associated with
+ * an enclosure without having to call enclosure_remove_device() in
+ * between.
+ *
+ * Returns zero on success or an error.
+ */
+int enclosure_add_device(struct enclosure_device *edev, int component,
+ struct device *dev)
+{
+ struct enclosure_component *cdev;
+ int err;
+
+ if (!edev || component >= edev->components)
+ return -EINVAL;
+
+ cdev = &edev->component[component];
+
+ if (cdev->dev == dev)
+ return -EEXIST;
+
+ if (cdev->dev) {
+ enclosure_remove_links(cdev);
+ put_device(cdev->dev);
+ }
+ cdev->dev = get_device(dev);
+ err = enclosure_add_links(cdev);
+ if (err) {
+ put_device(cdev->dev);
+ cdev->dev = NULL;
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(enclosure_add_device);
+
+/**
+ * enclosure_remove_device - remove a device from an enclosure
+ * @edev: the enclosure device
+ * @num: the number of the component to remove
+ *
+ * Returns zero on success or an error.
+ *
+ */
+int enclosure_remove_device(struct enclosure_device *edev, struct device *dev)
+{
+ struct enclosure_component *cdev;
+ int i;
+
+ if (!edev || !dev)
+ return -EINVAL;
+
+ for (i = 0; i < edev->components; i++) {
+ cdev = &edev->component[i];
+ if (cdev->dev == dev) {
+ enclosure_remove_links(cdev);
+ device_del(&cdev->cdev);
+ put_device(dev);
+ cdev->dev = NULL;
+ return device_add(&cdev->cdev);
+ }
+ }
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(enclosure_remove_device);
+
+/*
+ * sysfs pieces below
+ */
+
+static ssize_t components_show(struct device *cdev,
+ struct device_attribute *attr, char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev);
+
+ return snprintf(buf, 40, "%d\n", edev->components);
+}
+static DEVICE_ATTR_RO(components);
+
+static ssize_t id_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev);
+
+ if (edev->cb->show_id)
+ return edev->cb->show_id(edev, buf);
+ return -EINVAL;
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *enclosure_class_attrs[] = {
+ &dev_attr_components.attr,
+ &dev_attr_id.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(enclosure_class);
+
+static struct class enclosure_class = {
+ .name = "enclosure",
+ .owner = THIS_MODULE,
+ .dev_release = enclosure_release,
+ .dev_groups = enclosure_class_groups,
+};
+
+static const char *const enclosure_status [] = {
+ [ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported",
+ [ENCLOSURE_STATUS_OK] = "OK",
+ [ENCLOSURE_STATUS_CRITICAL] = "critical",
+ [ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical",
+ [ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable",
+ [ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
+ [ENCLOSURE_STATUS_UNKNOWN] = "unknown",
+ [ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+ [ENCLOSURE_STATUS_MAX] = NULL,
+};
+
+static const char *const enclosure_type [] = {
+ [ENCLOSURE_COMPONENT_DEVICE] = "device",
+ [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
+};
+
+static ssize_t get_component_fault(struct device *cdev,
+ struct device_attribute *attr, char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+ if (edev->cb->get_fault)
+ edev->cb->get_fault(edev, ecomp);
+ return snprintf(buf, 40, "%d\n", ecomp->fault);
+}
+
+static ssize_t set_component_fault(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+ int val = simple_strtoul(buf, NULL, 0);
+
+ if (edev->cb->set_fault)
+ edev->cb->set_fault(edev, ecomp, val);
+ return count;
+}
+
+static ssize_t get_component_status(struct device *cdev,
+ struct device_attribute *attr,char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+ if (edev->cb->get_status)
+ edev->cb->get_status(edev, ecomp);
+ return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]);
+}
+
+static ssize_t set_component_status(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+ int i;
+
+ for (i = 0; enclosure_status[i]; i++) {
+ if (strncmp(buf, enclosure_status[i],
+ strlen(enclosure_status[i])) == 0 &&
+ (buf[strlen(enclosure_status[i])] == '\n' ||
+ buf[strlen(enclosure_status[i])] == '\0'))
+ break;
+ }
+
+ if (enclosure_status[i] && edev->cb->set_status) {
+ edev->cb->set_status(edev, ecomp, i);
+ return count;
+ } else
+ return -EINVAL;
+}
+
+static ssize_t get_component_active(struct device *cdev,
+ struct device_attribute *attr, char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+ if (edev->cb->get_active)
+ edev->cb->get_active(edev, ecomp);
+ return snprintf(buf, 40, "%d\n", ecomp->active);
+}
+
+static ssize_t set_component_active(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+ int val = simple_strtoul(buf, NULL, 0);
+
+ if (edev->cb->set_active)
+ edev->cb->set_active(edev, ecomp, val);
+ return count;
+}
+
+static ssize_t get_component_locate(struct device *cdev,
+ struct device_attribute *attr, char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+ if (edev->cb->get_locate)
+ edev->cb->get_locate(edev, ecomp);
+ return snprintf(buf, 40, "%d\n", ecomp->locate);
+}
+
+static ssize_t set_component_locate(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+ int val = simple_strtoul(buf, NULL, 0);
+
+ if (edev->cb->set_locate)
+ edev->cb->set_locate(edev, ecomp, val);
+ return count;
+}
+
+static ssize_t get_component_power_status(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+ if (edev->cb->get_power_status)
+ edev->cb->get_power_status(edev, ecomp);
+ return snprintf(buf, 40, "%s\n", ecomp->power_status ? "on" : "off");
+}
+
+static ssize_t set_component_power_status(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+ int val;
+
+ if (strncmp(buf, "on", 2) == 0 &&
+ (buf[2] == '\n' || buf[2] == '\0'))
+ val = 1;
+ else if (strncmp(buf, "off", 3) == 0 &&
+ (buf[3] == '\n' || buf[3] == '\0'))
+ val = 0;
+ else
+ return -EINVAL;
+
+ if (edev->cb->set_power_status)
+ edev->cb->set_power_status(edev, ecomp, val);
+ return count;
+}
+
+static ssize_t get_component_type(struct device *cdev,
+ struct device_attribute *attr, char *buf)
+{
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+ return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]);
+}
+
+static ssize_t get_component_slot(struct device *cdev,
+ struct device_attribute *attr, char *buf)
+{
+ struct enclosure_component *ecomp = to_enclosure_component(cdev);
+ int slot;
+
+ /* if the enclosure does not override then use 'number' as a stand-in */
+ if (ecomp->slot >= 0)
+ slot = ecomp->slot;
+ else
+ slot = ecomp->number;
+
+ return snprintf(buf, 40, "%d\n", slot);
+}
+
+static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
+ set_component_fault);
+static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
+ set_component_status);
+static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
+ set_component_active);
+static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
+ set_component_locate);
+static DEVICE_ATTR(power_status, S_IRUGO | S_IWUSR, get_component_power_status,
+ set_component_power_status);
+static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL);
+static DEVICE_ATTR(slot, S_IRUGO, get_component_slot, NULL);
+
+static struct attribute *enclosure_component_attrs[] = {
+ &dev_attr_fault.attr,
+ &dev_attr_status.attr,
+ &dev_attr_active.attr,
+ &dev_attr_locate.attr,
+ &dev_attr_power_status.attr,
+ &dev_attr_type.attr,
+ &dev_attr_slot.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(enclosure_component);
+
+static int __init enclosure_init(void)
+{
+ int err;
+
+ err = class_register(&enclosure_class);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void __exit enclosure_exit(void)
+{
+ class_unregister(&enclosure_class);
+}
+
+module_init(enclosure_init);
+module_exit(enclosure_exit);
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("Enclosure Services");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c
new file mode 100644
index 0000000..71d2793
--- /dev/null
+++ b/drivers/misc/fsa9480.c
@@ -0,0 +1,549 @@
+/*
+ * fsa9480.c - FSA9480 micro USB switch device driver
+ *
+ * Copyright (C) 2010 Samsung Electronics
+ * Minkyu Kang <mk7.kang@samsung.com>
+ * Wonguk Jeong <wonguk.jeong@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/fsa9480.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+/* FSA9480 I2C registers */
+#define FSA9480_REG_DEVID 0x01
+#define FSA9480_REG_CTRL 0x02
+#define FSA9480_REG_INT1 0x03
+#define FSA9480_REG_INT2 0x04
+#define FSA9480_REG_INT1_MASK 0x05
+#define FSA9480_REG_INT2_MASK 0x06
+#define FSA9480_REG_ADC 0x07
+#define FSA9480_REG_TIMING1 0x08
+#define FSA9480_REG_TIMING2 0x09
+#define FSA9480_REG_DEV_T1 0x0a
+#define FSA9480_REG_DEV_T2 0x0b
+#define FSA9480_REG_BTN1 0x0c
+#define FSA9480_REG_BTN2 0x0d
+#define FSA9480_REG_CK 0x0e
+#define FSA9480_REG_CK_INT1 0x0f
+#define FSA9480_REG_CK_INT2 0x10
+#define FSA9480_REG_CK_INTMASK1 0x11
+#define FSA9480_REG_CK_INTMASK2 0x12
+#define FSA9480_REG_MANSW1 0x13
+#define FSA9480_REG_MANSW2 0x14
+
+/* Control */
+#define CON_SWITCH_OPEN (1 << 4)
+#define CON_RAW_DATA (1 << 3)
+#define CON_MANUAL_SW (1 << 2)
+#define CON_WAIT (1 << 1)
+#define CON_INT_MASK (1 << 0)
+#define CON_MASK (CON_SWITCH_OPEN | CON_RAW_DATA | \
+ CON_MANUAL_SW | CON_WAIT)
+
+/* Device Type 1 */
+#define DEV_USB_OTG (1 << 7)
+#define DEV_DEDICATED_CHG (1 << 6)
+#define DEV_USB_CHG (1 << 5)
+#define DEV_CAR_KIT (1 << 4)
+#define DEV_UART (1 << 3)
+#define DEV_USB (1 << 2)
+#define DEV_AUDIO_2 (1 << 1)
+#define DEV_AUDIO_1 (1 << 0)
+
+#define DEV_T1_USB_MASK (DEV_USB_OTG | DEV_USB)
+#define DEV_T1_UART_MASK (DEV_UART)
+#define DEV_T1_CHARGER_MASK (DEV_DEDICATED_CHG | DEV_USB_CHG)
+
+/* Device Type 2 */
+#define DEV_AV (1 << 6)
+#define DEV_TTY (1 << 5)
+#define DEV_PPD (1 << 4)
+#define DEV_JIG_UART_OFF (1 << 3)
+#define DEV_JIG_UART_ON (1 << 2)
+#define DEV_JIG_USB_OFF (1 << 1)
+#define DEV_JIG_USB_ON (1 << 0)
+
+#define DEV_T2_USB_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON)
+#define DEV_T2_UART_MASK (DEV_JIG_UART_OFF | DEV_JIG_UART_ON)
+#define DEV_T2_JIG_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON | \
+ DEV_JIG_UART_OFF | DEV_JIG_UART_ON)
+
+/*
+ * Manual Switch
+ * D- [7:5] / D+ [4:2]
+ * 000: Open all / 001: USB / 010: AUDIO / 011: UART / 100: V_AUDIO
+ */
+#define SW_VAUDIO ((4 << 5) | (4 << 2))
+#define SW_UART ((3 << 5) | (3 << 2))
+#define SW_AUDIO ((2 << 5) | (2 << 2))
+#define SW_DHOST ((1 << 5) | (1 << 2))
+#define SW_AUTO ((0 << 5) | (0 << 2))
+
+/* Interrupt 1 */
+#define INT_DETACH (1 << 1)
+#define INT_ATTACH (1 << 0)
+
+struct fsa9480_usbsw {
+ struct i2c_client *client;
+ struct fsa9480_platform_data *pdata;
+ int dev1;
+ int dev2;
+ int mansw;
+};
+
+static struct fsa9480_usbsw *chip;
+
+static int fsa9480_write_reg(struct i2c_client *client,
+ int reg, int value)
+{
+ int ret;
+
+ ret = i2c_smbus_write_byte_data(client, reg, value);
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+ return ret;
+}
+
+static int fsa9480_read_reg(struct i2c_client *client, int reg)
+{
+ int ret;
+
+ ret = i2c_smbus_read_byte_data(client, reg);
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+ return ret;
+}
+
+static int fsa9480_read_irq(struct i2c_client *client, int *value)
+{
+ int ret;
+
+ ret = i2c_smbus_read_i2c_block_data(client,
+ FSA9480_REG_INT1, 2, (u8 *)value);
+ *value &= 0xffff;
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+ return ret;
+}
+
+static void fsa9480_set_switch(const char *buf)
+{
+ struct fsa9480_usbsw *usbsw = chip;
+ struct i2c_client *client = usbsw->client;
+ unsigned int value;
+ unsigned int path = 0;
+
+ value = fsa9480_read_reg(client, FSA9480_REG_CTRL);
+
+ if (!strncmp(buf, "VAUDIO", 6)) {
+ path = SW_VAUDIO;
+ value &= ~CON_MANUAL_SW;
+ } else if (!strncmp(buf, "UART", 4)) {
+ path = SW_UART;
+ value &= ~CON_MANUAL_SW;
+ } else if (!strncmp(buf, "AUDIO", 5)) {
+ path = SW_AUDIO;
+ value &= ~CON_MANUAL_SW;
+ } else if (!strncmp(buf, "DHOST", 5)) {
+ path = SW_DHOST;
+ value &= ~CON_MANUAL_SW;
+ } else if (!strncmp(buf, "AUTO", 4)) {
+ path = SW_AUTO;
+ value |= CON_MANUAL_SW;
+ } else {
+ printk(KERN_ERR "Wrong command\n");
+ return;
+ }
+
+ usbsw->mansw = path;
+ fsa9480_write_reg(client, FSA9480_REG_MANSW1, path);
+ fsa9480_write_reg(client, FSA9480_REG_CTRL, value);
+}
+
+static ssize_t fsa9480_get_switch(char *buf)
+{
+ struct fsa9480_usbsw *usbsw = chip;
+ struct i2c_client *client = usbsw->client;
+ unsigned int value;
+
+ value = fsa9480_read_reg(client, FSA9480_REG_MANSW1);
+
+ if (value == SW_VAUDIO)
+ return sprintf(buf, "VAUDIO\n");
+ else if (value == SW_UART)
+ return sprintf(buf, "UART\n");
+ else if (value == SW_AUDIO)
+ return sprintf(buf, "AUDIO\n");
+ else if (value == SW_DHOST)
+ return sprintf(buf, "DHOST\n");
+ else if (value == SW_AUTO)
+ return sprintf(buf, "AUTO\n");
+ else
+ return sprintf(buf, "%x", value);
+}
+
+static ssize_t fsa9480_show_device(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct fsa9480_usbsw *usbsw = dev_get_drvdata(dev);
+ struct i2c_client *client = usbsw->client;
+ int dev1, dev2;
+
+ dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
+ dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
+
+ if (!dev1 && !dev2)
+ return sprintf(buf, "NONE\n");
+
+ /* USB */
+ if (dev1 & DEV_T1_USB_MASK || dev2 & DEV_T2_USB_MASK)
+ return sprintf(buf, "USB\n");
+
+ /* UART */
+ if (dev1 & DEV_T1_UART_MASK || dev2 & DEV_T2_UART_MASK)
+ return sprintf(buf, "UART\n");
+
+ /* CHARGER */
+ if (dev1 & DEV_T1_CHARGER_MASK)
+ return sprintf(buf, "CHARGER\n");
+
+ /* JIG */
+ if (dev2 & DEV_T2_JIG_MASK)
+ return sprintf(buf, "JIG\n");
+
+ return sprintf(buf, "UNKNOWN\n");
+}
+
+static ssize_t fsa9480_show_manualsw(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return fsa9480_get_switch(buf);
+
+}
+
+static ssize_t fsa9480_set_manualsw(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ fsa9480_set_switch(buf);
+
+ return count;
+}
+
+static DEVICE_ATTR(device, S_IRUGO, fsa9480_show_device, NULL);
+static DEVICE_ATTR(switch, S_IRUGO | S_IWUSR,
+ fsa9480_show_manualsw, fsa9480_set_manualsw);
+
+static struct attribute *fsa9480_attributes[] = {
+ &dev_attr_device.attr,
+ &dev_attr_switch.attr,
+ NULL
+};
+
+static const struct attribute_group fsa9480_group = {
+ .attrs = fsa9480_attributes,
+};
+
+static void fsa9480_detect_dev(struct fsa9480_usbsw *usbsw, int intr)
+{
+ int val1, val2, ctrl;
+ struct fsa9480_platform_data *pdata = usbsw->pdata;
+ struct i2c_client *client = usbsw->client;
+
+ val1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
+ val2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
+ ctrl = fsa9480_read_reg(client, FSA9480_REG_CTRL);
+
+ dev_info(&client->dev, "intr: 0x%x, dev1: 0x%x, dev2: 0x%x\n",
+ intr, val1, val2);
+
+ if (!intr)
+ goto out;
+
+ if (intr & INT_ATTACH) { /* Attached */
+ /* USB */
+ if (val1 & DEV_T1_USB_MASK || val2 & DEV_T2_USB_MASK) {
+ if (pdata->usb_cb)
+ pdata->usb_cb(FSA9480_ATTACHED);
+
+ if (usbsw->mansw) {
+ fsa9480_write_reg(client,
+ FSA9480_REG_MANSW1, usbsw->mansw);
+ }
+ }
+
+ /* UART */
+ if (val1 & DEV_T1_UART_MASK || val2 & DEV_T2_UART_MASK) {
+ if (pdata->uart_cb)
+ pdata->uart_cb(FSA9480_ATTACHED);
+
+ if (!(ctrl & CON_MANUAL_SW)) {
+ fsa9480_write_reg(client,
+ FSA9480_REG_MANSW1, SW_UART);
+ }
+ }
+
+ /* CHARGER */
+ if (val1 & DEV_T1_CHARGER_MASK) {
+ if (pdata->charger_cb)
+ pdata->charger_cb(FSA9480_ATTACHED);
+ }
+
+ /* JIG */
+ if (val2 & DEV_T2_JIG_MASK) {
+ if (pdata->jig_cb)
+ pdata->jig_cb(FSA9480_ATTACHED);
+ }
+ } else if (intr & INT_DETACH) { /* Detached */
+ /* USB */
+ if (usbsw->dev1 & DEV_T1_USB_MASK ||
+ usbsw->dev2 & DEV_T2_USB_MASK) {
+ if (pdata->usb_cb)
+ pdata->usb_cb(FSA9480_DETACHED);
+ }
+
+ /* UART */
+ if (usbsw->dev1 & DEV_T1_UART_MASK ||
+ usbsw->dev2 & DEV_T2_UART_MASK) {
+ if (pdata->uart_cb)
+ pdata->uart_cb(FSA9480_DETACHED);
+ }
+
+ /* CHARGER */
+ if (usbsw->dev1 & DEV_T1_CHARGER_MASK) {
+ if (pdata->charger_cb)
+ pdata->charger_cb(FSA9480_DETACHED);
+ }
+
+ /* JIG */
+ if (usbsw->dev2 & DEV_T2_JIG_MASK) {
+ if (pdata->jig_cb)
+ pdata->jig_cb(FSA9480_DETACHED);
+ }
+ }
+
+ usbsw->dev1 = val1;
+ usbsw->dev2 = val2;
+
+out:
+ ctrl &= ~CON_INT_MASK;
+ fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl);
+}
+
+static irqreturn_t fsa9480_irq_handler(int irq, void *data)
+{
+ struct fsa9480_usbsw *usbsw = data;
+ struct i2c_client *client = usbsw->client;
+ int intr;
+
+ /* clear interrupt */
+ fsa9480_read_irq(client, &intr);
+
+ /* device detection */
+ fsa9480_detect_dev(usbsw, intr);
+
+ return IRQ_HANDLED;
+}
+
+static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw)
+{
+ struct fsa9480_platform_data *pdata = usbsw->pdata;
+ struct i2c_client *client = usbsw->client;
+ int ret;
+ int intr;
+ unsigned int ctrl = CON_MASK;
+
+ /* clear interrupt */
+ fsa9480_read_irq(client, &intr);
+
+ /* unmask interrupt (attach/detach only) */
+ fsa9480_write_reg(client, FSA9480_REG_INT1_MASK, 0xfc);
+ fsa9480_write_reg(client, FSA9480_REG_INT2_MASK, 0x1f);
+
+ usbsw->mansw = fsa9480_read_reg(client, FSA9480_REG_MANSW1);
+
+ if (usbsw->mansw)
+ ctrl &= ~CON_MANUAL_SW; /* Manual Switching Mode */
+
+ fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl);
+
+ if (pdata && pdata->cfg_gpio)
+ pdata->cfg_gpio();
+
+ if (client->irq) {
+ ret = request_threaded_irq(client->irq, NULL,
+ fsa9480_irq_handler,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ "fsa9480 micro USB", usbsw);
+ if (ret) {
+ dev_err(&client->dev, "failed to request IRQ\n");
+ return ret;
+ }
+
+ if (pdata)
+ device_init_wakeup(&client->dev, pdata->wakeup);
+ }
+
+ return 0;
+}
+
+static int fsa9480_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct fsa9480_usbsw *usbsw;
+ int ret = 0;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+ return -EIO;
+
+ usbsw = kzalloc(sizeof(struct fsa9480_usbsw), GFP_KERNEL);
+ if (!usbsw) {
+ dev_err(&client->dev, "failed to allocate driver data\n");
+ return -ENOMEM;
+ }
+
+ usbsw->client = client;
+ usbsw->pdata = client->dev.platform_data;
+
+ chip = usbsw;
+
+ i2c_set_clientdata(client, usbsw);
+
+ ret = fsa9480_irq_init(usbsw);
+ if (ret)
+ goto fail1;
+
+ ret = sysfs_create_group(&client->dev.kobj, &fsa9480_group);
+ if (ret) {
+ dev_err(&client->dev,
+ "failed to create fsa9480 attribute group\n");
+ goto fail2;
+ }
+
+ /* ADC Detect Time: 500ms */
+ fsa9480_write_reg(client, FSA9480_REG_TIMING1, 0x6);
+
+ if (chip->pdata->reset_cb)
+ chip->pdata->reset_cb();
+
+ /* device detection */
+ fsa9480_detect_dev(usbsw, INT_ATTACH);
+
+ pm_runtime_set_active(&client->dev);
+
+ return 0;
+
+fail2:
+ if (client->irq)
+ free_irq(client->irq, usbsw);
+fail1:
+ kfree(usbsw);
+ return ret;
+}
+
+static int fsa9480_remove(struct i2c_client *client)
+{
+ struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
+ if (client->irq)
+ free_irq(client->irq, usbsw);
+
+ sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
+ device_init_wakeup(&client->dev, 0);
+ kfree(usbsw);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int fsa9480_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
+ struct fsa9480_platform_data *pdata = usbsw->pdata;
+
+ if (device_may_wakeup(&client->dev) && client->irq)
+ enable_irq_wake(client->irq);
+
+ if (pdata->usb_power)
+ pdata->usb_power(0);
+
+ return 0;
+}
+
+static int fsa9480_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
+ int dev1, dev2;
+
+ if (device_may_wakeup(&client->dev) && client->irq)
+ disable_irq_wake(client->irq);
+
+ /*
+ * Clear Pending interrupt. Note that detect_dev does what
+ * the interrupt handler does. So, we don't miss pending and
+ * we reenable interrupt if there is one.
+ */
+ fsa9480_read_reg(client, FSA9480_REG_INT1);
+ fsa9480_read_reg(client, FSA9480_REG_INT2);
+
+ dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
+ dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
+
+ /* device detection */
+ fsa9480_detect_dev(usbsw, (dev1 || dev2) ? INT_ATTACH : INT_DETACH);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(fsa9480_pm_ops, fsa9480_suspend, fsa9480_resume);
+#define FSA9480_PM_OPS (&fsa9480_pm_ops)
+
+#else
+
+#define FSA9480_PM_OPS NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id fsa9480_id[] = {
+ {"fsa9480", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, fsa9480_id);
+
+static struct i2c_driver fsa9480_i2c_driver = {
+ .driver = {
+ .name = "fsa9480",
+ .pm = FSA9480_PM_OPS,
+ },
+ .probe = fsa9480_probe,
+ .remove = fsa9480_remove,
+ .id_table = fsa9480_id,
+};
+
+module_i2c_driver(fsa9480_i2c_driver);
+
+MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
+MODULE_DESCRIPTION("FSA9480 USB Switch driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
new file mode 100644
index 0000000..4c0a033
--- /dev/null
+++ b/drivers/misc/genwqe/Kconfig
@@ -0,0 +1,19 @@
+#
+# IBM Accelerator Family 'GenWQE'
+#
+
+menuconfig GENWQE
+ tristate "GenWQE PCIe Accelerator"
+ depends on PCI && 64BIT
+ select CRC_ITU_T
+ default n
+ help
+ Enables PCIe card driver for IBM GenWQE accelerators.
+ The user-space interface is described in
+ include/linux/genwqe/genwqe_card.h.
+
+config GENWQE_PLATFORM_ERROR_RECOVERY
+ int "Use platform recovery procedures (0=off, 1=on)"
+ depends on GENWQE
+ default 1 if PPC64
+ default 0
diff --git a/drivers/misc/genwqe/Makefile b/drivers/misc/genwqe/Makefile
new file mode 100644
index 0000000..98a2b4f
--- /dev/null
+++ b/drivers/misc/genwqe/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for GenWQE driver
+#
+
+obj-$(CONFIG_GENWQE) := genwqe_card.o
+genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \
+ card_debugfs.o card_utils.o
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
new file mode 100644
index 0000000..4cf8f82
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.c
@@ -0,0 +1,1402 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Module initialization and PCIe setup. Card health monitoring and
+ * recovery functionality. Character device creation and deletion are
+ * controlled from here.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include <linux/aer.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/log2.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
+MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
+MODULE_AUTHOR("Michael Jung <mijung@gmx.net>");
+
+MODULE_DESCRIPTION("GenWQE Card");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+
+static char genwqe_driver_name[] = GENWQE_DEVNAME;
+static struct class *class_genwqe;
+static struct dentry *debugfs_genwqe;
+static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
+
+/* PCI structure for identifying device by PCI vendor and device ID */
+static const struct pci_device_id genwqe_device_table[] = {
+ { .vendor = PCI_VENDOR_ID_IBM,
+ .device = PCI_DEVICE_GENWQE,
+ .subvendor = PCI_SUBVENDOR_ID_IBM,
+ .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
+ .class = (PCI_CLASSCODE_GENWQE5 << 8),
+ .class_mask = ~0,
+ .driver_data = 0 },
+
+ /* Initial SR-IOV bring-up image */
+ { .vendor = PCI_VENDOR_ID_IBM,
+ .device = PCI_DEVICE_GENWQE,
+ .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
+ .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+ .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+ .class_mask = ~0,
+ .driver_data = 0 },
+
+ { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */
+ .device = 0x0000, /* VF Device ID */
+ .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
+ .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+ .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+ .class_mask = ~0,
+ .driver_data = 0 },
+
+ /* Fixed up image */
+ { .vendor = PCI_VENDOR_ID_IBM,
+ .device = PCI_DEVICE_GENWQE,
+ .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
+ .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
+ .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+ .class_mask = ~0,
+ .driver_data = 0 },
+
+ { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */
+ .device = 0x0000, /* VF Device ID */
+ .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
+ .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
+ .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+ .class_mask = ~0,
+ .driver_data = 0 },
+
+ /* Even one more ... */
+ { .vendor = PCI_VENDOR_ID_IBM,
+ .device = PCI_DEVICE_GENWQE,
+ .subvendor = PCI_SUBVENDOR_ID_IBM,
+ .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
+ .class = (PCI_CLASSCODE_GENWQE5 << 8),
+ .class_mask = ~0,
+ .driver_data = 0 },
+
+ { 0, } /* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, genwqe_device_table);
+
+/**
+ * genwqe_dev_alloc() - Create and prepare a new card descriptor
+ *
+ * Return: Pointer to card descriptor, or ERR_PTR(err) on error
+ */
+static struct genwqe_dev *genwqe_dev_alloc(void)
+{
+ unsigned int i = 0, j;
+ struct genwqe_dev *cd;
+
+ for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
+ if (genwqe_devices[i] == NULL)
+ break;
+ }
+ if (i >= GENWQE_CARD_NO_MAX)
+ return ERR_PTR(-ENODEV);
+
+ cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
+ if (!cd)
+ return ERR_PTR(-ENOMEM);
+
+ cd->card_idx = i;
+ cd->class_genwqe = class_genwqe;
+ cd->debugfs_genwqe = debugfs_genwqe;
+
+ /*
+ * This comes from kernel config option and can be overritten via
+ * debugfs.
+ */
+ cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
+
+ init_waitqueue_head(&cd->queue_waitq);
+
+ spin_lock_init(&cd->file_lock);
+ INIT_LIST_HEAD(&cd->file_list);
+
+ cd->card_state = GENWQE_CARD_UNUSED;
+ spin_lock_init(&cd->print_lock);
+
+ cd->ddcb_software_timeout = genwqe_ddcb_software_timeout;
+ cd->kill_timeout = genwqe_kill_timeout;
+
+ for (j = 0; j < GENWQE_MAX_VFS; j++)
+ cd->vf_jobtimeout_msec[j] = genwqe_vf_jobtimeout_msec;
+
+ genwqe_devices[i] = cd;
+ return cd;
+}
+
+static void genwqe_dev_free(struct genwqe_dev *cd)
+{
+ if (!cd)
+ return;
+
+ genwqe_devices[cd->card_idx] = NULL;
+ kfree(cd);
+}
+
+/**
+ * genwqe_bus_reset() - Card recovery
+ *
+ * pci_reset_function() will recover the device and ensure that the
+ * registers are accessible again when it completes with success. If
+ * not, the card will stay dead and registers will be unaccessible
+ * still.
+ */
+static int genwqe_bus_reset(struct genwqe_dev *cd)
+{
+ int bars, rc = 0;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ void __iomem *mmio;
+
+ if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
+ return -EIO;
+
+ mmio = cd->mmio;
+ cd->mmio = NULL;
+ pci_iounmap(pci_dev, mmio);
+
+ bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+ pci_release_selected_regions(pci_dev, bars);
+
+ /*
+ * Firmware/BIOS might change memory mapping during bus reset.
+ * Settings like enable bus-mastering, ... are backuped and
+ * restored by the pci_reset_function().
+ */
+ dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
+ rc = pci_reset_function(pci_dev);
+ if (rc) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: failed reset func (rc %d)\n", __func__, rc);
+ return rc;
+ }
+ dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
+
+ /*
+ * Here is the right spot to clear the register read
+ * failure. pci_bus_reset() does this job in real systems.
+ */
+ cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+ GENWQE_INJECT_GFIR_FATAL |
+ GENWQE_INJECT_GFIR_INFO);
+
+ rc = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
+ if (rc) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: request bars failed (%d)\n", __func__, rc);
+ return -EIO;
+ }
+
+ cd->mmio = pci_iomap(pci_dev, 0, 0);
+ if (cd->mmio == NULL) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: mapping BAR0 failed\n", __func__);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/*
+ * Hardware circumvention section. Certain bitstreams in our test-lab
+ * had different kinds of problems. Here is where we adjust those
+ * bitstreams to function will with this version of our device driver.
+ *
+ * Thise circumventions are applied to the physical function only.
+ * The magical numbers below are identifying development/manufacturing
+ * versions of the bitstream used on the card.
+ *
+ * Turn off error reporting for old/manufacturing images.
+ */
+
+bool genwqe_need_err_masking(struct genwqe_dev *cd)
+{
+ return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+static void genwqe_tweak_hardware(struct genwqe_dev *cd)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ /* Mask FIRs for development images */
+ if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
+ ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
+ dev_warn(&pci_dev->dev,
+ "FIRs masked due to bitstream %016llx.%016llx\n",
+ cd->slu_unitcfg, cd->app_unitcfg);
+
+ __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
+ 0xFFFFFFFFFFFFFFFFull);
+
+ __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
+ 0x0000000000000000ull);
+ }
+}
+
+/**
+ * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
+ *
+ * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
+ * be ignored. This is e.g. true for the bitstream we gave to the card
+ * manufacturer, but also for some old bitstreams we released to our
+ * test-lab.
+ */
+int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
+{
+ return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
+}
+
+int genwqe_flash_readback_fails(struct genwqe_dev *cd)
+{
+ return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+/**
+ * genwqe_T_psec() - Calculate PF/VF timeout register content
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ */
+/* T = 1/f */
+static int genwqe_T_psec(struct genwqe_dev *cd)
+{
+ u16 speed; /* 1/f -> 250, 200, 166, 175 */
+ static const int T[] = { 4000, 5000, 6000, 5714 };
+
+ speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+ if (speed >= ARRAY_SIZE(T))
+ return -1; /* illegal value */
+
+ return T[speed];
+}
+
+/**
+ * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
+ *
+ * Do this _after_ card_reset() is called. Otherwise the values will
+ * vanish. The settings need to be done when the queues are inactive.
+ *
+ * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
+ * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
+ */
+static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
+{
+ u32 T = genwqe_T_psec(cd);
+ u64 x;
+
+ if (genwqe_pf_jobtimeout_msec == 0)
+ return false;
+
+ /* PF: large value needed, flash update 2sec per block */
+ x = ilog2(genwqe_pf_jobtimeout_msec *
+ 16000000000uL/(T * 15)) - 10;
+
+ genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+ 0xff00 | (x & 0xff), 0);
+ return true;
+}
+
+/**
+ * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
+ */
+static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+ unsigned int vf;
+ u32 T = genwqe_T_psec(cd);
+ u64 x;
+ int totalvfs;
+
+ totalvfs = pci_sriov_get_totalvfs(pci_dev);
+ if (totalvfs <= 0)
+ return false;
+
+ for (vf = 0; vf < totalvfs; vf++) {
+
+ if (cd->vf_jobtimeout_msec[vf] == 0)
+ continue;
+
+ x = ilog2(cd->vf_jobtimeout_msec[vf] *
+ 16000000000uL/(T * 15)) - 10;
+
+ genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+ 0xff00 | (x & 0xff), vf + 1);
+ }
+ return true;
+}
+
+static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
+{
+ unsigned int type, e = 0;
+
+ for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+ switch (type) {
+ case GENWQE_DBG_UNIT0:
+ e = genwqe_ffdc_buff_size(cd, 0);
+ break;
+ case GENWQE_DBG_UNIT1:
+ e = genwqe_ffdc_buff_size(cd, 1);
+ break;
+ case GENWQE_DBG_UNIT2:
+ e = genwqe_ffdc_buff_size(cd, 2);
+ break;
+ case GENWQE_DBG_REGS:
+ e = GENWQE_FFDC_REGS;
+ break;
+ }
+
+ /* currently support only the debug units mentioned here */
+ cd->ffdc[type].entries = e;
+ cd->ffdc[type].regs =
+ kmalloc_array(e, sizeof(struct genwqe_reg),
+ GFP_KERNEL);
+ /*
+ * regs == NULL is ok, the using code treats this as no regs,
+ * Printing warning is ok in this case.
+ */
+ }
+ return 0;
+}
+
+static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
+{
+ unsigned int type;
+
+ for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+ kfree(cd->ffdc[type].regs);
+ cd->ffdc[type].regs = NULL;
+ }
+}
+
+static int genwqe_read_ids(struct genwqe_dev *cd)
+{
+ int err = 0;
+ int slu_id;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+ if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
+ dev_err(&pci_dev->dev,
+ "err: SLUID=%016llx\n", cd->slu_unitcfg);
+ err = -EIO;
+ goto out_err;
+ }
+
+ slu_id = genwqe_get_slu_id(cd);
+ if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
+ dev_err(&pci_dev->dev,
+ "err: incompatible SLU Architecture %u\n", slu_id);
+ err = -ENOENT;
+ goto out_err;
+ }
+
+ cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+ if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
+ dev_err(&pci_dev->dev,
+ "err: APPID=%016llx\n", cd->app_unitcfg);
+ err = -EIO;
+ goto out_err;
+ }
+ genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
+
+ /*
+ * Is access to all registers possible? If we are a VF the
+ * answer is obvious. If we run fully virtualized, we need to
+ * check if we can access all registers. If we do not have
+ * full access we will cause an UR and some informational FIRs
+ * in the PF, but that should not harm.
+ */
+ if (pci_dev->is_virtfn)
+ cd->is_privileged = 0;
+ else
+ cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+ != IO_ILLEGAL_VALUE);
+
+ out_err:
+ return err;
+}
+
+static int genwqe_start(struct genwqe_dev *cd)
+{
+ int err;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ err = genwqe_read_ids(cd);
+ if (err)
+ return err;
+
+ if (genwqe_is_privileged(cd)) {
+ /* do this after the tweaks. alloc fail is acceptable */
+ genwqe_ffdc_buffs_alloc(cd);
+ genwqe_stop_traps(cd);
+
+ /* Collect registers e.g. FIRs, UNITIDs, traces ... */
+ genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
+ cd->ffdc[GENWQE_DBG_REGS].entries, 0);
+
+ genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
+ cd->ffdc[GENWQE_DBG_UNIT0].regs,
+ cd->ffdc[GENWQE_DBG_UNIT0].entries);
+
+ genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
+ cd->ffdc[GENWQE_DBG_UNIT1].regs,
+ cd->ffdc[GENWQE_DBG_UNIT1].entries);
+
+ genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
+ cd->ffdc[GENWQE_DBG_UNIT2].regs,
+ cd->ffdc[GENWQE_DBG_UNIT2].entries);
+
+ genwqe_start_traps(cd);
+
+ if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
+ dev_warn(&pci_dev->dev,
+ "[%s] chip reload/recovery!\n", __func__);
+
+ /*
+ * Stealth Mode: Reload chip on either hot
+ * reset or PERST.
+ */
+ cd->softreset = 0x7Cull;
+ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+ cd->softreset);
+
+ err = genwqe_bus_reset(cd);
+ if (err != 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: bus reset failed!\n",
+ __func__);
+ goto out;
+ }
+
+ /*
+ * Re-read the IDs because
+ * it could happen that the bitstream load
+ * failed!
+ */
+ err = genwqe_read_ids(cd);
+ if (err)
+ goto out;
+ }
+ }
+
+ err = genwqe_setup_service_layer(cd); /* does a reset to the card */
+ if (err != 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: could not setup servicelayer!\n", __func__);
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */
+ genwqe_tweak_hardware(cd);
+
+ genwqe_setup_pf_jtimer(cd);
+ genwqe_setup_vf_jtimer(cd);
+ }
+
+ err = genwqe_device_create(cd);
+ if (err < 0) {
+ dev_err(&pci_dev->dev,
+ "err: chdev init failed! (err=%d)\n", err);
+ goto out_release_service_layer;
+ }
+ return 0;
+
+ out_release_service_layer:
+ genwqe_release_service_layer(cd);
+ out:
+ if (genwqe_is_privileged(cd))
+ genwqe_ffdc_buffs_free(cd);
+ return -EIO;
+}
+
+/**
+ * genwqe_stop() - Stop card operation
+ *
+ * Recovery notes:
+ * As long as genwqe_thread runs we might access registers during
+ * error data capture. Same is with the genwqe_health_thread.
+ * When genwqe_bus_reset() fails this function might called two times:
+ * first by the genwqe_health_thread() and later by genwqe_remove() to
+ * unbind the device. We must be able to survive that.
+ *
+ * This function must be robust enough to be called twice.
+ */
+static int genwqe_stop(struct genwqe_dev *cd)
+{
+ genwqe_finish_queue(cd); /* no register access */
+ genwqe_device_remove(cd); /* device removed, procs killed */
+ genwqe_release_service_layer(cd); /* here genwqe_thread is stopped */
+
+ if (genwqe_is_privileged(cd)) {
+ pci_disable_sriov(cd->pci_dev); /* access pci config space */
+ genwqe_ffdc_buffs_free(cd);
+ }
+
+ return 0;
+}
+
+/**
+ * genwqe_recover_card() - Try to recover the card if it is possible
+ *
+ * If fatal_err is set no register access is possible anymore. It is
+ * likely that genwqe_start fails in that situation. Proper error
+ * handling is required in this case.
+ *
+ * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
+ * and later genwqe_probe() for all virtual functions.
+ */
+static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
+{
+ int rc;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ genwqe_stop(cd);
+
+ /*
+ * Make sure chip is not reloaded to maintain FFDC. Write SLU
+ * Reset Register, CPLDReset field to 0.
+ */
+ if (!fatal_err) {
+ cd->softreset = 0x70ull;
+ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+ }
+
+ rc = genwqe_bus_reset(cd);
+ if (rc != 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: card recovery impossible!\n", __func__);
+ return rc;
+ }
+
+ rc = genwqe_start(cd);
+ if (rc < 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: failed to launch device!\n", __func__);
+ return rc;
+ }
+ return 0;
+}
+
+static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
+{
+ *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ return (*gfir & GFIR_ERR_TRIGGER) &&
+ genwqe_recovery_on_fatal_gfir_required(cd);
+}
+
+/**
+ * genwqe_fir_checking() - Check the fault isolation registers of the card
+ *
+ * If this code works ok, can be tried out with help of the genwqe_poke tool:
+ * sudo ./tools/genwqe_poke 0x8 0xfefefefefef
+ *
+ * Now the relevant FIRs/sFIRs should be printed out and the driver should
+ * invoke recovery (devices are removed and readded).
+ */
+static u64 genwqe_fir_checking(struct genwqe_dev *cd)
+{
+ int j, iterations = 0;
+ u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
+ u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ healthMonitor:
+ iterations++;
+ if (iterations > 16) {
+ dev_err(&pci_dev->dev, "* exit looping after %d times\n",
+ iterations);
+ goto fatal_error;
+ }
+
+ gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ if (gfir != 0x0)
+ dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
+ IO_SLC_CFGREG_GFIR, gfir);
+ if (gfir == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+
+ /*
+ * Avoid printing when to GFIR bit is on prevents contignous
+ * printout e.g. for the following bug:
+ * FIR set without a 2ndary FIR/FIR cannot be cleared
+ * Comment out the following if to get the prints:
+ */
+ if (gfir == 0)
+ return 0;
+
+ gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */
+
+ for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
+
+ /* read the primary FIR (pfir) */
+ fir_addr = (uid << 24) + 0x08;
+ fir = __genwqe_readq(cd, fir_addr);
+ if (fir == 0x0)
+ continue; /* no error in this unit */
+
+ dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
+ if (fir == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+
+ /* read primary FEC */
+ fec_addr = (uid << 24) + 0x18;
+ fec = __genwqe_readq(cd, fec_addr);
+
+ dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
+ if (fec == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+
+ for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
+
+ /* secondary fir empty, skip it */
+ if ((fir & mask) == 0x0)
+ continue;
+
+ sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+ sfir = __genwqe_readq(cd, sfir_addr);
+
+ if (sfir == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+ dev_err(&pci_dev->dev,
+ "* 0x%08x 0x%016llx\n", sfir_addr, sfir);
+
+ sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
+ sfec = __genwqe_readq(cd, sfec_addr);
+
+ if (sfec == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+ dev_err(&pci_dev->dev,
+ "* 0x%08x 0x%016llx\n", sfec_addr, sfec);
+
+ gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ if (gfir == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+
+ /* gfir turned on during routine! get out and
+ start over. */
+ if ((gfir_masked == 0x0) &&
+ (gfir & GFIR_ERR_TRIGGER)) {
+ goto healthMonitor;
+ }
+
+ /* do not clear if we entered with a fatal gfir */
+ if (gfir_masked == 0x0) {
+
+ /* NEW clear by mask the logged bits */
+ sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+ __genwqe_writeq(cd, sfir_addr, sfir);
+
+ dev_dbg(&pci_dev->dev,
+ "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n",
+ sfir_addr, sfir);
+
+ /*
+ * note, these cannot be error-Firs
+ * since gfir_masked is 0 after sfir
+ * was read. Also, it is safe to do
+ * this write if sfir=0. Still need to
+ * clear the primary. This just means
+ * there is no secondary FIR.
+ */
+
+ /* clear by mask the logged bit. */
+ fir_clr_addr = (uid << 24) + 0x10;
+ __genwqe_writeq(cd, fir_clr_addr, mask);
+
+ dev_dbg(&pci_dev->dev,
+ "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n",
+ fir_clr_addr, mask);
+ }
+ }
+ }
+ gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ if (gfir == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+
+ if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
+ /*
+ * Check once more that it didn't go on after all the
+ * FIRS were cleared.
+ */
+ dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
+ iterations);
+ goto healthMonitor;
+ }
+ return gfir_masked;
+
+ fatal_error:
+ return IO_ILLEGAL_VALUE;
+}
+
+/**
+ * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot
+ *
+ * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this
+ * reset method will not work in all cases.
+ *
+ * Return: 0 on success or error code from pci_set_pcie_reset_state()
+ */
+static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
+{
+ int rc;
+
+ /*
+ * lock pci config space access from userspace,
+ * save state and issue PCIe fundamental reset
+ */
+ pci_cfg_access_lock(pci_dev);
+ pci_save_state(pci_dev);
+ rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset);
+ if (!rc) {
+ /* keep PCIe reset asserted for 250ms */
+ msleep(250);
+ pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset);
+ /* Wait for 2s to reload flash and train the link */
+ msleep(2000);
+ }
+ pci_restore_state(pci_dev);
+ pci_cfg_access_unlock(pci_dev);
+ return rc;
+}
+
+
+static int genwqe_platform_recovery(struct genwqe_dev *cd)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+ int rc;
+
+ dev_info(&pci_dev->dev,
+ "[%s] resetting card for error recovery\n", __func__);
+
+ /* Clear out error injection flags */
+ cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+ GENWQE_INJECT_GFIR_FATAL |
+ GENWQE_INJECT_GFIR_INFO);
+
+ genwqe_stop(cd);
+
+ /* Try recoverying the card with fundamental reset */
+ rc = genwqe_pci_fundamental_reset(pci_dev);
+ if (!rc) {
+ rc = genwqe_start(cd);
+ if (!rc)
+ dev_info(&pci_dev->dev,
+ "[%s] card recovered\n", __func__);
+ else
+ dev_err(&pci_dev->dev,
+ "[%s] err: cannot start card services! (err=%d)\n",
+ __func__, rc);
+ } else {
+ dev_err(&pci_dev->dev,
+ "[%s] card reset failed\n", __func__);
+ }
+
+ return rc;
+}
+
+/*
+ * genwqe_reload_bistream() - reload card bitstream
+ *
+ * Set the appropriate register and call fundamental reset to reaload the card
+ * bitstream.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int genwqe_reload_bistream(struct genwqe_dev *cd)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+ int rc;
+
+ dev_info(&pci_dev->dev,
+ "[%s] resetting card for bitstream reload\n",
+ __func__);
+
+ genwqe_stop(cd);
+
+ /*
+ * Cause a CPLD reprogram with the 'next_bitstream'
+ * partition on PCIe hot or fundamental reset
+ */
+ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+ (cd->softreset & 0xcull) | 0x70ull);
+
+ rc = genwqe_pci_fundamental_reset(pci_dev);
+ if (rc) {
+ /*
+ * A fundamental reset failure can be caused
+ * by lack of support on the arch, so we just
+ * log the error and try to start the card
+ * again.
+ */
+ dev_err(&pci_dev->dev,
+ "[%s] err: failed to reset card for bitstream reload\n",
+ __func__);
+ }
+
+ rc = genwqe_start(cd);
+ if (rc) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: cannot start card services! (err=%d)\n",
+ __func__, rc);
+ return rc;
+ }
+ dev_info(&pci_dev->dev,
+ "[%s] card reloaded\n", __func__);
+ return 0;
+}
+
+
+/**
+ * genwqe_health_thread() - Health checking thread
+ *
+ * This thread is only started for the PF of the card.
+ *
+ * This thread monitors the health of the card. A critical situation
+ * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
+ * this case we need to be recovered from outside. Writing to
+ * registers will very likely not work either.
+ *
+ * This thread must only exit if kthread_should_stop() becomes true.
+ *
+ * Condition for the health-thread to trigger:
+ * a) when a kthread_stop() request comes in or
+ * b) a critical GFIR occured
+ *
+ * Informational GFIRs are checked and potentially printed in
+ * health_check_interval seconds.
+ */
+static int genwqe_health_thread(void *data)
+{
+ int rc, should_stop = 0;
+ struct genwqe_dev *cd = data;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
+
+ health_thread_begin:
+ while (!kthread_should_stop()) {
+ rc = wait_event_interruptible_timeout(cd->health_waitq,
+ (genwqe_health_check_cond(cd, &gfir) ||
+ (should_stop = kthread_should_stop())),
+ genwqe_health_check_interval * HZ);
+
+ if (should_stop)
+ break;
+
+ if (gfir == IO_ILLEGAL_VALUE) {
+ dev_err(&pci_dev->dev,
+ "[%s] GFIR=%016llx\n", __func__, gfir);
+ goto fatal_error;
+ }
+
+ slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+ if (slu_unitcfg == IO_ILLEGAL_VALUE) {
+ dev_err(&pci_dev->dev,
+ "[%s] SLU_UNITCFG=%016llx\n",
+ __func__, slu_unitcfg);
+ goto fatal_error;
+ }
+
+ app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+ if (app_unitcfg == IO_ILLEGAL_VALUE) {
+ dev_err(&pci_dev->dev,
+ "[%s] APP_UNITCFG=%016llx\n",
+ __func__, app_unitcfg);
+ goto fatal_error;
+ }
+
+ gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ if (gfir == IO_ILLEGAL_VALUE) {
+ dev_err(&pci_dev->dev,
+ "[%s] %s: GFIR=%016llx\n", __func__,
+ (gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
+ gfir);
+ goto fatal_error;
+ }
+
+ gfir_masked = genwqe_fir_checking(cd);
+ if (gfir_masked == IO_ILLEGAL_VALUE)
+ goto fatal_error;
+
+ /*
+ * GFIR ErrorTrigger bits set => reset the card!
+ * Never do this for old/manufacturing images!
+ */
+ if ((gfir_masked) && !cd->skip_recovery &&
+ genwqe_recovery_on_fatal_gfir_required(cd)) {
+
+ cd->card_state = GENWQE_CARD_FATAL_ERROR;
+
+ rc = genwqe_recover_card(cd, 0);
+ if (rc < 0) {
+ /* FIXME Card is unusable and needs unbind! */
+ goto fatal_error;
+ }
+ }
+
+ if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) {
+ /* Userspace requested card bitstream reload */
+ rc = genwqe_reload_bistream(cd);
+ if (rc)
+ goto fatal_error;
+ }
+
+ cd->last_gfir = gfir;
+ cond_resched();
+ }
+
+ return 0;
+
+ fatal_error:
+ if (cd->use_platform_recovery) {
+ /*
+ * Since we use raw accessors, EEH errors won't be detected
+ * by the platform until we do a non-raw MMIO or config space
+ * read
+ */
+ readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+ /* We do nothing if the card is going over PCI recovery */
+ if (pci_channel_offline(pci_dev))
+ return -EIO;
+
+ /*
+ * If it's supported by the platform, we try a fundamental reset
+ * to recover from a fatal error. Otherwise, we continue to wait
+ * for an external recovery procedure to take care of it.
+ */
+ rc = genwqe_platform_recovery(cd);
+ if (!rc)
+ goto health_thread_begin;
+ }
+
+ dev_err(&pci_dev->dev,
+ "[%s] card unusable. Please trigger unbind!\n", __func__);
+
+ /* Bring down logical devices to inform user space via udev remove. */
+ cd->card_state = GENWQE_CARD_FATAL_ERROR;
+ genwqe_stop(cd);
+
+ /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
+ while (!kthread_should_stop())
+ cond_resched();
+
+ return -EIO;
+}
+
+static int genwqe_health_check_start(struct genwqe_dev *cd)
+{
+ int rc;
+
+ if (genwqe_health_check_interval <= 0)
+ return 0; /* valid for disabling the service */
+
+ /* moved before request_irq() */
+ /* init_waitqueue_head(&cd->health_waitq); */
+
+ cd->health_thread = kthread_run(genwqe_health_thread, cd,
+ GENWQE_DEVNAME "%d_health",
+ cd->card_idx);
+ if (IS_ERR(cd->health_thread)) {
+ rc = PTR_ERR(cd->health_thread);
+ cd->health_thread = NULL;
+ return rc;
+ }
+ return 0;
+}
+
+static int genwqe_health_thread_running(struct genwqe_dev *cd)
+{
+ return cd->health_thread != NULL;
+}
+
+static int genwqe_health_check_stop(struct genwqe_dev *cd)
+{
+ int rc;
+
+ if (!genwqe_health_thread_running(cd))
+ return -EIO;
+
+ rc = kthread_stop(cd->health_thread);
+ cd->health_thread = NULL;
+ return 0;
+}
+
+/**
+ * genwqe_pci_setup() - Allocate PCIe related resources for our card
+ */
+static int genwqe_pci_setup(struct genwqe_dev *cd)
+{
+ int err, bars;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+ err = pci_enable_device_mem(pci_dev);
+ if (err) {
+ dev_err(&pci_dev->dev,
+ "err: failed to enable pci memory (err=%d)\n", err);
+ goto err_out;
+ }
+
+ /* Reserve PCI I/O and memory resources */
+ err = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
+ if (err) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: request bars failed (%d)\n", __func__, err);
+ err = -EIO;
+ goto err_disable_device;
+ }
+
+ /* check for 64-bit DMA address supported (DAC) */
+ if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) {
+ err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pci_dev->dev,
+ "err: DMA64 consistent mask error\n");
+ err = -EIO;
+ goto out_release_resources;
+ }
+ /* check for 32-bit DMA address supported (SAC) */
+ } else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+ err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pci_dev->dev,
+ "err: DMA32 consistent mask error\n");
+ err = -EIO;
+ goto out_release_resources;
+ }
+ } else {
+ dev_err(&pci_dev->dev,
+ "err: neither DMA32 nor DMA64 supported\n");
+ err = -EIO;
+ goto out_release_resources;
+ }
+
+ pci_set_master(pci_dev);
+ pci_enable_pcie_error_reporting(pci_dev);
+
+ /* EEH recovery requires PCIe fundamental reset */
+ pci_dev->needs_freset = 1;
+
+ /* request complete BAR-0 space (length = 0) */
+ cd->mmio_len = pci_resource_len(pci_dev, 0);
+ cd->mmio = pci_iomap(pci_dev, 0, 0);
+ if (cd->mmio == NULL) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: mapping BAR0 failed\n", __func__);
+ err = -ENOMEM;
+ goto out_release_resources;
+ }
+
+ cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
+ if (cd->num_vfs < 0)
+ cd->num_vfs = 0;
+
+ err = genwqe_read_ids(cd);
+ if (err)
+ goto out_iounmap;
+
+ return 0;
+
+ out_iounmap:
+ pci_iounmap(pci_dev, cd->mmio);
+ out_release_resources:
+ pci_release_selected_regions(pci_dev, bars);
+ err_disable_device:
+ pci_disable_device(pci_dev);
+ err_out:
+ return err;
+}
+
+/**
+ * genwqe_pci_remove() - Free PCIe related resources for our card
+ */
+static void genwqe_pci_remove(struct genwqe_dev *cd)
+{
+ int bars;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (cd->mmio)
+ pci_iounmap(pci_dev, cd->mmio);
+
+ bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+ pci_release_selected_regions(pci_dev, bars);
+ pci_disable_device(pci_dev);
+}
+
+/**
+ * genwqe_probe() - Device initialization
+ * @pdev: PCI device information struct
+ *
+ * Callable for multiple cards. This function is called on bind.
+ *
+ * Return: 0 if succeeded, < 0 when failed
+ */
+static int genwqe_probe(struct pci_dev *pci_dev,
+ const struct pci_device_id *id)
+{
+ int err;
+ struct genwqe_dev *cd;
+
+ genwqe_init_crc32();
+
+ cd = genwqe_dev_alloc();
+ if (IS_ERR(cd)) {
+ dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
+ (int)PTR_ERR(cd));
+ return PTR_ERR(cd);
+ }
+
+ dev_set_drvdata(&pci_dev->dev, cd);
+ cd->pci_dev = pci_dev;
+
+ err = genwqe_pci_setup(cd);
+ if (err < 0) {
+ dev_err(&pci_dev->dev,
+ "err: problems with PCI setup (err=%d)\n", err);
+ goto out_free_dev;
+ }
+
+ err = genwqe_start(cd);
+ if (err < 0) {
+ dev_err(&pci_dev->dev,
+ "err: cannot start card services! (err=%d)\n", err);
+ goto out_pci_remove;
+ }
+
+ if (genwqe_is_privileged(cd)) {
+ err = genwqe_health_check_start(cd);
+ if (err < 0) {
+ dev_err(&pci_dev->dev,
+ "err: cannot start health checking! (err=%d)\n",
+ err);
+ goto out_stop_services;
+ }
+ }
+ return 0;
+
+ out_stop_services:
+ genwqe_stop(cd);
+ out_pci_remove:
+ genwqe_pci_remove(cd);
+ out_free_dev:
+ genwqe_dev_free(cd);
+ return err;
+}
+
+/**
+ * genwqe_remove() - Called when device is removed (hot-plugable)
+ *
+ * Or when driver is unloaded respecitively when unbind is done.
+ */
+static void genwqe_remove(struct pci_dev *pci_dev)
+{
+ struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+ genwqe_health_check_stop(cd);
+
+ /*
+ * genwqe_stop() must survive if it is called twice
+ * sequentially. This happens when the health thread calls it
+ * and fails on genwqe_bus_reset().
+ */
+ genwqe_stop(cd);
+ genwqe_pci_remove(cd);
+ genwqe_dev_free(cd);
+}
+
+/*
+ * genwqe_err_error_detected() - Error detection callback
+ *
+ * This callback is called by the PCI subsystem whenever a PCI bus
+ * error is detected.
+ */
+static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
+ enum pci_channel_state state)
+{
+ struct genwqe_dev *cd;
+
+ dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
+
+ cd = dev_get_drvdata(&pci_dev->dev);
+ if (cd == NULL)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ /* Stop the card */
+ genwqe_health_check_stop(cd);
+ genwqe_stop(cd);
+
+ /*
+ * On permanent failure, the PCI code will call device remove
+ * after the return of this function.
+ * genwqe_stop() can be called twice.
+ */
+ if (state == pci_channel_io_perm_failure) {
+ return PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ genwqe_pci_remove(cd);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+}
+
+static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
+{
+ int rc;
+ struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+ rc = genwqe_pci_setup(cd);
+ if (!rc) {
+ return PCI_ERS_RESULT_RECOVERED;
+ } else {
+ dev_err(&pci_dev->dev,
+ "err: problems with PCI setup (err=%d)\n", rc);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+}
+
+static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
+{
+ return PCI_ERS_RESULT_NONE;
+}
+
+static void genwqe_err_resume(struct pci_dev *pci_dev)
+{
+ int rc;
+ struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+ rc = genwqe_start(cd);
+ if (!rc) {
+ rc = genwqe_health_check_start(cd);
+ if (rc)
+ dev_err(&pci_dev->dev,
+ "err: cannot start health checking! (err=%d)\n",
+ rc);
+ } else {
+ dev_err(&pci_dev->dev,
+ "err: cannot start card services! (err=%d)\n", rc);
+ }
+}
+
+static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+ int rc;
+ struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
+
+ if (numvfs > 0) {
+ genwqe_setup_vf_jtimer(cd);
+ rc = pci_enable_sriov(dev, numvfs);
+ if (rc < 0)
+ return rc;
+ return numvfs;
+ }
+ if (numvfs == 0) {
+ pci_disable_sriov(dev);
+ return 0;
+ }
+ return 0;
+}
+
+static struct pci_error_handlers genwqe_err_handler = {
+ .error_detected = genwqe_err_error_detected,
+ .mmio_enabled = genwqe_err_result_none,
+ .link_reset = genwqe_err_result_none,
+ .slot_reset = genwqe_err_slot_reset,
+ .resume = genwqe_err_resume,
+};
+
+static struct pci_driver genwqe_driver = {
+ .name = genwqe_driver_name,
+ .id_table = genwqe_device_table,
+ .probe = genwqe_probe,
+ .remove = genwqe_remove,
+ .sriov_configure = genwqe_sriov_configure,
+ .err_handler = &genwqe_err_handler,
+};
+
+/**
+ * genwqe_init_module() - Driver registration and initialization
+ */
+static int __init genwqe_init_module(void)
+{
+ int rc;
+
+ class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
+ if (IS_ERR(class_genwqe)) {
+ pr_err("[%s] create class failed\n", __func__);
+ return -ENOMEM;
+ }
+
+ debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
+ if (!debugfs_genwqe) {
+ rc = -ENOMEM;
+ goto err_out;
+ }
+
+ rc = pci_register_driver(&genwqe_driver);
+ if (rc != 0) {
+ pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
+ goto err_out0;
+ }
+
+ return rc;
+
+ err_out0:
+ debugfs_remove(debugfs_genwqe);
+ err_out:
+ class_destroy(class_genwqe);
+ return rc;
+}
+
+/**
+ * genwqe_exit_module() - Driver exit
+ */
+static void __exit genwqe_exit_module(void)
+{
+ pci_unregister_driver(&genwqe_driver);
+ debugfs_remove(debugfs_genwqe);
+ class_destroy(class_genwqe);
+}
+
+module_init(genwqe_init_module);
+module_exit(genwqe_exit_module);
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
new file mode 100644
index 0000000..cb851c1
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.h
@@ -0,0 +1,583 @@
+#ifndef __CARD_BASE_H__
+#define __CARD_BASE_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Interfaces within the GenWQE module. Defines genwqe_card and
+ * ddcb_queue as well as ddcb_requ.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/stringify.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/genwqe/genwqe_card.h>
+#include "genwqe_driver.h"
+
+#define GENWQE_MSI_IRQS 4 /* Just one supported, no MSIx */
+#define GENWQE_FLAG_MSI_ENABLED (1 << 0)
+
+#define GENWQE_MAX_VFS 15 /* maximum 15 VFs are possible */
+#define GENWQE_MAX_FUNCS 16 /* 1 PF and 15 VFs */
+#define GENWQE_CARD_NO_MAX (16 * GENWQE_MAX_FUNCS)
+
+/* Compile parameters, some of them appear in debugfs for later adjustment */
+#define genwqe_ddcb_max 32 /* DDCBs on the work-queue */
+#define genwqe_polling_enabled 0 /* in case of irqs not working */
+#define genwqe_ddcb_software_timeout 10 /* timeout per DDCB in seconds */
+#define genwqe_kill_timeout 8 /* time until process gets killed */
+#define genwqe_vf_jobtimeout_msec 250 /* 250 msec */
+#define genwqe_pf_jobtimeout_msec 8000 /* 8 sec should be ok */
+#define genwqe_health_check_interval 4 /* <= 0: disabled */
+
+/* Sysfs attribute groups used when we create the genwqe device */
+extern const struct attribute_group *genwqe_attribute_groups[];
+
+/*
+ * Config space for Genwqe5 A7:
+ * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00
+ * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00
+ * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04]
+ * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00
+ */
+#define PCI_DEVICE_GENWQE 0x044b /* Genwqe DeviceID */
+
+#define PCI_SUBSYSTEM_ID_GENWQE5 0x035f /* Genwqe A5 Subsystem-ID */
+#define PCI_SUBSYSTEM_ID_GENWQE5_NEW 0x044b /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5 0x1200 /* UNKNOWN */
+
+#define PCI_SUBVENDOR_ID_IBM_SRIOV 0x0000
+#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV 0x0000 /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5_SRIOV 0x1200 /* UNKNOWN */
+
+#define GENWQE_SLU_ARCH_REQ 2 /* Required SLU architecture level */
+
+/**
+ * struct genwqe_reg - Genwqe data dump functionality
+ */
+struct genwqe_reg {
+ u32 addr;
+ u32 idx;
+ u64 val;
+};
+
+/*
+ * enum genwqe_dbg_type - Specify chip unit to dump/debug
+ */
+enum genwqe_dbg_type {
+ GENWQE_DBG_UNIT0 = 0, /* captured before prev errs cleared */
+ GENWQE_DBG_UNIT1 = 1,
+ GENWQE_DBG_UNIT2 = 2,
+ GENWQE_DBG_UNIT3 = 3,
+ GENWQE_DBG_UNIT4 = 4,
+ GENWQE_DBG_UNIT5 = 5,
+ GENWQE_DBG_UNIT6 = 6,
+ GENWQE_DBG_UNIT7 = 7,
+ GENWQE_DBG_REGS = 8,
+ GENWQE_DBG_DMA = 9,
+ GENWQE_DBG_UNITS = 10, /* max number of possible debug units */
+};
+
+/* Software error injection to simulate card failures */
+#define GENWQE_INJECT_HARDWARE_FAILURE 0x00000001 /* injects -1 reg reads */
+#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */
+#define GENWQE_INJECT_GFIR_FATAL 0x00000004 /* GFIR = 0x0000ffff */
+#define GENWQE_INJECT_GFIR_INFO 0x00000008 /* GFIR = 0xffff0000 */
+
+/*
+ * Genwqe card description and management data.
+ *
+ * Error-handling in case of card malfunction
+ * ------------------------------------------
+ *
+ * If the card is detected to be defective the outside environment
+ * will cause the PCI layer to call deinit (the cleanup function for
+ * probe). This is the same effect like doing a unbind/bind operation
+ * on the card.
+ *
+ * The genwqe card driver implements a health checking thread which
+ * verifies the card function. If this detects a problem the cards
+ * device is being shutdown and restarted again, along with a reset of
+ * the card and queue.
+ *
+ * All functions accessing the card device return either -EIO or -ENODEV
+ * code to indicate the malfunction to the user. The user has to close
+ * the file descriptor and open a new one, once the card becomes
+ * available again.
+ *
+ * If the open file descriptor is setup to receive SIGIO, the signal is
+ * genereated for the application which has to provide a handler to
+ * react on it. If the application does not close the open
+ * file descriptor a SIGKILL is send to enforce freeing the cards
+ * resources.
+ *
+ * I did not find a different way to prevent kernel problems due to
+ * reference counters for the cards character devices getting out of
+ * sync. The character device deallocation does not block, even if
+ * there is still an open file descriptor pending. If this pending
+ * descriptor is closed, the data structures used by the character
+ * device is reinstantiated, which will lead to the reference counter
+ * dropping below the allowed values.
+ *
+ * Card recovery
+ * -------------
+ *
+ * To test the internal driver recovery the following command can be used:
+ * sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject'
+ */
+
+
+/**
+ * struct dma_mapping_type - Mapping type definition
+ *
+ * To avoid memcpying data arround we use user memory directly. To do
+ * this we need to pin/swap-in the memory and request a DMA address
+ * for it.
+ */
+enum dma_mapping_type {
+ GENWQE_MAPPING_RAW = 0, /* contignous memory buffer */
+ GENWQE_MAPPING_SGL_TEMP, /* sglist dynamically used */
+ GENWQE_MAPPING_SGL_PINNED, /* sglist used with pinning */
+};
+
+/**
+ * struct dma_mapping - Information about memory mappings done by the driver
+ */
+struct dma_mapping {
+ enum dma_mapping_type type;
+
+ void *u_vaddr; /* user-space vaddr/non-aligned */
+ void *k_vaddr; /* kernel-space vaddr/non-aligned */
+ dma_addr_t dma_addr; /* physical DMA address */
+
+ struct page **page_list; /* list of pages used by user buff */
+ dma_addr_t *dma_list; /* list of dma addresses per page */
+ unsigned int nr_pages; /* number of pages */
+ unsigned int size; /* size in bytes */
+
+ struct list_head card_list; /* list of usr_maps for card */
+ struct list_head pin_list; /* list of pinned memory for dev */
+};
+
+static inline void genwqe_mapping_init(struct dma_mapping *m,
+ enum dma_mapping_type type)
+{
+ memset(m, 0, sizeof(*m));
+ m->type = type;
+}
+
+/**
+ * struct ddcb_queue - DDCB queue data
+ * @ddcb_max: Number of DDCBs on the queue
+ * @ddcb_next: Next free DDCB
+ * @ddcb_act: Next DDCB supposed to finish
+ * @ddcb_seq: Sequence number of last DDCB
+ * @ddcbs_in_flight: Currently enqueued DDCBs
+ * @ddcbs_completed: Number of already completed DDCBs
+ * @return_on_busy: Number of -EBUSY returns on full queue
+ * @wait_on_busy: Number of waits on full queue
+ * @ddcb_daddr: DMA address of first DDCB in the queue
+ * @ddcb_vaddr: Kernel virtual address of first DDCB in the queue
+ * @ddcb_req: Associated requests (one per DDCB)
+ * @ddcb_waitqs: Associated wait queues (one per DDCB)
+ * @ddcb_lock: Lock to protect queuing operations
+ * @ddcb_waitq: Wait on next DDCB finishing
+ */
+
+struct ddcb_queue {
+ int ddcb_max; /* amount of DDCBs */
+ int ddcb_next; /* next available DDCB num */
+ int ddcb_act; /* DDCB to be processed */
+ u16 ddcb_seq; /* slc seq num */
+ unsigned int ddcbs_in_flight; /* number of ddcbs in processing */
+ unsigned int ddcbs_completed;
+ unsigned int ddcbs_max_in_flight;
+ unsigned int return_on_busy; /* how many times -EBUSY? */
+ unsigned int wait_on_busy;
+
+ dma_addr_t ddcb_daddr; /* DMA address */
+ struct ddcb *ddcb_vaddr; /* kernel virtual addr for DDCBs */
+ struct ddcb_requ **ddcb_req; /* ddcb processing parameter */
+ wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */
+
+ spinlock_t ddcb_lock; /* exclusive access to queue */
+ wait_queue_head_t busy_waitq; /* wait for ddcb processing */
+
+ /* registers or the respective queue to be used */
+ u32 IO_QUEUE_CONFIG;
+ u32 IO_QUEUE_STATUS;
+ u32 IO_QUEUE_SEGMENT;
+ u32 IO_QUEUE_INITSQN;
+ u32 IO_QUEUE_WRAP;
+ u32 IO_QUEUE_OFFSET;
+ u32 IO_QUEUE_WTIME;
+ u32 IO_QUEUE_ERRCNTS;
+ u32 IO_QUEUE_LRW;
+};
+
+/*
+ * GFIR, SLU_UNITCFG, APP_UNITCFG
+ * 8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC.
+ */
+#define GENWQE_FFDC_REGS (3 + (8 * (2 + 2 * 64)))
+
+struct genwqe_ffdc {
+ unsigned int entries;
+ struct genwqe_reg *regs;
+};
+
+/**
+ * struct genwqe_dev - GenWQE device information
+ * @card_state: Card operation state, see above
+ * @ffdc: First Failure Data Capture buffers for each unit
+ * @card_thread: Working thread to operate the DDCB queue
+ * @card_waitq: Wait queue used in card_thread
+ * @queue: DDCB queue
+ * @health_thread: Card monitoring thread (only for PFs)
+ * @health_waitq: Wait queue used in health_thread
+ * @pci_dev: Associated PCI device (function)
+ * @mmio: Base address of 64-bit register space
+ * @mmio_len: Length of register area
+ * @file_lock: Lock to protect access to file_list
+ * @file_list: List of all processes with open GenWQE file descriptors
+ *
+ * This struct contains all information needed to communicate with a
+ * GenWQE card. It is initialized when a GenWQE device is found and
+ * destroyed when it goes away. It holds data to maintain the queue as
+ * well as data needed to feed the user interfaces.
+ */
+struct genwqe_dev {
+ enum genwqe_card_state card_state;
+ spinlock_t print_lock;
+
+ int card_idx; /* card index 0..CARD_NO_MAX-1 */
+ u64 flags; /* general flags */
+
+ /* FFDC data gathering */
+ struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS];
+
+ /* DDCB workqueue */
+ struct task_struct *card_thread;
+ wait_queue_head_t queue_waitq;
+ struct ddcb_queue queue; /* genwqe DDCB queue */
+ unsigned int irqs_processed;
+
+ /* Card health checking thread */
+ struct task_struct *health_thread;
+ wait_queue_head_t health_waitq;
+
+ int use_platform_recovery; /* use platform recovery mechanisms */
+
+ /* char device */
+ dev_t devnum_genwqe; /* major/minor num card */
+ struct class *class_genwqe; /* reference to class object */
+ struct device *dev; /* for device creation */
+ struct cdev cdev_genwqe; /* char device for card */
+
+ struct dentry *debugfs_root; /* debugfs card root directory */
+ struct dentry *debugfs_genwqe; /* debugfs driver root directory */
+
+ /* pci resources */
+ struct pci_dev *pci_dev; /* PCI device */
+ void __iomem *mmio; /* BAR-0 MMIO start */
+ unsigned long mmio_len;
+ int num_vfs;
+ u32 vf_jobtimeout_msec[GENWQE_MAX_VFS];
+ int is_privileged; /* access to all regs possible */
+
+ /* config regs which we need often */
+ u64 slu_unitcfg;
+ u64 app_unitcfg;
+ u64 softreset;
+ u64 err_inject;
+ u64 last_gfir;
+ char app_name[5];
+
+ spinlock_t file_lock; /* lock for open files */
+ struct list_head file_list; /* list of open files */
+
+ /* debugfs parameters */
+ int ddcb_software_timeout; /* wait until DDCB times out */
+ int skip_recovery; /* circumvention if recovery fails */
+ int kill_timeout; /* wait after sending SIGKILL */
+};
+
+/**
+ * enum genwqe_requ_state - State of a DDCB execution request
+ */
+enum genwqe_requ_state {
+ GENWQE_REQU_NEW = 0,
+ GENWQE_REQU_ENQUEUED = 1,
+ GENWQE_REQU_TAPPED = 2,
+ GENWQE_REQU_FINISHED = 3,
+ GENWQE_REQU_STATE_MAX,
+};
+
+/**
+ * struct genwqe_sgl - Scatter gather list describing user-space memory
+ * @sgl: scatter gather list needs to be 128 byte aligned
+ * @sgl_dma_addr: dma address of sgl
+ * @sgl_size: size of area used for sgl
+ * @user_addr: user-space address of memory area
+ * @user_size: size of user-space memory area
+ * @page: buffer for partial pages if needed
+ * @page_dma_addr: dma address partial pages
+ */
+struct genwqe_sgl {
+ dma_addr_t sgl_dma_addr;
+ struct sg_entry *sgl;
+ size_t sgl_size; /* size of sgl */
+
+ void __user *user_addr; /* user-space base-address */
+ size_t user_size; /* size of memory area */
+
+ unsigned long nr_pages;
+ unsigned long fpage_offs;
+ size_t fpage_size;
+ size_t lpage_size;
+
+ void *fpage;
+ dma_addr_t fpage_dma_addr;
+
+ void *lpage;
+ dma_addr_t lpage_dma_addr;
+};
+
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+ void __user *user_addr, size_t user_size);
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+ dma_addr_t *dma_list);
+
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl);
+
+/**
+ * struct ddcb_requ - Kernel internal representation of the DDCB request
+ * @cmd: User space representation of the DDCB execution request
+ */
+struct ddcb_requ {
+ /* kernel specific content */
+ enum genwqe_requ_state req_state; /* request status */
+ int num; /* ddcb_no for this request */
+ struct ddcb_queue *queue; /* associated queue */
+
+ struct dma_mapping dma_mappings[DDCB_FIXUPS];
+ struct genwqe_sgl sgls[DDCB_FIXUPS];
+
+ /* kernel/user shared content */
+ struct genwqe_ddcb_cmd cmd; /* ddcb_no for this request */
+ struct genwqe_debug_data debug_data;
+};
+
+/**
+ * struct genwqe_file - Information for open GenWQE devices
+ */
+struct genwqe_file {
+ struct genwqe_dev *cd;
+ struct genwqe_driver *client;
+ struct file *filp;
+
+ struct fasync_struct *async_queue;
+ struct task_struct *owner;
+ struct list_head list; /* entry in list of open files */
+
+ spinlock_t map_lock; /* lock for dma_mappings */
+ struct list_head map_list; /* list of dma_mappings */
+
+ spinlock_t pin_lock; /* lock for pinned memory */
+ struct list_head pin_list; /* list of pinned memory */
+};
+
+int genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */
+int genwqe_finish_queue(struct genwqe_dev *cd);
+int genwqe_release_service_layer(struct genwqe_dev *cd);
+
+/**
+ * genwqe_get_slu_id() - Read Service Layer Unit Id
+ * Return: 0x00: Development code
+ * 0x01: SLC1 (old)
+ * 0x02: SLC2 (sept2012)
+ * 0x03: SLC2 (feb2013, generic driver)
+ */
+static inline int genwqe_get_slu_id(struct genwqe_dev *cd)
+{
+ return (int)((cd->slu_unitcfg >> 32) & 0xff);
+}
+
+int genwqe_ddcbs_in_flight(struct genwqe_dev *cd);
+
+u8 genwqe_card_type(struct genwqe_dev *cd);
+int genwqe_card_reset(struct genwqe_dev *cd);
+int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count);
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd);
+
+int genwqe_device_create(struct genwqe_dev *cd);
+int genwqe_device_remove(struct genwqe_dev *cd);
+
+/* debugfs */
+int genwqe_init_debugfs(struct genwqe_dev *cd);
+void genqwe_exit_debugfs(struct genwqe_dev *cd);
+
+int genwqe_read_softreset(struct genwqe_dev *cd);
+
+/* Hardware Circumventions */
+int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd);
+int genwqe_flash_readback_fails(struct genwqe_dev *cd);
+
+/**
+ * genwqe_write_vreg() - Write register in VF window
+ * @cd: genwqe device
+ * @reg: register address
+ * @val: value to write
+ * @func: 0: PF, 1: VF0, ..., 15: VF14
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func);
+
+/**
+ * genwqe_read_vreg() - Read register in VF window
+ * @cd: genwqe device
+ * @reg: register address
+ * @func: 0: PF, 1: VF0, ..., 15: VF14
+ *
+ * Return: content of the register
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func);
+
+/* FFDC Buffer Management */
+int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id);
+int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id,
+ struct genwqe_reg *regs, unsigned int max_regs);
+int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+ unsigned int max_regs, int all);
+int genwqe_ffdc_dump_dma(struct genwqe_dev *cd,
+ struct genwqe_reg *regs, unsigned int max_regs);
+
+int genwqe_init_debug_data(struct genwqe_dev *cd,
+ struct genwqe_debug_data *d);
+
+void genwqe_init_crc32(void);
+int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len);
+
+/* Memory allocation/deallocation; dma address handling */
+int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m,
+ void *uaddr, unsigned long size,
+ struct ddcb_requ *req);
+
+int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
+ struct ddcb_requ *req);
+
+static inline bool dma_mapping_used(struct dma_mapping *m)
+{
+ if (!m)
+ return 0;
+ return m->size != 0;
+}
+
+/**
+ * __genwqe_execute_ddcb() - Execute DDCB request with addr translation
+ *
+ * This function will do the address translation changes to the DDCBs
+ * according to the definitions required by the ATS field. It looks up
+ * the memory allocation buffer or does vmap/vunmap for the respective
+ * user-space buffers, inclusive page pinning and scatter gather list
+ * buildup and teardown.
+ */
+int __genwqe_execute_ddcb(struct genwqe_dev *cd,
+ struct genwqe_ddcb_cmd *cmd, unsigned int f_flags);
+
+/**
+ * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
+ *
+ * This version will not do address translation or any modification of
+ * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
+ * entirely prepared by the driver itself. That means the appropriate
+ * DMA addresses are already in the DDCB and do not need any
+ * modification.
+ */
+int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+ struct genwqe_ddcb_cmd *cmd,
+ unsigned int f_flags);
+int __genwqe_enqueue_ddcb(struct genwqe_dev *cd,
+ struct ddcb_requ *req,
+ unsigned int f_flags);
+
+int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+
+/* register access */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val);
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs);
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val);
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs);
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+ dma_addr_t *dma_handle);
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+ void *vaddr, dma_addr_t dma_handle);
+
+/* Base clock frequency in MHz */
+int genwqe_base_clock_frequency(struct genwqe_dev *cd);
+
+/* Before FFDC is captured the traps should be stopped. */
+void genwqe_stop_traps(struct genwqe_dev *cd);
+void genwqe_start_traps(struct genwqe_dev *cd);
+
+/* Hardware circumvention */
+bool genwqe_need_err_masking(struct genwqe_dev *cd);
+
+/**
+ * genwqe_is_privileged() - Determine operation mode for PCI function
+ *
+ * On Intel with SRIOV support we see:
+ * PF: is_physfn = 1 is_virtfn = 0
+ * VF: is_physfn = 0 is_virtfn = 1
+ *
+ * On Systems with no SRIOV support _and_ virtualized systems we get:
+ * is_physfn = 0 is_virtfn = 0
+ *
+ * Other vendors have individual pci device ids to distinguish between
+ * virtual function drivers and physical function drivers. GenWQE
+ * unfortunately has just on pci device id for both, VFs and PF.
+ *
+ * The following code is used to distinguish if the card is running in
+ * privileged mode, either as true PF or in a virtualized system with
+ * full register access e.g. currently on PowerPC.
+ *
+ * if (pci_dev->is_virtfn)
+ * cd->is_privileged = 0;
+ * else
+ * cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+ * != IO_ILLEGAL_VALUE);
+ */
+static inline int genwqe_is_privileged(struct genwqe_dev *cd)
+{
+ return cd->is_privileged;
+}
+
+#endif /* __CARD_BASE_H__ */
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 0000000..353ee0c
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1411 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Device Driver Control Block (DDCB) queue support. Definition of
+ * interrupt handlers for queue support as well as triggering the
+ * health monitor code in case of problems. The current hardware uses
+ * an MSI interrupt which is shared between error handling and
+ * functional code.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/crc-itu-t.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/*
+ * N: next DDCB, this is where the next DDCB will be put.
+ * A: active DDCB, this is where the code will look for the next completion.
+ * x: DDCB is enqueued, we are waiting for its completion.
+
+ * Situation (1): Empty queue
+ * +---+---+---+---+---+---+---+---+
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ * | | | | | | | | |
+ * +---+---+---+---+---+---+---+---+
+ * A/N
+ * enqueued_ddcbs = A - N = 2 - 2 = 0
+ *
+ * Situation (2): Wrapped, N > A
+ * +---+---+---+---+---+---+---+---+
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ * | | | x | x | | | | |
+ * +---+---+---+---+---+---+---+---+
+ * A N
+ * enqueued_ddcbs = N - A = 4 - 2 = 2
+ *
+ * Situation (3): Queue wrapped, A > N
+ * +---+---+---+---+---+---+---+---+
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ * | x | x | | | x | x | x | x |
+ * +---+---+---+---+---+---+---+---+
+ * N A
+ * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 2) = 6
+ *
+ * Situation (4a): Queue full N > A
+ * +---+---+---+---+---+---+---+---+
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ * | x | x | x | x | x | x | x | |
+ * +---+---+---+---+---+---+---+---+
+ * A N
+ *
+ * enqueued_ddcbs = N - A = 7 - 0 = 7
+ *
+ * Situation (4a): Queue full A > N
+ * +---+---+---+---+---+---+---+---+
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ * | x | x | x | | x | x | x | x |
+ * +---+---+---+---+---+---+---+---+
+ * N A
+ * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
+ */
+
+static int queue_empty(struct ddcb_queue *queue)
+{
+ return queue->ddcb_next == queue->ddcb_act;
+}
+
+static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
+{
+ if (queue->ddcb_next >= queue->ddcb_act)
+ return queue->ddcb_next - queue->ddcb_act;
+
+ return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
+}
+
+static int queue_free_ddcbs(struct ddcb_queue *queue)
+{
+ int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
+
+ if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
+ return 0;
+ }
+ return free_ddcbs;
+}
+
+/*
+ * Use of the PRIV field in the DDCB for queue debugging:
+ *
+ * (1) Trying to get rid of a DDCB which saw a timeout:
+ * pddcb->priv[6] = 0xcc; # cleared
+ *
+ * (2) Append a DDCB via NEXT bit:
+ * pddcb->priv[7] = 0xaa; # appended
+ *
+ * (3) DDCB needed tapping:
+ * pddcb->priv[7] = 0xbb; # tapped
+ *
+ * (4) DDCB marked as correctly finished:
+ * pddcb->priv[6] = 0xff; # finished
+ */
+
+static inline void ddcb_mark_tapped(struct ddcb *pddcb)
+{
+ pddcb->priv[7] = 0xbb; /* tapped */
+}
+
+static inline void ddcb_mark_appended(struct ddcb *pddcb)
+{
+ pddcb->priv[7] = 0xaa; /* appended */
+}
+
+static inline void ddcb_mark_cleared(struct ddcb *pddcb)
+{
+ pddcb->priv[6] = 0xcc; /* cleared */
+}
+
+static inline void ddcb_mark_finished(struct ddcb *pddcb)
+{
+ pddcb->priv[6] = 0xff; /* finished */
+}
+
+static inline void ddcb_mark_unused(struct ddcb *pddcb)
+{
+ pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
+}
+
+/**
+ * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
+ * @buff: pointer to data buffer
+ * @len: length of data for calculation
+ * @init: initial crc (0xffff at start)
+ *
+ * Polynomial = x^16 + x^12 + x^5 + 1 (0x1021)
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
+ * should result in a crc16 of 0x89c3
+ *
+ * Return: crc16 checksum in big endian format !
+ */
+static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
+{
+ return crc_itu_t(init, buff, len);
+}
+
+static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+ int i;
+ struct ddcb *pddcb;
+ unsigned long flags;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ spin_lock_irqsave(&cd->print_lock, flags);
+
+ dev_info(&pci_dev->dev,
+ "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
+ cd->card_idx, queue->ddcb_act, queue->ddcb_next);
+
+ pddcb = queue->ddcb_vaddr;
+ for (i = 0; i < queue->ddcb_max; i++) {
+ dev_err(&pci_dev->dev,
+ " %c %-3d: RETC=%03x SEQ=%04x HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
+ i == queue->ddcb_act ? '>' : ' ',
+ i,
+ be16_to_cpu(pddcb->retc_16),
+ be16_to_cpu(pddcb->seqnum_16),
+ pddcb->hsi,
+ pddcb->shi,
+ be64_to_cpu(pddcb->priv_64),
+ pddcb->cmd);
+ pddcb++;
+ }
+ spin_unlock_irqrestore(&cd->print_lock, flags);
+}
+
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
+{
+ struct ddcb_requ *req;
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return NULL;
+
+ return &req->cmd;
+}
+
+void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
+{
+ struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+ kfree(req);
+}
+
+static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
+{
+ return req->req_state;
+}
+
+static inline void ddcb_requ_set_state(struct ddcb_requ *req,
+ enum genwqe_requ_state new_state)
+{
+ req->req_state = new_state;
+}
+
+static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
+{
+ return req->cmd.ddata_addr != 0x0;
+}
+
+/**
+ * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
+ * @cd: pointer to genwqe device descriptor
+ * @req: DDCB work request
+ *
+ * Status of ddcb_requ mirrors this hardware state, but is copied in
+ * the ddcb_requ on interrupt/polling function. The lowlevel code
+ * should check the hardware state directly, the higher level code
+ * should check the copy.
+ *
+ * This function will also return true if the state of the queue is
+ * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
+ * shutdown case.
+ */
+static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+ return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
+ (cd->card_state != GENWQE_CARD_USED);
+}
+
+/**
+ * enqueue_ddcb() - Enqueue a DDCB
+ * @cd: pointer to genwqe device descriptor
+ * @queue: queue this operation should be done on
+ * @ddcb_no: pointer to ddcb number being tapped
+ *
+ * Start execution of DDCB by tapping or append to queue via NEXT
+ * bit. This is done by an atomic 'compare and swap' instruction and
+ * checking SHI and HSI of the previous DDCB.
+ *
+ * This function must only be called with ddcb_lock held.
+ *
+ * Return: 1 if new DDCB is appended to previous
+ * 2 if DDCB queue is tapped via register/simulation
+ */
+#define RET_DDCB_APPENDED 1
+#define RET_DDCB_TAPPED 2
+
+static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
+ struct ddcb *pddcb, int ddcb_no)
+{
+ unsigned int try;
+ int prev_no;
+ struct ddcb *prev_ddcb;
+ __be32 old, new, icrc_hsi_shi;
+ u64 num;
+
+ /*
+ * For performance checks a Dispatch Timestamp can be put into
+ * DDCB It is supposed to use the SLU's free running counter,
+ * but this requires PCIe cycles.
+ */
+ ddcb_mark_unused(pddcb);
+
+ /* check previous DDCB if already fetched */
+ prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
+ prev_ddcb = &queue->ddcb_vaddr[prev_no];
+
+ /*
+ * It might have happened that the HSI.FETCHED bit is
+ * set. Retry in this case. Therefore I expect maximum 2 times
+ * trying.
+ */
+ ddcb_mark_appended(pddcb);
+ for (try = 0; try < 2; try++) {
+ old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
+
+ /* try to append via NEXT bit if prev DDCB is not completed */
+ if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
+ break;
+
+ new = (old | DDCB_NEXT_BE32);
+
+ wmb(); /* need to ensure write ordering */
+ icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
+
+ if (icrc_hsi_shi == old)
+ return RET_DDCB_APPENDED; /* appended to queue */
+ }
+
+ /* Queue must be re-started by updating QUEUE_OFFSET */
+ ddcb_mark_tapped(pddcb);
+ num = (u64)ddcb_no << 8;
+
+ wmb(); /* need to ensure write ordering */
+ __genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
+
+ return RET_DDCB_TAPPED;
+}
+
+/**
+ * copy_ddcb_results() - Copy output state from real DDCB to request
+ *
+ * Copy DDCB ASV to request struct. There is no endian
+ * conversion made, since data structure in ASV is still
+ * unknown here.
+ *
+ * This is needed by:
+ * - genwqe_purge_ddcb()
+ * - genwqe_check_ddcb_queue()
+ */
+static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
+{
+ struct ddcb_queue *queue = req->queue;
+ struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
+
+ memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
+
+ /* copy status flags of the variant part */
+ req->cmd.vcrc = be16_to_cpu(pddcb->vcrc_16);
+ req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
+ req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
+
+ req->cmd.attn = be16_to_cpu(pddcb->attn_16);
+ req->cmd.progress = be32_to_cpu(pddcb->progress_32);
+ req->cmd.retc = be16_to_cpu(pddcb->retc_16);
+
+ if (ddcb_requ_collect_debug_data(req)) {
+ int prev_no = (ddcb_no == 0) ?
+ queue->ddcb_max - 1 : ddcb_no - 1;
+ struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
+
+ memcpy(&req->debug_data.ddcb_finished, pddcb,
+ sizeof(req->debug_data.ddcb_finished));
+ memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
+ sizeof(req->debug_data.ddcb_prev));
+ }
+}
+
+/**
+ * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests.
+ * @cd: pointer to genwqe device descriptor
+ *
+ * Return: Number of DDCBs which were finished
+ */
+static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
+ struct ddcb_queue *queue)
+{
+ unsigned long flags;
+ int ddcbs_finished = 0;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+ /* FIXME avoid soft locking CPU */
+ while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
+
+ struct ddcb *pddcb;
+ struct ddcb_requ *req;
+ u16 vcrc, vcrc_16, retc_16;
+
+ pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+
+ if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
+ 0x00000000)
+ goto go_home; /* not completed, continue waiting */
+
+ wmb(); /* Add sync to decouple prev. read operations */
+
+ /* Note: DDCB could be purged */
+ req = queue->ddcb_req[queue->ddcb_act];
+ if (req == NULL) {
+ /* this occurs if DDCB is purged, not an error */
+ /* Move active DDCB further; Nothing to do anymore. */
+ goto pick_next_one;
+ }
+
+ /*
+ * HSI=0x44 (fetched and completed), but RETC is
+ * 0x101, or even worse 0x000.
+ *
+ * In case of seeing the queue in inconsistent state
+ * we read the errcnts and the queue status to provide
+ * a trigger for our PCIe analyzer stop capturing.
+ */
+ retc_16 = be16_to_cpu(pddcb->retc_16);
+ if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
+ u64 errcnts, status;
+ u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
+
+ errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
+ status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+ dev_err(&pci_dev->dev,
+ "[%s] SEQN=%04x HSI=%02x RETC=%03x Q_ERRCNTS=%016llx Q_STATUS=%016llx DDCB_DMA_ADDR=%016llx\n",
+ __func__, be16_to_cpu(pddcb->seqnum_16),
+ pddcb->hsi, retc_16, errcnts, status,
+ queue->ddcb_daddr + ddcb_offs);
+ }
+
+ copy_ddcb_results(req, queue->ddcb_act);
+ queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
+
+ dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
+ genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+ ddcb_mark_finished(pddcb);
+
+ /* calculate CRC_16 to see if VCRC is correct */
+ vcrc = genwqe_crc16(pddcb->asv,
+ VCRC_LENGTH(req->cmd.asv_length),
+ 0xffff);
+ vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
+ if (vcrc != vcrc_16) {
+ printk_ratelimited(KERN_ERR
+ "%s %s: err: wrong VCRC pre=%02x vcrc_len=%d bytes vcrc_data=%04x is not vcrc_card=%04x\n",
+ GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+ pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
+ vcrc, vcrc_16);
+ }
+
+ ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+ queue->ddcbs_completed++;
+ queue->ddcbs_in_flight--;
+
+ /* wake up process waiting for this DDCB, and
+ processes on the busy queue */
+ wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+ wake_up_interruptible(&queue->busy_waitq);
+
+pick_next_one:
+ queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
+ ddcbs_finished++;
+ }
+
+ go_home:
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+ return ddcbs_finished;
+}
+
+/**
+ * __genwqe_wait_ddcb(): Waits until DDCB is completed
+ * @cd: pointer to genwqe device descriptor
+ * @req: pointer to requsted DDCB parameters
+ *
+ * The Service Layer will update the RETC in DDCB when processing is
+ * pending or done.
+ *
+ * Return: > 0 remaining jiffies, DDCB completed
+ * -ETIMEDOUT when timeout
+ * -ERESTARTSYS when ^C
+ * -EINVAL when unknown error condition
+ *
+ * When an error is returned the called needs to ensure that
+ * purge_ddcb() is being called to get the &req removed from the
+ * queue.
+ */
+int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+ int rc;
+ unsigned int ddcb_no;
+ struct ddcb_queue *queue;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (req == NULL)
+ return -EINVAL;
+
+ queue = req->queue;
+ if (queue == NULL)
+ return -EINVAL;
+
+ ddcb_no = req->num;
+ if (ddcb_no >= queue->ddcb_max)
+ return -EINVAL;
+
+ rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
+ ddcb_requ_finished(cd, req),
+ genwqe_ddcb_software_timeout * HZ);
+
+ /*
+ * We need to distinguish 3 cases here:
+ * 1. rc == 0 timeout occured
+ * 2. rc == -ERESTARTSYS signal received
+ * 3. rc > 0 remaining jiffies condition is true
+ */
+ if (rc == 0) {
+ struct ddcb_queue *queue = req->queue;
+ struct ddcb *pddcb;
+
+ /*
+ * Timeout may be caused by long task switching time.
+ * When timeout happens, check if the request has
+ * meanwhile completed.
+ */
+ genwqe_check_ddcb_queue(cd, req->queue);
+ if (ddcb_requ_finished(cd, req))
+ return rc;
+
+ dev_err(&pci_dev->dev,
+ "[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
+ __func__, req->num, rc, ddcb_requ_get_state(req),
+ req);
+ dev_err(&pci_dev->dev,
+ "[%s] IO_QUEUE_STATUS=0x%016llx\n", __func__,
+ __genwqe_readq(cd, queue->IO_QUEUE_STATUS));
+
+ pddcb = &queue->ddcb_vaddr[req->num];
+ genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+ print_ddcb_info(cd, req->queue);
+ return -ETIMEDOUT;
+
+ } else if (rc == -ERESTARTSYS) {
+ return rc;
+ /*
+ * EINTR: Stops the application
+ * ERESTARTSYS: Restartable systemcall; called again
+ */
+
+ } else if (rc < 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
+ __func__, req->num, rc, ddcb_requ_get_state(req));
+ return -EINVAL;
+ }
+
+ /* Severe error occured. Driver is forced to stop operation */
+ if (cd->card_state != GENWQE_CARD_USED) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: DDCB#%d forced to stop (rc=%d)\n",
+ __func__, req->num, rc);
+ return -EIO;
+ }
+ return rc;
+}
+
+/**
+ * get_next_ddcb() - Get next available DDCB
+ * @cd: pointer to genwqe device descriptor
+ *
+ * DDCB's content is completely cleared but presets for PRE and
+ * SEQNUM. This function must only be called when ddcb_lock is held.
+ *
+ * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
+ */
+static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
+ struct ddcb_queue *queue,
+ int *num)
+{
+ u64 *pu64;
+ struct ddcb *pddcb;
+
+ if (queue_free_ddcbs(queue) == 0) /* queue is full */
+ return NULL;
+
+ /* find new ddcb */
+ pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
+
+ /* if it is not completed, we are not allowed to use it */
+ /* barrier(); */
+ if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
+ return NULL;
+
+ *num = queue->ddcb_next; /* internal DDCB number */
+ queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
+
+ /* clear important DDCB fields */
+ pu64 = (u64 *)pddcb;
+ pu64[0] = 0ULL; /* offs 0x00 (ICRC,HSI,SHI,...) */
+ pu64[1] = 0ULL; /* offs 0x01 (ACFUNC,CMD...) */
+
+ /* destroy previous results in ASV */
+ pu64[0x80/8] = 0ULL; /* offs 0x80 (ASV + 0) */
+ pu64[0x88/8] = 0ULL; /* offs 0x88 (ASV + 0x08) */
+ pu64[0x90/8] = 0ULL; /* offs 0x90 (ASV + 0x10) */
+ pu64[0x98/8] = 0ULL; /* offs 0x98 (ASV + 0x18) */
+ pu64[0xd0/8] = 0ULL; /* offs 0xd0 (RETC,ATTN...) */
+
+ pddcb->pre = DDCB_PRESET_PRE; /* 128 */
+ pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
+ return pddcb;
+}
+
+/**
+ * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
+ * @cd: genwqe device descriptor
+ * @req: DDCB request
+ *
+ * This will fail when the request was already FETCHED. In this case
+ * we need to wait until it is finished. Else the DDCB can be
+ * reused. This function also ensures that the request data structure
+ * is removed from ddcb_req[].
+ *
+ * Do not forget to call this function when genwqe_wait_ddcb() fails,
+ * such that the request gets really removed from ddcb_req[].
+ *
+ * Return: 0 success
+ */
+int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+ struct ddcb *pddcb = NULL;
+ unsigned int t;
+ unsigned long flags;
+ struct ddcb_queue *queue = req->queue;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ u64 queue_status;
+ __be32 icrc_hsi_shi = 0x0000;
+ __be32 old, new;
+
+ /* unsigned long flags; */
+ if (genwqe_ddcb_software_timeout <= 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: software timeout is not set!\n", __func__);
+ return -EFAULT;
+ }
+
+ pddcb = &queue->ddcb_vaddr[req->num];
+
+ for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) {
+
+ spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+ /* Check if req was meanwhile finished */
+ if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
+ goto go_home;
+
+ /* try to set PURGE bit if FETCHED/COMPLETED are not set */
+ old = pddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
+ if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
+
+ new = (old | DDCB_PURGE_BE32);
+ icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
+ old, new);
+ if (icrc_hsi_shi == old)
+ goto finish_ddcb;
+ }
+
+ /* normal finish with HSI bit */
+ barrier();
+ icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+ if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
+ goto finish_ddcb;
+
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+ /*
+ * Here the check_ddcb() function will most likely
+ * discover this DDCB to be finished some point in
+ * time. It will mark the req finished and free it up
+ * in the list.
+ */
+
+ copy_ddcb_results(req, req->num); /* for the failing case */
+ msleep(100); /* sleep for 1/10 second and try again */
+ continue;
+
+finish_ddcb:
+ copy_ddcb_results(req, req->num);
+ ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+ queue->ddcbs_in_flight--;
+ queue->ddcb_req[req->num] = NULL; /* delete from array */
+ ddcb_mark_cleared(pddcb);
+
+ /* Move active DDCB further; Nothing to do here anymore. */
+
+ /*
+ * We need to ensure that there is at least one free
+ * DDCB in the queue. To do that, we must update
+ * ddcb_act only if the COMPLETED bit is set for the
+ * DDCB we are working on else we treat that DDCB even
+ * if we PURGED it as occupied (hardware is supposed
+ * to set the COMPLETED bit yet!).
+ */
+ icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+ if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
+ (queue->ddcb_act == req->num)) {
+ queue->ddcb_act = ((queue->ddcb_act + 1) %
+ queue->ddcb_max);
+ }
+go_home:
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+ return 0;
+ }
+
+ /*
+ * If the card is dead and the queue is forced to stop, we
+ * might see this in the queue status register.
+ */
+ queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+ dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
+ genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+ dev_err(&pci_dev->dev,
+ "[%s] err: DDCB#%d not purged and not completed after %d seconds QSTAT=%016llx!!\n",
+ __func__, req->num, genwqe_ddcb_software_timeout,
+ queue_status);
+
+ print_ddcb_info(cd, req->queue);
+
+ return -EFAULT;
+}
+
+int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
+{
+ int len;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (d == NULL) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: invalid memory for debug data!\n",
+ __func__);
+ return -EFAULT;
+ }
+
+ len = sizeof(d->driver_version);
+ snprintf(d->driver_version, len, "%s", DRV_VERSION);
+ d->slu_unitcfg = cd->slu_unitcfg;
+ d->app_unitcfg = cd->app_unitcfg;
+ return 0;
+}
+
+/**
+ * __genwqe_enqueue_ddcb() - Enqueue a DDCB
+ * @cd: pointer to genwqe device descriptor
+ * @req: pointer to DDCB execution request
+ * @f_flags: file mode: blocking, non-blocking
+ *
+ * Return: 0 if enqueuing succeeded
+ * -EIO if card is unusable/PCIe problems
+ * -EBUSY if enqueuing failed
+ */
+int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req,
+ unsigned int f_flags)
+{
+ struct ddcb *pddcb;
+ unsigned long flags;
+ struct ddcb_queue *queue;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ u16 icrc;
+
+ retry:
+ if (cd->card_state != GENWQE_CARD_USED) {
+ printk_ratelimited(KERN_ERR
+ "%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
+ GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+ __func__, req->num);
+ return -EIO;
+ }
+
+ queue = req->queue = &cd->queue;
+
+ /* FIXME circumvention to improve performance when no irq is
+ * there.
+ */
+ if (genwqe_polling_enabled)
+ genwqe_check_ddcb_queue(cd, queue);
+
+ /*
+ * It must be ensured to process all DDCBs in successive
+ * order. Use a lock here in order to prevent nested DDCB
+ * enqueuing.
+ */
+ spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+ pddcb = get_next_ddcb(cd, queue, &req->num); /* get ptr and num */
+ if (pddcb == NULL) {
+ int rc;
+
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+ if (f_flags & O_NONBLOCK) {
+ queue->return_on_busy++;
+ return -EBUSY;
+ }
+
+ queue->wait_on_busy++;
+ rc = wait_event_interruptible(queue->busy_waitq,
+ queue_free_ddcbs(queue) != 0);
+ dev_dbg(&pci_dev->dev, "[%s] waiting for free DDCB: rc=%d\n",
+ __func__, rc);
+ if (rc == -ERESTARTSYS)
+ return rc; /* interrupted by a signal */
+
+ goto retry;
+ }
+
+ if (queue->ddcb_req[req->num] != NULL) {
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+ dev_err(&pci_dev->dev,
+ "[%s] picked DDCB %d with req=%p still in use!!\n",
+ __func__, req->num, req);
+ return -EFAULT;
+ }
+ ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
+ queue->ddcb_req[req->num] = req;
+
+ pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
+ pddcb->cmd = req->cmd.cmd;
+ pddcb->acfunc = req->cmd.acfunc; /* functional unit */
+
+ /*
+ * We know that we can get retc 0x104 with CRC error, do not
+ * stop the queue in those cases for this command. XDIR = 1
+ * does not work for old SLU versions.
+ *
+ * Last bitstream with the old XDIR behavior had SLU_ID
+ * 0x34199.
+ */
+ if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
+ pddcb->xdir = 0x1;
+ else
+ pddcb->xdir = 0x0;
+
+
+ pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
+ ((req->cmd.asv_length / 8)));
+ pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
+
+ /*
+ * If copying the whole DDCB_ASIV_LENGTH is impacting
+ * performance we need to change it to
+ * req->cmd.asiv_length. But simulation benefits from some
+ * non-architectured bits behind the architectured content.
+ *
+ * How much data is copied depends on the availability of the
+ * ATS field, which was introduced late. If the ATS field is
+ * supported ASIV is 8 bytes shorter than it used to be. Since
+ * the ATS field is copied too, the code should do exactly
+ * what it did before, but I wanted to make copying of the ATS
+ * field very explicit.
+ */
+ if (genwqe_get_slu_id(cd) <= 0x2) {
+ memcpy(&pddcb->__asiv[0], /* destination */
+ &req->cmd.__asiv[0], /* source */
+ DDCB_ASIV_LENGTH); /* req->cmd.asiv_length */
+ } else {
+ pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats);
+ memcpy(&pddcb->n.asiv[0], /* destination */
+ &req->cmd.asiv[0], /* source */
+ DDCB_ASIV_LENGTH_ATS); /* req->cmd.asiv_length */
+ }
+
+ pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
+
+ /*
+ * Calculate CRC_16 for corresponding range PSP(7:4). Include
+ * empty 4 bytes prior to the data.
+ */
+ icrc = genwqe_crc16((const u8 *)pddcb,
+ ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
+ pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
+
+ /* enable DDCB completion irq */
+ if (!genwqe_polling_enabled)
+ pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
+
+ dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
+ genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+ if (ddcb_requ_collect_debug_data(req)) {
+ /* use the kernel copy of debug data. copying back to
+ user buffer happens later */
+
+ genwqe_init_debug_data(cd, &req->debug_data);
+ memcpy(&req->debug_data.ddcb_before, pddcb,
+ sizeof(req->debug_data.ddcb_before));
+ }
+
+ enqueue_ddcb(cd, queue, pddcb, req->num);
+ queue->ddcbs_in_flight++;
+
+ if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
+ queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
+
+ ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+ wake_up_interruptible(&cd->queue_waitq);
+
+ return 0;
+}
+
+/**
+ * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
+ * @cd: pointer to genwqe device descriptor
+ * @req: user provided DDCB request
+ * @f_flags: file mode: blocking, non-blocking
+ */
+int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+ struct genwqe_ddcb_cmd *cmd,
+ unsigned int f_flags)
+{
+ int rc = 0;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+ if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
+ dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
+ __func__, cmd->asiv_length);
+ return -EINVAL;
+ }
+ if (cmd->asv_length > DDCB_ASV_LENGTH) {
+ dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
+ __func__, cmd->asiv_length);
+ return -EINVAL;
+ }
+ rc = __genwqe_enqueue_ddcb(cd, req, f_flags);
+ if (rc != 0)
+ return rc;
+
+ rc = __genwqe_wait_ddcb(cd, req);
+ if (rc < 0) /* error or signal interrupt */
+ goto err_exit;
+
+ if (ddcb_requ_collect_debug_data(req)) {
+ if (copy_to_user((struct genwqe_debug_data __user *)
+ (unsigned long)cmd->ddata_addr,
+ &req->debug_data,
+ sizeof(struct genwqe_debug_data)))
+ return -EFAULT;
+ }
+
+ /*
+ * Higher values than 0x102 indicate completion with faults,
+ * lower values than 0x102 indicate processing faults. Note
+ * that DDCB might have been purged. E.g. Cntl+C.
+ */
+ if (cmd->retc != DDCB_RETC_COMPLETE) {
+ /* This might happen e.g. flash read, and needs to be
+ handled by the upper layer code. */
+ rc = -EBADMSG; /* not processed/error retc */
+ }
+
+ return rc;
+
+ err_exit:
+ __genwqe_purge_ddcb(cd, req);
+
+ if (ddcb_requ_collect_debug_data(req)) {
+ if (copy_to_user((struct genwqe_debug_data __user *)
+ (unsigned long)cmd->ddata_addr,
+ &req->debug_data,
+ sizeof(struct genwqe_debug_data)))
+ return -EFAULT;
+ }
+ return rc;
+}
+
+/**
+ * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
+ *
+ * We use this as condition for our wait-queue code.
+ */
+static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
+{
+ unsigned long flags;
+ struct ddcb *pddcb;
+ struct ddcb_queue *queue = &cd->queue;
+
+ spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+ if (queue_empty(queue)) { /* emtpy queue */
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+ return 0;
+ }
+
+ pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+ if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+ return 1;
+ }
+
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+ return 0;
+}
+
+/**
+ * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
+ *
+ * Keep track on the number of DDCBs which ware currently in the
+ * queue. This is needed for statistics as well as conditon if we want
+ * to wait or better do polling in case of no interrupts available.
+ */
+int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
+{
+ unsigned long flags;
+ int ddcbs_in_flight = 0;
+ struct ddcb_queue *queue = &cd->queue;
+
+ spin_lock_irqsave(&queue->ddcb_lock, flags);
+ ddcbs_in_flight += queue->ddcbs_in_flight;
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+ return ddcbs_in_flight;
+}
+
+static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+ int rc, i;
+ struct ddcb *pddcb;
+ u64 val64;
+ unsigned int queue_size;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (genwqe_ddcb_max < 2)
+ return -EINVAL;
+
+ queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+ queue->ddcbs_in_flight = 0; /* statistics */
+ queue->ddcbs_max_in_flight = 0;
+ queue->ddcbs_completed = 0;
+ queue->return_on_busy = 0;
+ queue->wait_on_busy = 0;
+
+ queue->ddcb_seq = 0x100; /* start sequence number */
+ queue->ddcb_max = genwqe_ddcb_max; /* module parameter */
+ queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
+ &queue->ddcb_daddr);
+ if (queue->ddcb_vaddr == NULL) {
+ dev_err(&pci_dev->dev,
+ "[%s] **err: could not allocate DDCB **\n", __func__);
+ return -ENOMEM;
+ }
+ memset(queue->ddcb_vaddr, 0, queue_size);
+
+ queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
+ queue->ddcb_max, GFP_KERNEL);
+ if (!queue->ddcb_req) {
+ rc = -ENOMEM;
+ goto free_ddcbs;
+ }
+
+ queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) *
+ queue->ddcb_max, GFP_KERNEL);
+ if (!queue->ddcb_waitqs) {
+ rc = -ENOMEM;
+ goto free_requs;
+ }
+
+ for (i = 0; i < queue->ddcb_max; i++) {
+ pddcb = &queue->ddcb_vaddr[i]; /* DDCBs */
+ pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
+ pddcb->retc_16 = cpu_to_be16(0xfff);
+
+ queue->ddcb_req[i] = NULL; /* requests */
+ init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
+ }
+
+ queue->ddcb_act = 0;
+ queue->ddcb_next = 0; /* queue is empty */
+
+ spin_lock_init(&queue->ddcb_lock);
+ init_waitqueue_head(&queue->busy_waitq);
+
+ val64 = ((u64)(queue->ddcb_max - 1) << 8); /* lastptr */
+ __genwqe_writeq(cd, queue->IO_QUEUE_CONFIG, 0x07); /* iCRC/vCRC */
+ __genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
+ __genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
+ __genwqe_writeq(cd, queue->IO_QUEUE_WRAP, val64);
+ return 0;
+
+ free_requs:
+ kfree(queue->ddcb_req);
+ queue->ddcb_req = NULL;
+ free_ddcbs:
+ __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+ queue->ddcb_daddr);
+ queue->ddcb_vaddr = NULL;
+ queue->ddcb_daddr = 0ull;
+ return -ENODEV;
+
+}
+
+static int ddcb_queue_initialized(struct ddcb_queue *queue)
+{
+ return queue->ddcb_vaddr != NULL;
+}
+
+static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+ unsigned int queue_size;
+
+ queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+ kfree(queue->ddcb_req);
+ queue->ddcb_req = NULL;
+
+ if (queue->ddcb_vaddr) {
+ __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+ queue->ddcb_daddr);
+ queue->ddcb_vaddr = NULL;
+ queue->ddcb_daddr = 0ull;
+ }
+}
+
+static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
+{
+ u64 gfir;
+ struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ /*
+ * In case of fatal FIR error the queue is stopped, such that
+ * we can safely check it without risking anything.
+ */
+ cd->irqs_processed++;
+ wake_up_interruptible(&cd->queue_waitq);
+
+ /*
+ * Checking for errors before kicking the queue might be
+ * safer, but slower for the good-case ... See above.
+ */
+ gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ if (((gfir & GFIR_ERR_TRIGGER) != 0x0) &&
+ !pci_channel_offline(pci_dev)) {
+
+ if (cd->use_platform_recovery) {
+ /*
+ * Since we use raw accessors, EEH errors won't be
+ * detected by the platform until we do a non-raw
+ * MMIO or config space read
+ */
+ readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+ /* Don't do anything if the PCI channel is frozen */
+ if (pci_channel_offline(pci_dev))
+ goto exit;
+ }
+
+ wake_up_interruptible(&cd->health_waitq);
+
+ /*
+ * By default GFIRs causes recovery actions. This
+ * count is just for debug when recovery is masked.
+ */
+ dev_err_ratelimited(&pci_dev->dev,
+ "[%s] GFIR=%016llx\n",
+ __func__, gfir);
+ }
+
+ exit:
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
+{
+ struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+
+ cd->irqs_processed++;
+ wake_up_interruptible(&cd->queue_waitq);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * genwqe_card_thread() - Work thread for the DDCB queue
+ *
+ * The idea is to check if there are DDCBs in processing. If there are
+ * some finished DDCBs, we process them and wakeup the
+ * requestors. Otherwise we give other processes time using
+ * cond_resched().
+ */
+static int genwqe_card_thread(void *data)
+{
+ int should_stop = 0, rc = 0;
+ struct genwqe_dev *cd = (struct genwqe_dev *)data;
+
+ while (!kthread_should_stop()) {
+
+ genwqe_check_ddcb_queue(cd, &cd->queue);
+
+ if (genwqe_polling_enabled) {
+ rc = wait_event_interruptible_timeout(
+ cd->queue_waitq,
+ genwqe_ddcbs_in_flight(cd) ||
+ (should_stop = kthread_should_stop()), 1);
+ } else {
+ rc = wait_event_interruptible_timeout(
+ cd->queue_waitq,
+ genwqe_next_ddcb_ready(cd) ||
+ (should_stop = kthread_should_stop()), HZ);
+ }
+ if (should_stop)
+ break;
+
+ /*
+ * Avoid soft lockups on heavy loads; we do not want
+ * to disable our interrupts.
+ */
+ cond_resched();
+ }
+ return 0;
+}
+
+/**
+ * genwqe_setup_service_layer() - Setup DDCB queue
+ * @cd: pointer to genwqe device descriptor
+ *
+ * Allocate DDCBs. Configure Service Layer Controller (SLC).
+ *
+ * Return: 0 success
+ */
+int genwqe_setup_service_layer(struct genwqe_dev *cd)
+{
+ int rc;
+ struct ddcb_queue *queue;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (genwqe_is_privileged(cd)) {
+ rc = genwqe_card_reset(cd);
+ if (rc < 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: reset failed.\n", __func__);
+ return rc;
+ }
+ genwqe_read_softreset(cd);
+ }
+
+ queue = &cd->queue;
+ queue->IO_QUEUE_CONFIG = IO_SLC_QUEUE_CONFIG;
+ queue->IO_QUEUE_STATUS = IO_SLC_QUEUE_STATUS;
+ queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
+ queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
+ queue->IO_QUEUE_OFFSET = IO_SLC_QUEUE_OFFSET;
+ queue->IO_QUEUE_WRAP = IO_SLC_QUEUE_WRAP;
+ queue->IO_QUEUE_WTIME = IO_SLC_QUEUE_WTIME;
+ queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
+ queue->IO_QUEUE_LRW = IO_SLC_QUEUE_LRW;
+
+ rc = setup_ddcb_queue(cd, queue);
+ if (rc != 0) {
+ rc = -ENODEV;
+ goto err_out;
+ }
+
+ init_waitqueue_head(&cd->queue_waitq);
+ cd->card_thread = kthread_run(genwqe_card_thread, cd,
+ GENWQE_DEVNAME "%d_thread",
+ cd->card_idx);
+ if (IS_ERR(cd->card_thread)) {
+ rc = PTR_ERR(cd->card_thread);
+ cd->card_thread = NULL;
+ goto stop_free_queue;
+ }
+
+ rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
+ if (rc)
+ goto stop_kthread;
+
+ /*
+ * We must have all wait-queues initialized when we enable the
+ * interrupts. Otherwise we might crash if we get an early
+ * irq.
+ */
+ init_waitqueue_head(&cd->health_waitq);
+
+ if (genwqe_is_privileged(cd)) {
+ rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
+ GENWQE_DEVNAME, cd);
+ } else {
+ rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
+ GENWQE_DEVNAME, cd);
+ }
+ if (rc < 0) {
+ dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
+ goto stop_irq_cap;
+ }
+
+ cd->card_state = GENWQE_CARD_USED;
+ return 0;
+
+ stop_irq_cap:
+ genwqe_reset_interrupt_capability(cd);
+ stop_kthread:
+ kthread_stop(cd->card_thread);
+ cd->card_thread = NULL;
+ stop_free_queue:
+ free_ddcb_queue(cd, queue);
+ err_out:
+ return rc;
+}
+
+/**
+ * queue_wake_up_all() - Handles fatal error case
+ *
+ * The PCI device got unusable and we have to stop all pending
+ * requests as fast as we can. The code after this must purge the
+ * DDCBs in question and ensure that all mappings are freed.
+ */
+static int queue_wake_up_all(struct genwqe_dev *cd)
+{
+ unsigned int i;
+ unsigned long flags;
+ struct ddcb_queue *queue = &cd->queue;
+
+ spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+ for (i = 0; i < queue->ddcb_max; i++)
+ wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+ wake_up_interruptible(&queue->busy_waitq);
+ spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+ return 0;
+}
+
+/**
+ * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
+ *
+ * Relies on the pre-condition that there are no users of the card
+ * device anymore e.g. with open file-descriptors.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_finish_queue(struct genwqe_dev *cd)
+{
+ int i, rc = 0, in_flight;
+ int waitmax = genwqe_ddcb_software_timeout;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ struct ddcb_queue *queue = &cd->queue;
+
+ if (!ddcb_queue_initialized(queue))
+ return 0;
+
+ /* Do not wipe out the error state. */
+ if (cd->card_state == GENWQE_CARD_USED)
+ cd->card_state = GENWQE_CARD_UNUSED;
+
+ /* Wake up all requests in the DDCB queue such that they
+ should be removed nicely. */
+ queue_wake_up_all(cd);
+
+ /* We must wait to get rid of the DDCBs in flight */
+ for (i = 0; i < waitmax; i++) {
+ in_flight = genwqe_ddcbs_in_flight(cd);
+
+ if (in_flight == 0)
+ break;
+
+ dev_dbg(&pci_dev->dev,
+ " DEBUG [%d/%d] waiting for queue to get empty: %d requests!\n",
+ i, waitmax, in_flight);
+
+ /*
+ * Severe severe error situation: The card itself has
+ * 16 DDCB queues, each queue has e.g. 32 entries,
+ * each DDBC has a hardware timeout of currently 250
+ * msec but the PFs have a hardware timeout of 8 sec
+ * ... so I take something large.
+ */
+ msleep(1000);
+ }
+ if (i == waitmax) {
+ dev_err(&pci_dev->dev, " [%s] err: queue is not empty!!\n",
+ __func__);
+ rc = -EIO;
+ }
+ return rc;
+}
+
+/**
+ * genwqe_release_service_layer() - Shutdown DDCB queue
+ * @cd: genwqe device descriptor
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_release_service_layer(struct genwqe_dev *cd)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (!ddcb_queue_initialized(&cd->queue))
+ return 1;
+
+ free_irq(pci_dev->irq, cd);
+ genwqe_reset_interrupt_capability(cd);
+
+ if (cd->card_thread != NULL) {
+ kthread_stop(cd->card_thread);
+ cd->card_thread = NULL;
+ }
+
+ free_ddcb_queue(cd, &cd->queue);
+ return 0;
+}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 0000000..0361a68
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,188 @@
+#ifndef __CARD_DDCB_H__
+#define __CARD_DDCB_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+
+/**
+ * struct ddcb - Device Driver Control Block DDCB
+ * @hsi: Hardware software interlock
+ * @shi: Software hardware interlock. Hsi and shi are used to interlock
+ * software and hardware activities. We are using a compare and
+ * swap operation to ensure that there are no races when
+ * activating new DDCBs on the queue, or when we need to
+ * purge a DDCB from a running queue.
+ * @acfunc: Accelerator function addresses a unit within the chip
+ * @cmd: Command to work on
+ * @cmdopts_16: Options for the command
+ * @asiv: Input data
+ * @asv: Output data
+ *
+ * The DDCB data format is big endian. Multiple consequtive DDBCs form
+ * a DDCB queue.
+ */
+#define ASIV_LENGTH 104 /* Old specification without ATS field */
+#define ASIV_LENGTH_ATS 96 /* New specification with ATS field */
+#define ASV_LENGTH 64
+
+struct ddcb {
+ union {
+ __be32 icrc_hsi_shi_32; /* iCRC, Hardware/SW interlock */
+ struct {
+ __be16 icrc_16;
+ u8 hsi;
+ u8 shi;
+ };
+ };
+ u8 pre; /* Preamble */
+ u8 xdir; /* Execution Directives */
+ __be16 seqnum_16; /* Sequence Number */
+
+ u8 acfunc; /* Accelerator Function.. */
+ u8 cmd; /* Command. */
+ __be16 cmdopts_16; /* Command Options */
+ u8 sur; /* Status Update Rate */
+ u8 psp; /* Protection Section Pointer */
+ __be16 rsvd_0e_16; /* Reserved invariant */
+
+ __be64 fwiv_64; /* Firmware Invariant. */
+
+ union {
+ struct {
+ __be64 ats_64; /* Address Translation Spec */
+ u8 asiv[ASIV_LENGTH_ATS]; /* New ASIV */
+ } n;
+ u8 __asiv[ASIV_LENGTH]; /* obsolete */
+ };
+ u8 asv[ASV_LENGTH]; /* Appl Spec Variant */
+
+ __be16 rsvd_c0_16; /* Reserved Variant */
+ __be16 vcrc_16; /* Variant CRC */
+ __be32 rsvd_32; /* Reserved unprotected */
+
+ __be64 deque_ts_64; /* Deque Time Stamp. */
+
+ __be16 retc_16; /* Return Code */
+ __be16 attn_16; /* Attention/Extended Error Codes */
+ __be32 progress_32; /* Progress indicator. */
+
+ __be64 cmplt_ts_64; /* Completion Time Stamp. */
+
+ /* The following layout matches the new service layer format */
+ __be32 ibdc_32; /* Inbound Data Count (* 256) */
+ __be32 obdc_32; /* Outbound Data Count (* 256) */
+
+ __be64 rsvd_SLH_64; /* Reserved for hardware */
+ union { /* private data for driver */
+ u8 priv[8];
+ __be64 priv_64;
+ };
+ __be64 disp_ts_64; /* Dispatch TimeStamp */
+} __attribute__((__packed__));
+
+/* CRC polynomials for DDCB */
+#define CRC16_POLYNOMIAL 0x1021
+
+/*
+ * SHI: Software to Hardware Interlock
+ * This 1 byte field is written by software to interlock the
+ * movement of one queue entry to another with the hardware in the
+ * chip.
+ */
+#define DDCB_SHI_INTR 0x04 /* Bit 2 */
+#define DDCB_SHI_PURGE 0x02 /* Bit 1 */
+#define DDCB_SHI_NEXT 0x01 /* Bit 0 */
+
+/*
+ * HSI: Hardware to Software interlock
+ * This 1 byte field is written by hardware to interlock the movement
+ * of one queue entry to another with the software in the chip.
+ */
+#define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */
+#define DDCB_HSI_FETCHED 0x04 /* Bit 2 */
+
+/*
+ * Accessing HSI/SHI is done 32-bit wide
+ * Normally 16-bit access would work too, but on some platforms the
+ * 16 compare and swap operation is not supported. Therefore
+ * switching to 32-bit such that those platforms will work too.
+ *
+ * iCRC HSI/SHI
+ */
+#define DDCB_INTR_BE32 cpu_to_be32(0x00000004)
+#define DDCB_PURGE_BE32 cpu_to_be32(0x00000002)
+#define DDCB_NEXT_BE32 cpu_to_be32(0x00000001)
+#define DDCB_COMPLETED_BE32 cpu_to_be32(0x00004000)
+#define DDCB_FETCHED_BE32 cpu_to_be32(0x00000400)
+
+/* Definitions of DDCB presets */
+#define DDCB_PRESET_PRE 0x80
+#define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */
+#define VCRC_LENGTH(n) ((n)) /* used ASV */
+
+/*
+ * Genwqe Scatter Gather list
+ * Each element has up to 8 entries.
+ * The chaining element is element 0 cause of prefetching needs.
+ */
+
+/*
+ * 0b0110 Chained descriptor. The descriptor is describing the next
+ * descriptor list.
+ */
+#define SG_CHAINED (0x6)
+
+/*
+ * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
+ * condition.
+ */
+#define SG_DATA (0x2)
+
+/*
+ * 0b0000 Early terminator. This is the last entry on the list
+ * irregardless of the length indicated.
+ */
+#define SG_END_LIST (0x0)
+
+/**
+ * struct sglist - Scatter gather list
+ * @target_addr: Either a dma addr of memory to work on or a
+ * dma addr or a subsequent sglist block.
+ * @len: Length of the data block.
+ * @flags: See above.
+ *
+ * Depending on the command the GenWQE card can use a scatter gather
+ * list to describe the memory it works on. Always 8 sg_entry's form
+ * a block.
+ */
+struct sg_entry {
+ __be64 target_addr;
+ __be32 len;
+ __be32 flags;
+};
+
+#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
new file mode 100644
index 0000000..c715534
--- /dev/null
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -0,0 +1,508 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Debugfs interfaces for the GenWQE card. Help to debug potential
+ * problems. Dump internal chip state for debugging and failure
+ * determination.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+#define GENWQE_DEBUGFS_RO(_name, _showfn) \
+ static int genwqe_debugfs_##_name##_open(struct inode *inode, \
+ struct file *file) \
+ { \
+ return single_open(file, _showfn, inode->i_private); \
+ } \
+ static const struct file_operations genwqe_##_name##_fops = { \
+ .open = genwqe_debugfs_##_name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+ }
+
+static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs,
+ int entries)
+{
+ unsigned int i;
+ u32 v_hi, v_lo;
+
+ for (i = 0; i < entries; i++) {
+ v_hi = (regs[i].val >> 32) & 0xffffffff;
+ v_lo = (regs[i].val) & 0xffffffff;
+
+ seq_printf(s, " 0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n",
+ regs[i].addr, regs[i].idx, v_hi, v_lo);
+ }
+}
+
+static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+ struct genwqe_dev *cd = s->private;
+ int entries;
+ struct genwqe_reg *regs;
+
+ entries = genwqe_ffdc_buff_size(cd, uid);
+ if (entries < 0)
+ return -EINVAL;
+
+ if (entries == 0)
+ return 0;
+
+ regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL);
+ if (regs == NULL)
+ return -ENOMEM;
+
+ genwqe_stop_traps(cd); /* halt the traps while dumping data */
+ genwqe_ffdc_buff_read(cd, uid, regs, entries);
+ genwqe_start_traps(cd);
+
+ dbg_uidn_show(s, regs, entries);
+ kfree(regs);
+ return 0;
+}
+
+static int genwqe_curr_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+ return curr_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid0, genwqe_curr_dbg_uid0_show);
+
+static int genwqe_curr_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+ return curr_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid1, genwqe_curr_dbg_uid1_show);
+
+static int genwqe_curr_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+ return curr_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid2, genwqe_curr_dbg_uid2_show);
+
+static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+ struct genwqe_dev *cd = s->private;
+
+ dbg_uidn_show(s, cd->ffdc[uid].regs, cd->ffdc[uid].entries);
+ return 0;
+}
+
+static int genwqe_prev_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+ return prev_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid0, genwqe_prev_dbg_uid0_show);
+
+static int genwqe_prev_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+ return prev_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid1, genwqe_prev_dbg_uid1_show);
+
+static int genwqe_prev_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+ return prev_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid2, genwqe_prev_dbg_uid2_show);
+
+static int genwqe_curr_regs_show(struct seq_file *s, void *unused)
+{
+ struct genwqe_dev *cd = s->private;
+ unsigned int i;
+ struct genwqe_reg *regs;
+
+ regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL);
+ if (regs == NULL)
+ return -ENOMEM;
+
+ genwqe_stop_traps(cd);
+ genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1);
+ genwqe_start_traps(cd);
+
+ for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+ if (regs[i].addr == 0xffffffff)
+ break; /* invalid entries */
+
+ if (regs[i].val == 0x0ull)
+ continue; /* do not print 0x0 FIRs */
+
+ seq_printf(s, " 0x%08x 0x%016llx\n",
+ regs[i].addr, regs[i].val);
+ }
+ return 0;
+}
+
+GENWQE_DEBUGFS_RO(curr_regs, genwqe_curr_regs_show);
+
+static int genwqe_prev_regs_show(struct seq_file *s, void *unused)
+{
+ struct genwqe_dev *cd = s->private;
+ unsigned int i;
+ struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs;
+
+ if (regs == NULL)
+ return -EINVAL;
+
+ for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+ if (regs[i].addr == 0xffffffff)
+ break; /* invalid entries */
+
+ if (regs[i].val == 0x0ull)
+ continue; /* do not print 0x0 FIRs */
+
+ seq_printf(s, " 0x%08x 0x%016llx\n",
+ regs[i].addr, regs[i].val);
+ }
+ return 0;
+}
+
+GENWQE_DEBUGFS_RO(prev_regs, genwqe_prev_regs_show);
+
+static int genwqe_jtimer_show(struct seq_file *s, void *unused)
+{
+ struct genwqe_dev *cd = s->private;
+ unsigned int vf_num;
+ u64 jtimer;
+
+ jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0);
+ seq_printf(s, " PF 0x%016llx %d msec\n", jtimer,
+ genwqe_pf_jobtimeout_msec);
+
+ for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+ jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+ vf_num + 1);
+ seq_printf(s, " VF%-2d 0x%016llx %d msec\n", vf_num, jtimer,
+ cd->vf_jobtimeout_msec[vf_num]);
+ }
+ return 0;
+}
+
+GENWQE_DEBUGFS_RO(jtimer, genwqe_jtimer_show);
+
+static int genwqe_queue_working_time_show(struct seq_file *s, void *unused)
+{
+ struct genwqe_dev *cd = s->private;
+ unsigned int vf_num;
+ u64 t;
+
+ t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0);
+ seq_printf(s, " PF 0x%016llx\n", t);
+
+ for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+ t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1);
+ seq_printf(s, " VF%-2d 0x%016llx\n", vf_num, t);
+ }
+ return 0;
+}
+
+GENWQE_DEBUGFS_RO(queue_working_time, genwqe_queue_working_time_show);
+
+static int genwqe_ddcb_info_show(struct seq_file *s, void *unused)
+{
+ struct genwqe_dev *cd = s->private;
+ unsigned int i;
+ struct ddcb_queue *queue;
+ struct ddcb *pddcb;
+
+ queue = &cd->queue;
+ seq_puts(s, "DDCB QUEUE:\n");
+ seq_printf(s, " ddcb_max: %d\n"
+ " ddcb_daddr: %016llx - %016llx\n"
+ " ddcb_vaddr: %016llx\n"
+ " ddcbs_in_flight: %u\n"
+ " ddcbs_max_in_flight: %u\n"
+ " ddcbs_completed: %u\n"
+ " return_on_busy: %u\n"
+ " wait_on_busy: %u\n"
+ " irqs_processed: %u\n",
+ queue->ddcb_max, (long long)queue->ddcb_daddr,
+ (long long)queue->ddcb_daddr +
+ (queue->ddcb_max * DDCB_LENGTH),
+ (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight,
+ queue->ddcbs_max_in_flight, queue->ddcbs_completed,
+ queue->return_on_busy, queue->wait_on_busy,
+ cd->irqs_processed);
+
+ /* Hardware State */
+ seq_printf(s, " 0x%08x 0x%016llx IO_QUEUE_CONFIG\n"
+ " 0x%08x 0x%016llx IO_QUEUE_STATUS\n"
+ " 0x%08x 0x%016llx IO_QUEUE_SEGMENT\n"
+ " 0x%08x 0x%016llx IO_QUEUE_INITSQN\n"
+ " 0x%08x 0x%016llx IO_QUEUE_WRAP\n"
+ " 0x%08x 0x%016llx IO_QUEUE_OFFSET\n"
+ " 0x%08x 0x%016llx IO_QUEUE_WTIME\n"
+ " 0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n"
+ " 0x%08x 0x%016llx IO_QUEUE_LRW\n",
+ queue->IO_QUEUE_CONFIG,
+ __genwqe_readq(cd, queue->IO_QUEUE_CONFIG),
+ queue->IO_QUEUE_STATUS,
+ __genwqe_readq(cd, queue->IO_QUEUE_STATUS),
+ queue->IO_QUEUE_SEGMENT,
+ __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT),
+ queue->IO_QUEUE_INITSQN,
+ __genwqe_readq(cd, queue->IO_QUEUE_INITSQN),
+ queue->IO_QUEUE_WRAP,
+ __genwqe_readq(cd, queue->IO_QUEUE_WRAP),
+ queue->IO_QUEUE_OFFSET,
+ __genwqe_readq(cd, queue->IO_QUEUE_OFFSET),
+ queue->IO_QUEUE_WTIME,
+ __genwqe_readq(cd, queue->IO_QUEUE_WTIME),
+ queue->IO_QUEUE_ERRCNTS,
+ __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS),
+ queue->IO_QUEUE_LRW,
+ __genwqe_readq(cd, queue->IO_QUEUE_LRW));
+
+ seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n",
+ queue->ddcb_act, queue->ddcb_next);
+
+ pddcb = queue->ddcb_vaddr;
+ for (i = 0; i < queue->ddcb_max; i++) {
+ seq_printf(s, " %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ",
+ i, be16_to_cpu(pddcb->retc_16),
+ be16_to_cpu(pddcb->seqnum_16),
+ pddcb->hsi, pddcb->shi);
+ seq_printf(s, "PRIV=%06llx CMD=%02x\n",
+ be64_to_cpu(pddcb->priv_64), pddcb->cmd);
+ pddcb++;
+ }
+ return 0;
+}
+
+GENWQE_DEBUGFS_RO(ddcb_info, genwqe_ddcb_info_show);
+
+static int genwqe_info_show(struct seq_file *s, void *unused)
+{
+ struct genwqe_dev *cd = s->private;
+ u16 val16, type;
+ u64 app_id, slu_id, bitstream = -1;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+ app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+ if (genwqe_is_privileged(cd))
+ bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM);
+
+ val16 = (u16)(slu_id & 0x0fLLU);
+ type = (u16)((slu_id >> 20) & 0xffLLU);
+
+ seq_printf(s, "%s driver version: %s\n"
+ " Device Name/Type: %s %s CardIdx: %d\n"
+ " SLU/APP Config : 0x%016llx/0x%016llx\n"
+ " Build Date : %u/%x/%u\n"
+ " Base Clock : %u MHz\n"
+ " Arch/SVN Release: %u/%llx\n"
+ " Bitstream : %llx\n",
+ GENWQE_DEVNAME, DRV_VERSION, dev_name(&pci_dev->dev),
+ genwqe_is_privileged(cd) ?
+ "Physical" : "Virtual or no SR-IOV",
+ cd->card_idx, slu_id, app_id,
+ (u16)((slu_id >> 12) & 0x0fLLU), /* month */
+ (u16)((slu_id >> 4) & 0xffLLU), /* day */
+ (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */
+ genwqe_base_clock_frequency(cd),
+ (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40,
+ bitstream);
+
+ return 0;
+}
+
+GENWQE_DEBUGFS_RO(info, genwqe_info_show);
+
+int genwqe_init_debugfs(struct genwqe_dev *cd)
+{
+ struct dentry *root;
+ struct dentry *file;
+ int ret;
+ char card_name[64];
+ char name[64];
+ unsigned int i;
+
+ sprintf(card_name, "%s%d_card", GENWQE_DEVNAME, cd->card_idx);
+
+ root = debugfs_create_dir(card_name, cd->debugfs_genwqe);
+ if (!root) {
+ ret = -ENOMEM;
+ goto err0;
+ }
+
+ /* non privileged interfaces are done here */
+ file = debugfs_create_file("ddcb_info", S_IRUGO, root, cd,
+ &genwqe_ddcb_info_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("info", S_IRUGO, root, cd,
+ &genwqe_info_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_x64("err_inject", 0666, root, &cd->err_inject);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_u32("ddcb_software_timeout", 0666, root,
+ &cd->ddcb_software_timeout);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_u32("kill_timeout", 0666, root,
+ &cd->kill_timeout);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ /* privileged interfaces follow here */
+ if (!genwqe_is_privileged(cd)) {
+ cd->debugfs_root = root;
+ return 0;
+ }
+
+ file = debugfs_create_file("curr_regs", S_IRUGO, root, cd,
+ &genwqe_curr_regs_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd,
+ &genwqe_curr_dbg_uid0_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd,
+ &genwqe_curr_dbg_uid1_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd,
+ &genwqe_curr_dbg_uid2_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("prev_regs", S_IRUGO, root, cd,
+ &genwqe_prev_regs_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd,
+ &genwqe_prev_dbg_uid0_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd,
+ &genwqe_prev_dbg_uid1_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd,
+ &genwqe_prev_dbg_uid2_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ for (i = 0; i < GENWQE_MAX_VFS; i++) {
+ sprintf(name, "vf%u_jobtimeout_msec", i);
+
+ file = debugfs_create_u32(name, 0666, root,
+ &cd->vf_jobtimeout_msec[i]);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ }
+
+ file = debugfs_create_file("jobtimer", S_IRUGO, root, cd,
+ &genwqe_jtimer_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_file("queue_working_time", S_IRUGO, root, cd,
+ &genwqe_queue_working_time_fops);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_u32("skip_recovery", 0666, root,
+ &cd->skip_recovery);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ file = debugfs_create_u32("use_platform_recovery", 0666, root,
+ &cd->use_platform_recovery);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ cd->debugfs_root = root;
+ return 0;
+err1:
+ debugfs_remove_recursive(root);
+err0:
+ return ret;
+}
+
+void genqwe_exit_debugfs(struct genwqe_dev *cd)
+{
+ debugfs_remove_recursive(cd->debugfs_root);
+}
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 0000000..7f1b282
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1413 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Character device representation of the GenWQE device. This allows
+ * user-space applications to communicate with the card.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static int genwqe_open_files(struct genwqe_dev *cd)
+{
+ int rc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cd->file_lock, flags);
+ rc = list_empty(&cd->file_list);
+ spin_unlock_irqrestore(&cd->file_lock, flags);
+ return !rc;
+}
+
+static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+ unsigned long flags;
+
+ cfile->owner = current;
+ spin_lock_irqsave(&cd->file_lock, flags);
+ list_add(&cfile->list, &cd->file_list);
+ spin_unlock_irqrestore(&cd->file_lock, flags);
+}
+
+static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cd->file_lock, flags);
+ list_del(&cfile->list);
+ spin_unlock_irqrestore(&cd->file_lock, flags);
+
+ return 0;
+}
+
+static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cfile->pin_lock, flags);
+ list_add(&m->pin_list, &cfile->pin_list);
+ spin_unlock_irqrestore(&cfile->pin_lock, flags);
+}
+
+static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cfile->pin_lock, flags);
+ list_del(&m->pin_list);
+ spin_unlock_irqrestore(&cfile->pin_lock, flags);
+
+ return 0;
+}
+
+/**
+ * genwqe_search_pin() - Search for the mapping for a userspace address
+ * @cfile: Descriptor of opened file
+ * @u_addr: User virtual address
+ * @size: Size of buffer
+ * @dma_addr: DMA address to be updated
+ *
+ * Return: Pointer to the corresponding mapping NULL if not found
+ */
+static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
+ unsigned long u_addr,
+ unsigned int size,
+ void **virt_addr)
+{
+ unsigned long flags;
+ struct dma_mapping *m;
+
+ spin_lock_irqsave(&cfile->pin_lock, flags);
+
+ list_for_each_entry(m, &cfile->pin_list, pin_list) {
+ if ((((u64)m->u_vaddr) <= (u_addr)) &&
+ (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+ if (virt_addr)
+ *virt_addr = m->k_vaddr +
+ (u_addr - (u64)m->u_vaddr);
+
+ spin_unlock_irqrestore(&cfile->pin_lock, flags);
+ return m;
+ }
+ }
+ spin_unlock_irqrestore(&cfile->pin_lock, flags);
+ return NULL;
+}
+
+static void __genwqe_add_mapping(struct genwqe_file *cfile,
+ struct dma_mapping *dma_map)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cfile->map_lock, flags);
+ list_add(&dma_map->card_list, &cfile->map_list);
+ spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+static void __genwqe_del_mapping(struct genwqe_file *cfile,
+ struct dma_mapping *dma_map)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cfile->map_lock, flags);
+ list_del(&dma_map->card_list);
+ spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+
+/**
+ * __genwqe_search_mapping() - Search for the mapping for a userspace address
+ * @cfile: descriptor of opened file
+ * @u_addr: user virtual address
+ * @size: size of buffer
+ * @dma_addr: DMA address to be updated
+ * Return: Pointer to the corresponding mapping NULL if not found
+ */
+static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
+ unsigned long u_addr,
+ unsigned int size,
+ dma_addr_t *dma_addr,
+ void **virt_addr)
+{
+ unsigned long flags;
+ struct dma_mapping *m;
+ struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+ spin_lock_irqsave(&cfile->map_lock, flags);
+ list_for_each_entry(m, &cfile->map_list, card_list) {
+
+ if ((((u64)m->u_vaddr) <= (u_addr)) &&
+ (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+ /* match found: current is as expected and
+ addr is in range */
+ if (dma_addr)
+ *dma_addr = m->dma_addr +
+ (u_addr - (u64)m->u_vaddr);
+
+ if (virt_addr)
+ *virt_addr = m->k_vaddr +
+ (u_addr - (u64)m->u_vaddr);
+
+ spin_unlock_irqrestore(&cfile->map_lock, flags);
+ return m;
+ }
+ }
+ spin_unlock_irqrestore(&cfile->map_lock, flags);
+
+ dev_err(&pci_dev->dev,
+ "[%s] Entry not found: u_addr=%lx, size=%x\n",
+ __func__, u_addr, size);
+
+ return NULL;
+}
+
+static void genwqe_remove_mappings(struct genwqe_file *cfile)
+{
+ int i = 0;
+ struct list_head *node, *next;
+ struct dma_mapping *dma_map;
+ struct genwqe_dev *cd = cfile->cd;
+ struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+ list_for_each_safe(node, next, &cfile->map_list) {
+ dma_map = list_entry(node, struct dma_mapping, card_list);
+
+ list_del_init(&dma_map->card_list);
+
+ /*
+ * This is really a bug, because those things should
+ * have been already tidied up.
+ *
+ * GENWQE_MAPPING_RAW should have been removed via mmunmap().
+ * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
+ */
+ dev_err(&pci_dev->dev,
+ "[%s] %d. cleanup mapping: u_vaddr=%p u_kaddr=%016lx dma_addr=%lx\n",
+ __func__, i++, dma_map->u_vaddr,
+ (unsigned long)dma_map->k_vaddr,
+ (unsigned long)dma_map->dma_addr);
+
+ if (dma_map->type == GENWQE_MAPPING_RAW) {
+ /* we allocated this dynamically */
+ __genwqe_free_consistent(cd, dma_map->size,
+ dma_map->k_vaddr,
+ dma_map->dma_addr);
+ kfree(dma_map);
+ } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
+ /* we use dma_map statically from the request */
+ genwqe_user_vunmap(cd, dma_map, NULL);
+ }
+ }
+}
+
+static void genwqe_remove_pinnings(struct genwqe_file *cfile)
+{
+ struct list_head *node, *next;
+ struct dma_mapping *dma_map;
+ struct genwqe_dev *cd = cfile->cd;
+
+ list_for_each_safe(node, next, &cfile->pin_list) {
+ dma_map = list_entry(node, struct dma_mapping, pin_list);
+
+ /*
+ * This is not a bug, because a killed processed might
+ * not call the unpin ioctl, which is supposed to free
+ * the resources.
+ *
+ * Pinnings are dymically allocated and need to be
+ * deleted.
+ */
+ list_del_init(&dma_map->pin_list);
+ genwqe_user_vunmap(cd, dma_map, NULL);
+ kfree(dma_map);
+ }
+}
+
+/**
+ * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
+ *
+ * E.g. genwqe_send_signal(cd, SIGIO);
+ */
+static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
+{
+ unsigned int files = 0;
+ unsigned long flags;
+ struct genwqe_file *cfile;
+
+ spin_lock_irqsave(&cd->file_lock, flags);
+ list_for_each_entry(cfile, &cd->file_list, list) {
+ if (cfile->async_queue)
+ kill_fasync(&cfile->async_queue, sig, POLL_HUP);
+ files++;
+ }
+ spin_unlock_irqrestore(&cd->file_lock, flags);
+ return files;
+}
+
+static int genwqe_force_sig(struct genwqe_dev *cd, int sig)
+{
+ unsigned int files = 0;
+ unsigned long flags;
+ struct genwqe_file *cfile;
+
+ spin_lock_irqsave(&cd->file_lock, flags);
+ list_for_each_entry(cfile, &cd->file_list, list) {
+ force_sig(sig, cfile->owner);
+ files++;
+ }
+ spin_unlock_irqrestore(&cd->file_lock, flags);
+ return files;
+}
+
+/**
+ * genwqe_open() - file open
+ * @inode: file system information
+ * @filp: file handle
+ *
+ * This function is executed whenever an application calls
+ * open("/dev/genwqe",..).
+ *
+ * Return: 0 if successful or <0 if errors
+ */
+static int genwqe_open(struct inode *inode, struct file *filp)
+{
+ struct genwqe_dev *cd;
+ struct genwqe_file *cfile;
+ struct pci_dev *pci_dev;
+
+ cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
+ if (cfile == NULL)
+ return -ENOMEM;
+
+ cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
+ pci_dev = cd->pci_dev;
+ cfile->cd = cd;
+ cfile->filp = filp;
+ cfile->client = NULL;
+
+ spin_lock_init(&cfile->map_lock); /* list of raw memory allocations */
+ INIT_LIST_HEAD(&cfile->map_list);
+
+ spin_lock_init(&cfile->pin_lock); /* list of user pinned memory */
+ INIT_LIST_HEAD(&cfile->pin_list);
+
+ filp->private_data = cfile;
+
+ genwqe_add_file(cd, cfile);
+ return 0;
+}
+
+/**
+ * genwqe_fasync() - Setup process to receive SIGIO.
+ * @fd: file descriptor
+ * @filp: file handle
+ * @mode: file mode
+ *
+ * Sending a signal is working as following:
+ *
+ * if (cdev->async_queue)
+ * kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
+ *
+ * Some devices also implement asynchronous notification to indicate
+ * when the device can be written; in this case, of course,
+ * kill_fasync must be called with a mode of POLL_OUT.
+ */
+static int genwqe_fasync(int fd, struct file *filp, int mode)
+{
+ struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
+
+ return fasync_helper(fd, filp, mode, &cdev->async_queue);
+}
+
+
+/**
+ * genwqe_release() - file close
+ * @inode: file system information
+ * @filp: file handle
+ *
+ * This function is executed whenever an application calls 'close(fd_genwqe)'
+ *
+ * Return: always 0
+ */
+static int genwqe_release(struct inode *inode, struct file *filp)
+{
+ struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+ struct genwqe_dev *cd = cfile->cd;
+
+ /* there must be no entries in these lists! */
+ genwqe_remove_mappings(cfile);
+ genwqe_remove_pinnings(cfile);
+
+ /* remove this filp from the asynchronously notified filp's */
+ genwqe_fasync(-1, filp, 0);
+
+ /*
+ * For this to work we must not release cd when this cfile is
+ * not yet released, otherwise the list entry is invalid,
+ * because the list itself gets reinstantiated!
+ */
+ genwqe_del_file(cd, cfile);
+ kfree(cfile);
+ return 0;
+}
+
+static void genwqe_vma_open(struct vm_area_struct *vma)
+{
+ /* nothing ... */
+}
+
+/**
+ * genwqe_vma_close() - Called each time when vma is unmapped
+ *
+ * Free memory which got allocated by GenWQE mmap().
+ */
+static void genwqe_vma_close(struct vm_area_struct *vma)
+{
+ unsigned long vsize = vma->vm_end - vma->vm_start;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct dma_mapping *dma_map;
+ struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
+ cdev_genwqe);
+ struct pci_dev *pci_dev = cd->pci_dev;
+ dma_addr_t d_addr = 0;
+ struct genwqe_file *cfile = vma->vm_private_data;
+
+ dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
+ &d_addr, NULL);
+ if (dma_map == NULL) {
+ dev_err(&pci_dev->dev,
+ " [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
+ __func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+ vsize);
+ return;
+ }
+ __genwqe_del_mapping(cfile, dma_map);
+ __genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
+ dma_map->dma_addr);
+ kfree(dma_map);
+}
+
+static const struct vm_operations_struct genwqe_vma_ops = {
+ .open = genwqe_vma_open,
+ .close = genwqe_vma_close,
+};
+
+/**
+ * genwqe_mmap() - Provide contignous buffers to userspace
+ *
+ * We use mmap() to allocate contignous buffers used for DMA
+ * transfers. After the buffer is allocated we remap it to user-space
+ * and remember a reference to our dma_mapping data structure, where
+ * we store the associated DMA address and allocated size.
+ *
+ * When we receive a DDCB execution request with the ATS bits set to
+ * plain buffer, we lookup our dma_mapping list to find the
+ * corresponding DMA address for the associated user-space address.
+ */
+static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int rc;
+ unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
+ struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+ struct genwqe_dev *cd = cfile->cd;
+ struct dma_mapping *dma_map;
+
+ if (vsize == 0)
+ return -EINVAL;
+
+ if (get_order(vsize) > MAX_ORDER)
+ return -ENOMEM;
+
+ dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
+ if (dma_map == NULL)
+ return -ENOMEM;
+
+ genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
+ dma_map->u_vaddr = (void *)vma->vm_start;
+ dma_map->size = vsize;
+ dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
+ dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
+ &dma_map->dma_addr);
+ if (dma_map->k_vaddr == NULL) {
+ rc = -ENOMEM;
+ goto free_dma_map;
+ }
+
+ if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
+ *(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
+
+ pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
+ rc = remap_pfn_range(vma,
+ vma->vm_start,
+ pfn,
+ vsize,
+ vma->vm_page_prot);
+ if (rc != 0) {
+ rc = -EFAULT;
+ goto free_dma_mem;
+ }
+
+ vma->vm_private_data = cfile;
+ vma->vm_ops = &genwqe_vma_ops;
+ __genwqe_add_mapping(cfile, dma_map);
+
+ return 0;
+
+ free_dma_mem:
+ __genwqe_free_consistent(cd, dma_map->size,
+ dma_map->k_vaddr,
+ dma_map->dma_addr);
+ free_dma_map:
+ kfree(dma_map);
+ return rc;
+}
+
+/**
+ * do_flash_update() - Excute flash update (write image or CVPD)
+ * @cd: genwqe device
+ * @load: details about image load
+ *
+ * Return: 0 if successful
+ */
+
+#define FLASH_BLOCK 0x40000 /* we use 256k blocks */
+
+static int do_flash_update(struct genwqe_file *cfile,
+ struct genwqe_bitstream *load)
+{
+ int rc = 0;
+ int blocks_to_flash;
+ dma_addr_t dma_addr;
+ u64 flash = 0;
+ size_t tocopy = 0;
+ u8 __user *buf;
+ u8 *xbuf;
+ u32 crc;
+ u8 cmdopts;
+ struct genwqe_dev *cd = cfile->cd;
+ struct file *filp = cfile->filp;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if ((load->size & 0x3) != 0)
+ return -EINVAL;
+
+ if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+ return -EINVAL;
+
+ /* FIXME Bits have changed for new service layer! */
+ switch ((char)load->partition) {
+ case '0':
+ cmdopts = 0x14;
+ break; /* download/erase_first/part_0 */
+ case '1':
+ cmdopts = 0x1C;
+ break; /* download/erase_first/part_1 */
+ case 'v':
+ cmdopts = 0x0C;
+ break; /* download/erase_first/vpd */
+ default:
+ return -EINVAL;
+ }
+
+ buf = (u8 __user *)load->data_addr;
+ xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+ if (xbuf == NULL)
+ return -ENOMEM;
+
+ blocks_to_flash = load->size / FLASH_BLOCK;
+ while (load->size) {
+ struct genwqe_ddcb_cmd *req;
+
+ /*
+ * We must be 4 byte aligned. Buffer must be 0 appened
+ * to have defined values when calculating CRC.
+ */
+ tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+ rc = copy_from_user(xbuf, buf, tocopy);
+ if (rc) {
+ rc = -EFAULT;
+ goto free_buffer;
+ }
+ crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
+
+ dev_dbg(&pci_dev->dev,
+ "[%s] DMA: %lx CRC: %08x SZ: %ld %d\n",
+ __func__, (unsigned long)dma_addr, crc, tocopy,
+ blocks_to_flash);
+
+ /* prepare DDCB for SLU process */
+ req = ddcb_requ_alloc();
+ if (req == NULL) {
+ rc = -ENOMEM;
+ goto free_buffer;
+ }
+
+ req->cmd = SLCMD_MOVE_FLASH;
+ req->cmdopts = cmdopts;
+
+ /* prepare invariant values */
+ if (genwqe_get_slu_id(cd) <= 0x2) {
+ *(__be64 *)&req->__asiv[0] = cpu_to_be64(dma_addr);
+ *(__be64 *)&req->__asiv[8] = cpu_to_be64(tocopy);
+ *(__be64 *)&req->__asiv[16] = cpu_to_be64(flash);
+ *(__be32 *)&req->__asiv[24] = cpu_to_be32(0);
+ req->__asiv[24] = load->uid;
+ *(__be32 *)&req->__asiv[28] = cpu_to_be32(crc);
+
+ /* for simulation only */
+ *(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
+ *(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
+ req->asiv_length = 32; /* bytes included in crc calc */
+ } else { /* setup DDCB for ATS architecture */
+ *(__be64 *)&req->asiv[0] = cpu_to_be64(dma_addr);
+ *(__be32 *)&req->asiv[8] = cpu_to_be32(tocopy);
+ *(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
+ *(__be64 *)&req->asiv[16] = cpu_to_be64(flash);
+ *(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
+ *(__be32 *)&req->asiv[28] = cpu_to_be32(crc);
+
+ /* for simulation only */
+ *(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
+ *(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
+
+ /* Rd only */
+ req->ats = 0x4ULL << 44;
+ req->asiv_length = 40; /* bytes included in crc calc */
+ }
+ req->asv_length = 8;
+
+ /* For Genwqe5 we get back the calculated CRC */
+ *(u64 *)&req->asv[0] = 0ULL; /* 0x80 */
+
+ rc = __genwqe_execute_raw_ddcb(cd, req, filp->f_flags);
+
+ load->retc = req->retc;
+ load->attn = req->attn;
+ load->progress = req->progress;
+
+ if (rc < 0) {
+ ddcb_requ_free(req);
+ goto free_buffer;
+ }
+
+ if (req->retc != DDCB_RETC_COMPLETE) {
+ rc = -EIO;
+ ddcb_requ_free(req);
+ goto free_buffer;
+ }
+
+ load->size -= tocopy;
+ flash += tocopy;
+ buf += tocopy;
+ blocks_to_flash--;
+ ddcb_requ_free(req);
+ }
+
+ free_buffer:
+ __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+ return rc;
+}
+
+static int do_flash_read(struct genwqe_file *cfile,
+ struct genwqe_bitstream *load)
+{
+ int rc, blocks_to_flash;
+ dma_addr_t dma_addr;
+ u64 flash = 0;
+ size_t tocopy = 0;
+ u8 __user *buf;
+ u8 *xbuf;
+ u8 cmdopts;
+ struct genwqe_dev *cd = cfile->cd;
+ struct file *filp = cfile->filp;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ struct genwqe_ddcb_cmd *cmd;
+
+ if ((load->size & 0x3) != 0)
+ return -EINVAL;
+
+ if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+ return -EINVAL;
+
+ /* FIXME Bits have changed for new service layer! */
+ switch ((char)load->partition) {
+ case '0':
+ cmdopts = 0x12;
+ break; /* upload/part_0 */
+ case '1':
+ cmdopts = 0x1A;
+ break; /* upload/part_1 */
+ case 'v':
+ cmdopts = 0x0A;
+ break; /* upload/vpd */
+ default:
+ return -EINVAL;
+ }
+
+ buf = (u8 __user *)load->data_addr;
+ xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+ if (xbuf == NULL)
+ return -ENOMEM;
+
+ blocks_to_flash = load->size / FLASH_BLOCK;
+ while (load->size) {
+ /*
+ * We must be 4 byte aligned. Buffer must be 0 appened
+ * to have defined values when calculating CRC.
+ */
+ tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+ dev_dbg(&pci_dev->dev,
+ "[%s] DMA: %lx SZ: %ld %d\n",
+ __func__, (unsigned long)dma_addr, tocopy,
+ blocks_to_flash);
+
+ /* prepare DDCB for SLU process */
+ cmd = ddcb_requ_alloc();
+ if (cmd == NULL) {
+ rc = -ENOMEM;
+ goto free_buffer;
+ }
+ cmd->cmd = SLCMD_MOVE_FLASH;
+ cmd->cmdopts = cmdopts;
+
+ /* prepare invariant values */
+ if (genwqe_get_slu_id(cd) <= 0x2) {
+ *(__be64 *)&cmd->__asiv[0] = cpu_to_be64(dma_addr);
+ *(__be64 *)&cmd->__asiv[8] = cpu_to_be64(tocopy);
+ *(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
+ *(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0);
+ cmd->__asiv[24] = load->uid;
+ *(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
+ cmd->asiv_length = 32; /* bytes included in crc calc */
+ } else { /* setup DDCB for ATS architecture */
+ *(__be64 *)&cmd->asiv[0] = cpu_to_be64(dma_addr);
+ *(__be32 *)&cmd->asiv[8] = cpu_to_be32(tocopy);
+ *(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
+ *(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash);
+ *(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
+ *(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
+
+ /* rd/wr */
+ cmd->ats = 0x5ULL << 44;
+ cmd->asiv_length = 40; /* bytes included in crc calc */
+ }
+ cmd->asv_length = 8;
+
+ /* we only get back the calculated CRC */
+ *(u64 *)&cmd->asv[0] = 0ULL; /* 0x80 */
+
+ rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+
+ load->retc = cmd->retc;
+ load->attn = cmd->attn;
+ load->progress = cmd->progress;
+
+ if ((rc < 0) && (rc != -EBADMSG)) {
+ ddcb_requ_free(cmd);
+ goto free_buffer;
+ }
+
+ rc = copy_to_user(buf, xbuf, tocopy);
+ if (rc) {
+ rc = -EFAULT;
+ ddcb_requ_free(cmd);
+ goto free_buffer;
+ }
+
+ /* We know that we can get retc 0x104 with CRC err */
+ if (((cmd->retc == DDCB_RETC_FAULT) &&
+ (cmd->attn != 0x02)) || /* Normally ignore CRC error */
+ ((cmd->retc == DDCB_RETC_COMPLETE) &&
+ (cmd->attn != 0x00))) { /* Everything was fine */
+ rc = -EIO;
+ ddcb_requ_free(cmd);
+ goto free_buffer;
+ }
+
+ load->size -= tocopy;
+ flash += tocopy;
+ buf += tocopy;
+ blocks_to_flash--;
+ ddcb_requ_free(cmd);
+ }
+ rc = 0;
+
+ free_buffer:
+ __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+ return rc;
+}
+
+static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+ int rc;
+ struct genwqe_dev *cd = cfile->cd;
+ struct pci_dev *pci_dev = cfile->cd->pci_dev;
+ struct dma_mapping *dma_map;
+ unsigned long map_addr;
+ unsigned long map_size;
+
+ if ((m->addr == 0x0) || (m->size == 0))
+ return -EINVAL;
+
+ map_addr = (m->addr & PAGE_MASK);
+ map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+ dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
+ if (dma_map == NULL)
+ return -ENOMEM;
+
+ genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
+ rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL);
+ if (rc != 0) {
+ dev_err(&pci_dev->dev,
+ "[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
+ kfree(dma_map);
+ return rc;
+ }
+
+ genwqe_add_pin(cfile, dma_map);
+ return 0;
+}
+
+static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+ struct genwqe_dev *cd = cfile->cd;
+ struct dma_mapping *dma_map;
+ unsigned long map_addr;
+ unsigned long map_size;
+
+ if (m->addr == 0x0)
+ return -EINVAL;
+
+ map_addr = (m->addr & PAGE_MASK);
+ map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+ dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
+ if (dma_map == NULL)
+ return -ENOENT;
+
+ genwqe_del_pin(cfile, dma_map);
+ genwqe_user_vunmap(cd, dma_map, NULL);
+ kfree(dma_map);
+ return 0;
+}
+
+/**
+ * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
+ *
+ * Only if there are any. Pinnings are not removed.
+ */
+static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+ unsigned int i;
+ struct dma_mapping *dma_map;
+ struct genwqe_dev *cd = cfile->cd;
+
+ for (i = 0; i < DDCB_FIXUPS; i++) {
+ dma_map = &req->dma_mappings[i];
+
+ if (dma_mapping_used(dma_map)) {
+ __genwqe_del_mapping(cfile, dma_map);
+ genwqe_user_vunmap(cd, dma_map, req);
+ }
+ if (req->sgls[i].sgl != NULL)
+ genwqe_free_sync_sgl(cd, &req->sgls[i]);
+ }
+ return 0;
+}
+
+/**
+ * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
+ *
+ * Before the DDCB gets executed we need to handle the fixups. We
+ * replace the user-space addresses with DMA addresses or do
+ * additional setup work e.g. generating a scatter-gather list which
+ * is used to describe the memory referred to in the fixup.
+ */
+static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+ int rc;
+ unsigned int asiv_offs, i;
+ struct genwqe_dev *cd = cfile->cd;
+ struct genwqe_ddcb_cmd *cmd = &req->cmd;
+ struct dma_mapping *m;
+ const char *type = "UNKNOWN";
+
+ for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
+ i++, asiv_offs += 0x08) {
+
+ u64 u_addr;
+ dma_addr_t d_addr;
+ u32 u_size = 0;
+ u64 ats_flags;
+
+ ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs);
+
+ switch (ats_flags) {
+
+ case ATS_TYPE_DATA:
+ break; /* nothing to do here */
+
+ case ATS_TYPE_FLAT_RDWR:
+ case ATS_TYPE_FLAT_RD: {
+ u_addr = be64_to_cpu(*((__be64 *)&cmd->
+ asiv[asiv_offs]));
+ u_size = be32_to_cpu(*((__be32 *)&cmd->
+ asiv[asiv_offs + 0x08]));
+
+ /*
+ * No data available. Ignore u_addr in this
+ * case and set addr to 0. Hardware must not
+ * fetch the buffer.
+ */
+ if (u_size == 0x0) {
+ *((__be64 *)&cmd->asiv[asiv_offs]) =
+ cpu_to_be64(0x0);
+ break;
+ }
+
+ m = __genwqe_search_mapping(cfile, u_addr, u_size,
+ &d_addr, NULL);
+ if (m == NULL) {
+ rc = -EFAULT;
+ goto err_out;
+ }
+
+ *((__be64 *)&cmd->asiv[asiv_offs]) =
+ cpu_to_be64(d_addr);
+ break;
+ }
+
+ case ATS_TYPE_SGL_RDWR:
+ case ATS_TYPE_SGL_RD: {
+ int page_offs;
+
+ u_addr = be64_to_cpu(*((__be64 *)
+ &cmd->asiv[asiv_offs]));
+ u_size = be32_to_cpu(*((__be32 *)
+ &cmd->asiv[asiv_offs + 0x08]));
+
+ /*
+ * No data available. Ignore u_addr in this
+ * case and set addr to 0. Hardware must not
+ * fetch the empty sgl.
+ */
+ if (u_size == 0x0) {
+ *((__be64 *)&cmd->asiv[asiv_offs]) =
+ cpu_to_be64(0x0);
+ break;
+ }
+
+ m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
+ if (m != NULL) {
+ type = "PINNING";
+ page_offs = (u_addr -
+ (u64)m->u_vaddr)/PAGE_SIZE;
+ } else {
+ type = "MAPPING";
+ m = &req->dma_mappings[i];
+
+ genwqe_mapping_init(m,
+ GENWQE_MAPPING_SGL_TEMP);
+ rc = genwqe_user_vmap(cd, m, (void *)u_addr,
+ u_size, req);
+ if (rc != 0)
+ goto err_out;
+
+ __genwqe_add_mapping(cfile, m);
+ page_offs = 0;
+ }
+
+ /* create genwqe style scatter gather list */
+ rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i],
+ (void __user *)u_addr,
+ u_size);
+ if (rc != 0)
+ goto err_out;
+
+ genwqe_setup_sgl(cd, &req->sgls[i],
+ &m->dma_list[page_offs]);
+
+ *((__be64 *)&cmd->asiv[asiv_offs]) =
+ cpu_to_be64(req->sgls[i].sgl_dma_addr);
+
+ break;
+ }
+ default:
+ rc = -EINVAL;
+ goto err_out;
+ }
+ }
+ return 0;
+
+ err_out:
+ ddcb_cmd_cleanup(cfile, req);
+ return rc;
+}
+
+/**
+ * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
+ *
+ * The code will build up the translation tables or lookup the
+ * contignous memory allocation table to find the right translations
+ * and DMA addresses.
+ */
+static int genwqe_execute_ddcb(struct genwqe_file *cfile,
+ struct genwqe_ddcb_cmd *cmd)
+{
+ int rc;
+ struct genwqe_dev *cd = cfile->cd;
+ struct file *filp = cfile->filp;
+ struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+ rc = ddcb_cmd_fixups(cfile, req);
+ if (rc != 0)
+ return rc;
+
+ rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+ ddcb_cmd_cleanup(cfile, req);
+ return rc;
+}
+
+static int do_execute_ddcb(struct genwqe_file *cfile,
+ unsigned long arg, int raw)
+{
+ int rc;
+ struct genwqe_ddcb_cmd *cmd;
+ struct ddcb_requ *req;
+ struct genwqe_dev *cd = cfile->cd;
+ struct file *filp = cfile->filp;
+
+ cmd = ddcb_requ_alloc();
+ if (cmd == NULL)
+ return -ENOMEM;
+
+ req = container_of(cmd, struct ddcb_requ, cmd);
+
+ if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
+ ddcb_requ_free(cmd);
+ return -EFAULT;
+ }
+
+ if (!raw)
+ rc = genwqe_execute_ddcb(cfile, cmd);
+ else
+ rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+
+ /* Copy back only the modifed fields. Do not copy ASIV
+ back since the copy got modified by the driver. */
+ if (copy_to_user((void __user *)arg, cmd,
+ sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
+ ddcb_requ_free(cmd);
+ return -EFAULT;
+ }
+
+ ddcb_requ_free(cmd);
+ return rc;
+}
+
+/**
+ * genwqe_ioctl() - IO control
+ * @filp: file handle
+ * @cmd: command identifier (passed from user)
+ * @arg: argument (passed from user)
+ *
+ * Return: 0 success
+ */
+static long genwqe_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ int rc = 0;
+ struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+ struct genwqe_dev *cd = cfile->cd;
+ struct pci_dev *pci_dev = cd->pci_dev;
+ struct genwqe_reg_io __user *io;
+ u64 val;
+ u32 reg_offs;
+
+ /* Return -EIO if card hit EEH */
+ if (pci_channel_offline(pci_dev))
+ return -EIO;
+
+ if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE)
+ return -EINVAL;
+
+ switch (cmd) {
+
+ case GENWQE_GET_CARD_STATE:
+ put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
+ return 0;
+
+ /* Register access */
+ case GENWQE_READ_REG64: {
+ io = (struct genwqe_reg_io __user *)arg;
+
+ if (get_user(reg_offs, &io->num))
+ return -EFAULT;
+
+ if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+ return -EINVAL;
+
+ val = __genwqe_readq(cd, reg_offs);
+ put_user(val, &io->val64);
+ return 0;
+ }
+
+ case GENWQE_WRITE_REG64: {
+ io = (struct genwqe_reg_io __user *)arg;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+ return -EPERM;
+
+ if (get_user(reg_offs, &io->num))
+ return -EFAULT;
+
+ if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+ return -EINVAL;
+
+ if (get_user(val, &io->val64))
+ return -EFAULT;
+
+ __genwqe_writeq(cd, reg_offs, val);
+ return 0;
+ }
+
+ case GENWQE_READ_REG32: {
+ io = (struct genwqe_reg_io __user *)arg;
+
+ if (get_user(reg_offs, &io->num))
+ return -EFAULT;
+
+ if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+ return -EINVAL;
+
+ val = __genwqe_readl(cd, reg_offs);
+ put_user(val, &io->val64);
+ return 0;
+ }
+
+ case GENWQE_WRITE_REG32: {
+ io = (struct genwqe_reg_io __user *)arg;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+ return -EPERM;
+
+ if (get_user(reg_offs, &io->num))
+ return -EFAULT;
+
+ if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+ return -EINVAL;
+
+ if (get_user(val, &io->val64))
+ return -EFAULT;
+
+ __genwqe_writel(cd, reg_offs, val);
+ return 0;
+ }
+
+ /* Flash update/reading */
+ case GENWQE_SLU_UPDATE: {
+ struct genwqe_bitstream load;
+
+ if (!genwqe_is_privileged(cd))
+ return -EPERM;
+
+ if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+ return -EPERM;
+
+ if (copy_from_user(&load, (void __user *)arg,
+ sizeof(load)))
+ return -EFAULT;
+
+ rc = do_flash_update(cfile, &load);
+
+ if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+ return -EFAULT;
+
+ return rc;
+ }
+
+ case GENWQE_SLU_READ: {
+ struct genwqe_bitstream load;
+
+ if (!genwqe_is_privileged(cd))
+ return -EPERM;
+
+ if (genwqe_flash_readback_fails(cd))
+ return -ENOSPC; /* known to fail for old versions */
+
+ if (copy_from_user(&load, (void __user *)arg, sizeof(load)))
+ return -EFAULT;
+
+ rc = do_flash_read(cfile, &load);
+
+ if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+ return -EFAULT;
+
+ return rc;
+ }
+
+ /* memory pinning and unpinning */
+ case GENWQE_PIN_MEM: {
+ struct genwqe_mem m;
+
+ if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+ return -EFAULT;
+
+ return genwqe_pin_mem(cfile, &m);
+ }
+
+ case GENWQE_UNPIN_MEM: {
+ struct genwqe_mem m;
+
+ if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+ return -EFAULT;
+
+ return genwqe_unpin_mem(cfile, &m);
+ }
+
+ /* launch an DDCB and wait for completion */
+ case GENWQE_EXECUTE_DDCB:
+ return do_execute_ddcb(cfile, arg, 0);
+
+ case GENWQE_EXECUTE_RAW_DDCB: {
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return do_execute_ddcb(cfile, arg, 1);
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return rc;
+}
+
+#if defined(CONFIG_COMPAT)
+/**
+ * genwqe_compat_ioctl() - Compatibility ioctl
+ *
+ * Called whenever a 32-bit process running under a 64-bit kernel
+ * performs an ioctl on /dev/genwqe<n>_card.
+ *
+ * @filp: file pointer.
+ * @cmd: command.
+ * @arg: user argument.
+ * Return: zero on success or negative number on failure.
+ */
+static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ return genwqe_ioctl(filp, cmd, arg);
+}
+#endif /* defined(CONFIG_COMPAT) */
+
+static const struct file_operations genwqe_fops = {
+ .owner = THIS_MODULE,
+ .open = genwqe_open,
+ .fasync = genwqe_fasync,
+ .mmap = genwqe_mmap,
+ .unlocked_ioctl = genwqe_ioctl,
+#if defined(CONFIG_COMPAT)
+ .compat_ioctl = genwqe_compat_ioctl,
+#endif
+ .release = genwqe_release,
+};
+
+static int genwqe_device_initialized(struct genwqe_dev *cd)
+{
+ return cd->dev != NULL;
+}
+
+/**
+ * genwqe_device_create() - Create and configure genwqe char device
+ * @cd: genwqe device descriptor
+ *
+ * This function must be called before we create any more genwqe
+ * character devices, because it is allocating the major and minor
+ * number which are supposed to be used by the client drivers.
+ */
+int genwqe_device_create(struct genwqe_dev *cd)
+{
+ int rc;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ /*
+ * Here starts the individual setup per client. It must
+ * initialize its own cdev data structure with its own fops.
+ * The appropriate devnum needs to be created. The ranges must
+ * not overlap.
+ */
+ rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
+ GENWQE_MAX_MINOR, GENWQE_DEVNAME);
+ if (rc < 0) {
+ dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
+ goto err_dev;
+ }
+
+ cdev_init(&cd->cdev_genwqe, &genwqe_fops);
+ cd->cdev_genwqe.owner = THIS_MODULE;
+
+ rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
+ if (rc < 0) {
+ dev_err(&pci_dev->dev, "err: cdev_add failed\n");
+ goto err_add;
+ }
+
+ /*
+ * Finally the device in /dev/... must be created. The rule is
+ * to use card%d_clientname for each created device.
+ */
+ cd->dev = device_create_with_groups(cd->class_genwqe,
+ &cd->pci_dev->dev,
+ cd->devnum_genwqe, cd,
+ genwqe_attribute_groups,
+ GENWQE_DEVNAME "%u_card",
+ cd->card_idx);
+ if (IS_ERR(cd->dev)) {
+ rc = PTR_ERR(cd->dev);
+ goto err_cdev;
+ }
+
+ rc = genwqe_init_debugfs(cd);
+ if (rc != 0)
+ goto err_debugfs;
+
+ return 0;
+
+ err_debugfs:
+ device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+ err_cdev:
+ cdev_del(&cd->cdev_genwqe);
+ err_add:
+ unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+ err_dev:
+ cd->dev = NULL;
+ return rc;
+}
+
+static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
+{
+ int rc;
+ unsigned int i;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (!genwqe_open_files(cd))
+ return 0;
+
+ dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
+
+ rc = genwqe_kill_fasync(cd, SIGIO);
+ if (rc > 0) {
+ /* give kill_timeout seconds to close file descriptors ... */
+ for (i = 0; (i < genwqe_kill_timeout) &&
+ genwqe_open_files(cd); i++) {
+ dev_info(&pci_dev->dev, " %d sec ...", i);
+
+ cond_resched();
+ msleep(1000);
+ }
+
+ /* if no open files we can safely continue, else ... */
+ if (!genwqe_open_files(cd))
+ return 0;
+
+ dev_warn(&pci_dev->dev,
+ "[%s] send SIGKILL and wait ...\n", __func__);
+
+ rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */
+ if (rc) {
+ /* Give kill_timout more seconds to end processes */
+ for (i = 0; (i < genwqe_kill_timeout) &&
+ genwqe_open_files(cd); i++) {
+ dev_warn(&pci_dev->dev, " %d sec ...", i);
+
+ cond_resched();
+ msleep(1000);
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * genwqe_device_remove() - Remove genwqe's char device
+ *
+ * This function must be called after the client devices are removed
+ * because it will free the major/minor number range for the genwqe
+ * drivers.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_device_remove(struct genwqe_dev *cd)
+{
+ int rc;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (!genwqe_device_initialized(cd))
+ return 1;
+
+ genwqe_inform_and_stop_processes(cd);
+
+ /*
+ * We currently do wait until all filedescriptors are
+ * closed. This leads to a problem when we abort the
+ * application which will decrease this reference from
+ * 1/unused to 0/illegal and not from 2/used 1/empty.
+ */
+ rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount);
+ if (rc != 1) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
+ panic("Fatal err: cannot free resources with pending references!");
+ }
+
+ genqwe_exit_debugfs(cd);
+ device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+ cdev_del(&cd->cdev_genwqe);
+ unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+ cd->dev = NULL;
+
+ return 0;
+}
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
new file mode 100644
index 0000000..6ab31ef
--- /dev/null
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -0,0 +1,303 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Sysfs interfaces for the GenWQE card. There are attributes to query
+ * the version of the bitstream as well as some for the driver. For
+ * debugging, please also see the debugfs interfaces of this driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static const char * const genwqe_types[] = {
+ [GENWQE_TYPE_ALTERA_230] = "GenWQE4-230",
+ [GENWQE_TYPE_ALTERA_530] = "GenWQE4-530",
+ [GENWQE_TYPE_ALTERA_A4] = "GenWQE5-A4",
+ [GENWQE_TYPE_ALTERA_A7] = "GenWQE5-A7",
+};
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+ const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" };
+
+ return sprintf(buf, "%s\n", cs[cd->card_state]);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t appid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ char app_name[5];
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ genwqe_read_app_id(cd, app_name, sizeof(app_name));
+ return sprintf(buf, "%s\n", app_name);
+}
+static DEVICE_ATTR_RO(appid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u64 slu_id, app_id;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+ app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+ return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u8 card_type;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ card_type = genwqe_card_type(cd);
+ return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ?
+ "invalid" : genwqe_types[card_type]);
+}
+static DEVICE_ATTR_RO(type);
+
+static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u64 tempsens;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR);
+ return sprintf(buf, "%016llx\n", tempsens);
+}
+static DEVICE_ATTR_RO(tempsens);
+
+static ssize_t freerunning_timer_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u64 t;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER);
+ return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(freerunning_timer);
+
+static ssize_t queue_working_time_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u64 t;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME);
+ return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(queue_working_time);
+
+static ssize_t base_clock_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u64 base_clock;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ base_clock = genwqe_base_clock_frequency(cd);
+ return sprintf(buf, "%lld\n", base_clock);
+}
+static DEVICE_ATTR_RO(base_clock);
+
+/**
+ * curr_bitstream_show() - Show the current bitstream id
+ *
+ * There is a bug in some old versions of the CPLD which selects the
+ * bitstream, which causes the IO_SLU_BITSTREAM register to report
+ * unreliable data in very rare cases. This makes this sysfs
+ * unreliable up to the point were a new CPLD version is being used.
+ *
+ * Unfortunately there is no automatic way yet to query the CPLD
+ * version, such that you need to manually ensure via programming
+ * tools that you have a recent version of the CPLD software.
+ *
+ * The proposed circumvention is to use a special recovery bitstream
+ * on the backup partition (0) to identify problems while loading the
+ * image.
+ */
+static ssize_t curr_bitstream_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int curr_bitstream;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+ return sprintf(buf, "%d\n", curr_bitstream);
+}
+static DEVICE_ATTR_RO(curr_bitstream);
+
+/**
+ * next_bitstream_show() - Show the next activated bitstream
+ *
+ * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF.
+ */
+static ssize_t next_bitstream_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int next_bitstream;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ switch ((cd->softreset & 0xc) >> 2) {
+ case 0x2:
+ next_bitstream = 0;
+ break;
+ case 0x3:
+ next_bitstream = 1;
+ break;
+ default:
+ next_bitstream = -1;
+ break; /* error */
+ }
+ return sprintf(buf, "%d\n", next_bitstream);
+}
+
+static ssize_t next_bitstream_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int partition;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ if (kstrtoint(buf, 0, &partition) < 0)
+ return -EINVAL;
+
+ switch (partition) {
+ case 0x0:
+ cd->softreset = 0x78;
+ break;
+ case 0x1:
+ cd->softreset = 0x7c;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+ return count;
+}
+static DEVICE_ATTR_RW(next_bitstream);
+
+static ssize_t reload_bitstream_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int reload;
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+ if (kstrtoint(buf, 0, &reload) < 0)
+ return -EINVAL;
+
+ if (reload == 0x1) {
+ if (cd->card_state == GENWQE_CARD_UNUSED ||
+ cd->card_state == GENWQE_CARD_USED)
+ cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM;
+ else
+ return -EIO;
+ } else {
+ return -EINVAL;
+ }
+
+ return count;
+}
+static DEVICE_ATTR_WO(reload_bitstream);
+
+/*
+ * Create device_attribute structures / params: name, mode, show, store
+ * additional flag if valid in VF
+ */
+static struct attribute *genwqe_attributes[] = {
+ &dev_attr_tempsens.attr,
+ &dev_attr_next_bitstream.attr,
+ &dev_attr_curr_bitstream.attr,
+ &dev_attr_base_clock.attr,
+ &dev_attr_type.attr,
+ &dev_attr_version.attr,
+ &dev_attr_appid.attr,
+ &dev_attr_status.attr,
+ &dev_attr_freerunning_timer.attr,
+ &dev_attr_queue_working_time.attr,
+ &dev_attr_reload_bitstream.attr,
+ NULL,
+};
+
+static struct attribute *genwqe_normal_attributes[] = {
+ &dev_attr_type.attr,
+ &dev_attr_version.attr,
+ &dev_attr_appid.attr,
+ &dev_attr_status.attr,
+ &dev_attr_freerunning_timer.attr,
+ &dev_attr_queue_working_time.attr,
+ NULL,
+};
+
+/**
+ * genwqe_is_visible() - Determine if sysfs attribute should be visible or not
+ *
+ * VFs have restricted mmio capabilities, so not all sysfs entries
+ * are allowed in VFs.
+ */
+static umode_t genwqe_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ unsigned int j;
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct genwqe_dev *cd = dev_get_drvdata(dev);
+ umode_t mode = attr->mode;
+
+ if (genwqe_is_privileged(cd))
+ return mode;
+
+ for (j = 0; genwqe_normal_attributes[j] != NULL; j++)
+ if (genwqe_normal_attributes[j] == attr)
+ return mode;
+
+ return 0;
+}
+
+static struct attribute_group genwqe_attribute_group = {
+ .is_visible = genwqe_is_visible,
+ .attrs = genwqe_attributes,
+};
+
+const struct attribute_group *genwqe_attribute_groups[] = {
+ &genwqe_attribute_group,
+ NULL,
+};
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
new file mode 100644
index 0000000..5246605
--- /dev/null
+++ b/drivers/misc/genwqe/card_utils.c
@@ -0,0 +1,1060 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Miscelanous functionality used in the other GenWQE driver parts.
+ */
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/page-flags.h>
+#include <linux/scatterlist.h>
+#include <linux/hugetlb.h>
+#include <linux/iommu.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/**
+ * __genwqe_writeq() - Write 64-bit register
+ * @cd: genwqe device descriptor
+ * @byte_offs: byte offset within BAR
+ * @val: 64-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+ return -EIO;
+
+ if (cd->mmio == NULL)
+ return -EIO;
+
+ if (pci_channel_offline(pci_dev))
+ return -EIO;
+
+ __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
+ return 0;
+}
+
+/**
+ * __genwqe_readq() - Read 64-bit register
+ * @cd: genwqe device descriptor
+ * @byte_offs: offset within BAR
+ *
+ * Return: value from register
+ */
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
+{
+ if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+ return 0xffffffffffffffffull;
+
+ if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
+ (byte_offs == IO_SLC_CFGREG_GFIR))
+ return 0x000000000000ffffull;
+
+ if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
+ (byte_offs == IO_SLC_CFGREG_GFIR))
+ return 0x00000000ffff0000ull;
+
+ if (cd->mmio == NULL)
+ return 0xffffffffffffffffull;
+
+ return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
+}
+
+/**
+ * __genwqe_writel() - Write 32-bit register
+ * @cd: genwqe device descriptor
+ * @byte_offs: byte offset within BAR
+ * @val: 32-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+ return -EIO;
+
+ if (cd->mmio == NULL)
+ return -EIO;
+
+ if (pci_channel_offline(pci_dev))
+ return -EIO;
+
+ __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
+ return 0;
+}
+
+/**
+ * __genwqe_readl() - Read 32-bit register
+ * @cd: genwqe device descriptor
+ * @byte_offs: offset within BAR
+ *
+ * Return: Value from register
+ */
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
+{
+ if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+ return 0xffffffff;
+
+ if (cd->mmio == NULL)
+ return 0xffffffff;
+
+ return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
+}
+
+/**
+ * genwqe_read_app_id() - Extract app_id
+ *
+ * app_unitcfg need to be filled with valid data first
+ */
+int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
+{
+ int i, j;
+ u32 app_id = (u32)cd->app_unitcfg;
+
+ memset(app_name, 0, len);
+ for (i = 0, j = 0; j < min(len, 4); j++) {
+ char ch = (char)((app_id >> (24 - j*8)) & 0xff);
+
+ if (ch == ' ')
+ continue;
+ app_name[i++] = isprint(ch) ? ch : 'X';
+ }
+ return i;
+}
+
+/**
+ * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
+ *
+ * Existing kernel functions seem to use a different polynom,
+ * therefore we could not use them here.
+ *
+ * Genwqe's Polynomial = 0x20044009
+ */
+#define CRC32_POLYNOMIAL 0x20044009
+static u32 crc32_tab[256]; /* crc32 lookup table */
+
+void genwqe_init_crc32(void)
+{
+ int i, j;
+ u32 crc;
+
+ for (i = 0; i < 256; i++) {
+ crc = i << 24;
+ for (j = 0; j < 8; j++) {
+ if (crc & 0x80000000)
+ crc = (crc << 1) ^ CRC32_POLYNOMIAL;
+ else
+ crc = (crc << 1);
+ }
+ crc32_tab[i] = crc;
+ }
+}
+
+/**
+ * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
+ * @buff: pointer to data buffer
+ * @len: length of data for calculation
+ * @init: initial crc (0xffffffff at start)
+ *
+ * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
+
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
+ * result in a crc32 of 0xf33cb7d3.
+ *
+ * The existing kernel crc functions did not cover this polynom yet.
+ *
+ * Return: crc32 checksum.
+ */
+u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
+{
+ int i;
+ u32 crc;
+
+ crc = init;
+ while (len--) {
+ i = ((crc >> 24) ^ *buff++) & 0xFF;
+ crc = (crc << 8) ^ crc32_tab[i];
+ }
+ return crc;
+}
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+ dma_addr_t *dma_handle)
+{
+ if (get_order(size) > MAX_ORDER)
+ return NULL;
+
+ return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+ GFP_KERNEL);
+}
+
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ if (vaddr == NULL)
+ return;
+
+ dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
+}
+
+static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
+ int num_pages)
+{
+ int i;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
+ pci_unmap_page(pci_dev, dma_list[i],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ dma_list[i] = 0x0;
+ }
+}
+
+static int genwqe_map_pages(struct genwqe_dev *cd,
+ struct page **page_list, int num_pages,
+ dma_addr_t *dma_list)
+{
+ int i;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ /* establish DMA mapping for requested pages */
+ for (i = 0; i < num_pages; i++) {
+ dma_addr_t daddr;
+
+ dma_list[i] = 0x0;
+ daddr = pci_map_page(pci_dev, page_list[i],
+ 0, /* map_offs */
+ PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */
+
+ if (pci_dma_mapping_error(pci_dev, daddr)) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: no dma addr daddr=%016llx!\n",
+ __func__, (long long)daddr);
+ goto err;
+ }
+
+ dma_list[i] = daddr;
+ }
+ return 0;
+
+ err:
+ genwqe_unmap_pages(cd, dma_list, num_pages);
+ return -EIO;
+}
+
+static int genwqe_sgl_size(int num_pages)
+{
+ int len, num_tlb = num_pages / 7;
+
+ len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
+ return roundup(len, PAGE_SIZE);
+}
+
+/**
+ * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
+ *
+ * Allocates memory for sgl and overlapping pages. Pages which might
+ * overlap other user-space memory blocks are being cached for DMAs,
+ * such that we do not run into syncronization issues. Data is copied
+ * from user-space into the cached pages.
+ */
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+ void __user *user_addr, size_t user_size)
+{
+ int rc;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
+ sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
+ sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
+ sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
+
+ dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
+ __func__, user_addr, user_size, sgl->nr_pages,
+ sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
+
+ sgl->user_addr = user_addr;
+ sgl->user_size = user_size;
+ sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
+
+ if (get_order(sgl->sgl_size) > MAX_ORDER) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: too much memory requested!\n", __func__);
+ return -ENOMEM;
+ }
+
+ sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
+ &sgl->sgl_dma_addr);
+ if (sgl->sgl == NULL) {
+ dev_err(&pci_dev->dev,
+ "[%s] err: no memory available!\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* Only use buffering on incomplete pages */
+ if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
+ sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+ &sgl->fpage_dma_addr);
+ if (sgl->fpage == NULL)
+ goto err_out;
+
+ /* Sync with user memory */
+ if (copy_from_user(sgl->fpage + sgl->fpage_offs,
+ user_addr, sgl->fpage_size)) {
+ rc = -EFAULT;
+ goto err_out;
+ }
+ }
+ if (sgl->lpage_size != 0) {
+ sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+ &sgl->lpage_dma_addr);
+ if (sgl->lpage == NULL)
+ goto err_out1;
+
+ /* Sync with user memory */
+ if (copy_from_user(sgl->lpage, user_addr + user_size -
+ sgl->lpage_size, sgl->lpage_size)) {
+ rc = -EFAULT;
+ goto err_out2;
+ }
+ }
+ return 0;
+
+ err_out2:
+ __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+ sgl->lpage_dma_addr);
+ sgl->lpage = NULL;
+ sgl->lpage_dma_addr = 0;
+ err_out1:
+ __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+ sgl->fpage_dma_addr);
+ sgl->fpage = NULL;
+ sgl->fpage_dma_addr = 0;
+ err_out:
+ __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+ sgl->sgl_dma_addr);
+ sgl->sgl = NULL;
+ sgl->sgl_dma_addr = 0;
+ sgl->sgl_size = 0;
+ return -ENOMEM;
+}
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+ dma_addr_t *dma_list)
+{
+ int i = 0, j = 0, p;
+ unsigned long dma_offs, map_offs;
+ dma_addr_t prev_daddr = 0;
+ struct sg_entry *s, *last_s = NULL;
+ size_t size = sgl->user_size;
+
+ dma_offs = 128; /* next block if needed/dma_offset */
+ map_offs = sgl->fpage_offs; /* offset in first page */
+
+ s = &sgl->sgl[0]; /* first set of 8 entries */
+ p = 0; /* page */
+ while (p < sgl->nr_pages) {
+ dma_addr_t daddr;
+ unsigned int size_to_map;
+
+ /* always write the chaining entry, cleanup is done later */
+ j = 0;
+ s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
+ s[j].len = cpu_to_be32(128);
+ s[j].flags = cpu_to_be32(SG_CHAINED);
+ j++;
+
+ while (j < 8) {
+ /* DMA mapping for requested page, offs, size */
+ size_to_map = min(size, PAGE_SIZE - map_offs);
+
+ if ((p == 0) && (sgl->fpage != NULL)) {
+ daddr = sgl->fpage_dma_addr + map_offs;
+
+ } else if ((p == sgl->nr_pages - 1) &&
+ (sgl->lpage != NULL)) {
+ daddr = sgl->lpage_dma_addr;
+ } else {
+ daddr = dma_list[p] + map_offs;
+ }
+
+ size -= size_to_map;
+ map_offs = 0;
+
+ if (prev_daddr == daddr) {
+ u32 prev_len = be32_to_cpu(last_s->len);
+
+ /* pr_info("daddr combining: "
+ "%016llx/%08x -> %016llx\n",
+ prev_daddr, prev_len, daddr); */
+
+ last_s->len = cpu_to_be32(prev_len +
+ size_to_map);
+
+ p++; /* process next page */
+ if (p == sgl->nr_pages)
+ goto fixup; /* nothing to do */
+
+ prev_daddr = daddr + size_to_map;
+ continue;
+ }
+
+ /* start new entry */
+ s[j].target_addr = cpu_to_be64(daddr);
+ s[j].len = cpu_to_be32(size_to_map);
+ s[j].flags = cpu_to_be32(SG_DATA);
+ prev_daddr = daddr + size_to_map;
+ last_s = &s[j];
+ j++;
+
+ p++; /* process next page */
+ if (p == sgl->nr_pages)
+ goto fixup; /* nothing to do */
+ }
+ dma_offs += 128;
+ s += 8; /* continue 8 elements further */
+ }
+ fixup:
+ if (j == 1) { /* combining happend on last entry! */
+ s -= 8; /* full shift needed on previous sgl block */
+ j = 7; /* shift all elements */
+ }
+
+ for (i = 0; i < j; i++) /* move elements 1 up */
+ s[i] = s[i + 1];
+
+ s[i].target_addr = cpu_to_be64(0);
+ s[i].len = cpu_to_be32(0);
+ s[i].flags = cpu_to_be32(SG_END_LIST);
+ return 0;
+}
+
+/**
+ * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
+ *
+ * After the DMA transfer has been completed we free the memory for
+ * the sgl and the cached pages. Data is being transfered from cached
+ * pages into user-space buffers.
+ */
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
+{
+ int rc = 0;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (sgl->fpage) {
+ if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs,
+ sgl->fpage_size)) {
+ dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n",
+ __func__);
+ rc = -EFAULT;
+ }
+ __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+ sgl->fpage_dma_addr);
+ sgl->fpage = NULL;
+ sgl->fpage_dma_addr = 0;
+ }
+ if (sgl->lpage) {
+ if (copy_to_user(sgl->user_addr + sgl->user_size -
+ sgl->lpage_size, sgl->lpage,
+ sgl->lpage_size)) {
+ dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n",
+ __func__);
+ rc = -EFAULT;
+ }
+ __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+ sgl->lpage_dma_addr);
+ sgl->lpage = NULL;
+ sgl->lpage_dma_addr = 0;
+ }
+ __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+ sgl->sgl_dma_addr);
+
+ sgl->sgl = NULL;
+ sgl->sgl_dma_addr = 0x0;
+ sgl->sgl_size = 0;
+ return rc;
+}
+
+/**
+ * free_user_pages() - Give pinned pages back
+ *
+ * Documentation of get_user_pages is in mm/memory.c:
+ *
+ * If the page is written to, set_page_dirty (or set_page_dirty_lock,
+ * as appropriate) must be called after the page is finished with, and
+ * before put_page is called.
+ *
+ * FIXME Could be of use to others and might belong in the generic
+ * code, if others agree. E.g.
+ * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c
+ * ceph_put_page_vector in net/ceph/pagevec.c
+ * maybe more?
+ */
+static int free_user_pages(struct page **page_list, unsigned int nr_pages,
+ int dirty)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (page_list[i] != NULL) {
+ if (dirty)
+ set_page_dirty_lock(page_list[i]);
+ put_page(page_list[i]);
+ }
+ }
+ return 0;
+}
+
+/**
+ * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
+ * @cd: pointer to genwqe device
+ * @m: mapping params
+ * @uaddr: user virtual address
+ * @size: size of memory to be mapped
+ *
+ * We need to think about how we could speed this up. Of course it is
+ * not a good idea to do this over and over again, like we are
+ * currently doing it. Nevertheless, I am curious where on the path
+ * the performance is spend. Most probably within the memory
+ * allocation functions, but maybe also in the DMA mapping code.
+ *
+ * Restrictions: The maximum size of the possible mapping currently depends
+ * on the amount of memory we can get using kzalloc() for the
+ * page_list and pci_alloc_consistent for the sg_list.
+ * The sg_list is currently itself not scattered, which could
+ * be fixed with some effort. The page_list must be split into
+ * PAGE_SIZE chunks too. All that will make the complicated
+ * code more complicated.
+ *
+ * Return: 0 if success
+ */
+int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
+ unsigned long size, struct ddcb_requ *req)
+{
+ int rc = -EINVAL;
+ unsigned long data, offs;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if ((uaddr == NULL) || (size == 0)) {
+ m->size = 0; /* mark unused and not added */
+ return -EINVAL;
+ }
+ m->u_vaddr = uaddr;
+ m->size = size;
+
+ /* determine space needed for page_list. */
+ data = (unsigned long)uaddr;
+ offs = offset_in_page(data);
+ m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
+
+ m->page_list = kcalloc(m->nr_pages,
+ sizeof(struct page *) + sizeof(dma_addr_t),
+ GFP_KERNEL);
+ if (!m->page_list) {
+ dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
+ m->nr_pages = 0;
+ m->u_vaddr = NULL;
+ m->size = 0; /* mark unused and not added */
+ return -ENOMEM;
+ }
+ m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
+
+ /* pin user pages in memory */
+ rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
+ m->nr_pages,
+ 1, /* write by caller */
+ m->page_list); /* ptrs to pages */
+ if (rc < 0)
+ goto fail_get_user_pages;
+
+ /* assumption: get_user_pages can be killed by signals. */
+ if (rc < m->nr_pages) {
+ free_user_pages(m->page_list, rc, 0);
+ rc = -EFAULT;
+ goto fail_get_user_pages;
+ }
+
+ rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
+ if (rc != 0)
+ goto fail_free_user_pages;
+
+ return 0;
+
+ fail_free_user_pages:
+ free_user_pages(m->page_list, m->nr_pages, 0);
+
+ fail_get_user_pages:
+ kfree(m->page_list);
+ m->page_list = NULL;
+ m->dma_list = NULL;
+ m->nr_pages = 0;
+ m->u_vaddr = NULL;
+ m->size = 0; /* mark unused and not added */
+ return rc;
+}
+
+/**
+ * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
+ * memory
+ * @cd: pointer to genwqe device
+ * @m: mapping params
+ */
+int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
+ struct ddcb_requ *req)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (!dma_mapping_used(m)) {
+ dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
+ __func__, m);
+ return -EINVAL;
+ }
+
+ if (m->dma_list)
+ genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
+
+ if (m->page_list) {
+ free_user_pages(m->page_list, m->nr_pages, 1);
+
+ kfree(m->page_list);
+ m->page_list = NULL;
+ m->dma_list = NULL;
+ m->nr_pages = 0;
+ }
+
+ m->u_vaddr = NULL;
+ m->size = 0; /* mark as unused and not added */
+ return 0;
+}
+
+/**
+ * genwqe_card_type() - Get chip type SLU Configuration Register
+ * @cd: pointer to the genwqe device descriptor
+ * Return: 0: Altera Stratix-IV 230
+ * 1: Altera Stratix-IV 530
+ * 2: Altera Stratix-V A4
+ * 3: Altera Stratix-V A7
+ */
+u8 genwqe_card_type(struct genwqe_dev *cd)
+{
+ u64 card_type = cd->slu_unitcfg;
+
+ return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
+}
+
+/**
+ * genwqe_card_reset() - Reset the card
+ * @cd: pointer to the genwqe device descriptor
+ */
+int genwqe_card_reset(struct genwqe_dev *cd)
+{
+ u64 softrst;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (!genwqe_is_privileged(cd))
+ return -ENODEV;
+
+ /* new SL */
+ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
+ msleep(1000);
+ __genwqe_readq(cd, IO_HSU_FIR_CLR);
+ __genwqe_readq(cd, IO_APP_FIR_CLR);
+ __genwqe_readq(cd, IO_SLU_FIR_CLR);
+
+ /*
+ * Read-modify-write to preserve the stealth bits
+ *
+ * For SL >= 039, Stealth WE bit allows removing
+ * the read-modify-wrote.
+ * r-m-w may require a mask 0x3C to avoid hitting hard
+ * reset again for error reset (should be 0, chicken).
+ */
+ softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
+ __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
+
+ /* give ERRORRESET some time to finish */
+ msleep(50);
+
+ if (genwqe_need_err_masking(cd)) {
+ dev_info(&pci_dev->dev,
+ "[%s] masking errors for old bitstreams\n", __func__);
+ __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+ }
+ return 0;
+}
+
+int genwqe_read_softreset(struct genwqe_dev *cd)
+{
+ u64 bitstream;
+
+ if (!genwqe_is_privileged(cd))
+ return -ENODEV;
+
+ bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+ cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
+ return 0;
+}
+
+/**
+ * genwqe_set_interrupt_capability() - Configure MSI capability structure
+ * @cd: pointer to the device
+ * Return: 0 if no error
+ */
+int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
+{
+ int rc;
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ rc = pci_enable_msi_range(pci_dev, 1, count);
+ if (rc < 0)
+ return rc;
+
+ cd->flags |= GENWQE_FLAG_MSI_ENABLED;
+ return 0;
+}
+
+/**
+ * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
+ * @cd: pointer to the device
+ */
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
+{
+ struct pci_dev *pci_dev = cd->pci_dev;
+
+ if (cd->flags & GENWQE_FLAG_MSI_ENABLED) {
+ pci_disable_msi(pci_dev);
+ cd->flags &= ~GENWQE_FLAG_MSI_ENABLED;
+ }
+}
+
+/**
+ * set_reg_idx() - Fill array with data. Ignore illegal offsets.
+ * @cd: card device
+ * @r: debug register array
+ * @i: index to desired entry
+ * @m: maximum possible entries
+ * @addr: addr which is read
+ * @index: index in debug array
+ * @val: read value
+ */
+static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
+ unsigned int *i, unsigned int m, u32 addr, u32 idx,
+ u64 val)
+{
+ if (WARN_ON_ONCE(*i >= m))
+ return -EFAULT;
+
+ r[*i].addr = addr;
+ r[*i].idx = idx;
+ r[*i].val = val;
+ ++*i;
+ return 0;
+}
+
+static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
+ unsigned int *i, unsigned int m, u32 addr, u64 val)
+{
+ return set_reg_idx(cd, r, i, m, addr, 0, val);
+}
+
+int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+ unsigned int max_regs, int all)
+{
+ unsigned int i, j, idx = 0;
+ u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
+ u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
+
+ /* Global FIR */
+ gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+ set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
+
+ /* UnitCfg for SLU */
+ sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
+ set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
+
+ /* UnitCfg for APP */
+ appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
+ set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
+
+ /* Check all chip Units */
+ for (i = 0; i < GENWQE_MAX_UNITS; i++) {
+
+ /* Unit FIR */
+ ufir_addr = (i << 24) | 0x008;
+ ufir = __genwqe_readq(cd, ufir_addr);
+ set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
+
+ /* Unit FEC */
+ ufec_addr = (i << 24) | 0x018;
+ ufec = __genwqe_readq(cd, ufec_addr);
+ set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
+
+ for (j = 0; j < 64; j++) {
+ /* wherever there is a primary 1, read the 2ndary */
+ if (!all && (!(ufir & (1ull << j))))
+ continue;
+
+ sfir_addr = (i << 24) | (0x100 + 8 * j);
+ sfir = __genwqe_readq(cd, sfir_addr);
+ set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
+
+ sfec_addr = (i << 24) | (0x300 + 8 * j);
+ sfec = __genwqe_readq(cd, sfec_addr);
+ set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
+ }
+ }
+
+ /* fill with invalid data until end */
+ for (i = idx; i < max_regs; i++) {
+ regs[i].addr = 0xffffffff;
+ regs[i].val = 0xffffffffffffffffull;
+ }
+ return idx;
+}
+
+/**
+ * genwqe_ffdc_buff_size() - Calculates the number of dump registers
+ */
+int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
+{
+ int entries = 0, ring, traps, traces, trace_entries;
+ u32 eevptr_addr, l_addr, d_len, d_type;
+ u64 eevptr, val, addr;
+
+ eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+ eevptr = __genwqe_readq(cd, eevptr_addr);
+
+ if ((eevptr != 0x0) && (eevptr != -1ull)) {
+ l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+
+ while (1) {
+ val = __genwqe_readq(cd, l_addr);
+
+ if ((val == 0x0) || (val == -1ull))
+ break;
+
+ /* 38:24 */
+ d_len = (val & 0x0000007fff000000ull) >> 24;
+
+ /* 39 */
+ d_type = (val & 0x0000008000000000ull) >> 36;
+
+ if (d_type) { /* repeat */
+ entries += d_len;
+ } else { /* size in bytes! */
+ entries += d_len >> 3;
+ }
+
+ l_addr += 8;
+ }
+ }
+
+ for (ring = 0; ring < 8; ring++) {
+ addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+ val = __genwqe_readq(cd, addr);
+
+ if ((val == 0x0ull) || (val == -1ull))
+ continue;
+
+ traps = (val >> 24) & 0xff;
+ traces = (val >> 16) & 0xff;
+ trace_entries = val & 0xffff;
+
+ entries += traps + (traces * trace_entries);
+ }
+ return entries;
+}
+
+/**
+ * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
+ */
+int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
+ struct genwqe_reg *regs, unsigned int max_regs)
+{
+ int i, traps, traces, trace, trace_entries, trace_entry, ring;
+ unsigned int idx = 0;
+ u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
+ u64 eevptr, e, val, addr;
+
+ eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+ eevptr = __genwqe_readq(cd, eevptr_addr);
+
+ if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
+ l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+ while (1) {
+ e = __genwqe_readq(cd, l_addr);
+ if ((e == 0x0) || (e == 0xffffffffffffffffull))
+ break;
+
+ d_addr = (e & 0x0000000000ffffffull); /* 23:0 */
+ d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
+ d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
+ d_addr |= GENWQE_UID_OFFS(uid);
+
+ if (d_type) {
+ for (i = 0; i < (int)d_len; i++) {
+ val = __genwqe_readq(cd, d_addr);
+ set_reg_idx(cd, regs, &idx, max_regs,
+ d_addr, i, val);
+ }
+ } else {
+ d_len >>= 3; /* Size in bytes! */
+ for (i = 0; i < (int)d_len; i++, d_addr += 8) {
+ val = __genwqe_readq(cd, d_addr);
+ set_reg_idx(cd, regs, &idx, max_regs,
+ d_addr, 0, val);
+ }
+ }
+ l_addr += 8;
+ }
+ }
+
+ /*
+ * To save time, there are only 6 traces poplulated on Uid=2,
+ * Ring=1. each with iters=512.
+ */
+ for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
+ 2...7 are ASI rings */
+ addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+ val = __genwqe_readq(cd, addr);
+
+ if ((val == 0x0ull) || (val == -1ull))
+ continue;
+
+ traps = (val >> 24) & 0xff; /* Number of Traps */
+ traces = (val >> 16) & 0xff; /* Number of Traces */
+ trace_entries = val & 0xffff; /* Entries per trace */
+
+ /* Note: This is a combined loop that dumps both the traps */
+ /* (for the trace == 0 case) as well as the traces 1 to */
+ /* 'traces'. */
+ for (trace = 0; trace <= traces; trace++) {
+ u32 diag_sel =
+ GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
+
+ addr = (GENWQE_UID_OFFS(uid) |
+ IO_EXTENDED_DIAG_SELECTOR);
+ __genwqe_writeq(cd, addr, diag_sel);
+
+ for (trace_entry = 0;
+ trace_entry < (trace ? trace_entries : traps);
+ trace_entry++) {
+ addr = (GENWQE_UID_OFFS(uid) |
+ IO_EXTENDED_DIAG_READ_MBX);
+ val = __genwqe_readq(cd, addr);
+ set_reg_idx(cd, regs, &idx, max_regs, addr,
+ (diag_sel<<16) | trace_entry, val);
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * genwqe_write_vreg() - Write register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
+{
+ __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+ __genwqe_writeq(cd, reg, val);
+ return 0;
+}
+
+/**
+ * genwqe_read_vreg() - Read register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
+{
+ __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+ return __genwqe_readq(cd, reg);
+}
+
+/**
+ * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ *
+ * Return: Card clock in MHz
+ */
+int genwqe_base_clock_frequency(struct genwqe_dev *cd)
+{
+ u16 speed; /* MHz MHz MHz MHz */
+ static const int speed_grade[] = { 250, 200, 166, 175 };
+
+ speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+ if (speed >= ARRAY_SIZE(speed_grade))
+ return 0; /* illegal value */
+
+ return speed_grade[speed];
+}
+
+/**
+ * genwqe_stop_traps() - Stop traps
+ *
+ * Before reading out the analysis data, we need to stop the traps.
+ */
+void genwqe_stop_traps(struct genwqe_dev *cd)
+{
+ __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
+}
+
+/**
+ * genwqe_start_traps() - Start traps
+ *
+ * After having read the data, we can/must enable the traps again.
+ */
+void genwqe_start_traps(struct genwqe_dev *cd)
+{
+ __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
+
+ if (genwqe_need_err_masking(cd))
+ __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+}
diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h
new file mode 100644
index 0000000..1535535
--- /dev/null
+++ b/drivers/misc/genwqe/genwqe_driver.h
@@ -0,0 +1,77 @@
+#ifndef __GENWQE_DRIVER_H__
+#define __GENWQE_DRIVER_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#include <asm/byteorder.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#define DRV_VERSION "2.0.25"
+
+/*
+ * Static minor number assignement, until we decide/implement
+ * something dynamic.
+ */
+#define GENWQE_MAX_MINOR 128 /* up to 128 possible genwqe devices */
+
+/**
+ * genwqe_requ_alloc() - Allocate a new DDCB execution request
+ *
+ * This data structure contains the user visiable fields of the DDCB
+ * to be executed.
+ *
+ * Return: ptr to genwqe_ddcb_cmd data structure
+ */
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void);
+
+/**
+ * ddcb_requ_free() - Free DDCB execution request.
+ * @req: ptr to genwqe_ddcb_cmd data structure.
+ */
+void ddcb_requ_free(struct genwqe_ddcb_cmd *req);
+
+u32 genwqe_crc32(u8 *buff, size_t len, u32 init);
+
+static inline void genwqe_hexdump(struct pci_dev *pci_dev,
+ const void *buff, unsigned int size)
+{
+ char prefix[32];
+
+ scnprintf(prefix, sizeof(prefix), "%s %s: ",
+ GENWQE_DEVNAME, pci_name(pci_dev));
+
+ print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff,
+ size, true);
+}
+
+#endif /* __GENWQE_DRIVER_H__ */
diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
new file mode 100644
index 0000000..90520d7
--- /dev/null
+++ b/drivers/misc/hmc6352.c
@@ -0,0 +1,155 @@
+/*
+ * hmc6352.c - Honeywell Compass Driver
+ *
+ * Copyright (C) 2009 Intel Corp
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+
+static DEFINE_MUTEX(compass_mutex);
+
+static int compass_command(struct i2c_client *c, u8 cmd)
+{
+ int ret = i2c_master_send(c, &cmd, 1);
+ if (ret < 0)
+ dev_warn(&c->dev, "command '%c' failed.\n", cmd);
+ return ret;
+}
+
+static int compass_store(struct device *dev, const char *buf, size_t count,
+ const char *map)
+{
+ struct i2c_client *c = to_i2c_client(dev);
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret)
+ return ret;
+ if (val >= strlen(map))
+ return -EINVAL;
+ mutex_lock(&compass_mutex);
+ ret = compass_command(c, map[val]);
+ mutex_unlock(&compass_mutex);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+
+static ssize_t compass_calibration_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ return compass_store(dev, buf, count, "EC");
+}
+
+static ssize_t compass_power_mode_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ return compass_store(dev, buf, count, "SW");
+}
+
+static ssize_t compass_heading_data_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned char i2c_data[2];
+ int ret;
+
+ mutex_lock(&compass_mutex);
+ ret = compass_command(client, 'A');
+ if (ret != 1) {
+ mutex_unlock(&compass_mutex);
+ return ret;
+ }
+ msleep(10); /* sending 'A' cmd we need to wait for 7-10 millisecs */
+ ret = i2c_master_recv(client, i2c_data, 2);
+ mutex_unlock(&compass_mutex);
+ if (ret < 0) {
+ dev_warn(dev, "i2c read data cmd failed\n");
+ return ret;
+ }
+ ret = (i2c_data[0] << 8) | i2c_data[1];
+ return sprintf(buf, "%d.%d\n", ret/10, ret%10);
+}
+
+
+static DEVICE_ATTR(heading0_input, S_IRUGO, compass_heading_data_show, NULL);
+static DEVICE_ATTR(calibration, S_IWUSR, NULL, compass_calibration_store);
+static DEVICE_ATTR(power_state, S_IWUSR, NULL, compass_power_mode_store);
+
+static struct attribute *mid_att_compass[] = {
+ &dev_attr_heading0_input.attr,
+ &dev_attr_calibration.attr,
+ &dev_attr_power_state.attr,
+ NULL
+};
+
+static const struct attribute_group m_compass_gr = {
+ .name = "hmc6352",
+ .attrs = mid_att_compass
+};
+
+static int hmc6352_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int res;
+
+ res = sysfs_create_group(&client->dev.kobj, &m_compass_gr);
+ if (res) {
+ dev_err(&client->dev, "device_create_file failed\n");
+ return res;
+ }
+ dev_info(&client->dev, "%s HMC6352 compass chip found\n",
+ client->name);
+ return 0;
+}
+
+static int hmc6352_remove(struct i2c_client *client)
+{
+ sysfs_remove_group(&client->dev.kobj, &m_compass_gr);
+ return 0;
+}
+
+static struct i2c_device_id hmc6352_id[] = {
+ { "hmc6352", 0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, hmc6352_id);
+
+static struct i2c_driver hmc6352_driver = {
+ .driver = {
+ .name = "hmc6352",
+ },
+ .probe = hmc6352_probe,
+ .remove = hmc6352_remove,
+ .id_table = hmc6352_id,
+};
+
+module_i2c_driver(hmc6352_driver);
+
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com");
+MODULE_DESCRIPTION("hmc6352 Compass Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
new file mode 100644
index 0000000..d6a901c
--- /dev/null
+++ b/drivers/misc/hpilo.c
@@ -0,0 +1,912 @@
+/*
+ * Driver for the HP iLO management processor.
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ * David Altobelli <david.altobelli@hpe.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include "hpilo.h"
+
+static struct class *ilo_class;
+static unsigned int ilo_major;
+static unsigned int max_ccb = 16;
+static char ilo_hwdev[MAX_ILO_DEV];
+
+static inline int get_entry_id(int entry)
+{
+ return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR;
+}
+
+static inline int get_entry_len(int entry)
+{
+ return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3;
+}
+
+static inline int mk_entry(int id, int len)
+{
+ int qlen = len & 7 ? (len >> 3) + 1 : len >> 3;
+ return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS;
+}
+
+static inline int desc_mem_sz(int nr_entry)
+{
+ return nr_entry << L2_QENTRY_SZ;
+}
+
+/*
+ * FIFO queues, shared with hardware.
+ *
+ * If a queue has empty slots, an entry is added to the queue tail,
+ * and that entry is marked as occupied.
+ * Entries can be dequeued from the head of the list, when the device
+ * has marked the entry as consumed.
+ *
+ * Returns true on successful queue/dequeue, false on failure.
+ */
+static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry)
+{
+ struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&hw->fifo_lock, flags);
+ if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask]
+ & ENTRY_MASK_O)) {
+ fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |=
+ (entry & ENTRY_MASK_NOSTATE) | fifo_q->merge;
+ fifo_q->tail += 1;
+ ret = 1;
+ }
+ spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+ return ret;
+}
+
+static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry)
+{
+ struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+ unsigned long flags;
+ int ret = 0;
+ u64 c;
+
+ spin_lock_irqsave(&hw->fifo_lock, flags);
+ c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+ if (c & ENTRY_MASK_C) {
+ if (entry)
+ *entry = c & ENTRY_MASK_NOSTATE;
+
+ fifo_q->fifobar[fifo_q->head & fifo_q->imask] =
+ (c | ENTRY_MASK) + 1;
+ fifo_q->head += 1;
+ ret = 1;
+ }
+ spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+ return ret;
+}
+
+static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar)
+{
+ struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+ unsigned long flags;
+ int ret = 0;
+ u64 c;
+
+ spin_lock_irqsave(&hw->fifo_lock, flags);
+ c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+ if (c & ENTRY_MASK_C)
+ ret = 1;
+ spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+ return ret;
+}
+
+static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb,
+ int dir, int id, int len)
+{
+ char *fifobar;
+ int entry;
+
+ if (dir == SENDQ)
+ fifobar = ccb->ccb_u1.send_fifobar;
+ else
+ fifobar = ccb->ccb_u3.recv_fifobar;
+
+ entry = mk_entry(id, len);
+ return fifo_enqueue(hw, fifobar, entry);
+}
+
+static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb,
+ int dir, int *id, int *len, void **pkt)
+{
+ char *fifobar, *desc;
+ int entry = 0, pkt_id = 0;
+ int ret;
+
+ if (dir == SENDQ) {
+ fifobar = ccb->ccb_u1.send_fifobar;
+ desc = ccb->ccb_u2.send_desc;
+ } else {
+ fifobar = ccb->ccb_u3.recv_fifobar;
+ desc = ccb->ccb_u4.recv_desc;
+ }
+
+ ret = fifo_dequeue(hw, fifobar, &entry);
+ if (ret) {
+ pkt_id = get_entry_id(entry);
+ if (id)
+ *id = pkt_id;
+ if (len)
+ *len = get_entry_len(entry);
+ if (pkt)
+ *pkt = (void *)(desc + desc_mem_sz(pkt_id));
+ }
+
+ return ret;
+}
+
+static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb)
+{
+ char *fifobar = ccb->ccb_u3.recv_fifobar;
+
+ return fifo_check_recv(hw, fifobar);
+}
+
+static inline void doorbell_set(struct ccb *ccb)
+{
+ iowrite8(1, ccb->ccb_u5.db_base);
+}
+
+static inline void doorbell_clr(struct ccb *ccb)
+{
+ iowrite8(2, ccb->ccb_u5.db_base);
+}
+
+static inline int ctrl_set(int l2sz, int idxmask, int desclim)
+{
+ int active = 0, go = 1;
+ return l2sz << CTRL_BITPOS_L2SZ |
+ idxmask << CTRL_BITPOS_FIFOINDEXMASK |
+ desclim << CTRL_BITPOS_DESCLIMIT |
+ active << CTRL_BITPOS_A |
+ go << CTRL_BITPOS_G;
+}
+
+static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz)
+{
+ /* for simplicity, use the same parameters for send and recv ctrls */
+ ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+ ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+}
+
+static inline int fifo_sz(int nr_entry)
+{
+ /* size of a fifo is determined by the number of entries it contains */
+ return (nr_entry * sizeof(u64)) + FIFOHANDLESIZE;
+}
+
+static void fifo_setup(void *base_addr, int nr_entry)
+{
+ struct fifo *fifo_q = base_addr;
+ int i;
+
+ /* set up an empty fifo */
+ fifo_q->head = 0;
+ fifo_q->tail = 0;
+ fifo_q->reset = 0;
+ fifo_q->nrents = nr_entry;
+ fifo_q->imask = nr_entry - 1;
+ fifo_q->merge = ENTRY_MASK_O;
+
+ for (i = 0; i < nr_entry; i++)
+ fifo_q->fifobar[i] = 0;
+}
+
+static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data)
+{
+ struct ccb *driver_ccb = &data->driver_ccb;
+ struct ccb __iomem *device_ccb = data->mapped_ccb;
+ int retries;
+
+ /* complicated dance to tell the hw we are stopping */
+ doorbell_clr(driver_ccb);
+ iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G),
+ &device_ccb->send_ctrl);
+ iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G),
+ &device_ccb->recv_ctrl);
+
+ /* give iLO some time to process stop request */
+ for (retries = MAX_WAIT; retries > 0; retries--) {
+ doorbell_set(driver_ccb);
+ udelay(WAIT_TIME);
+ if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A))
+ &&
+ !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A)))
+ break;
+ }
+ if (retries == 0)
+ dev_err(&pdev->dev, "Closing, but controller still active\n");
+
+ /* clear the hw ccb */
+ memset_io(device_ccb, 0, sizeof(struct ccb));
+
+ /* free resources used to back send/recv queues */
+ pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa);
+}
+
+static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+ char *dma_va;
+ dma_addr_t dma_pa;
+ struct ccb *driver_ccb, *ilo_ccb;
+
+ driver_ccb = &data->driver_ccb;
+ ilo_ccb = &data->ilo_ccb;
+
+ data->dma_size = 2 * fifo_sz(NR_QENTRY) +
+ 2 * desc_mem_sz(NR_QENTRY) +
+ ILO_START_ALIGN + ILO_CACHE_SZ;
+
+ data->dma_va = pci_alloc_consistent(hw->ilo_dev, data->dma_size,
+ &data->dma_pa);
+ if (!data->dma_va)
+ return -ENOMEM;
+
+ dma_va = (char *)data->dma_va;
+ dma_pa = data->dma_pa;
+
+ memset(dma_va, 0, data->dma_size);
+
+ dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN);
+ dma_pa = roundup(dma_pa, ILO_START_ALIGN);
+
+ /*
+ * Create two ccb's, one with virt addrs, one with phys addrs.
+ * Copy the phys addr ccb to device shared mem.
+ */
+ ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ);
+ ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ);
+
+ fifo_setup(dma_va, NR_QENTRY);
+ driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE;
+ ilo_ccb->ccb_u1.send_fifobar_pa = dma_pa + FIFOHANDLESIZE;
+ dma_va += fifo_sz(NR_QENTRY);
+ dma_pa += fifo_sz(NR_QENTRY);
+
+ dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ);
+ dma_pa = roundup(dma_pa, ILO_CACHE_SZ);
+
+ fifo_setup(dma_va, NR_QENTRY);
+ driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE;
+ ilo_ccb->ccb_u3.recv_fifobar_pa = dma_pa + FIFOHANDLESIZE;
+ dma_va += fifo_sz(NR_QENTRY);
+ dma_pa += fifo_sz(NR_QENTRY);
+
+ driver_ccb->ccb_u2.send_desc = dma_va;
+ ilo_ccb->ccb_u2.send_desc_pa = dma_pa;
+ dma_pa += desc_mem_sz(NR_QENTRY);
+ dma_va += desc_mem_sz(NR_QENTRY);
+
+ driver_ccb->ccb_u4.recv_desc = dma_va;
+ ilo_ccb->ccb_u4.recv_desc_pa = dma_pa;
+
+ driver_ccb->channel = slot;
+ ilo_ccb->channel = slot;
+
+ driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE);
+ ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */
+
+ return 0;
+}
+
+static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+ int pkt_id, pkt_sz;
+ struct ccb *driver_ccb = &data->driver_ccb;
+
+ /* copy the ccb with physical addrs to device memory */
+ data->mapped_ccb = (struct ccb __iomem *)
+ (hw->ram_vaddr + (slot * ILOHW_CCB_SZ));
+ memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb));
+
+ /* put packets on the send and receive queues */
+ pkt_sz = 0;
+ for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) {
+ ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz);
+ doorbell_set(driver_ccb);
+ }
+
+ pkt_sz = desc_mem_sz(1);
+ for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++)
+ ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz);
+
+ /* the ccb is ready to use */
+ doorbell_clr(driver_ccb);
+}
+
+static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data)
+{
+ int pkt_id, i;
+ struct ccb *driver_ccb = &data->driver_ccb;
+
+ /* make sure iLO is really handling requests */
+ for (i = MAX_WAIT; i > 0; i--) {
+ if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL))
+ break;
+ udelay(WAIT_TIME);
+ }
+
+ if (i == 0) {
+ dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n");
+ return -EBUSY;
+ }
+
+ ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0);
+ doorbell_set(driver_ccb);
+ return 0;
+}
+
+static inline int is_channel_reset(struct ccb *ccb)
+{
+ /* check for this particular channel needing a reset */
+ return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset;
+}
+
+static inline void set_channel_reset(struct ccb *ccb)
+{
+ /* set a flag indicating this channel needs a reset */
+ FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1;
+}
+
+static inline int get_device_outbound(struct ilo_hwinfo *hw)
+{
+ return ioread32(&hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline int is_db_reset(int db_out)
+{
+ return db_out & (1 << DB_RESET);
+}
+
+static inline int is_device_reset(struct ilo_hwinfo *hw)
+{
+ /* check for global reset condition */
+ return is_db_reset(get_device_outbound(hw));
+}
+
+static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr)
+{
+ iowrite32(clr, &hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline void clear_device(struct ilo_hwinfo *hw)
+{
+ /* clear the device (reset bits, pending channel entries) */
+ clear_pending_db(hw, -1);
+}
+
+static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw)
+{
+ iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw)
+{
+ iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1,
+ &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static void ilo_set_reset(struct ilo_hwinfo *hw)
+{
+ int slot;
+
+ /*
+ * Mapped memory is zeroed on ilo reset, so set a per ccb flag
+ * to indicate that this ccb needs to be closed and reopened.
+ */
+ for (slot = 0; slot < max_ccb; slot++) {
+ if (!hw->ccb_alloc[slot])
+ continue;
+ set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb);
+ }
+}
+
+static ssize_t ilo_read(struct file *fp, char __user *buf,
+ size_t len, loff_t *off)
+{
+ int err, found, cnt, pkt_id, pkt_len;
+ struct ccb_data *data = fp->private_data;
+ struct ccb *driver_ccb = &data->driver_ccb;
+ struct ilo_hwinfo *hw = data->ilo_hw;
+ void *pkt;
+
+ if (is_channel_reset(driver_ccb)) {
+ /*
+ * If the device has been reset, applications
+ * need to close and reopen all ccbs.
+ */
+ return -ENODEV;
+ }
+
+ /*
+ * This function is to be called when data is expected
+ * in the channel, and will return an error if no packet is found
+ * during the loop below. The sleep/retry logic is to allow
+ * applications to call read() immediately post write(),
+ * and give iLO some time to process the sent packet.
+ */
+ cnt = 20;
+ do {
+ /* look for a received packet */
+ found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id,
+ &pkt_len, &pkt);
+ if (found)
+ break;
+ cnt--;
+ msleep(100);
+ } while (!found && cnt);
+
+ if (!found)
+ return -EAGAIN;
+
+ /* only copy the length of the received packet */
+ if (pkt_len < len)
+ len = pkt_len;
+
+ err = copy_to_user(buf, pkt, len);
+
+ /* return the received packet to the queue */
+ ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1));
+
+ return err ? -EFAULT : len;
+}
+
+static ssize_t ilo_write(struct file *fp, const char __user *buf,
+ size_t len, loff_t *off)
+{
+ int err, pkt_id, pkt_len;
+ struct ccb_data *data = fp->private_data;
+ struct ccb *driver_ccb = &data->driver_ccb;
+ struct ilo_hwinfo *hw = data->ilo_hw;
+ void *pkt;
+
+ if (is_channel_reset(driver_ccb))
+ return -ENODEV;
+
+ /* get a packet to send the user command */
+ if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt))
+ return -EBUSY;
+
+ /* limit the length to the length of the packet */
+ if (pkt_len < len)
+ len = pkt_len;
+
+ /* on failure, set the len to 0 to return empty packet to the device */
+ err = copy_from_user(pkt, buf, len);
+ if (err)
+ len = 0;
+
+ /* send the packet */
+ ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len);
+ doorbell_set(driver_ccb);
+
+ return err ? -EFAULT : len;
+}
+
+static unsigned int ilo_poll(struct file *fp, poll_table *wait)
+{
+ struct ccb_data *data = fp->private_data;
+ struct ccb *driver_ccb = &data->driver_ccb;
+
+ poll_wait(fp, &data->ccb_waitq, wait);
+
+ if (is_channel_reset(driver_ccb))
+ return POLLERR;
+ else if (ilo_pkt_recv(data->ilo_hw, driver_ccb))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static int ilo_close(struct inode *ip, struct file *fp)
+{
+ int slot;
+ struct ccb_data *data;
+ struct ilo_hwinfo *hw;
+ unsigned long flags;
+
+ slot = iminor(ip) % max_ccb;
+ hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+ spin_lock(&hw->open_lock);
+
+ if (hw->ccb_alloc[slot]->ccb_cnt == 1) {
+
+ data = fp->private_data;
+
+ spin_lock_irqsave(&hw->alloc_lock, flags);
+ hw->ccb_alloc[slot] = NULL;
+ spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+ ilo_ccb_close(hw->ilo_dev, data);
+
+ kfree(data);
+ } else
+ hw->ccb_alloc[slot]->ccb_cnt--;
+
+ spin_unlock(&hw->open_lock);
+
+ return 0;
+}
+
+static int ilo_open(struct inode *ip, struct file *fp)
+{
+ int slot, error;
+ struct ccb_data *data;
+ struct ilo_hwinfo *hw;
+ unsigned long flags;
+
+ slot = iminor(ip) % max_ccb;
+ hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+ /* new ccb allocation */
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ spin_lock(&hw->open_lock);
+
+ /* each fd private_data holds sw/hw view of ccb */
+ if (hw->ccb_alloc[slot] == NULL) {
+ /* create a channel control block for this minor */
+ error = ilo_ccb_setup(hw, data, slot);
+ if (error) {
+ kfree(data);
+ goto out;
+ }
+
+ data->ccb_cnt = 1;
+ data->ccb_excl = fp->f_flags & O_EXCL;
+ data->ilo_hw = hw;
+ init_waitqueue_head(&data->ccb_waitq);
+
+ /* write the ccb to hw */
+ spin_lock_irqsave(&hw->alloc_lock, flags);
+ ilo_ccb_open(hw, data, slot);
+ hw->ccb_alloc[slot] = data;
+ spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+ /* make sure the channel is functional */
+ error = ilo_ccb_verify(hw, data);
+ if (error) {
+
+ spin_lock_irqsave(&hw->alloc_lock, flags);
+ hw->ccb_alloc[slot] = NULL;
+ spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+ ilo_ccb_close(hw->ilo_dev, data);
+
+ kfree(data);
+ goto out;
+ }
+
+ } else {
+ kfree(data);
+ if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) {
+ /*
+ * The channel exists, and either this open
+ * or a previous open of this channel wants
+ * exclusive access.
+ */
+ error = -EBUSY;
+ } else {
+ hw->ccb_alloc[slot]->ccb_cnt++;
+ error = 0;
+ }
+ }
+out:
+ spin_unlock(&hw->open_lock);
+
+ if (!error)
+ fp->private_data = hw->ccb_alloc[slot];
+
+ return error;
+}
+
+static const struct file_operations ilo_fops = {
+ .owner = THIS_MODULE,
+ .read = ilo_read,
+ .write = ilo_write,
+ .poll = ilo_poll,
+ .open = ilo_open,
+ .release = ilo_close,
+ .llseek = noop_llseek,
+};
+
+static irqreturn_t ilo_isr(int irq, void *data)
+{
+ struct ilo_hwinfo *hw = data;
+ int pending, i;
+
+ spin_lock(&hw->alloc_lock);
+
+ /* check for ccbs which have data */
+ pending = get_device_outbound(hw);
+ if (!pending) {
+ spin_unlock(&hw->alloc_lock);
+ return IRQ_NONE;
+ }
+
+ if (is_db_reset(pending)) {
+ /* wake up all ccbs if the device was reset */
+ pending = -1;
+ ilo_set_reset(hw);
+ }
+
+ for (i = 0; i < max_ccb; i++) {
+ if (!hw->ccb_alloc[i])
+ continue;
+ if (pending & (1 << i))
+ wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq);
+ }
+
+ /* clear the device of the channels that have been handled */
+ clear_pending_db(hw, pending);
+
+ spin_unlock(&hw->alloc_lock);
+
+ return IRQ_HANDLED;
+}
+
+static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+ pci_iounmap(pdev, hw->db_vaddr);
+ pci_iounmap(pdev, hw->ram_vaddr);
+ pci_iounmap(pdev, hw->mmio_vaddr);
+}
+
+static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+ int error = -ENOMEM;
+
+ /* map the memory mapped i/o registers */
+ hw->mmio_vaddr = pci_iomap(pdev, 1, 0);
+ if (hw->mmio_vaddr == NULL) {
+ dev_err(&pdev->dev, "Error mapping mmio\n");
+ goto out;
+ }
+
+ /* map the adapter shared memory region */
+ hw->ram_vaddr = pci_iomap(pdev, 2, max_ccb * ILOHW_CCB_SZ);
+ if (hw->ram_vaddr == NULL) {
+ dev_err(&pdev->dev, "Error mapping shared mem\n");
+ goto mmio_free;
+ }
+
+ /* map the doorbell aperture */
+ hw->db_vaddr = pci_iomap(pdev, 3, max_ccb * ONE_DB_SIZE);
+ if (hw->db_vaddr == NULL) {
+ dev_err(&pdev->dev, "Error mapping doorbell\n");
+ goto ram_free;
+ }
+
+ return 0;
+ram_free:
+ pci_iounmap(pdev, hw->ram_vaddr);
+mmio_free:
+ pci_iounmap(pdev, hw->mmio_vaddr);
+out:
+ return error;
+}
+
+static void ilo_remove(struct pci_dev *pdev)
+{
+ int i, minor;
+ struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev);
+
+ if (!ilo_hw)
+ return;
+
+ clear_device(ilo_hw);
+
+ minor = MINOR(ilo_hw->cdev.dev);
+ for (i = minor; i < minor + max_ccb; i++)
+ device_destroy(ilo_class, MKDEV(ilo_major, i));
+
+ cdev_del(&ilo_hw->cdev);
+ ilo_disable_interrupts(ilo_hw);
+ free_irq(pdev->irq, ilo_hw);
+ ilo_unmap_device(pdev, ilo_hw);
+ pci_release_regions(pdev);
+ /*
+ * pci_disable_device(pdev) used to be here. But this PCI device has
+ * two functions with interrupt lines connected to a single pin. The
+ * other one is a USB host controller. So when we disable the PIN here
+ * e.g. by rmmod hpilo, the controller stops working. It is because
+ * the interrupt link is disabled in ACPI since it is not refcounted
+ * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable.
+ */
+ kfree(ilo_hw);
+ ilo_hwdev[(minor / max_ccb)] = 0;
+}
+
+static int ilo_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int devnum, minor, start, error = 0;
+ struct ilo_hwinfo *ilo_hw;
+
+ /* Ignore subsystem_device = 0x1979 (set by BIOS) */
+ if (pdev->subsystem_device == 0x1979)
+ return 0;
+
+ if (max_ccb > MAX_CCB)
+ max_ccb = MAX_CCB;
+ else if (max_ccb < MIN_CCB)
+ max_ccb = MIN_CCB;
+
+ /* find a free range for device files */
+ for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) {
+ if (ilo_hwdev[devnum] == 0) {
+ ilo_hwdev[devnum] = 1;
+ break;
+ }
+ }
+
+ if (devnum == MAX_ILO_DEV) {
+ dev_err(&pdev->dev, "Error finding free device\n");
+ return -ENODEV;
+ }
+
+ /* track global allocations for this device */
+ error = -ENOMEM;
+ ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL);
+ if (!ilo_hw)
+ goto out;
+
+ ilo_hw->ilo_dev = pdev;
+ spin_lock_init(&ilo_hw->alloc_lock);
+ spin_lock_init(&ilo_hw->fifo_lock);
+ spin_lock_init(&ilo_hw->open_lock);
+
+ error = pci_enable_device(pdev);
+ if (error)
+ goto free;
+
+ pci_set_master(pdev);
+
+ error = pci_request_regions(pdev, ILO_NAME);
+ if (error)
+ goto disable;
+
+ error = ilo_map_device(pdev, ilo_hw);
+ if (error)
+ goto free_regions;
+
+ pci_set_drvdata(pdev, ilo_hw);
+ clear_device(ilo_hw);
+
+ error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw);
+ if (error)
+ goto unmap;
+
+ ilo_enable_interrupts(ilo_hw);
+
+ cdev_init(&ilo_hw->cdev, &ilo_fops);
+ ilo_hw->cdev.owner = THIS_MODULE;
+ start = devnum * max_ccb;
+ error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), max_ccb);
+ if (error) {
+ dev_err(&pdev->dev, "Could not add cdev\n");
+ goto remove_isr;
+ }
+
+ for (minor = 0 ; minor < max_ccb; minor++) {
+ struct device *dev;
+ dev = device_create(ilo_class, &pdev->dev,
+ MKDEV(ilo_major, minor), NULL,
+ "hpilo!d%dccb%d", devnum, minor);
+ if (IS_ERR(dev))
+ dev_err(&pdev->dev, "Could not create files\n");
+ }
+
+ return 0;
+remove_isr:
+ ilo_disable_interrupts(ilo_hw);
+ free_irq(pdev->irq, ilo_hw);
+unmap:
+ ilo_unmap_device(pdev, ilo_hw);
+free_regions:
+ pci_release_regions(pdev);
+disable:
+/* pci_disable_device(pdev); see comment in ilo_remove */
+free:
+ kfree(ilo_hw);
+out:
+ ilo_hwdev[devnum] = 0;
+ return error;
+}
+
+static struct pci_device_id ilo_devices[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, ilo_devices);
+
+static struct pci_driver ilo_driver = {
+ .name = ILO_NAME,
+ .id_table = ilo_devices,
+ .probe = ilo_probe,
+ .remove = ilo_remove,
+};
+
+static int __init ilo_init(void)
+{
+ int error;
+ dev_t dev;
+
+ ilo_class = class_create(THIS_MODULE, "iLO");
+ if (IS_ERR(ilo_class)) {
+ error = PTR_ERR(ilo_class);
+ goto out;
+ }
+
+ error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME);
+ if (error)
+ goto class_destroy;
+
+ ilo_major = MAJOR(dev);
+
+ error = pci_register_driver(&ilo_driver);
+ if (error)
+ goto chr_remove;
+
+ return 0;
+chr_remove:
+ unregister_chrdev_region(dev, MAX_OPEN);
+class_destroy:
+ class_destroy(ilo_class);
+out:
+ return error;
+}
+
+static void __exit ilo_exit(void)
+{
+ pci_unregister_driver(&ilo_driver);
+ unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN);
+ class_destroy(ilo_class);
+}
+
+MODULE_VERSION("1.4.1");
+MODULE_ALIAS(ILO_NAME);
+MODULE_DESCRIPTION(ILO_NAME);
+MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
+MODULE_LICENSE("GPL v2");
+
+module_param(max_ccb, uint, 0444);
+MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)");
+
+module_init(ilo_init);
+module_exit(ilo_exit);
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
new file mode 100644
index 0000000..b97672e
--- /dev/null
+++ b/drivers/misc/hpilo.h
@@ -0,0 +1,214 @@
+/*
+ * linux/drivers/char/hpilo.h
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ * David Altobelli <david.altobelli@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __HPILO_H
+#define __HPILO_H
+
+#define ILO_NAME "hpilo"
+
+/* max number of open channel control blocks per device, hw limited to 32 */
+#define MAX_CCB 24
+/* min number of open channel control blocks per device, hw limited to 32 */
+#define MIN_CCB 8
+/* max number of supported devices */
+#define MAX_ILO_DEV 1
+/* max number of files */
+#define MAX_OPEN (MAX_CCB * MAX_ILO_DEV)
+/* total wait time in usec */
+#define MAX_WAIT_TIME 10000
+/* per spin wait time in usec */
+#define WAIT_TIME 10
+/* spin counter for open/close delay */
+#define MAX_WAIT (MAX_WAIT_TIME / WAIT_TIME)
+
+/*
+ * Per device, used to track global memory allocations.
+ */
+struct ilo_hwinfo {
+ /* mmio registers on device */
+ char __iomem *mmio_vaddr;
+
+ /* doorbell registers on device */
+ char __iomem *db_vaddr;
+
+ /* shared memory on device used for channel control blocks */
+ char __iomem *ram_vaddr;
+
+ /* files corresponding to this device */
+ struct ccb_data *ccb_alloc[MAX_CCB];
+
+ struct pci_dev *ilo_dev;
+
+ /*
+ * open_lock serializes ccb_cnt during open and close
+ * [ irq disabled ]
+ * -> alloc_lock used when adding/removing/searching ccb_alloc,
+ * which represents all ccbs open on the device
+ * --> fifo_lock controls access to fifo queues shared with hw
+ *
+ * Locks must be taken in this order, but open_lock and alloc_lock
+ * are optional, they do not need to be held in order to take a
+ * lower level lock.
+ */
+ spinlock_t open_lock;
+ spinlock_t alloc_lock;
+ spinlock_t fifo_lock;
+
+ struct cdev cdev;
+};
+
+/* offset from mmio_vaddr for enabling doorbell interrupts */
+#define DB_IRQ 0xB2
+/* offset from mmio_vaddr for outbound communications */
+#define DB_OUT 0xD4
+/* DB_OUT reset bit */
+#define DB_RESET 26
+
+/*
+ * Channel control block. Used to manage hardware queues.
+ * The format must match hw's version. The hw ccb is 128 bytes,
+ * but the context area shouldn't be touched by the driver.
+ */
+#define ILOSW_CCB_SZ 64
+#define ILOHW_CCB_SZ 128
+struct ccb {
+ union {
+ char *send_fifobar;
+ u64 send_fifobar_pa;
+ } ccb_u1;
+ union {
+ char *send_desc;
+ u64 send_desc_pa;
+ } ccb_u2;
+ u64 send_ctrl;
+
+ union {
+ char *recv_fifobar;
+ u64 recv_fifobar_pa;
+ } ccb_u3;
+ union {
+ char *recv_desc;
+ u64 recv_desc_pa;
+ } ccb_u4;
+ u64 recv_ctrl;
+
+ union {
+ char __iomem *db_base;
+ u64 padding5;
+ } ccb_u5;
+
+ u64 channel;
+
+ /* unused context area (64 bytes) */
+};
+
+/* ccb queue parameters */
+#define SENDQ 1
+#define RECVQ 2
+#define NR_QENTRY 4
+#define L2_QENTRY_SZ 12
+
+/* ccb ctrl bitfields */
+#define CTRL_BITPOS_L2SZ 0
+#define CTRL_BITPOS_FIFOINDEXMASK 4
+#define CTRL_BITPOS_DESCLIMIT 18
+#define CTRL_BITPOS_A 30
+#define CTRL_BITPOS_G 31
+
+/* ccb doorbell macros */
+#define L2_DB_SIZE 14
+#define ONE_DB_SIZE (1 << L2_DB_SIZE)
+
+/*
+ * Per fd structure used to track the ccb allocated to that dev file.
+ */
+struct ccb_data {
+ /* software version of ccb, using virtual addrs */
+ struct ccb driver_ccb;
+
+ /* hardware version of ccb, using physical addrs */
+ struct ccb ilo_ccb;
+
+ /* hardware ccb is written to this shared mapped device memory */
+ struct ccb __iomem *mapped_ccb;
+
+ /* dma'able memory used for send/recv queues */
+ void *dma_va;
+ dma_addr_t dma_pa;
+ size_t dma_size;
+
+ /* pointer to hardware device info */
+ struct ilo_hwinfo *ilo_hw;
+
+ /* queue for this ccb to wait for recv data */
+ wait_queue_head_t ccb_waitq;
+
+ /* usage count, to allow for shared ccb's */
+ int ccb_cnt;
+
+ /* open wanted exclusive access to this ccb */
+ int ccb_excl;
+};
+
+/*
+ * FIFO queue structure, shared with hw.
+ */
+#define ILO_START_ALIGN 4096
+#define ILO_CACHE_SZ 128
+struct fifo {
+ u64 nrents; /* user requested number of fifo entries */
+ u64 imask; /* mask to extract valid fifo index */
+ u64 merge; /* O/C bits to merge in during enqueue operation */
+ u64 reset; /* set to non-zero when the target device resets */
+ u8 pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)];
+
+ u64 head;
+ u8 pad_1[ILO_CACHE_SZ - (sizeof(u64))];
+
+ u64 tail;
+ u8 pad_2[ILO_CACHE_SZ - (sizeof(u64))];
+
+ u64 fifobar[1];
+};
+
+/* convert between struct fifo, and the fifobar, which is saved in the ccb */
+#define FIFOHANDLESIZE (sizeof(struct fifo) - sizeof(u64))
+#define FIFOBARTOHANDLE(_fifo) \
+ ((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE))
+
+/* the number of qwords to consume from the entry descriptor */
+#define ENTRY_BITPOS_QWORDS 0
+/* descriptor index number (within a specified queue) */
+#define ENTRY_BITPOS_DESCRIPTOR 10
+/* state bit, fifo entry consumed by consumer */
+#define ENTRY_BITPOS_C 22
+/* state bit, fifo entry is occupied */
+#define ENTRY_BITPOS_O 23
+
+#define ENTRY_BITS_QWORDS 10
+#define ENTRY_BITS_DESCRIPTOR 12
+#define ENTRY_BITS_C 1
+#define ENTRY_BITS_O 1
+#define ENTRY_BITS_TOTAL \
+ (ENTRY_BITS_C + ENTRY_BITS_O + \
+ ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR)
+
+/* extract various entry fields */
+#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1)
+#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C)
+#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O)
+#define ENTRY_MASK_QWORDS \
+ (((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS)
+#define ENTRY_MASK_DESCRIPTOR \
+ (((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR)
+
+#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O))
+
+#endif /* __HPILO_H */
diff --git a/drivers/misc/ibmasm/Makefile b/drivers/misc/ibmasm/Makefile
new file mode 100644
index 0000000..9e63ade
--- /dev/null
+++ b/drivers/misc/ibmasm/Makefile
@@ -0,0 +1,15 @@
+
+obj-$(CONFIG_IBM_ASM) := ibmasm.o
+
+ibmasm-y := module.o \
+ ibmasmfs.o \
+ event.o \
+ command.o \
+ remote.o \
+ heartbeat.o \
+ r_heartbeat.o \
+ dot_command.o \
+ lowlevel.o
+
+ibmasm-$(CONFIG_SERIAL_8250) += uart.o
+
diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c
new file mode 100644
index 0000000..7d56f45
--- /dev/null
+++ b/drivers/misc/ibmasm/command.c
@@ -0,0 +1,187 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+static void exec_next_command(struct service_processor *sp);
+
+static atomic_t command_count = ATOMIC_INIT(0);
+
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size)
+{
+ struct command *cmd;
+
+ if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE)
+ return NULL;
+
+ cmd = kzalloc(sizeof(struct command), GFP_KERNEL);
+ if (cmd == NULL)
+ return NULL;
+
+
+ cmd->buffer = kzalloc(buffer_size, GFP_KERNEL);
+ if (cmd->buffer == NULL) {
+ kfree(cmd);
+ return NULL;
+ }
+ cmd->buffer_size = buffer_size;
+
+ kref_init(&cmd->kref);
+ cmd->lock = &sp->lock;
+
+ cmd->status = IBMASM_CMD_PENDING;
+ init_waitqueue_head(&cmd->wait);
+ INIT_LIST_HEAD(&cmd->queue_node);
+
+ atomic_inc(&command_count);
+ dbg("command count: %d\n", atomic_read(&command_count));
+
+ return cmd;
+}
+
+void ibmasm_free_command(struct kref *kref)
+{
+ struct command *cmd = to_command(kref);
+
+ list_del(&cmd->queue_node);
+ atomic_dec(&command_count);
+ dbg("command count: %d\n", atomic_read(&command_count));
+ kfree(cmd->buffer);
+ kfree(cmd);
+}
+
+static void enqueue_command(struct service_processor *sp, struct command *cmd)
+{
+ list_add_tail(&cmd->queue_node, &sp->command_queue);
+}
+
+static struct command *dequeue_command(struct service_processor *sp)
+{
+ struct command *cmd;
+ struct list_head *next;
+
+ if (list_empty(&sp->command_queue))
+ return NULL;
+
+ next = sp->command_queue.next;
+ list_del_init(next);
+ cmd = list_entry(next, struct command, queue_node);
+
+ return cmd;
+}
+
+static inline void do_exec_command(struct service_processor *sp)
+{
+ char tsbuf[32];
+
+ dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+ if (ibmasm_send_i2o_message(sp)) {
+ sp->current_command->status = IBMASM_CMD_FAILED;
+ wake_up(&sp->current_command->wait);
+ command_put(sp->current_command);
+ exec_next_command(sp);
+ }
+}
+
+/**
+ * exec_command
+ * send a command to a service processor
+ * Commands are executed sequentially. One command (sp->current_command)
+ * is sent to the service processor. Once the interrupt handler gets a
+ * message of type command_response, the message is copied into
+ * the current commands buffer,
+ */
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd)
+{
+ unsigned long flags;
+ char tsbuf[32];
+
+ dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+ spin_lock_irqsave(&sp->lock, flags);
+
+ if (!sp->current_command) {
+ sp->current_command = cmd;
+ command_get(sp->current_command);
+ spin_unlock_irqrestore(&sp->lock, flags);
+ do_exec_command(sp);
+ } else {
+ enqueue_command(sp, cmd);
+ spin_unlock_irqrestore(&sp->lock, flags);
+ }
+}
+
+static void exec_next_command(struct service_processor *sp)
+{
+ unsigned long flags;
+ char tsbuf[32];
+
+ dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+ spin_lock_irqsave(&sp->lock, flags);
+ sp->current_command = dequeue_command(sp);
+ if (sp->current_command) {
+ command_get(sp->current_command);
+ spin_unlock_irqrestore(&sp->lock, flags);
+ do_exec_command(sp);
+ } else {
+ spin_unlock_irqrestore(&sp->lock, flags);
+ }
+}
+
+/**
+ * Sleep until a command has failed or a response has been received
+ * and the command status been updated by the interrupt handler.
+ * (see receive_response).
+ */
+void ibmasm_wait_for_response(struct command *cmd, int timeout)
+{
+ wait_event_interruptible_timeout(cmd->wait,
+ cmd->status == IBMASM_CMD_COMPLETE ||
+ cmd->status == IBMASM_CMD_FAILED,
+ timeout * HZ);
+}
+
+/**
+ * receive_command_response
+ * called by the interrupt handler when a dot command of type command_response
+ * was received.
+ */
+void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size)
+{
+ struct command *cmd = sp->current_command;
+
+ if (!sp->current_command)
+ return;
+
+ memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size));
+ cmd->status = IBMASM_CMD_COMPLETE;
+ wake_up(&sp->current_command->wait);
+ command_put(sp->current_command);
+ exec_next_command(sp);
+}
diff --git a/drivers/misc/ibmasm/dot_command.c b/drivers/misc/ibmasm/dot_command.c
new file mode 100644
index 0000000..d7b2ca3
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.c
@@ -0,0 +1,152 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/**
+ * Dispatch an incoming message to the specific handler for the message.
+ * Called from interrupt context.
+ */
+void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size)
+{
+ u32 size;
+ struct dot_command_header *header = (struct dot_command_header *)message;
+
+ if (message_size == 0)
+ return;
+
+ size = get_dot_command_size(message);
+ if (size == 0)
+ return;
+
+ if (size > message_size)
+ size = message_size;
+
+ switch (header->type) {
+ case sp_event:
+ ibmasm_receive_event(sp, message, size);
+ break;
+ case sp_command_response:
+ ibmasm_receive_command_response(sp, message, size);
+ break;
+ case sp_heartbeat:
+ ibmasm_receive_heartbeat(sp, message, size);
+ break;
+ default:
+ dev_err(sp->dev, "Received unknown message from service processor\n");
+ }
+}
+
+
+#define INIT_BUFFER_SIZE 32
+
+
+/**
+ * send the 4.3.5.10 dot command (driver VPD) to the service processor
+ */
+int ibmasm_send_driver_vpd(struct service_processor *sp)
+{
+ struct command *command;
+ struct dot_command_header *header;
+ u8 *vpd_command;
+ u8 *vpd_data;
+ int result = 0;
+
+ command = ibmasm_new_command(sp, INIT_BUFFER_SIZE);
+ if (command == NULL)
+ return -ENOMEM;
+
+ header = (struct dot_command_header *)command->buffer;
+ header->type = sp_write;
+ header->command_size = 4;
+ header->data_size = 16;
+ header->status = 0;
+ header->reserved = 0;
+
+ vpd_command = command->buffer + sizeof(struct dot_command_header);
+ vpd_command[0] = 0x4;
+ vpd_command[1] = 0x3;
+ vpd_command[2] = 0x5;
+ vpd_command[3] = 0xa;
+
+ vpd_data = vpd_command + header->command_size;
+ vpd_data[0] = 0;
+ strcat(vpd_data, IBMASM_DRIVER_VPD);
+ vpd_data[10] = 0;
+ vpd_data[15] = 0;
+
+ ibmasm_exec_command(sp, command);
+ ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL);
+
+ if (command->status != IBMASM_CMD_COMPLETE)
+ result = -ENODEV;
+
+ command_put(command);
+
+ return result;
+}
+
+struct os_state_command {
+ struct dot_command_header header;
+ unsigned char command[3];
+ unsigned char data;
+};
+
+/**
+ * send the 4.3.6 dot command (os state) to the service processor
+ * During driver init this function is called with os state "up".
+ * This causes the service processor to start sending heartbeats the
+ * driver.
+ * During driver exit the function is called with os state "down",
+ * causing the service processor to stop the heartbeats.
+ */
+int ibmasm_send_os_state(struct service_processor *sp, int os_state)
+{
+ struct command *cmd;
+ struct os_state_command *os_state_cmd;
+ int result = 0;
+
+ cmd = ibmasm_new_command(sp, sizeof(struct os_state_command));
+ if (cmd == NULL)
+ return -ENOMEM;
+
+ os_state_cmd = (struct os_state_command *)cmd->buffer;
+ os_state_cmd->header.type = sp_write;
+ os_state_cmd->header.command_size = 3;
+ os_state_cmd->header.data_size = 1;
+ os_state_cmd->header.status = 0;
+ os_state_cmd->command[0] = 4;
+ os_state_cmd->command[1] = 3;
+ os_state_cmd->command[2] = 6;
+ os_state_cmd->data = os_state;
+
+ ibmasm_exec_command(sp, cmd);
+ ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+ if (cmd->status != IBMASM_CMD_COMPLETE)
+ result = -ENODEV;
+
+ command_put(cmd);
+ return result;
+}
diff --git a/drivers/misc/ibmasm/dot_command.h b/drivers/misc/ibmasm/dot_command.h
new file mode 100644
index 0000000..fc9fc9d
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.h
@@ -0,0 +1,78 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#ifndef __DOT_COMMAND_H__
+#define __DOT_COMMAND_H__
+
+/*
+ * dot commands are the protocol used to communicate with the service
+ * processor.
+ * They consist of header, a command of variable length and data of
+ * variable length.
+ */
+
+/* dot command types */
+#define sp_write 0
+#define sp_write_next 1
+#define sp_read 2
+#define sp_read_next 3
+#define sp_command_response 4
+#define sp_event 5
+#define sp_heartbeat 6
+
+#pragma pack(1)
+struct dot_command_header {
+ u8 type;
+ u8 command_size;
+ u16 data_size;
+ u8 status;
+ u8 reserved;
+};
+#pragma pack()
+
+static inline size_t get_dot_command_size(void *buffer)
+{
+ struct dot_command_header *cmd = (struct dot_command_header *)buffer;
+ return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size;
+}
+
+static inline unsigned int get_dot_command_timeout(void *buffer)
+{
+ struct dot_command_header *header = (struct dot_command_header *)buffer;
+ unsigned char *cmd = buffer + sizeof(struct dot_command_header);
+
+ /* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */
+
+ if (header->command_size == 3) {
+ if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1))
+ return IBMASM_CMD_TIMEOUT_EXTRA;
+ } else if (header->command_size == 2) {
+ if ((cmd[0] == 7) && (cmd[1] == 1))
+ return IBMASM_CMD_TIMEOUT_EXTRA;
+ if (cmd[0] == 8)
+ return IBMASM_CMD_TIMEOUT_EXTRA;
+ }
+ return IBMASM_CMD_TIMEOUT_NORMAL;
+}
+
+#endif /* __DOT_COMMAND_H__ */
diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c
new file mode 100644
index 0000000..8e540f4
--- /dev/null
+++ b/drivers/misc/ibmasm/event.c
@@ -0,0 +1,177 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+/*
+ * ASM service processor event handling routines.
+ *
+ * Events are signalled to the device drivers through interrupts.
+ * They have the format of dot commands, with the type field set to
+ * sp_event.
+ * The driver does not interpret the events, it simply stores them in a
+ * circular buffer.
+ */
+
+static void wake_up_event_readers(struct service_processor *sp)
+{
+ struct event_reader *reader;
+
+ list_for_each_entry(reader, &sp->event_buffer->readers, node)
+ wake_up_interruptible(&reader->wait);
+}
+
+/**
+ * receive_event
+ * Called by the interrupt handler when a dot command of type sp_event is
+ * received.
+ * Store the event in the circular event buffer, wake up any sleeping
+ * event readers.
+ * There is no reader marker in the buffer, therefore readers are
+ * responsible for keeping up with the writer, or they will lose events.
+ */
+void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size)
+{
+ struct event_buffer *buffer = sp->event_buffer;
+ struct ibmasm_event *event;
+ unsigned long flags;
+
+ data_size = min(data_size, IBMASM_EVENT_MAX_SIZE);
+
+ spin_lock_irqsave(&sp->lock, flags);
+ /* copy the event into the next slot in the circular buffer */
+ event = &buffer->events[buffer->next_index];
+ memcpy_fromio(event->data, data, data_size);
+ event->data_size = data_size;
+ event->serial_number = buffer->next_serial_number;
+
+ /* advance indices in the buffer */
+ buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS;
+ buffer->next_serial_number++;
+ spin_unlock_irqrestore(&sp->lock, flags);
+
+ wake_up_event_readers(sp);
+}
+
+static inline int event_available(struct event_buffer *b, struct event_reader *r)
+{
+ return (r->next_serial_number < b->next_serial_number);
+}
+
+/**
+ * get_next_event
+ * Called by event readers (initiated from user space through the file
+ * system).
+ * Sleeps until a new event is available.
+ */
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader)
+{
+ struct event_buffer *buffer = sp->event_buffer;
+ struct ibmasm_event *event;
+ unsigned int index;
+ unsigned long flags;
+
+ reader->cancelled = 0;
+
+ if (wait_event_interruptible(reader->wait,
+ event_available(buffer, reader) || reader->cancelled))
+ return -ERESTARTSYS;
+
+ if (!event_available(buffer, reader))
+ return 0;
+
+ spin_lock_irqsave(&sp->lock, flags);
+
+ index = buffer->next_index;
+ event = &buffer->events[index];
+ while (event->serial_number < reader->next_serial_number) {
+ index = (index + 1) % IBMASM_NUM_EVENTS;
+ event = &buffer->events[index];
+ }
+ memcpy(reader->data, event->data, event->data_size);
+ reader->data_size = event->data_size;
+ reader->next_serial_number = event->serial_number + 1;
+
+ spin_unlock_irqrestore(&sp->lock, flags);
+
+ return event->data_size;
+}
+
+void ibmasm_cancel_next_event(struct event_reader *reader)
+{
+ reader->cancelled = 1;
+ wake_up_interruptible(&reader->wait);
+}
+
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader)
+{
+ unsigned long flags;
+
+ reader->next_serial_number = sp->event_buffer->next_serial_number;
+ init_waitqueue_head(&reader->wait);
+ spin_lock_irqsave(&sp->lock, flags);
+ list_add(&reader->node, &sp->event_buffer->readers);
+ spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sp->lock, flags);
+ list_del(&reader->node);
+ spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+int ibmasm_event_buffer_init(struct service_processor *sp)
+{
+ struct event_buffer *buffer;
+ struct ibmasm_event *event;
+ int i;
+
+ buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL);
+ if (!buffer)
+ return 1;
+
+ buffer->next_index = 0;
+ buffer->next_serial_number = 1;
+
+ event = buffer->events;
+ for (i=0; i<IBMASM_NUM_EVENTS; i++, event++)
+ event->serial_number = 0;
+
+ INIT_LIST_HEAD(&buffer->readers);
+
+ sp->event_buffer = buffer;
+
+ return 0;
+}
+
+void ibmasm_event_buffer_exit(struct service_processor *sp)
+{
+ kfree(sp->event_buffer);
+}
diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c
new file mode 100644
index 0000000..9074637
--- /dev/null
+++ b/drivers/misc/ibmasm/heartbeat.c
@@ -0,0 +1,101 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/notifier.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+#include "lowlevel.h"
+
+static int suspend_heartbeats = 0;
+
+/*
+ * Once the driver indicates to the service processor that it is running
+ * - see send_os_state() - the service processor sends periodic heartbeats
+ * to the driver. The driver must respond to the heartbeats or else the OS
+ * will be rebooted.
+ * In the case of a panic the interrupt handler continues to work and thus
+ * continues to respond to heartbeats, making the service processor believe
+ * the OS is still running and thus preventing a reboot.
+ * To prevent this from happening a callback is added the panic_notifier_list.
+ * Before responding to a heartbeat the driver checks if a panic has happened,
+ * if yes it suspends heartbeat, causing the service processor to reboot as
+ * expected.
+ */
+static int panic_happened(struct notifier_block *n, unsigned long val, void *v)
+{
+ suspend_heartbeats = 1;
+ return 0;
+}
+
+static struct notifier_block panic_notifier = { panic_happened, NULL, 1 };
+
+void ibmasm_register_panic_notifier(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier);
+}
+
+void ibmasm_unregister_panic_notifier(void)
+{
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &panic_notifier);
+}
+
+
+int ibmasm_heartbeat_init(struct service_processor *sp)
+{
+ sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE);
+ if (sp->heartbeat == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ibmasm_heartbeat_exit(struct service_processor *sp)
+{
+ char tsbuf[32];
+
+ dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+ ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL);
+ dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+ suspend_heartbeats = 1;
+ command_put(sp->heartbeat);
+}
+
+void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size)
+{
+ struct command *cmd = sp->heartbeat;
+ struct dot_command_header *header = (struct dot_command_header *)cmd->buffer;
+ char tsbuf[32];
+
+ dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+ if (suspend_heartbeats)
+ return;
+
+ /* return the received dot command to sender */
+ cmd->status = IBMASM_CMD_PENDING;
+ size = min(size, cmd->buffer_size);
+ memcpy_fromio(cmd->buffer, message, size);
+ header->type = sp_write;
+ ibmasm_exec_command(sp, cmd);
+}
diff --git a/drivers/misc/ibmasm/i2o.h b/drivers/misc/ibmasm/i2o.h
new file mode 100644
index 0000000..2e9566d
--- /dev/null
+++ b/drivers/misc/ibmasm/i2o.h
@@ -0,0 +1,77 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#pragma pack(1)
+struct i2o_header {
+ u8 version;
+ u8 message_flags;
+ u16 message_size;
+ u8 target;
+ u8 initiator_and_target;
+ u8 initiator;
+ u8 function;
+ u32 initiator_context;
+};
+#pragma pack()
+
+#define I2O_HEADER_TEMPLATE \
+ { .version = 0x01, \
+ .message_flags = 0x00, \
+ .function = 0xFF, \
+ .initiator = 0x00, \
+ .initiator_and_target = 0x40, \
+ .target = 0x00, \
+ .initiator_context = 0x0 }
+
+#define I2O_MESSAGE_SIZE 0x1000
+#define I2O_COMMAND_SIZE (I2O_MESSAGE_SIZE - sizeof(struct i2o_header))
+
+#pragma pack(1)
+struct i2o_message {
+ struct i2o_header header;
+ void *data;
+};
+#pragma pack()
+
+static inline unsigned short outgoing_message_size(unsigned int data_size)
+{
+ unsigned int size;
+ unsigned short i2o_size;
+
+ if (data_size > I2O_COMMAND_SIZE)
+ data_size = I2O_COMMAND_SIZE;
+
+ size = sizeof(struct i2o_header) + data_size;
+
+ i2o_size = size / sizeof(u32);
+
+ if (size % sizeof(u32))
+ i2o_size++;
+
+ return i2o_size;
+}
+
+static inline u32 incoming_data_size(struct i2o_message *i2o_message)
+{
+ return (sizeof(u32) * i2o_message->header.message_size);
+}
diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h
new file mode 100644
index 0000000..9b08344
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasm.h
@@ -0,0 +1,220 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/device.h>
+#include <linux/input.h>
+
+/* Driver identification */
+#define DRIVER_NAME "ibmasm"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_AUTHOR "Max Asbock <masbock@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
+#define DRIVER_DESC "IBM ASM Service Processor Driver"
+
+#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME)
+#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME)
+
+extern int ibmasm_debug;
+#define dbg(STR, ARGS...) \
+ do { \
+ if (ibmasm_debug) \
+ printk(KERN_DEBUG STR , ##ARGS); \
+ } while (0)
+
+static inline char *get_timestamp(char *buf)
+{
+ struct timeval now;
+ do_gettimeofday(&now);
+ sprintf(buf, "%lu.%lu", now.tv_sec, now.tv_usec);
+ return buf;
+}
+
+#define IBMASM_CMD_PENDING 0
+#define IBMASM_CMD_COMPLETE 1
+#define IBMASM_CMD_FAILED 2
+
+#define IBMASM_CMD_TIMEOUT_NORMAL 45
+#define IBMASM_CMD_TIMEOUT_EXTRA 240
+
+#define IBMASM_CMD_MAX_BUFFER_SIZE 0x8000
+
+#define REVERSE_HEARTBEAT_TIMEOUT 120
+
+#define HEARTBEAT_BUFFER_SIZE 0x400
+
+#ifdef IA64
+#define IBMASM_DRIVER_VPD "Lin64 6.08 "
+#else
+#define IBMASM_DRIVER_VPD "Lin32 6.08 "
+#endif
+
+#define SYSTEM_STATE_OS_UP 5
+#define SYSTEM_STATE_OS_DOWN 4
+
+#define IBMASM_NAME_SIZE 16
+
+#define IBMASM_NUM_EVENTS 10
+#define IBMASM_EVENT_MAX_SIZE 2048u
+
+
+struct command {
+ struct list_head queue_node;
+ wait_queue_head_t wait;
+ unsigned char *buffer;
+ size_t buffer_size;
+ int status;
+ struct kref kref;
+ spinlock_t *lock;
+};
+#define to_command(c) container_of(c, struct command, kref)
+
+void ibmasm_free_command(struct kref *kref);
+static inline void command_put(struct command *cmd)
+{
+ unsigned long flags;
+ spinlock_t *lock = cmd->lock;
+
+ spin_lock_irqsave(lock, flags);
+ kref_put(&cmd->kref, ibmasm_free_command);
+ spin_unlock_irqrestore(lock, flags);
+}
+
+static inline void command_get(struct command *cmd)
+{
+ kref_get(&cmd->kref);
+}
+
+
+struct ibmasm_event {
+ unsigned int serial_number;
+ unsigned int data_size;
+ unsigned char data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct event_buffer {
+ struct ibmasm_event events[IBMASM_NUM_EVENTS];
+ unsigned int next_serial_number;
+ unsigned int next_index;
+ struct list_head readers;
+};
+
+struct event_reader {
+ int cancelled;
+ unsigned int next_serial_number;
+ wait_queue_head_t wait;
+ struct list_head node;
+ unsigned int data_size;
+ unsigned char data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct reverse_heartbeat {
+ wait_queue_head_t wait;
+ unsigned int stopped;
+};
+
+struct ibmasm_remote {
+ struct input_dev *keybd_dev;
+ struct input_dev *mouse_dev;
+};
+
+struct service_processor {
+ struct list_head node;
+ spinlock_t lock;
+ void __iomem *base_address;
+ unsigned int irq;
+ struct command *current_command;
+ struct command *heartbeat;
+ struct list_head command_queue;
+ struct event_buffer *event_buffer;
+ char dirname[IBMASM_NAME_SIZE];
+ char devname[IBMASM_NAME_SIZE];
+ unsigned int number;
+ struct ibmasm_remote remote;
+ int serial_line;
+ struct device *dev;
+};
+
+/* command processing */
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size);
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd);
+void ibmasm_wait_for_response(struct command *cmd, int timeout);
+void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size);
+
+/* event processing */
+int ibmasm_event_buffer_init(struct service_processor *sp);
+void ibmasm_event_buffer_exit(struct service_processor *sp);
+void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size);
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader);
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_cancel_next_event(struct event_reader *reader);
+
+/* heartbeat - from SP to OS */
+void ibmasm_register_panic_notifier(void);
+void ibmasm_unregister_panic_notifier(void);
+int ibmasm_heartbeat_init(struct service_processor *sp);
+void ibmasm_heartbeat_exit(struct service_processor *sp);
+void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size);
+
+/* reverse heartbeat - from OS to SP */
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb);
+
+/* dot commands */
+void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size);
+int ibmasm_send_driver_vpd(struct service_processor *sp);
+int ibmasm_send_os_state(struct service_processor *sp, int os_state);
+
+/* low level message processing */
+int ibmasm_send_i2o_message(struct service_processor *sp);
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id);
+
+/* remote console */
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp);
+int ibmasm_init_remote_input_dev(struct service_processor *sp);
+void ibmasm_free_remote_input_dev(struct service_processor *sp);
+
+/* file system */
+int ibmasmfs_register(void);
+void ibmasmfs_unregister(void);
+void ibmasmfs_add_sp(struct service_processor *sp);
+
+/* uart */
+#ifdef CONFIG_SERIAL_8250
+void ibmasm_register_uart(struct service_processor *sp);
+void ibmasm_unregister_uart(struct service_processor *sp);
+#else
+#define ibmasm_register_uart(sp) do { } while(0)
+#define ibmasm_unregister_uart(sp) do { } while(0)
+#endif
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
new file mode 100644
index 0000000..e8b9331
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -0,0 +1,629 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+/*
+ * Parts of this code are based on an article by Jonathan Corbet
+ * that appeared in Linux Weekly News.
+ */
+
+
+/*
+ * The IBMASM file virtual filesystem. It creates the following hierarchy
+ * dynamically when mounted from user space:
+ *
+ * /ibmasm
+ * |-- 0
+ * | |-- command
+ * | |-- event
+ * | |-- reverse_heartbeat
+ * | `-- remote_video
+ * | |-- depth
+ * | |-- height
+ * | `-- width
+ * .
+ * .
+ * .
+ * `-- n
+ * |-- command
+ * |-- event
+ * |-- reverse_heartbeat
+ * `-- remote_video
+ * |-- depth
+ * |-- height
+ * `-- width
+ *
+ * For each service processor the following files are created:
+ *
+ * command: execute dot commands
+ * write: execute a dot command on the service processor
+ * read: return the result of a previously executed dot command
+ *
+ * events: listen for service processor events
+ * read: sleep (interruptible) until an event occurs
+ * write: wakeup sleeping event listener
+ *
+ * reverse_heartbeat: send a heartbeat to the service processor
+ * read: sleep (interruptible) until the reverse heartbeat fails
+ * write: wakeup sleeping heartbeat listener
+ *
+ * remote_video/width
+ * remote_video/height
+ * remote_video/width: control remote display settings
+ * write: set value
+ * read: read value
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include "ibmasm.h"
+#include "remote.h"
+#include "dot_command.h"
+
+#define IBMASMFS_MAGIC 0x66726f67
+
+static LIST_HEAD(service_processors);
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode);
+static void ibmasmfs_create_files (struct super_block *sb);
+static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent);
+
+
+static struct dentry *ibmasmfs_mount(struct file_system_type *fst,
+ int flags, const char *name, void *data)
+{
+ return mount_single(fst, flags, data, ibmasmfs_fill_super);
+}
+
+static const struct super_operations ibmasmfs_s_ops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+};
+
+static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
+
+static struct file_system_type ibmasmfs_type = {
+ .owner = THIS_MODULE,
+ .name = "ibmasmfs",
+ .mount = ibmasmfs_mount,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("ibmasmfs");
+
+static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent)
+{
+ struct inode *root;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = IBMASMFS_MAGIC;
+ sb->s_op = &ibmasmfs_s_ops;
+ sb->s_time_gran = 1;
+
+ root = ibmasmfs_make_inode (sb, S_IFDIR | 0500);
+ if (!root)
+ return -ENOMEM;
+
+ root->i_op = &simple_dir_inode_operations;
+ root->i_fop = ibmasmfs_dir_ops;
+
+ sb->s_root = d_make_root(root);
+ if (!sb->s_root)
+ return -ENOMEM;
+
+ ibmasmfs_create_files(sb);
+ return 0;
+}
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
+{
+ struct inode *ret = new_inode(sb);
+
+ if (ret) {
+ ret->i_ino = get_next_ino();
+ ret->i_mode = mode;
+ ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
+ }
+ return ret;
+}
+
+static struct dentry *ibmasmfs_create_file(struct dentry *parent,
+ const char *name,
+ const struct file_operations *fops,
+ void *data,
+ int mode)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ return NULL;
+
+ inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode);
+ if (!inode) {
+ dput(dentry);
+ return NULL;
+ }
+
+ inode->i_fop = fops;
+ inode->i_private = data;
+
+ d_add(dentry, inode);
+ return dentry;
+}
+
+static struct dentry *ibmasmfs_create_dir(struct dentry *parent,
+ const char *name)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ return NULL;
+
+ inode = ibmasmfs_make_inode(parent->d_sb, S_IFDIR | 0500);
+ if (!inode) {
+ dput(dentry);
+ return NULL;
+ }
+
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = ibmasmfs_dir_ops;
+
+ d_add(dentry, inode);
+ return dentry;
+}
+
+int ibmasmfs_register(void)
+{
+ return register_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_unregister(void)
+{
+ unregister_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_add_sp(struct service_processor *sp)
+{
+ list_add(&sp->node, &service_processors);
+}
+
+/* struct to save state between command file operations */
+struct ibmasmfs_command_data {
+ struct service_processor *sp;
+ struct command *command;
+};
+
+/* struct to save state between event file operations */
+struct ibmasmfs_event_data {
+ struct service_processor *sp;
+ struct event_reader reader;
+ int active;
+};
+
+/* struct to save state between reverse heartbeat file operations */
+struct ibmasmfs_heartbeat_data {
+ struct service_processor *sp;
+ struct reverse_heartbeat heartbeat;
+ int active;
+};
+
+static int command_file_open(struct inode *inode, struct file *file)
+{
+ struct ibmasmfs_command_data *command_data;
+
+ if (!inode->i_private)
+ return -ENODEV;
+
+ command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL);
+ if (!command_data)
+ return -ENOMEM;
+
+ command_data->command = NULL;
+ command_data->sp = inode->i_private;
+ file->private_data = command_data;
+ return 0;
+}
+
+static int command_file_close(struct inode *inode, struct file *file)
+{
+ struct ibmasmfs_command_data *command_data = file->private_data;
+
+ if (command_data->command)
+ command_put(command_data->command);
+
+ kfree(command_data);
+ return 0;
+}
+
+static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+ struct ibmasmfs_command_data *command_data = file->private_data;
+ struct command *cmd;
+ int len;
+ unsigned long flags;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ spin_lock_irqsave(&command_data->sp->lock, flags);
+ cmd = command_data->command;
+ if (cmd == NULL) {
+ spin_unlock_irqrestore(&command_data->sp->lock, flags);
+ return 0;
+ }
+ command_data->command = NULL;
+ spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+ if (cmd->status != IBMASM_CMD_COMPLETE) {
+ command_put(cmd);
+ return -EIO;
+ }
+ len = min(count, cmd->buffer_size);
+ if (copy_to_user(buf, cmd->buffer, len)) {
+ command_put(cmd);
+ return -EFAULT;
+ }
+ command_put(cmd);
+
+ return len;
+}
+
+static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+ struct ibmasmfs_command_data *command_data = file->private_data;
+ struct command *cmd;
+ unsigned long flags;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ /* commands are executed sequentially, only one command at a time */
+ if (command_data->command)
+ return -EAGAIN;
+
+ cmd = ibmasm_new_command(command_data->sp, count);
+ if (!cmd)
+ return -ENOMEM;
+
+ if (copy_from_user(cmd->buffer, ubuff, count)) {
+ command_put(cmd);
+ return -EFAULT;
+ }
+
+ spin_lock_irqsave(&command_data->sp->lock, flags);
+ if (command_data->command) {
+ spin_unlock_irqrestore(&command_data->sp->lock, flags);
+ command_put(cmd);
+ return -EAGAIN;
+ }
+ command_data->command = cmd;
+ spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+ ibmasm_exec_command(command_data->sp, cmd);
+ ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer));
+
+ return count;
+}
+
+static int event_file_open(struct inode *inode, struct file *file)
+{
+ struct ibmasmfs_event_data *event_data;
+ struct service_processor *sp;
+
+ if (!inode->i_private)
+ return -ENODEV;
+
+ sp = inode->i_private;
+
+ event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL);
+ if (!event_data)
+ return -ENOMEM;
+
+ ibmasm_event_reader_register(sp, &event_data->reader);
+
+ event_data->sp = sp;
+ event_data->active = 0;
+ file->private_data = event_data;
+ return 0;
+}
+
+static int event_file_close(struct inode *inode, struct file *file)
+{
+ struct ibmasmfs_event_data *event_data = file->private_data;
+
+ ibmasm_event_reader_unregister(event_data->sp, &event_data->reader);
+ kfree(event_data);
+ return 0;
+}
+
+static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+ struct ibmasmfs_event_data *event_data = file->private_data;
+ struct event_reader *reader = &event_data->reader;
+ struct service_processor *sp = event_data->sp;
+ int ret;
+ unsigned long flags;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count == 0 || count > IBMASM_EVENT_MAX_SIZE)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ spin_lock_irqsave(&sp->lock, flags);
+ if (event_data->active) {
+ spin_unlock_irqrestore(&sp->lock, flags);
+ return -EBUSY;
+ }
+ event_data->active = 1;
+ spin_unlock_irqrestore(&sp->lock, flags);
+
+ ret = ibmasm_get_next_event(sp, reader);
+ if (ret <= 0)
+ goto out;
+
+ if (count < reader->data_size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (copy_to_user(buf, reader->data, reader->data_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = reader->data_size;
+
+out:
+ event_data->active = 0;
+ return ret;
+}
+
+static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+ struct ibmasmfs_event_data *event_data = file->private_data;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count != 1)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ ibmasm_cancel_next_event(&event_data->reader);
+ return 0;
+}
+
+static int r_heartbeat_file_open(struct inode *inode, struct file *file)
+{
+ struct ibmasmfs_heartbeat_data *rhbeat;
+
+ if (!inode->i_private)
+ return -ENODEV;
+
+ rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL);
+ if (!rhbeat)
+ return -ENOMEM;
+
+ rhbeat->sp = inode->i_private;
+ rhbeat->active = 0;
+ ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+ file->private_data = rhbeat;
+ return 0;
+}
+
+static int r_heartbeat_file_close(struct inode *inode, struct file *file)
+{
+ struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+ kfree(rhbeat);
+ return 0;
+}
+
+static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+ struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+ unsigned long flags;
+ int result;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count == 0 || count > 1024)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ /* allow only one reverse heartbeat per process */
+ spin_lock_irqsave(&rhbeat->sp->lock, flags);
+ if (rhbeat->active) {
+ spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+ return -EBUSY;
+ }
+ rhbeat->active = 1;
+ spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+
+ result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+ rhbeat->active = 0;
+
+ return result;
+}
+
+static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+ struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count != 1)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ if (rhbeat->active)
+ ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat);
+
+ return 1;
+}
+
+static int remote_settings_file_close(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+ void __iomem *address = (void __iomem *)file->private_data;
+ unsigned char *page;
+ int retval;
+ int len = 0;
+ unsigned int value;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count == 0 || count > 1024)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ page = (unsigned char *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ value = readl(address);
+ len = sprintf(page, "%d\n", value);
+
+ if (copy_to_user(buf, page, len)) {
+ retval = -EFAULT;
+ goto exit;
+ }
+ *offset += len;
+ retval = len;
+
+exit:
+ free_page((unsigned long)page);
+ return retval;
+}
+
+static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+ void __iomem *address = (void __iomem *)file->private_data;
+ char *buff;
+ unsigned int value;
+
+ if (*offset < 0)
+ return -EINVAL;
+ if (count == 0 || count > 1024)
+ return 0;
+ if (*offset != 0)
+ return 0;
+
+ buff = kzalloc (count + 1, GFP_KERNEL);
+ if (!buff)
+ return -ENOMEM;
+
+
+ if (copy_from_user(buff, ubuff, count)) {
+ kfree(buff);
+ return -EFAULT;
+ }
+
+ value = simple_strtoul(buff, NULL, 10);
+ writel(value, address);
+ kfree(buff);
+
+ return count;
+}
+
+static const struct file_operations command_fops = {
+ .open = command_file_open,
+ .release = command_file_close,
+ .read = command_file_read,
+ .write = command_file_write,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations event_fops = {
+ .open = event_file_open,
+ .release = event_file_close,
+ .read = event_file_read,
+ .write = event_file_write,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations r_heartbeat_fops = {
+ .open = r_heartbeat_file_open,
+ .release = r_heartbeat_file_close,
+ .read = r_heartbeat_file_read,
+ .write = r_heartbeat_file_write,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations remote_settings_fops = {
+ .open = simple_open,
+ .release = remote_settings_file_close,
+ .read = remote_settings_file_read,
+ .write = remote_settings_file_write,
+ .llseek = generic_file_llseek,
+};
+
+
+static void ibmasmfs_create_files (struct super_block *sb)
+{
+ struct list_head *entry;
+ struct service_processor *sp;
+
+ list_for_each(entry, &service_processors) {
+ struct dentry *dir;
+ struct dentry *remote_dir;
+ sp = list_entry(entry, struct service_processor, node);
+ dir = ibmasmfs_create_dir(sb->s_root, sp->dirname);
+ if (!dir)
+ continue;
+
+ ibmasmfs_create_file(dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR);
+ ibmasmfs_create_file(dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR);
+ ibmasmfs_create_file(dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR);
+
+ remote_dir = ibmasmfs_create_dir(dir, "remote_video");
+ if (!remote_dir)
+ continue;
+
+ ibmasmfs_create_file(remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR);
+ ibmasmfs_create_file(remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR);
+ ibmasmfs_create_file(remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR);
+ }
+}
diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c
new file mode 100644
index 0000000..5319ea2
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.c
@@ -0,0 +1,85 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "i2o.h"
+#include "dot_command.h"
+#include "remote.h"
+
+static struct i2o_header header = I2O_HEADER_TEMPLATE;
+
+
+int ibmasm_send_i2o_message(struct service_processor *sp)
+{
+ u32 mfa;
+ unsigned int command_size;
+ struct i2o_message *message;
+ struct command *command = sp->current_command;
+
+ mfa = get_mfa_inbound(sp->base_address);
+ if (!mfa)
+ return 1;
+
+ command_size = get_dot_command_size(command->buffer);
+ header.message_size = outgoing_message_size(command_size);
+
+ message = get_i2o_message(sp->base_address, mfa);
+
+ memcpy_toio(&message->header, &header, sizeof(struct i2o_header));
+ memcpy_toio(&message->data, command->buffer, command_size);
+
+ set_mfa_inbound(sp->base_address, mfa);
+
+ return 0;
+}
+
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id)
+{
+ u32 mfa;
+ struct service_processor *sp = (struct service_processor *)dev_id;
+ void __iomem *base_address = sp->base_address;
+ char tsbuf[32];
+
+ if (!sp_interrupt_pending(base_address))
+ return IRQ_NONE;
+
+ dbg("respond to interrupt at %s\n", get_timestamp(tsbuf));
+
+ if (mouse_interrupt_pending(sp)) {
+ ibmasm_handle_mouse_interrupt(sp);
+ clear_mouse_interrupt(sp);
+ }
+
+ mfa = get_mfa_outbound(base_address);
+ if (valid_mfa(mfa)) {
+ struct i2o_message *msg = get_i2o_message(base_address, mfa);
+ ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg));
+ } else
+ dbg("didn't get a valid MFA\n");
+
+ set_mfa_outbound(base_address, mfa);
+ dbg("finished interrupt at %s\n", get_timestamp(tsbuf));
+
+ return IRQ_HANDLED;
+}
diff --git a/drivers/misc/ibmasm/lowlevel.h b/drivers/misc/ibmasm/lowlevel.h
new file mode 100644
index 0000000..e97848f
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.h
@@ -0,0 +1,137 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+/* Condor service processor specific hardware definitions */
+
+#ifndef __IBMASM_CONDOR_H__
+#define __IBMASM_CONDOR_H__
+
+#include <asm/io.h>
+
+#define VENDORID_IBM 0x1014
+#define DEVICEID_RSA 0x010F
+
+#define GET_MFA_ADDR(x) (x & 0xFFFFFF00)
+
+#define MAILBOX_FULL(x) (x & 0x00000001)
+
+#define NO_MFAS_AVAILABLE 0xFFFFFFFF
+
+
+#define INBOUND_QUEUE_PORT 0x40 /* contains address of next free MFA */
+#define OUTBOUND_QUEUE_PORT 0x44 /* contains address of posted MFA */
+
+#define SP_INTR_MASK 0x00000008
+#define UART_INTR_MASK 0x00000010
+
+#define INTR_STATUS_REGISTER 0x13A0
+#define INTR_CONTROL_REGISTER 0x13A4
+
+#define SCOUT_COM_A_BASE 0x0000
+#define SCOUT_COM_B_BASE 0x0100
+#define SCOUT_COM_C_BASE 0x0200
+#define SCOUT_COM_D_BASE 0x0300
+
+static inline int sp_interrupt_pending(void __iomem *base_address)
+{
+ return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline int uart_interrupt_pending(void __iomem *base_address)
+{
+ return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask)
+{
+ void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+ writel( readl(ctrl_reg) & ~mask, ctrl_reg);
+}
+
+static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask)
+{
+ void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+ writel( readl(ctrl_reg) | mask, ctrl_reg);
+}
+
+static inline void enable_sp_interrupts(void __iomem *base_address)
+{
+ ibmasm_enable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void disable_sp_interrupts(void __iomem *base_address)
+{
+ ibmasm_disable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void enable_uart_interrupts(void __iomem *base_address)
+{
+ ibmasm_enable_interrupts(base_address, UART_INTR_MASK);
+}
+
+static inline void disable_uart_interrupts(void __iomem *base_address)
+{
+ ibmasm_disable_interrupts(base_address, UART_INTR_MASK);
+}
+
+#define valid_mfa(mfa) ( (mfa) != NO_MFAS_AVAILABLE )
+
+static inline u32 get_mfa_outbound(void __iomem *base_address)
+{
+ int retry;
+ u32 mfa;
+
+ for (retry=0; retry<=10; retry++) {
+ mfa = readl(base_address + OUTBOUND_QUEUE_PORT);
+ if (valid_mfa(mfa))
+ break;
+ }
+ return mfa;
+}
+
+static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa)
+{
+ writel(mfa, base_address + OUTBOUND_QUEUE_PORT);
+}
+
+static inline u32 get_mfa_inbound(void __iomem *base_address)
+{
+ u32 mfa = readl(base_address + INBOUND_QUEUE_PORT);
+
+ if (MAILBOX_FULL(mfa))
+ return 0;
+
+ return mfa;
+}
+
+static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa)
+{
+ writel(mfa, base_address + INBOUND_QUEUE_PORT);
+}
+
+static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa)
+{
+ return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address);
+}
+
+#endif /* __IBMASM_CONDOR_H__ */
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
new file mode 100644
index 0000000..6b3bf9a
--- /dev/null
+++ b/drivers/misc/ibmasm/module.c
@@ -0,0 +1,235 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ * This driver is based on code originally written by Pete Reynolds
+ * and others.
+ *
+ */
+
+/*
+ * The ASM device driver does the following things:
+ *
+ * 1) When loaded it sends a message to the service processor,
+ * indicating that an OS is * running. This causes the service processor
+ * to send periodic heartbeats to the OS.
+ *
+ * 2) Answers the periodic heartbeats sent by the service processor.
+ * Failure to do so would result in system reboot.
+ *
+ * 3) Acts as a pass through for dot commands sent from user applications.
+ * The interface for this is the ibmasmfs file system.
+ *
+ * 4) Allows user applications to register for event notification. Events
+ * are sent to the driver through interrupts. They can be read from user
+ * space through the ibmasmfs file system.
+ *
+ * 5) Allows user space applications to send heartbeats to the service
+ * processor (aka reverse heartbeats). Again this happens through ibmasmfs.
+ *
+ * 6) Handles remote mouse and keyboard event interrupts and makes them
+ * available to user applications through ibmasmfs.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "remote.h"
+
+int ibmasm_debug = 0;
+module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off");
+
+
+static int ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ int result;
+ struct service_processor *sp;
+
+ if ((result = pci_enable_device(pdev))) {
+ dev_err(&pdev->dev, "Failed to enable PCI device\n");
+ return result;
+ }
+ if ((result = pci_request_regions(pdev, DRIVER_NAME))) {
+ dev_err(&pdev->dev, "Failed to allocate PCI resources\n");
+ goto error_resources;
+ }
+ /* vnc client won't work without bus-mastering */
+ pci_set_master(pdev);
+
+ sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL);
+ if (sp == NULL) {
+ dev_err(&pdev->dev, "Failed to allocate memory\n");
+ result = -ENOMEM;
+ goto error_kmalloc;
+ }
+
+ spin_lock_init(&sp->lock);
+ INIT_LIST_HEAD(&sp->command_queue);
+
+ pci_set_drvdata(pdev, (void *)sp);
+ sp->dev = &pdev->dev;
+ sp->number = pdev->bus->number;
+ snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number);
+ snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number);
+
+ if (ibmasm_event_buffer_init(sp)) {
+ dev_err(sp->dev, "Failed to allocate event buffer\n");
+ goto error_eventbuffer;
+ }
+
+ if (ibmasm_heartbeat_init(sp)) {
+ dev_err(sp->dev, "Failed to allocate heartbeat command\n");
+ goto error_heartbeat;
+ }
+
+ sp->irq = pdev->irq;
+ sp->base_address = pci_ioremap_bar(pdev, 0);
+ if (!sp->base_address) {
+ dev_err(sp->dev, "Failed to ioremap pci memory\n");
+ result = -ENODEV;
+ goto error_ioremap;
+ }
+
+ result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp);
+ if (result) {
+ dev_err(sp->dev, "Failed to register interrupt handler\n");
+ goto error_request_irq;
+ }
+
+ enable_sp_interrupts(sp->base_address);
+
+ result = ibmasm_init_remote_input_dev(sp);
+ if (result) {
+ dev_err(sp->dev, "Failed to initialize remote queue\n");
+ goto error_send_message;
+ }
+
+ result = ibmasm_send_driver_vpd(sp);
+ if (result) {
+ dev_err(sp->dev, "Failed to send driver VPD to service processor\n");
+ goto error_send_message;
+ }
+ result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP);
+ if (result) {
+ dev_err(sp->dev, "Failed to send OS state to service processor\n");
+ goto error_send_message;
+ }
+ ibmasmfs_add_sp(sp);
+
+ ibmasm_register_uart(sp);
+
+ return 0;
+
+error_send_message:
+ disable_sp_interrupts(sp->base_address);
+ ibmasm_free_remote_input_dev(sp);
+ free_irq(sp->irq, (void *)sp);
+error_request_irq:
+ iounmap(sp->base_address);
+error_ioremap:
+ ibmasm_heartbeat_exit(sp);
+error_heartbeat:
+ ibmasm_event_buffer_exit(sp);
+error_eventbuffer:
+ kfree(sp);
+error_kmalloc:
+ pci_release_regions(pdev);
+error_resources:
+ pci_disable_device(pdev);
+
+ return result;
+}
+
+static void ibmasm_remove_one(struct pci_dev *pdev)
+{
+ struct service_processor *sp = pci_get_drvdata(pdev);
+
+ dbg("Unregistering UART\n");
+ ibmasm_unregister_uart(sp);
+ dbg("Sending OS down message\n");
+ if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN))
+ err("failed to get repsonse to 'Send OS State' command\n");
+ dbg("Disabling heartbeats\n");
+ ibmasm_heartbeat_exit(sp);
+ dbg("Disabling interrupts\n");
+ disable_sp_interrupts(sp->base_address);
+ dbg("Freeing SP irq\n");
+ free_irq(sp->irq, (void *)sp);
+ dbg("Cleaning up\n");
+ ibmasm_free_remote_input_dev(sp);
+ iounmap(sp->base_address);
+ ibmasm_event_buffer_exit(sp);
+ kfree(sp);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static struct pci_device_id ibmasm_pci_table[] =
+{
+ { PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) },
+ {},
+};
+
+static struct pci_driver ibmasm_driver = {
+ .name = DRIVER_NAME,
+ .id_table = ibmasm_pci_table,
+ .probe = ibmasm_init_one,
+ .remove = ibmasm_remove_one,
+};
+
+static void __exit ibmasm_exit (void)
+{
+ ibmasm_unregister_panic_notifier();
+ ibmasmfs_unregister();
+ pci_unregister_driver(&ibmasm_driver);
+ info(DRIVER_DESC " version " DRIVER_VERSION " unloaded");
+}
+
+static int __init ibmasm_init(void)
+{
+ int result = pci_register_driver(&ibmasm_driver);
+ if (result)
+ return result;
+
+ result = ibmasmfs_register();
+ if (result) {
+ pci_unregister_driver(&ibmasm_driver);
+ err("Failed to register ibmasmfs file system");
+ return result;
+ }
+
+ ibmasm_register_panic_notifier();
+ info(DRIVER_DESC " version " DRIVER_VERSION " loaded");
+ return 0;
+}
+
+module_init(ibmasm_init);
+module_exit(ibmasm_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ibmasm_pci_table);
+
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
new file mode 100644
index 0000000..232034f
--- /dev/null
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -0,0 +1,99 @@
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/*
+ * Reverse Heartbeat, i.e. heartbeats sent from the driver to the
+ * service processor.
+ * These heartbeats are initiated by user level programs.
+ */
+
+/* the reverse heartbeat dot command */
+#pragma pack(1)
+static struct {
+ struct dot_command_header header;
+ unsigned char command[3];
+} rhb_dot_cmd = {
+ .header = {
+ .type = sp_read,
+ .command_size = 3,
+ .data_size = 0,
+ .status = 0
+ },
+ .command = { 4, 3, 6 }
+};
+#pragma pack()
+
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+ init_waitqueue_head(&rhb->wait);
+ rhb->stopped = 0;
+}
+
+/**
+ * start_reverse_heartbeat
+ * Loop forever, sending a reverse heartbeat dot command to the service
+ * processor, then sleeping. The loop comes to an end if the service
+ * processor fails to respond 3 times or we were interrupted.
+ */
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+ struct command *cmd;
+ int times_failed = 0;
+ int result = 1;
+
+ cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd);
+ if (!cmd)
+ return -ENOMEM;
+
+ while (times_failed < 3) {
+ memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd);
+ cmd->status = IBMASM_CMD_PENDING;
+ ibmasm_exec_command(sp, cmd);
+ ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+ if (cmd->status != IBMASM_CMD_COMPLETE)
+ times_failed++;
+
+ wait_event_interruptible_timeout(rhb->wait,
+ rhb->stopped,
+ REVERSE_HEARTBEAT_TIMEOUT * HZ);
+
+ if (signal_pending(current) || rhb->stopped) {
+ result = -EINTR;
+ break;
+ }
+ }
+ command_put(cmd);
+ rhb->stopped = 0;
+
+ return result;
+}
+
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb)
+{
+ rhb->stopped = 1;
+ wake_up_interruptible(&rhb->wait);
+}
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
new file mode 100644
index 0000000..477bb43
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.c
@@ -0,0 +1,282 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Authors: Max Asböck <amax@us.ibm.com>
+ * Vernon Mauery <vernux@us.ibm.com>
+ *
+ */
+
+/* Remote mouse and keyboard event handling functions */
+
+#include <linux/pci.h>
+#include "ibmasm.h"
+#include "remote.h"
+
+#define MOUSE_X_MAX 1600
+#define MOUSE_Y_MAX 1200
+
+static const unsigned short xlate_high[XLATE_SIZE] = {
+ [KEY_SYM_ENTER & 0xff] = KEY_ENTER,
+ [KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH,
+ [KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK,
+ [KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS,
+ [KEY_SYM_KPDOT & 0xff] = KEY_KPDOT,
+ [KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS,
+ [KEY_SYM_KP0 & 0xff] = KEY_KP0,
+ [KEY_SYM_KP1 & 0xff] = KEY_KP1,
+ [KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2,
+ [KEY_SYM_KP3 & 0xff] = KEY_KP3,
+ [KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4,
+ [KEY_SYM_KP5 & 0xff] = KEY_KP5,
+ [KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6,
+ [KEY_SYM_KP7 & 0xff] = KEY_KP7,
+ [KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8,
+ [KEY_SYM_KP9 & 0xff] = KEY_KP9,
+ [KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE,
+ [KEY_SYM_TAB & 0xff] = KEY_TAB,
+ [KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL,
+ [KEY_SYM_ALT & 0xff] = KEY_LEFTALT,
+ [KEY_SYM_INSERT & 0xff] = KEY_INSERT,
+ [KEY_SYM_DELETE & 0xff] = KEY_DELETE,
+ [KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT,
+ [KEY_SYM_UARROW & 0xff] = KEY_UP,
+ [KEY_SYM_DARROW & 0xff] = KEY_DOWN,
+ [KEY_SYM_LARROW & 0xff] = KEY_LEFT,
+ [KEY_SYM_RARROW & 0xff] = KEY_RIGHT,
+ [KEY_SYM_ESCAPE & 0xff] = KEY_ESC,
+ [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP,
+ [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN,
+ [KEY_SYM_HOME & 0xff] = KEY_HOME,
+ [KEY_SYM_END & 0xff] = KEY_END,
+ [KEY_SYM_F1 & 0xff] = KEY_F1,
+ [KEY_SYM_F2 & 0xff] = KEY_F2,
+ [KEY_SYM_F3 & 0xff] = KEY_F3,
+ [KEY_SYM_F4 & 0xff] = KEY_F4,
+ [KEY_SYM_F5 & 0xff] = KEY_F5,
+ [KEY_SYM_F6 & 0xff] = KEY_F6,
+ [KEY_SYM_F7 & 0xff] = KEY_F7,
+ [KEY_SYM_F8 & 0xff] = KEY_F8,
+ [KEY_SYM_F9 & 0xff] = KEY_F9,
+ [KEY_SYM_F10 & 0xff] = KEY_F10,
+ [KEY_SYM_F11 & 0xff] = KEY_F11,
+ [KEY_SYM_F12 & 0xff] = KEY_F12,
+ [KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK,
+ [KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK,
+ [KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK,
+};
+
+static const unsigned short xlate[XLATE_SIZE] = {
+ [NO_KEYCODE] = KEY_RESERVED,
+ [KEY_SYM_SPACE] = KEY_SPACE,
+ [KEY_SYM_TILDE] = KEY_GRAVE, [KEY_SYM_BKTIC] = KEY_GRAVE,
+ [KEY_SYM_ONE] = KEY_1, [KEY_SYM_BANG] = KEY_1,
+ [KEY_SYM_TWO] = KEY_2, [KEY_SYM_AT] = KEY_2,
+ [KEY_SYM_THREE] = KEY_3, [KEY_SYM_POUND] = KEY_3,
+ [KEY_SYM_FOUR] = KEY_4, [KEY_SYM_DOLLAR] = KEY_4,
+ [KEY_SYM_FIVE] = KEY_5, [KEY_SYM_PERCENT] = KEY_5,
+ [KEY_SYM_SIX] = KEY_6, [KEY_SYM_CARAT] = KEY_6,
+ [KEY_SYM_SEVEN] = KEY_7, [KEY_SYM_AMPER] = KEY_7,
+ [KEY_SYM_EIGHT] = KEY_8, [KEY_SYM_STAR] = KEY_8,
+ [KEY_SYM_NINE] = KEY_9, [KEY_SYM_LPAREN] = KEY_9,
+ [KEY_SYM_ZERO] = KEY_0, [KEY_SYM_RPAREN] = KEY_0,
+ [KEY_SYM_MINUS] = KEY_MINUS, [KEY_SYM_USCORE] = KEY_MINUS,
+ [KEY_SYM_EQUAL] = KEY_EQUAL, [KEY_SYM_PLUS] = KEY_EQUAL,
+ [KEY_SYM_LBRKT] = KEY_LEFTBRACE, [KEY_SYM_LCURLY] = KEY_LEFTBRACE,
+ [KEY_SYM_RBRKT] = KEY_RIGHTBRACE, [KEY_SYM_RCURLY] = KEY_RIGHTBRACE,
+ [KEY_SYM_SLASH] = KEY_BACKSLASH, [KEY_SYM_PIPE] = KEY_BACKSLASH,
+ [KEY_SYM_TIC] = KEY_APOSTROPHE, [KEY_SYM_QUOTE] = KEY_APOSTROPHE,
+ [KEY_SYM_SEMIC] = KEY_SEMICOLON, [KEY_SYM_COLON] = KEY_SEMICOLON,
+ [KEY_SYM_COMMA] = KEY_COMMA, [KEY_SYM_LT] = KEY_COMMA,
+ [KEY_SYM_PERIOD] = KEY_DOT, [KEY_SYM_GT] = KEY_DOT,
+ [KEY_SYM_BSLASH] = KEY_SLASH, [KEY_SYM_QMARK] = KEY_SLASH,
+ [KEY_SYM_A] = KEY_A, [KEY_SYM_a] = KEY_A,
+ [KEY_SYM_B] = KEY_B, [KEY_SYM_b] = KEY_B,
+ [KEY_SYM_C] = KEY_C, [KEY_SYM_c] = KEY_C,
+ [KEY_SYM_D] = KEY_D, [KEY_SYM_d] = KEY_D,
+ [KEY_SYM_E] = KEY_E, [KEY_SYM_e] = KEY_E,
+ [KEY_SYM_F] = KEY_F, [KEY_SYM_f] = KEY_F,
+ [KEY_SYM_G] = KEY_G, [KEY_SYM_g] = KEY_G,
+ [KEY_SYM_H] = KEY_H, [KEY_SYM_h] = KEY_H,
+ [KEY_SYM_I] = KEY_I, [KEY_SYM_i] = KEY_I,
+ [KEY_SYM_J] = KEY_J, [KEY_SYM_j] = KEY_J,
+ [KEY_SYM_K] = KEY_K, [KEY_SYM_k] = KEY_K,
+ [KEY_SYM_L] = KEY_L, [KEY_SYM_l] = KEY_L,
+ [KEY_SYM_M] = KEY_M, [KEY_SYM_m] = KEY_M,
+ [KEY_SYM_N] = KEY_N, [KEY_SYM_n] = KEY_N,
+ [KEY_SYM_O] = KEY_O, [KEY_SYM_o] = KEY_O,
+ [KEY_SYM_P] = KEY_P, [KEY_SYM_p] = KEY_P,
+ [KEY_SYM_Q] = KEY_Q, [KEY_SYM_q] = KEY_Q,
+ [KEY_SYM_R] = KEY_R, [KEY_SYM_r] = KEY_R,
+ [KEY_SYM_S] = KEY_S, [KEY_SYM_s] = KEY_S,
+ [KEY_SYM_T] = KEY_T, [KEY_SYM_t] = KEY_T,
+ [KEY_SYM_U] = KEY_U, [KEY_SYM_u] = KEY_U,
+ [KEY_SYM_V] = KEY_V, [KEY_SYM_v] = KEY_V,
+ [KEY_SYM_W] = KEY_W, [KEY_SYM_w] = KEY_W,
+ [KEY_SYM_X] = KEY_X, [KEY_SYM_x] = KEY_X,
+ [KEY_SYM_Y] = KEY_Y, [KEY_SYM_y] = KEY_Y,
+ [KEY_SYM_Z] = KEY_Z, [KEY_SYM_z] = KEY_Z,
+};
+
+static void print_input(struct remote_input *input)
+{
+ if (input->type == INPUT_TYPE_MOUSE) {
+ unsigned char buttons = input->mouse_buttons;
+ dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n",
+ input->data.mouse.x, input->data.mouse.y,
+ (buttons) ? " -- buttons:" : "",
+ (buttons & REMOTE_BUTTON_LEFT) ? "left " : "",
+ (buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "",
+ (buttons & REMOTE_BUTTON_RIGHT) ? "right" : ""
+ );
+ } else {
+ dbg("remote keypress (code, flag, down):"
+ "%d (0x%x) [0x%x] [0x%x]\n",
+ input->data.keyboard.key_code,
+ input->data.keyboard.key_code,
+ input->data.keyboard.key_flag,
+ input->data.keyboard.key_down
+ );
+ }
+}
+
+static void send_mouse_event(struct input_dev *dev, struct remote_input *input)
+{
+ unsigned char buttons = input->mouse_buttons;
+
+ input_report_abs(dev, ABS_X, input->data.mouse.x);
+ input_report_abs(dev, ABS_Y, input->data.mouse.y);
+ input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT);
+ input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE);
+ input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT);
+ input_sync(dev);
+}
+
+static void send_keyboard_event(struct input_dev *dev,
+ struct remote_input *input)
+{
+ unsigned int key;
+ unsigned short code = input->data.keyboard.key_code;
+
+ if (code & 0xff00)
+ key = xlate_high[code & 0xff];
+ else
+ key = xlate[code];
+ input_report_key(dev, key, input->data.keyboard.key_down);
+ input_sync(dev);
+}
+
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp)
+{
+ unsigned long reader;
+ unsigned long writer;
+ struct remote_input input;
+
+ reader = get_queue_reader(sp);
+ writer = get_queue_writer(sp);
+
+ while (reader != writer) {
+ memcpy_fromio(&input, get_queue_entry(sp, reader),
+ sizeof(struct remote_input));
+
+ print_input(&input);
+ if (input.type == INPUT_TYPE_MOUSE) {
+ send_mouse_event(sp->remote.mouse_dev, &input);
+ } else if (input.type == INPUT_TYPE_KEYBOARD) {
+ send_keyboard_event(sp->remote.keybd_dev, &input);
+ } else
+ break;
+
+ reader = advance_queue_reader(sp, reader);
+ writer = get_queue_writer(sp);
+ }
+}
+
+int ibmasm_init_remote_input_dev(struct service_processor *sp)
+{
+ /* set up the mouse input device */
+ struct input_dev *mouse_dev, *keybd_dev;
+ struct pci_dev *pdev = to_pci_dev(sp->dev);
+ int error = -ENOMEM;
+ int i;
+
+ sp->remote.mouse_dev = mouse_dev = input_allocate_device();
+ sp->remote.keybd_dev = keybd_dev = input_allocate_device();
+
+ if (!mouse_dev || !keybd_dev)
+ goto err_free_devices;
+
+ mouse_dev->id.bustype = BUS_PCI;
+ mouse_dev->id.vendor = pdev->vendor;
+ mouse_dev->id.product = pdev->device;
+ mouse_dev->id.version = 1;
+ mouse_dev->dev.parent = sp->dev;
+ mouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+ mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+ BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+ set_bit(BTN_TOUCH, mouse_dev->keybit);
+ mouse_dev->name = "ibmasm RSA I remote mouse";
+ input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
+ input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0);
+
+ keybd_dev->id.bustype = BUS_PCI;
+ keybd_dev->id.vendor = pdev->vendor;
+ keybd_dev->id.product = pdev->device;
+ keybd_dev->id.version = 2;
+ keybd_dev->dev.parent = sp->dev;
+ keybd_dev->evbit[0] = BIT_MASK(EV_KEY);
+ keybd_dev->name = "ibmasm RSA I remote keyboard";
+
+ for (i = 0; i < XLATE_SIZE; i++) {
+ if (xlate_high[i])
+ set_bit(xlate_high[i], keybd_dev->keybit);
+ if (xlate[i])
+ set_bit(xlate[i], keybd_dev->keybit);
+ }
+
+ error = input_register_device(mouse_dev);
+ if (error)
+ goto err_free_devices;
+
+ error = input_register_device(keybd_dev);
+ if (error)
+ goto err_unregister_mouse_dev;
+
+ enable_mouse_interrupts(sp);
+
+ printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number);
+
+ return 0;
+
+ err_unregister_mouse_dev:
+ input_unregister_device(mouse_dev);
+ mouse_dev = NULL; /* so we don't try to free it again below */
+ err_free_devices:
+ input_free_device(mouse_dev);
+ input_free_device(keybd_dev);
+
+ return error;
+}
+
+void ibmasm_free_remote_input_dev(struct service_processor *sp)
+{
+ disable_mouse_interrupts(sp);
+ input_unregister_device(sp->remote.mouse_dev);
+ input_unregister_device(sp->remote.keybd_dev);
+}
+
diff --git a/drivers/misc/ibmasm/remote.h b/drivers/misc/ibmasm/remote.h
new file mode 100644
index 0000000..a7729ef
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.h
@@ -0,0 +1,270 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ * Originally written by Pete Reynolds
+ */
+
+#ifndef _IBMASM_REMOTE_H_
+#define _IBMASM_REMOTE_H_
+
+#include <asm/io.h>
+
+/* pci offsets */
+#define CONDOR_MOUSE_DATA 0x000AC000
+#define CONDOR_MOUSE_ISR_CONTROL 0x00
+#define CONDOR_MOUSE_ISR_STATUS 0x04
+#define CONDOR_MOUSE_Q_READER 0x08
+#define CONDOR_MOUSE_Q_WRITER 0x0C
+#define CONDOR_MOUSE_Q_BEGIN 0x10
+#define CONDOR_MOUSE_MAX_X 0x14
+#define CONDOR_MOUSE_MAX_Y 0x18
+
+#define CONDOR_INPUT_DESKTOP_INFO 0x1F0
+#define CONDOR_INPUT_DISPLAY_RESX 0x1F4
+#define CONDOR_INPUT_DISPLAY_RESY 0x1F8
+#define CONDOR_INPUT_DISPLAY_BITS 0x1FC
+#define CONDOR_OUTPUT_VNC_STATUS 0x200
+
+#define CONDOR_MOUSE_INTR_STATUS_MASK 0x00000001
+
+#define INPUT_TYPE_MOUSE 0x1
+#define INPUT_TYPE_KEYBOARD 0x2
+
+
+/* mouse button states received from SP */
+#define REMOTE_DOUBLE_CLICK 0xF0
+#define REMOTE_BUTTON_LEFT 0x01
+#define REMOTE_BUTTON_MIDDLE 0x02
+#define REMOTE_BUTTON_RIGHT 0x04
+
+/* size of keysym/keycode translation matricies */
+#define XLATE_SIZE 256
+
+struct mouse_input {
+ unsigned short y;
+ unsigned short x;
+};
+
+
+struct keyboard_input {
+ unsigned short key_code;
+ unsigned char key_flag;
+ unsigned char key_down;
+};
+
+
+
+struct remote_input {
+ union {
+ struct mouse_input mouse;
+ struct keyboard_input keyboard;
+ } data;
+
+ unsigned char type;
+ unsigned char pad1;
+ unsigned char mouse_buttons;
+ unsigned char pad3;
+};
+
+#define mouse_addr(sp) (sp->base_address + CONDOR_MOUSE_DATA)
+#define display_width(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX)
+#define display_height(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY)
+#define display_depth(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS)
+#define desktop_info(sp) (mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO)
+#define vnc_status(sp) (mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS)
+#define isr_control(sp) (mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+#define mouse_interrupt_pending(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define clear_mouse_interrupt(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define enable_mouse_interrupts(sp) writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+#define disable_mouse_interrupts(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+/* remote input queue operations */
+#define REMOTE_QUEUE_SIZE 60
+
+#define get_queue_writer(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER)
+#define get_queue_reader(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+#define set_queue_reader(sp, reader) writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+
+#define queue_begin (mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN)
+
+#define get_queue_entry(sp, read_index) \
+ ((void*)(queue_begin + read_index * sizeof(struct remote_input)))
+
+static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader)
+{
+ reader++;
+ if (reader == REMOTE_QUEUE_SIZE)
+ reader = 0;
+
+ set_queue_reader(sp, reader);
+ return reader;
+}
+
+#define NO_KEYCODE 0
+#define KEY_SYM_BK_SPC 0xFF08
+#define KEY_SYM_TAB 0xFF09
+#define KEY_SYM_ENTER 0xFF0D
+#define KEY_SYM_SCR_LOCK 0xFF14
+#define KEY_SYM_ESCAPE 0xFF1B
+#define KEY_SYM_HOME 0xFF50
+#define KEY_SYM_LARROW 0xFF51
+#define KEY_SYM_UARROW 0xFF52
+#define KEY_SYM_RARROW 0xFF53
+#define KEY_SYM_DARROW 0xFF54
+#define KEY_SYM_PAGEUP 0xFF55
+#define KEY_SYM_PAGEDOWN 0xFF56
+#define KEY_SYM_END 0xFF57
+#define KEY_SYM_INSERT 0xFF63
+#define KEY_SYM_NUM_LOCK 0xFF7F
+#define KEY_SYM_KPSTAR 0xFFAA
+#define KEY_SYM_KPPLUS 0xFFAB
+#define KEY_SYM_KPMINUS 0xFFAD
+#define KEY_SYM_KPDOT 0xFFAE
+#define KEY_SYM_KPSLASH 0xFFAF
+#define KEY_SYM_KPRIGHT 0xFF96
+#define KEY_SYM_KPUP 0xFF97
+#define KEY_SYM_KPLEFT 0xFF98
+#define KEY_SYM_KPDOWN 0xFF99
+#define KEY_SYM_KP0 0xFFB0
+#define KEY_SYM_KP1 0xFFB1
+#define KEY_SYM_KP2 0xFFB2
+#define KEY_SYM_KP3 0xFFB3
+#define KEY_SYM_KP4 0xFFB4
+#define KEY_SYM_KP5 0xFFB5
+#define KEY_SYM_KP6 0xFFB6
+#define KEY_SYM_KP7 0xFFB7
+#define KEY_SYM_KP8 0xFFB8
+#define KEY_SYM_KP9 0xFFB9
+#define KEY_SYM_F1 0xFFBE // 1B 5B 5B 41
+#define KEY_SYM_F2 0xFFBF // 1B 5B 5B 42
+#define KEY_SYM_F3 0xFFC0 // 1B 5B 5B 43
+#define KEY_SYM_F4 0xFFC1 // 1B 5B 5B 44
+#define KEY_SYM_F5 0xFFC2 // 1B 5B 5B 45
+#define KEY_SYM_F6 0xFFC3 // 1B 5B 31 37 7E
+#define KEY_SYM_F7 0xFFC4 // 1B 5B 31 38 7E
+#define KEY_SYM_F8 0xFFC5 // 1B 5B 31 39 7E
+#define KEY_SYM_F9 0xFFC6 // 1B 5B 32 30 7E
+#define KEY_SYM_F10 0xFFC7 // 1B 5B 32 31 7E
+#define KEY_SYM_F11 0xFFC8 // 1B 5B 32 33 7E
+#define KEY_SYM_F12 0xFFC9 // 1B 5B 32 34 7E
+#define KEY_SYM_SHIFT 0xFFE1
+#define KEY_SYM_CTRL 0xFFE3
+#define KEY_SYM_ALT 0xFFE9
+#define KEY_SYM_CAP_LOCK 0xFFE5
+#define KEY_SYM_DELETE 0xFFFF
+#define KEY_SYM_TILDE 0x60
+#define KEY_SYM_BKTIC 0x7E
+#define KEY_SYM_ONE 0x31
+#define KEY_SYM_BANG 0x21
+#define KEY_SYM_TWO 0x32
+#define KEY_SYM_AT 0x40
+#define KEY_SYM_THREE 0x33
+#define KEY_SYM_POUND 0x23
+#define KEY_SYM_FOUR 0x34
+#define KEY_SYM_DOLLAR 0x24
+#define KEY_SYM_FIVE 0x35
+#define KEY_SYM_PERCENT 0x25
+#define KEY_SYM_SIX 0x36
+#define KEY_SYM_CARAT 0x5E
+#define KEY_SYM_SEVEN 0x37
+#define KEY_SYM_AMPER 0x26
+#define KEY_SYM_EIGHT 0x38
+#define KEY_SYM_STAR 0x2A
+#define KEY_SYM_NINE 0x39
+#define KEY_SYM_LPAREN 0x28
+#define KEY_SYM_ZERO 0x30
+#define KEY_SYM_RPAREN 0x29
+#define KEY_SYM_MINUS 0x2D
+#define KEY_SYM_USCORE 0x5F
+#define KEY_SYM_EQUAL 0x2B
+#define KEY_SYM_PLUS 0x3D
+#define KEY_SYM_LBRKT 0x5B
+#define KEY_SYM_LCURLY 0x7B
+#define KEY_SYM_RBRKT 0x5D
+#define KEY_SYM_RCURLY 0x7D
+#define KEY_SYM_SLASH 0x5C
+#define KEY_SYM_PIPE 0x7C
+#define KEY_SYM_TIC 0x27
+#define KEY_SYM_QUOTE 0x22
+#define KEY_SYM_SEMIC 0x3B
+#define KEY_SYM_COLON 0x3A
+#define KEY_SYM_COMMA 0x2C
+#define KEY_SYM_LT 0x3C
+#define KEY_SYM_PERIOD 0x2E
+#define KEY_SYM_GT 0x3E
+#define KEY_SYM_BSLASH 0x2F
+#define KEY_SYM_QMARK 0x3F
+#define KEY_SYM_A 0x41
+#define KEY_SYM_B 0x42
+#define KEY_SYM_C 0x43
+#define KEY_SYM_D 0x44
+#define KEY_SYM_E 0x45
+#define KEY_SYM_F 0x46
+#define KEY_SYM_G 0x47
+#define KEY_SYM_H 0x48
+#define KEY_SYM_I 0x49
+#define KEY_SYM_J 0x4A
+#define KEY_SYM_K 0x4B
+#define KEY_SYM_L 0x4C
+#define KEY_SYM_M 0x4D
+#define KEY_SYM_N 0x4E
+#define KEY_SYM_O 0x4F
+#define KEY_SYM_P 0x50
+#define KEY_SYM_Q 0x51
+#define KEY_SYM_R 0x52
+#define KEY_SYM_S 0x53
+#define KEY_SYM_T 0x54
+#define KEY_SYM_U 0x55
+#define KEY_SYM_V 0x56
+#define KEY_SYM_W 0x57
+#define KEY_SYM_X 0x58
+#define KEY_SYM_Y 0x59
+#define KEY_SYM_Z 0x5A
+#define KEY_SYM_a 0x61
+#define KEY_SYM_b 0x62
+#define KEY_SYM_c 0x63
+#define KEY_SYM_d 0x64
+#define KEY_SYM_e 0x65
+#define KEY_SYM_f 0x66
+#define KEY_SYM_g 0x67
+#define KEY_SYM_h 0x68
+#define KEY_SYM_i 0x69
+#define KEY_SYM_j 0x6A
+#define KEY_SYM_k 0x6B
+#define KEY_SYM_l 0x6C
+#define KEY_SYM_m 0x6D
+#define KEY_SYM_n 0x6E
+#define KEY_SYM_o 0x6F
+#define KEY_SYM_p 0x70
+#define KEY_SYM_q 0x71
+#define KEY_SYM_r 0x72
+#define KEY_SYM_s 0x73
+#define KEY_SYM_t 0x74
+#define KEY_SYM_u 0x75
+#define KEY_SYM_v 0x76
+#define KEY_SYM_w 0x77
+#define KEY_SYM_x 0x78
+#define KEY_SYM_y 0x79
+#define KEY_SYM_z 0x7A
+#define KEY_SYM_SPACE 0x20
+#endif /* _IBMASM_REMOTE_H_ */
diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c
new file mode 100644
index 0000000..01e2b0d
--- /dev/null
+++ b/drivers/misc/ibmasm/uart.c
@@ -0,0 +1,72 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+
+void ibmasm_register_uart(struct service_processor *sp)
+{
+ struct uart_8250_port uart;
+ void __iomem *iomem_base;
+
+ iomem_base = sp->base_address + SCOUT_COM_B_BASE;
+
+ /* read the uart scratch register to determine if the UART
+ * is dedicated to the service processor or if the OS can use it
+ */
+ if (0 == readl(iomem_base + UART_SCR)) {
+ dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n");
+ sp->serial_line = -1;
+ return;
+ }
+
+ memset(&uart, 0, sizeof(uart));
+ uart.port.irq = sp->irq;
+ uart.port.uartclk = 3686400;
+ uart.port.flags = UPF_SHARE_IRQ;
+ uart.port.iotype = UPIO_MEM;
+ uart.port.membase = iomem_base;
+
+ sp->serial_line = serial8250_register_8250_port(&uart);
+ if (sp->serial_line < 0) {
+ dev_err(sp->dev, "Failed to register serial port\n");
+ return;
+ }
+ enable_uart_interrupts(sp->base_address);
+}
+
+void ibmasm_unregister_uart(struct service_processor *sp)
+{
+ if (sp->serial_line < 0)
+ return;
+
+ disable_uart_interrupts(sp->base_address);
+ serial8250_unregister_port(sp->serial_line);
+}
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
new file mode 100644
index 0000000..28f51e0
--- /dev/null
+++ b/drivers/misc/ics932s401.c
@@ -0,0 +1,495 @@
+/*
+ * A driver for the Integrated Circuits ICS932S401
+ * Copyright (C) 2008 IBM
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END };
+
+/* ICS932S401 registers */
+#define ICS932S401_REG_CFG2 0x01
+#define ICS932S401_CFG1_SPREAD 0x01
+#define ICS932S401_REG_CFG7 0x06
+#define ICS932S401_FS_MASK 0x07
+#define ICS932S401_REG_VENDOR_REV 0x07
+#define ICS932S401_VENDOR 1
+#define ICS932S401_VENDOR_MASK 0x0F
+#define ICS932S401_REV 4
+#define ICS932S401_REV_SHIFT 4
+#define ICS932S401_REG_DEVICE 0x09
+#define ICS932S401_DEVICE 11
+#define ICS932S401_REG_CTRL 0x0A
+#define ICS932S401_MN_ENABLED 0x80
+#define ICS932S401_CPU_ALT 0x04
+#define ICS932S401_SRC_ALT 0x08
+#define ICS932S401_REG_CPU_M_CTRL 0x0B
+#define ICS932S401_M_MASK 0x3F
+#define ICS932S401_REG_CPU_N_CTRL 0x0C
+#define ICS932S401_REG_CPU_SPREAD1 0x0D
+#define ICS932S401_REG_CPU_SPREAD2 0x0E
+#define ICS932S401_SPREAD_MASK 0x7FFF
+#define ICS932S401_REG_SRC_M_CTRL 0x0F
+#define ICS932S401_REG_SRC_N_CTRL 0x10
+#define ICS932S401_REG_SRC_SPREAD1 0x11
+#define ICS932S401_REG_SRC_SPREAD2 0x12
+#define ICS932S401_REG_CPU_DIVISOR 0x13
+#define ICS932S401_CPU_DIVISOR_SHIFT 4
+#define ICS932S401_REG_PCISRC_DIVISOR 0x14
+#define ICS932S401_SRC_DIVISOR_MASK 0x0F
+#define ICS932S401_PCI_DIVISOR_SHIFT 4
+
+/* Base clock is 14.318MHz */
+#define BASE_CLOCK 14318
+
+#define NUM_REGS 21
+#define NUM_MIRRORED_REGS 15
+
+static int regs_to_copy[NUM_MIRRORED_REGS] = {
+ ICS932S401_REG_CFG2,
+ ICS932S401_REG_CFG7,
+ ICS932S401_REG_VENDOR_REV,
+ ICS932S401_REG_DEVICE,
+ ICS932S401_REG_CTRL,
+ ICS932S401_REG_CPU_M_CTRL,
+ ICS932S401_REG_CPU_N_CTRL,
+ ICS932S401_REG_CPU_SPREAD1,
+ ICS932S401_REG_CPU_SPREAD2,
+ ICS932S401_REG_SRC_M_CTRL,
+ ICS932S401_REG_SRC_N_CTRL,
+ ICS932S401_REG_SRC_SPREAD1,
+ ICS932S401_REG_SRC_SPREAD2,
+ ICS932S401_REG_CPU_DIVISOR,
+ ICS932S401_REG_PCISRC_DIVISOR,
+};
+
+/* How often do we reread sensors values? (In jiffies) */
+#define SENSOR_REFRESH_INTERVAL (2 * HZ)
+
+/* How often do we reread sensor limit values? (In jiffies) */
+#define LIMIT_REFRESH_INTERVAL (60 * HZ)
+
+struct ics932s401_data {
+ struct attribute_group attrs;
+ struct mutex lock;
+ char sensors_valid;
+ unsigned long sensors_last_updated; /* In jiffies */
+
+ u8 regs[NUM_REGS];
+};
+
+static int ics932s401_probe(struct i2c_client *client,
+ const struct i2c_device_id *id);
+static int ics932s401_detect(struct i2c_client *client,
+ struct i2c_board_info *info);
+static int ics932s401_remove(struct i2c_client *client);
+
+static const struct i2c_device_id ics932s401_id[] = {
+ { "ics932s401", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, ics932s401_id);
+
+static struct i2c_driver ics932s401_driver = {
+ .class = I2C_CLASS_HWMON,
+ .driver = {
+ .name = "ics932s401",
+ },
+ .probe = ics932s401_probe,
+ .remove = ics932s401_remove,
+ .id_table = ics932s401_id,
+ .detect = ics932s401_detect,
+ .address_list = normal_i2c,
+};
+
+static struct ics932s401_data *ics932s401_update_device(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct ics932s401_data *data = i2c_get_clientdata(client);
+ unsigned long local_jiffies = jiffies;
+ int i, temp;
+
+ mutex_lock(&data->lock);
+ if (time_before(local_jiffies, data->sensors_last_updated +
+ SENSOR_REFRESH_INTERVAL)
+ && data->sensors_valid)
+ goto out;
+
+ /*
+ * Each register must be read as a word and then right shifted 8 bits.
+ * Not really sure why this is; setting the "byte count programming"
+ * register to 1 does not fix this problem.
+ */
+ for (i = 0; i < NUM_MIRRORED_REGS; i++) {
+ temp = i2c_smbus_read_word_data(client, regs_to_copy[i]);
+ data->regs[regs_to_copy[i]] = temp >> 8;
+ }
+
+ data->sensors_last_updated = local_jiffies;
+ data->sensors_valid = 1;
+
+out:
+ mutex_unlock(&data->lock);
+ return data;
+}
+
+static ssize_t show_spread_enabled(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+
+ if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)
+ return sprintf(buf, "1\n");
+
+ return sprintf(buf, "0\n");
+}
+
+/* bit to cpu khz map */
+static const int fs_speeds[] = {
+ 266666,
+ 133333,
+ 200000,
+ 166666,
+ 333333,
+ 100000,
+ 400000,
+ 0,
+};
+
+/* clock divisor map */
+static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16,
+ 24, 40, 120};
+
+/* Calculate CPU frequency from the M/N registers. */
+static int calculate_cpu_freq(struct ics932s401_data *data)
+{
+ int m, n, freq;
+
+ m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK;
+ n = data->regs[ICS932S401_REG_CPU_N_CTRL];
+
+ /* Pull in bits 8 & 9 from the M register */
+ n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1;
+ n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3;
+
+ freq = BASE_CLOCK * (n + 8) / (m + 2);
+ freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >>
+ ICS932S401_CPU_DIVISOR_SHIFT];
+
+ return freq;
+}
+
+static ssize_t show_cpu_clock(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+
+ return sprintf(buf, "%d\n", calculate_cpu_freq(data));
+}
+
+static ssize_t show_cpu_clock_sel(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+ int freq;
+
+ if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+ freq = calculate_cpu_freq(data);
+ else {
+ /* Freq is neatly wrapped up for us */
+ int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK;
+ freq = fs_speeds[fid];
+ if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) {
+ switch (freq) {
+ case 166666:
+ freq = 160000;
+ break;
+ case 333333:
+ freq = 320000;
+ break;
+ }
+ }
+ }
+
+ return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate SRC frequency from the M/N registers. */
+static int calculate_src_freq(struct ics932s401_data *data)
+{
+ int m, n, freq;
+
+ m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+ n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+ /* Pull in bits 8 & 9 from the M register */
+ n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+ n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+ freq = BASE_CLOCK * (n + 8) / (m + 2);
+ freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] &
+ ICS932S401_SRC_DIVISOR_MASK];
+
+ return freq;
+}
+
+static ssize_t show_src_clock(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+
+ return sprintf(buf, "%d\n", calculate_src_freq(data));
+}
+
+static ssize_t show_src_clock_sel(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+ int freq;
+
+ if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+ freq = calculate_src_freq(data);
+ else
+ /* Freq is neatly wrapped up for us */
+ if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT &&
+ data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT)
+ freq = 96000;
+ else
+ freq = 100000;
+
+ return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate PCI frequency from the SRC M/N registers. */
+static int calculate_pci_freq(struct ics932s401_data *data)
+{
+ int m, n, freq;
+
+ m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+ n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+ /* Pull in bits 8 & 9 from the M register */
+ n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+ n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+ freq = BASE_CLOCK * (n + 8) / (m + 2);
+ freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >>
+ ICS932S401_PCI_DIVISOR_SHIFT];
+
+ return freq;
+}
+
+static ssize_t show_pci_clock(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+
+ return sprintf(buf, "%d\n", calculate_pci_freq(data));
+}
+
+static ssize_t show_pci_clock_sel(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+ int freq;
+
+ if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+ freq = calculate_pci_freq(data);
+ else
+ freq = 33333;
+
+ return sprintf(buf, "%d\n", freq);
+}
+
+static ssize_t show_value(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf);
+
+static ssize_t show_spread(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf);
+
+static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL);
+static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL);
+static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL);
+static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL);
+static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL);
+static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL);
+static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL);
+static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL);
+static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL);
+
+static struct attribute *ics932s401_attr[] =
+{
+ &dev_attr_spread_enabled.attr,
+ &dev_attr_cpu_clock_selection.attr,
+ &dev_attr_cpu_clock.attr,
+ &dev_attr_src_clock_selection.attr,
+ &dev_attr_src_clock.attr,
+ &dev_attr_pci_clock_selection.attr,
+ &dev_attr_pci_clock.attr,
+ &dev_attr_usb_clock.attr,
+ &dev_attr_ref_clock.attr,
+ &dev_attr_cpu_spread.attr,
+ &dev_attr_src_spread.attr,
+ NULL
+};
+
+static ssize_t show_value(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ int x;
+
+ if (devattr == &dev_attr_usb_clock)
+ x = 48000;
+ else if (devattr == &dev_attr_ref_clock)
+ x = BASE_CLOCK;
+ else
+ BUG();
+
+ return sprintf(buf, "%d\n", x);
+}
+
+static ssize_t show_spread(struct device *dev,
+ struct device_attribute *devattr,
+ char *buf)
+{
+ struct ics932s401_data *data = ics932s401_update_device(dev);
+ int reg;
+ unsigned long val;
+
+ if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD))
+ return sprintf(buf, "0%%\n");
+
+ if (devattr == &dev_attr_src_spread)
+ reg = ICS932S401_REG_SRC_SPREAD1;
+ else if (devattr == &dev_attr_cpu_spread)
+ reg = ICS932S401_REG_CPU_SPREAD1;
+ else
+ BUG();
+
+ val = data->regs[reg] | (data->regs[reg + 1] << 8);
+ val &= ICS932S401_SPREAD_MASK;
+
+ /* Scale 0..2^14 to -0.5. */
+ val = 500000 * val / 16384;
+ return sprintf(buf, "-0.%lu%%\n", val);
+}
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int ics932s401_detect(struct i2c_client *client,
+ struct i2c_board_info *info)
+{
+ struct i2c_adapter *adapter = client->adapter;
+ int vendor, device, revision;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+ return -ENODEV;
+
+ vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV);
+ vendor >>= 8;
+ revision = vendor >> ICS932S401_REV_SHIFT;
+ vendor &= ICS932S401_VENDOR_MASK;
+ if (vendor != ICS932S401_VENDOR)
+ return -ENODEV;
+
+ device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE);
+ device >>= 8;
+ if (device != ICS932S401_DEVICE)
+ return -ENODEV;
+
+ if (revision != ICS932S401_REV)
+ dev_info(&adapter->dev, "Unknown revision %d\n", revision);
+
+ strlcpy(info->type, "ics932s401", I2C_NAME_SIZE);
+
+ return 0;
+}
+
+static int ics932s401_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct ics932s401_data *data;
+ int err;
+
+ data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL);
+ if (!data) {
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ i2c_set_clientdata(client, data);
+ mutex_init(&data->lock);
+
+ dev_info(&client->dev, "%s chip found\n", client->name);
+
+ /* Register sysfs hooks */
+ data->attrs.attrs = ics932s401_attr;
+ err = sysfs_create_group(&client->dev.kobj, &data->attrs);
+ if (err)
+ goto exit_free;
+
+ return 0;
+
+exit_free:
+ kfree(data);
+exit:
+ return err;
+}
+
+static int ics932s401_remove(struct i2c_client *client)
+{
+ struct ics932s401_data *data = i2c_get_clientdata(client);
+
+ sysfs_remove_group(&client->dev.kobj, &data->attrs);
+ kfree(data);
+ return 0;
+}
+
+module_i2c_driver(ics932s401_driver);
+
+MODULE_AUTHOR("Darrick J. Wong <darrick.wong@oracle.com>");
+MODULE_DESCRIPTION("ICS932S401 driver");
+MODULE_LICENSE("GPL");
+
+/* IBM IntelliStation Z30 */
+MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*");
+MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*");
+
+/* IBM x3650/x3550 */
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*");
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*");
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
new file mode 100644
index 0000000..8758d03
--- /dev/null
+++ b/drivers/misc/ioc4.c
@@ -0,0 +1,500 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005-2006 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/* This file contains the master driver module for use by SGI IOC4 subdrivers.
+ *
+ * It allocates any resources shared between multiple subdevices, and
+ * provides accessor functions (where needed) and the like for those
+ * resources. It also provides a mechanism for the subdevice modules
+ * to support loading and unloading.
+ *
+ * Non-shared resources (e.g. external interrupt A_INT_OUT register page
+ * alias, serial port and UART registers) are handled by the subdevice
+ * modules themselves.
+ *
+ * This is all necessary because IOC4 is not implemented as a multi-function
+ * PCI device, but an amalgamation of disparate registers for several
+ * types of device (ATA, serial, external interrupts). The normal
+ * resource management in the kernel doesn't have quite the right interfaces
+ * to handle this situation (e.g. multiple modules can't claim the same
+ * PCI ID), thus this IOC4 master module.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ioc4.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+#include <asm/io.h>
+
+/***************
+ * Definitions *
+ ***************/
+
+/* Tweakable values */
+
+/* PCI bus speed detection/calibration */
+#define IOC4_CALIBRATE_COUNT 63 /* Calibration cycle period */
+#define IOC4_CALIBRATE_CYCLES 256 /* Average over this many cycles */
+#define IOC4_CALIBRATE_DISCARD 2 /* Discard first few cycles */
+#define IOC4_CALIBRATE_LOW_MHZ 25 /* Lower bound on bus speed sanity */
+#define IOC4_CALIBRATE_HIGH_MHZ 75 /* Upper bound on bus speed sanity */
+#define IOC4_CALIBRATE_DEFAULT_MHZ 66 /* Assumed if sanity check fails */
+
+/************************
+ * Submodule management *
+ ************************/
+
+static DEFINE_MUTEX(ioc4_mutex);
+
+static LIST_HEAD(ioc4_devices);
+static LIST_HEAD(ioc4_submodules);
+
+/* Register an IOC4 submodule */
+int
+ioc4_register_submodule(struct ioc4_submodule *is)
+{
+ struct ioc4_driver_data *idd;
+
+ mutex_lock(&ioc4_mutex);
+ list_add(&is->is_list, &ioc4_submodules);
+
+ /* Initialize submodule for each IOC4 */
+ if (!is->is_probe)
+ goto out;
+
+ list_for_each_entry(idd, &ioc4_devices, idd_list) {
+ if (is->is_probe(idd)) {
+ printk(KERN_WARNING
+ "%s: IOC4 submodule %s probe failed "
+ "for pci_dev %s",
+ __func__, module_name(is->is_owner),
+ pci_name(idd->idd_pdev));
+ }
+ }
+ out:
+ mutex_unlock(&ioc4_mutex);
+ return 0;
+}
+
+/* Unregister an IOC4 submodule */
+void
+ioc4_unregister_submodule(struct ioc4_submodule *is)
+{
+ struct ioc4_driver_data *idd;
+
+ mutex_lock(&ioc4_mutex);
+ list_del(&is->is_list);
+
+ /* Remove submodule for each IOC4 */
+ if (!is->is_remove)
+ goto out;
+
+ list_for_each_entry(idd, &ioc4_devices, idd_list) {
+ if (is->is_remove(idd)) {
+ printk(KERN_WARNING
+ "%s: IOC4 submodule %s remove failed "
+ "for pci_dev %s.\n",
+ __func__, module_name(is->is_owner),
+ pci_name(idd->idd_pdev));
+ }
+ }
+ out:
+ mutex_unlock(&ioc4_mutex);
+}
+
+/*********************
+ * Device management *
+ *********************/
+
+#define IOC4_CALIBRATE_LOW_LIMIT \
+ (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ)
+#define IOC4_CALIBRATE_HIGH_LIMIT \
+ (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ)
+#define IOC4_CALIBRATE_DEFAULT \
+ (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ)
+
+#define IOC4_CALIBRATE_END \
+ (IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD)
+
+#define IOC4_INT_OUT_MODE_TOGGLE 0x7 /* Toggle INT_OUT every COUNT+1 ticks */
+
+/* Determines external interrupt output clock period of the PCI bus an
+ * IOC4 is attached to. This value can be used to determine the PCI
+ * bus speed.
+ *
+ * IOC4 has a design feature that various internal timers are derived from
+ * the PCI bus clock. This causes IOC4 device drivers to need to take the
+ * bus speed into account when setting various register values (e.g. INT_OUT
+ * register COUNT field, UART divisors, etc). Since this information is
+ * needed by several subdrivers, it is determined by the main IOC4 driver,
+ * even though the following code utilizes external interrupt registers
+ * to perform the speed calculation.
+ */
+static void
+ioc4_clock_calibrate(struct ioc4_driver_data *idd)
+{
+ union ioc4_int_out int_out;
+ union ioc4_gpcr gpcr;
+ unsigned int state, last_state;
+ uint64_t start, end, period;
+ unsigned int count;
+
+ /* Enable output */
+ gpcr.raw = 0;
+ gpcr.fields.dir = IOC4_GPCR_DIR_0;
+ gpcr.fields.int_out_en = 1;
+ writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw);
+
+ /* Reset to power-on state */
+ writel(0, &idd->idd_misc_regs->int_out.raw);
+ mmiowb();
+
+ /* Set up square wave */
+ int_out.raw = 0;
+ int_out.fields.count = IOC4_CALIBRATE_COUNT;
+ int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE;
+ int_out.fields.diag = 0;
+ writel(int_out.raw, &idd->idd_misc_regs->int_out.raw);
+ mmiowb();
+
+ /* Check square wave period averaged over some number of cycles */
+ start = ktime_get_ns();
+ state = 1; /* make sure the first read isn't a rising edge */
+ for (count = 0; count <= IOC4_CALIBRATE_END; count++) {
+ do { /* wait for a rising edge */
+ last_state = state;
+ int_out.raw = readl(&idd->idd_misc_regs->int_out.raw);
+ state = int_out.fields.int_out;
+ } while (last_state || !state);
+
+ /* discard the first few cycles */
+ if (count == IOC4_CALIBRATE_DISCARD)
+ start = ktime_get_ns();
+ }
+ end = ktime_get_ns();
+
+ /* Calculation rearranged to preserve intermediate precision.
+ * Logically:
+ * 1. "end - start" gives us the measurement period over all
+ * the square wave cycles.
+ * 2. Divide by number of square wave cycles to get the period
+ * of a square wave cycle.
+ * 3. Divide by 2*(int_out.fields.count+1), which is the formula
+ * by which the IOC4 generates the square wave, to get the
+ * period of an IOC4 INT_OUT count.
+ */
+ period = (end - start) /
+ (IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1));
+
+ /* Bounds check the result. */
+ if (period > IOC4_CALIBRATE_LOW_LIMIT ||
+ period < IOC4_CALIBRATE_HIGH_LIMIT) {
+ printk(KERN_INFO
+ "IOC4 %s: Clock calibration failed. Assuming"
+ "PCI clock is %d ns.\n",
+ pci_name(idd->idd_pdev),
+ IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR);
+ period = IOC4_CALIBRATE_DEFAULT;
+ } else {
+ u64 ns = period;
+
+ do_div(ns, IOC4_EXTINT_COUNT_DIVISOR);
+ printk(KERN_DEBUG
+ "IOC4 %s: PCI clock is %llu ns.\n",
+ pci_name(idd->idd_pdev), (unsigned long long)ns);
+ }
+
+ /* Remember results. We store the extint clock period rather
+ * than the PCI clock period so that greater precision is
+ * retained. Divide by IOC4_EXTINT_COUNT_DIVISOR to get
+ * PCI clock period.
+ */
+ idd->count_period = period;
+}
+
+/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT.
+ * Each brings out different combinations of IOC4 signals, thus.
+ * the IOC4 subdrivers need to know to which we're attached.
+ *
+ * We look for the presence of a SCSI (IO9) or SATA (IO10) controller
+ * on the same PCI bus at slot number 3 to differentiate IO9 from IO10.
+ * If neither is present, it's a PCI-RT.
+ */
+static unsigned int
+ioc4_variant(struct ioc4_driver_data *idd)
+{
+ struct pci_dev *pdev = NULL;
+ int found = 0;
+
+ /* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */
+ do {
+ pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC,
+ PCI_DEVICE_ID_QLOGIC_ISP12160, pdev);
+ if (pdev &&
+ idd->idd_pdev->bus->number == pdev->bus->number &&
+ 3 == PCI_SLOT(pdev->devfn))
+ found = 1;
+ } while (pdev && !found);
+ if (NULL != pdev) {
+ pci_dev_put(pdev);
+ return IOC4_VARIANT_IO9;
+ }
+
+ /* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */
+ pdev = NULL;
+ do {
+ pdev = pci_get_device(PCI_VENDOR_ID_VITESSE,
+ PCI_DEVICE_ID_VITESSE_VSC7174, pdev);
+ if (pdev &&
+ idd->idd_pdev->bus->number == pdev->bus->number &&
+ 3 == PCI_SLOT(pdev->devfn))
+ found = 1;
+ } while (pdev && !found);
+ if (NULL != pdev) {
+ pci_dev_put(pdev);
+ return IOC4_VARIANT_IO10;
+ }
+
+ /* PCI-RT: No SCSI/SATA controller will be present */
+ return IOC4_VARIANT_PCI_RT;
+}
+
+static void
+ioc4_load_modules(struct work_struct *work)
+{
+ request_module("sgiioc4");
+}
+
+static DECLARE_WORK(ioc4_load_modules_work, ioc4_load_modules);
+
+/* Adds a new instance of an IOC4 card */
+static int
+ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
+{
+ struct ioc4_driver_data *idd;
+ struct ioc4_submodule *is;
+ uint32_t pcmd;
+ int ret;
+
+ /* Enable IOC4 and take ownership of it */
+ if ((ret = pci_enable_device(pdev))) {
+ printk(KERN_WARNING
+ "%s: Failed to enable IOC4 device for pci_dev %s.\n",
+ __func__, pci_name(pdev));
+ goto out;
+ }
+ pci_set_master(pdev);
+
+ /* Set up per-IOC4 data */
+ idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL);
+ if (!idd) {
+ printk(KERN_WARNING
+ "%s: Failed to allocate IOC4 data for pci_dev %s.\n",
+ __func__, pci_name(pdev));
+ ret = -ENODEV;
+ goto out_idd;
+ }
+ idd->idd_pdev = pdev;
+ idd->idd_pci_id = pci_id;
+
+ /* Map IOC4 misc registers. These are shared between subdevices
+ * so the main IOC4 module manages them.
+ */
+ idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0);
+ if (!idd->idd_bar0) {
+ printk(KERN_WARNING
+ "%s: Unable to find IOC4 misc resource "
+ "for pci_dev %s.\n",
+ __func__, pci_name(idd->idd_pdev));
+ ret = -ENODEV;
+ goto out_pci;
+ }
+ if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs),
+ "ioc4_misc")) {
+ printk(KERN_WARNING
+ "%s: Unable to request IOC4 misc region "
+ "for pci_dev %s.\n",
+ __func__, pci_name(idd->idd_pdev));
+ ret = -ENODEV;
+ goto out_pci;
+ }
+ idd->idd_misc_regs = ioremap(idd->idd_bar0,
+ sizeof(struct ioc4_misc_regs));
+ if (!idd->idd_misc_regs) {
+ printk(KERN_WARNING
+ "%s: Unable to remap IOC4 misc region "
+ "for pci_dev %s.\n",
+ __func__, pci_name(idd->idd_pdev));
+ ret = -ENODEV;
+ goto out_misc_region;
+ }
+
+ /* Failsafe portion of per-IOC4 initialization */
+
+ /* Detect card variant */
+ idd->idd_variant = ioc4_variant(idd);
+ printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev),
+ idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" :
+ idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" :
+ idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown");
+
+ /* Initialize IOC4 */
+ pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd);
+ pci_write_config_dword(idd->idd_pdev, PCI_COMMAND,
+ pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+
+ /* Determine PCI clock */
+ ioc4_clock_calibrate(idd);
+
+ /* Disable/clear all interrupts. Need to do this here lest
+ * one submodule request the shared IOC4 IRQ, but interrupt
+ * is generated by a different subdevice.
+ */
+ /* Disable */
+ writel(~0, &idd->idd_misc_regs->other_iec.raw);
+ writel(~0, &idd->idd_misc_regs->sio_iec);
+ /* Clear (i.e. acknowledge) */
+ writel(~0, &idd->idd_misc_regs->other_ir.raw);
+ writel(~0, &idd->idd_misc_regs->sio_ir);
+
+ /* Track PCI-device specific data */
+ idd->idd_serial_data = NULL;
+ pci_set_drvdata(idd->idd_pdev, idd);
+
+ mutex_lock(&ioc4_mutex);
+ list_add_tail(&idd->idd_list, &ioc4_devices);
+
+ /* Add this IOC4 to all submodules */
+ list_for_each_entry(is, &ioc4_submodules, is_list) {
+ if (is->is_probe && is->is_probe(idd)) {
+ printk(KERN_WARNING
+ "%s: IOC4 submodule 0x%s probe failed "
+ "for pci_dev %s.\n",
+ __func__, module_name(is->is_owner),
+ pci_name(idd->idd_pdev));
+ }
+ }
+ mutex_unlock(&ioc4_mutex);
+
+ /* Request sgiioc4 IDE driver on boards that bring that functionality
+ * off of IOC4. The root filesystem may be hosted on a drive connected
+ * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it
+ * won't be picked up by modprobes due to the ioc4 module owning the
+ * PCI device.
+ */
+ if (idd->idd_variant != IOC4_VARIANT_PCI_RT) {
+ /* Request the module from a work procedure as the modprobe
+ * goes out to a userland helper and that will hang if done
+ * directly from ioc4_probe().
+ */
+ printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n");
+ schedule_work(&ioc4_load_modules_work);
+ }
+
+ return 0;
+
+out_misc_region:
+ release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
+out_pci:
+ kfree(idd);
+out_idd:
+ pci_disable_device(pdev);
+out:
+ return ret;
+}
+
+/* Removes a particular instance of an IOC4 card. */
+static void
+ioc4_remove(struct pci_dev *pdev)
+{
+ struct ioc4_submodule *is;
+ struct ioc4_driver_data *idd;
+
+ idd = pci_get_drvdata(pdev);
+
+ /* Remove this IOC4 from all submodules */
+ mutex_lock(&ioc4_mutex);
+ list_for_each_entry(is, &ioc4_submodules, is_list) {
+ if (is->is_remove && is->is_remove(idd)) {
+ printk(KERN_WARNING
+ "%s: IOC4 submodule 0x%s remove failed "
+ "for pci_dev %s.\n",
+ __func__, module_name(is->is_owner),
+ pci_name(idd->idd_pdev));
+ }
+ }
+ mutex_unlock(&ioc4_mutex);
+
+ /* Release resources */
+ iounmap(idd->idd_misc_regs);
+ if (!idd->idd_bar0) {
+ printk(KERN_WARNING
+ "%s: Unable to get IOC4 misc mapping for pci_dev %s. "
+ "Device removal may be incomplete.\n",
+ __func__, pci_name(idd->idd_pdev));
+ }
+ release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
+
+ /* Disable IOC4 and relinquish */
+ pci_disable_device(pdev);
+
+ /* Remove and free driver data */
+ mutex_lock(&ioc4_mutex);
+ list_del(&idd->idd_list);
+ mutex_unlock(&ioc4_mutex);
+ kfree(idd);
+}
+
+static struct pci_device_id ioc4_id_table[] = {
+ {PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID,
+ PCI_ANY_ID, 0x0b4000, 0xFFFFFF},
+ {0}
+};
+
+static struct pci_driver ioc4_driver = {
+ .name = "IOC4",
+ .id_table = ioc4_id_table,
+ .probe = ioc4_probe,
+ .remove = ioc4_remove,
+};
+
+MODULE_DEVICE_TABLE(pci, ioc4_id_table);
+
+/*********************
+ * Module management *
+ *********************/
+
+/* Module load */
+static int __init
+ioc4_init(void)
+{
+ return pci_register_driver(&ioc4_driver);
+}
+
+/* Module unload */
+static void __exit
+ioc4_exit(void)
+{
+ /* Ensure ioc4_load_modules() has completed before exiting */
+ flush_work(&ioc4_load_modules_work);
+ pci_unregister_driver(&ioc4_driver);
+}
+
+module_init(ioc4_init);
+module_exit(ioc4_exit);
+
+MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>");
+MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(ioc4_register_submodule);
+EXPORT_SYMBOL(ioc4_unregister_submodule);
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
new file mode 100644
index 0000000..976df00
--- /dev/null
+++ b/drivers/misc/isl29003.c
@@ -0,0 +1,480 @@
+/*
+ * isl29003.c - Linux kernel module for
+ * Intersil ISL29003 ambient light sensor
+ *
+ * See file:Documentation/misc-devices/isl29003
+ *
+ * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ * Based on code written by
+ * Rodolfo Giometti <giometti@linux.it>
+ * Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+
+#define ISL29003_DRV_NAME "isl29003"
+#define DRIVER_VERSION "1.0"
+
+#define ISL29003_REG_COMMAND 0x00
+#define ISL29003_ADC_ENABLED (1 << 7)
+#define ISL29003_ADC_PD (1 << 6)
+#define ISL29003_TIMING_INT (1 << 5)
+#define ISL29003_MODE_SHIFT (2)
+#define ISL29003_MODE_MASK (0x3 << ISL29003_MODE_SHIFT)
+#define ISL29003_RES_SHIFT (0)
+#define ISL29003_RES_MASK (0x3 << ISL29003_RES_SHIFT)
+
+#define ISL29003_REG_CONTROL 0x01
+#define ISL29003_INT_FLG (1 << 5)
+#define ISL29003_RANGE_SHIFT (2)
+#define ISL29003_RANGE_MASK (0x3 << ISL29003_RANGE_SHIFT)
+#define ISL29003_INT_PERSISTS_SHIFT (0)
+#define ISL29003_INT_PERSISTS_MASK (0xf << ISL29003_INT_PERSISTS_SHIFT)
+
+#define ISL29003_REG_IRQ_THRESH_HI 0x02
+#define ISL29003_REG_IRQ_THRESH_LO 0x03
+#define ISL29003_REG_LSB_SENSOR 0x04
+#define ISL29003_REG_MSB_SENSOR 0x05
+#define ISL29003_REG_LSB_TIMER 0x06
+#define ISL29003_REG_MSB_TIMER 0x07
+
+#define ISL29003_NUM_CACHABLE_REGS 4
+
+struct isl29003_data {
+ struct i2c_client *client;
+ struct mutex lock;
+ u8 reg_cache[ISL29003_NUM_CACHABLE_REGS];
+ u8 power_state_before_suspend;
+};
+
+static int gain_range[] = {
+ 1000, 4000, 16000, 64000
+};
+
+/*
+ * register access helpers
+ */
+
+static int __isl29003_read_reg(struct i2c_client *client,
+ u32 reg, u8 mask, u8 shift)
+{
+ struct isl29003_data *data = i2c_get_clientdata(client);
+ return (data->reg_cache[reg] & mask) >> shift;
+}
+
+static int __isl29003_write_reg(struct i2c_client *client,
+ u32 reg, u8 mask, u8 shift, u8 val)
+{
+ struct isl29003_data *data = i2c_get_clientdata(client);
+ int ret = 0;
+ u8 tmp;
+
+ if (reg >= ISL29003_NUM_CACHABLE_REGS)
+ return -EINVAL;
+
+ mutex_lock(&data->lock);
+
+ tmp = data->reg_cache[reg];
+ tmp &= ~mask;
+ tmp |= val << shift;
+
+ ret = i2c_smbus_write_byte_data(client, reg, tmp);
+ if (!ret)
+ data->reg_cache[reg] = tmp;
+
+ mutex_unlock(&data->lock);
+ return ret;
+}
+
+/*
+ * internally used functions
+ */
+
+/* range */
+static int isl29003_get_range(struct i2c_client *client)
+{
+ return __isl29003_read_reg(client, ISL29003_REG_CONTROL,
+ ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT);
+}
+
+static int isl29003_set_range(struct i2c_client *client, int range)
+{
+ return __isl29003_write_reg(client, ISL29003_REG_CONTROL,
+ ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range);
+}
+
+/* resolution */
+static int isl29003_get_resolution(struct i2c_client *client)
+{
+ return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+ ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_resolution(struct i2c_client *client, int res)
+{
+ return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+ ISL29003_RES_MASK, ISL29003_RES_SHIFT, res);
+}
+
+/* mode */
+static int isl29003_get_mode(struct i2c_client *client)
+{
+ return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+ ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_mode(struct i2c_client *client, int mode)
+{
+ return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+ ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode);
+}
+
+/* power_state */
+static int isl29003_set_power_state(struct i2c_client *client, int state)
+{
+ return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+ ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0,
+ state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD);
+}
+
+static int isl29003_get_power_state(struct i2c_client *client)
+{
+ struct isl29003_data *data = i2c_get_clientdata(client);
+ u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND];
+ return ~cmdreg & ISL29003_ADC_PD;
+}
+
+static int isl29003_get_adc_value(struct i2c_client *client)
+{
+ struct isl29003_data *data = i2c_get_clientdata(client);
+ int lsb, msb, range, bitdepth;
+
+ mutex_lock(&data->lock);
+ lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR);
+
+ if (lsb < 0) {
+ mutex_unlock(&data->lock);
+ return lsb;
+ }
+
+ msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR);
+ mutex_unlock(&data->lock);
+
+ if (msb < 0)
+ return msb;
+
+ range = isl29003_get_range(client);
+ bitdepth = (4 - isl29003_get_resolution(client)) * 4;
+ return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth;
+}
+
+/*
+ * sysfs layer
+ */
+
+/* range */
+static ssize_t isl29003_show_range(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ return sprintf(buf, "%i\n", isl29003_get_range(client));
+}
+
+static ssize_t isl29003_store_range(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > 3)
+ return -EINVAL;
+
+ ret = isl29003_set_range(client, val);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(range, S_IWUSR | S_IRUGO,
+ isl29003_show_range, isl29003_store_range);
+
+
+/* resolution */
+static ssize_t isl29003_show_resolution(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ return sprintf(buf, "%d\n", isl29003_get_resolution(client));
+}
+
+static ssize_t isl29003_store_resolution(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > 3)
+ return -EINVAL;
+
+ ret = isl29003_set_resolution(client, val);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO,
+ isl29003_show_resolution, isl29003_store_resolution);
+
+/* mode */
+static ssize_t isl29003_show_mode(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ return sprintf(buf, "%d\n", isl29003_get_mode(client));
+}
+
+static ssize_t isl29003_store_mode(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > 2)
+ return -EINVAL;
+
+ ret = isl29003_set_mode(client, val);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+ isl29003_show_mode, isl29003_store_mode);
+
+
+/* power state */
+static ssize_t isl29003_show_power_state(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ return sprintf(buf, "%d\n", isl29003_get_power_state(client));
+}
+
+static ssize_t isl29003_store_power_state(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > 1)
+ return -EINVAL;
+
+ ret = isl29003_set_power_state(client, val);
+ return ret ? ret : count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+ isl29003_show_power_state, isl29003_store_power_state);
+
+
+/* lux */
+static ssize_t isl29003_show_lux(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ /* No LUX data if not operational */
+ if (!isl29003_get_power_state(client))
+ return -EBUSY;
+
+ return sprintf(buf, "%d\n", isl29003_get_adc_value(client));
+}
+
+static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL);
+
+static struct attribute *isl29003_attributes[] = {
+ &dev_attr_range.attr,
+ &dev_attr_resolution.attr,
+ &dev_attr_mode.attr,
+ &dev_attr_power_state.attr,
+ &dev_attr_lux.attr,
+ NULL
+};
+
+static const struct attribute_group isl29003_attr_group = {
+ .attrs = isl29003_attributes,
+};
+
+static int isl29003_init_client(struct i2c_client *client)
+{
+ struct isl29003_data *data = i2c_get_clientdata(client);
+ int i;
+
+ /* read all the registers once to fill the cache.
+ * if one of the reads fails, we consider the init failed */
+ for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) {
+ int v = i2c_smbus_read_byte_data(client, i);
+ if (v < 0)
+ return -ENODEV;
+
+ data->reg_cache[i] = v;
+ }
+
+ /* set defaults */
+ isl29003_set_range(client, 0);
+ isl29003_set_resolution(client, 0);
+ isl29003_set_mode(client, 0);
+ isl29003_set_power_state(client, 0);
+
+ return 0;
+}
+
+/*
+ * I2C layer
+ */
+
+static int isl29003_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct isl29003_data *data;
+ int err = 0;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+ return -EIO;
+
+ data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->client = client;
+ i2c_set_clientdata(client, data);
+ mutex_init(&data->lock);
+
+ /* initialize the ISL29003 chip */
+ err = isl29003_init_client(client);
+ if (err)
+ goto exit_kfree;
+
+ /* register sysfs hooks */
+ err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group);
+ if (err)
+ goto exit_kfree;
+
+ dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION);
+ return 0;
+
+exit_kfree:
+ kfree(data);
+ return err;
+}
+
+static int isl29003_remove(struct i2c_client *client)
+{
+ sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group);
+ isl29003_set_power_state(client, 0);
+ kfree(i2c_get_clientdata(client));
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int isl29003_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct isl29003_data *data = i2c_get_clientdata(client);
+
+ data->power_state_before_suspend = isl29003_get_power_state(client);
+ return isl29003_set_power_state(client, 0);
+}
+
+static int isl29003_resume(struct device *dev)
+{
+ int i;
+ struct i2c_client *client = to_i2c_client(dev);
+ struct isl29003_data *data = i2c_get_clientdata(client);
+
+ /* restore registers from cache */
+ for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++)
+ if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i]))
+ return -EIO;
+
+ return isl29003_set_power_state(client,
+ data->power_state_before_suspend);
+}
+
+static SIMPLE_DEV_PM_OPS(isl29003_pm_ops, isl29003_suspend, isl29003_resume);
+#define ISL29003_PM_OPS (&isl29003_pm_ops)
+
+#else
+#define ISL29003_PM_OPS NULL
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id isl29003_id[] = {
+ { "isl29003", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, isl29003_id);
+
+static struct i2c_driver isl29003_driver = {
+ .driver = {
+ .name = ISL29003_DRV_NAME,
+ .pm = ISL29003_PM_OPS,
+ },
+ .probe = isl29003_probe,
+ .remove = isl29003_remove,
+ .id_table = isl29003_id,
+};
+
+module_i2c_driver(isl29003_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("ISL29003 ambient light sensor driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c
new file mode 100644
index 0000000..4a9c50a
--- /dev/null
+++ b/drivers/misc/isl29020.c
@@ -0,0 +1,238 @@
+/*
+ * isl29020.c - Intersil ALS Driver
+ *
+ * Copyright (C) 2008 Intel Corp
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Data sheet at: http://www.intersil.com/data/fn/fn6505.pdf
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/pm_runtime.h>
+
+static DEFINE_MUTEX(mutex);
+
+static ssize_t als_sensing_range_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ int val;
+
+ val = i2c_smbus_read_byte_data(client, 0x00);
+
+ if (val < 0)
+ return val;
+ return sprintf(buf, "%d000\n", 1 << (2 * (val & 3)));
+
+}
+
+static ssize_t als_lux_input_data_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ int ret_val, val;
+ unsigned long int lux;
+ int temp;
+
+ pm_runtime_get_sync(dev);
+ msleep(100);
+
+ mutex_lock(&mutex);
+ temp = i2c_smbus_read_byte_data(client, 0x02); /* MSB data */
+ if (temp < 0) {
+ pm_runtime_put_sync(dev);
+ mutex_unlock(&mutex);
+ return temp;
+ }
+
+ ret_val = i2c_smbus_read_byte_data(client, 0x01); /* LSB data */
+ mutex_unlock(&mutex);
+
+ if (ret_val < 0) {
+ pm_runtime_put_sync(dev);
+ return ret_val;
+ }
+
+ ret_val |= temp << 8;
+ val = i2c_smbus_read_byte_data(client, 0x00);
+ pm_runtime_put_sync(dev);
+ if (val < 0)
+ return val;
+ lux = ((((1 << (2 * (val & 3))))*1000) * ret_val) / 65536;
+ return sprintf(buf, "%ld\n", lux);
+}
+
+static ssize_t als_sensing_range_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ int ret_val;
+ unsigned long val;
+
+ ret_val = kstrtoul(buf, 10, &val);
+ if (ret_val)
+ return ret_val;
+
+ if (val < 1 || val > 64000)
+ return -EINVAL;
+
+ /* Pick the smallest sensor range that will meet our requirements */
+ if (val <= 1000)
+ val = 1;
+ else if (val <= 4000)
+ val = 2;
+ else if (val <= 16000)
+ val = 3;
+ else
+ val = 4;
+
+ ret_val = i2c_smbus_read_byte_data(client, 0x00);
+ if (ret_val < 0)
+ return ret_val;
+
+ ret_val &= 0xFC; /*reset the bit before setting them */
+ ret_val |= val - 1;
+ ret_val = i2c_smbus_write_byte_data(client, 0x00, ret_val);
+
+ if (ret_val < 0)
+ return ret_val;
+ return count;
+}
+
+static void als_set_power_state(struct i2c_client *client, int enable)
+{
+ int ret_val;
+
+ ret_val = i2c_smbus_read_byte_data(client, 0x00);
+ if (ret_val < 0)
+ return;
+
+ if (enable)
+ ret_val |= 0x80;
+ else
+ ret_val &= 0x7F;
+
+ i2c_smbus_write_byte_data(client, 0x00, ret_val);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR,
+ als_sensing_range_show, als_sensing_range_store);
+static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux_input_data_show, NULL);
+
+static struct attribute *mid_att_als[] = {
+ &dev_attr_lux0_sensor_range.attr,
+ &dev_attr_lux0_input.attr,
+ NULL
+};
+
+static struct attribute_group m_als_gr = {
+ .name = "isl29020",
+ .attrs = mid_att_als
+};
+
+static int als_set_default_config(struct i2c_client *client)
+{
+ int retval;
+
+ retval = i2c_smbus_write_byte_data(client, 0x00, 0xc0);
+ if (retval < 0) {
+ dev_err(&client->dev, "default write failed.");
+ return retval;
+ }
+ return 0;
+}
+
+static int isl29020_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int res;
+
+ res = als_set_default_config(client);
+ if (res < 0)
+ return res;
+
+ res = sysfs_create_group(&client->dev.kobj, &m_als_gr);
+ if (res) {
+ dev_err(&client->dev, "isl29020: device create file failed\n");
+ return res;
+ }
+ dev_info(&client->dev, "%s isl29020: ALS chip found\n", client->name);
+ als_set_power_state(client, 0);
+ pm_runtime_enable(&client->dev);
+ return res;
+}
+
+static int isl29020_remove(struct i2c_client *client)
+{
+ sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+ return 0;
+}
+
+static struct i2c_device_id isl29020_id[] = {
+ { "isl29020", 0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, isl29020_id);
+
+#ifdef CONFIG_PM
+
+static int isl29020_runtime_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ als_set_power_state(client, 0);
+ return 0;
+}
+
+static int isl29020_runtime_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ als_set_power_state(client, 1);
+ return 0;
+}
+
+static const struct dev_pm_ops isl29020_pm_ops = {
+ .runtime_suspend = isl29020_runtime_suspend,
+ .runtime_resume = isl29020_runtime_resume,
+};
+
+#define ISL29020_PM_OPS (&isl29020_pm_ops)
+#else /* CONFIG_PM */
+#define ISL29020_PM_OPS NULL
+#endif /* CONFIG_PM */
+
+static struct i2c_driver isl29020_driver = {
+ .driver = {
+ .name = "isl29020",
+ .pm = ISL29020_PM_OPS,
+ },
+ .probe = isl29020_probe,
+ .remove = isl29020_remove,
+ .id_table = isl29020_id,
+};
+
+module_i2c_driver(isl29020_driver);
+
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com>");
+MODULE_DESCRIPTION("Intersil isl29020 ALS Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
new file mode 100644
index 0000000..99635dd
--- /dev/null
+++ b/drivers/misc/kgdbts.c
@@ -0,0 +1,1189 @@
+/*
+ * kgdbts is a test suite for kgdb for the sole purpose of validating
+ * that key pieces of the kgdb internals are working properly such as
+ * HW/SW breakpoints, single stepping, and NMI.
+ *
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/* Information about the kgdb test suite.
+ * -------------------------------------
+ *
+ * The kgdb test suite is designed as a KGDB I/O module which
+ * simulates the communications that a debugger would have with kgdb.
+ * The tests are broken up in to a line by line and referenced here as
+ * a "get" which is kgdb requesting input and "put" which is kgdb
+ * sending a response.
+ *
+ * The kgdb suite can be invoked from the kernel command line
+ * arguments system or executed dynamically at run time. The test
+ * suite uses the variable "kgdbts" to obtain the information about
+ * which tests to run and to configure the verbosity level. The
+ * following are the various characters you can use with the kgdbts=
+ * line:
+ *
+ * When using the "kgdbts=" you only choose one of the following core
+ * test types:
+ * A = Run all the core tests silently
+ * V1 = Run all the core tests with minimal output
+ * V2 = Run all the core tests in debug mode
+ *
+ * You can also specify optional tests:
+ * N## = Go to sleep with interrupts of for ## seconds
+ * to test the HW NMI watchdog
+ * F## = Break at do_fork for ## iterations
+ * S## = Break at sys_open for ## iterations
+ * I## = Run the single step test ## iterations
+ *
+ * NOTE: that the do_fork and sys_open tests are mutually exclusive.
+ *
+ * To invoke the kgdb test suite from boot you use a kernel start
+ * argument as follows:
+ * kgdbts=V1 kgdbwait
+ * Or if you wanted to perform the NMI test for 6 seconds and do_fork
+ * test for 100 forks, you could use:
+ * kgdbts=V1N6F100 kgdbwait
+ *
+ * The test suite can also be invoked at run time with:
+ * echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts
+ * Or as another example:
+ * echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * When developing a new kgdb arch specific implementation or
+ * using these tests for the purpose of regression testing,
+ * several invocations are required.
+ *
+ * 1) Boot with the test suite enabled by using the kernel arguments
+ * "kgdbts=V1F100 kgdbwait"
+ * ## If kgdb arch specific implementation has NMI use
+ * "kgdbts=V1N6F100
+ *
+ * 2) After the system boot run the basic test.
+ * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * 3) Run the concurrency tests. It is best to use n+1
+ * while loops where n is the number of cpus you have
+ * in your system. The example below uses only two
+ * loops.
+ *
+ * ## This tests break points on sys_open
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ * fg # and hit control-c
+ * ## This tests break points on do_fork
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <asm/sections.h>
+
+#define v1printk(a...) do { \
+ if (verbose) \
+ printk(KERN_INFO a); \
+ } while (0)
+#define v2printk(a...) do { \
+ if (verbose > 1) \
+ printk(KERN_INFO a); \
+ touch_nmi_watchdog(); \
+ } while (0)
+#define eprintk(a...) do { \
+ printk(KERN_ERR a); \
+ WARN_ON(1); \
+ } while (0)
+#define MAX_CONFIG_LEN 40
+
+static struct kgdb_io kgdbts_io_ops;
+static char get_buf[BUFMAX];
+static int get_buf_cnt;
+static char put_buf[BUFMAX];
+static int put_buf_cnt;
+static char scratch_buf[BUFMAX];
+static int verbose;
+static int repeat_test;
+static int test_complete;
+static int send_ack;
+static int final_ack;
+static int force_hwbrks;
+static int hwbreaks_ok;
+static int hw_break_val;
+static int hw_break_val2;
+static int cont_instead_of_sstep;
+static unsigned long cont_thread_id;
+static unsigned long sstep_thread_id;
+#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC)
+static int arch_needs_sstep_emulation = 1;
+#else
+static int arch_needs_sstep_emulation;
+#endif
+static unsigned long cont_addr;
+static unsigned long sstep_addr;
+static int restart_from_top_after_write;
+static int sstep_state;
+
+/* Storage for the registers, in GDB format. */
+static unsigned long kgdbts_gdb_regs[(NUMREGBYTES +
+ sizeof(unsigned long) - 1) /
+ sizeof(unsigned long)];
+static struct pt_regs kgdbts_regs;
+
+/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
+static int configured = -1;
+
+#ifdef CONFIG_KGDB_TESTS_BOOT_STRING
+static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING;
+#else
+static char config[MAX_CONFIG_LEN];
+#endif
+static struct kparam_string kps = {
+ .string = config,
+ .maxlen = MAX_CONFIG_LEN,
+};
+
+static void fill_get_buf(char *buf);
+
+struct test_struct {
+ char *get;
+ char *put;
+ void (*get_handler)(char *);
+ int (*put_handler)(char *, char *);
+};
+
+struct test_state {
+ char *name;
+ struct test_struct *tst;
+ int idx;
+ int (*run_test) (int, int);
+ int (*validate_put) (char *);
+};
+
+static struct test_state ts;
+
+static int kgdbts_unreg_thread(void *ptr)
+{
+ /* Wait until the tests are complete and then ungresiter the I/O
+ * driver.
+ */
+ while (!final_ack)
+ msleep_interruptible(1500);
+ /* Pause for any other threads to exit after final ack. */
+ msleep_interruptible(1000);
+ if (configured)
+ kgdb_unregister_io_module(&kgdbts_io_ops);
+ configured = 0;
+
+ return 0;
+}
+
+/* This is noinline such that it can be used for a single location to
+ * place a breakpoint
+ */
+static noinline void kgdbts_break_test(void)
+{
+ v2printk("kgdbts: breakpoint complete\n");
+}
+
+/* Lookup symbol info in the kernel */
+static unsigned long lookup_addr(char *arg)
+{
+ unsigned long addr = 0;
+
+ if (!strcmp(arg, "kgdbts_break_test"))
+ addr = (unsigned long)kgdbts_break_test;
+ else if (!strcmp(arg, "sys_open"))
+ addr = (unsigned long)do_sys_open;
+ else if (!strcmp(arg, "do_fork"))
+ addr = (unsigned long)_do_fork;
+ else if (!strcmp(arg, "hw_break_val"))
+ addr = (unsigned long)&hw_break_val;
+ addr = (unsigned long) dereference_function_descriptor((void *)addr);
+ return addr;
+}
+
+static void break_helper(char *bp_type, char *arg, unsigned long vaddr)
+{
+ unsigned long addr;
+
+ if (arg)
+ addr = lookup_addr(arg);
+ else
+ addr = vaddr;
+
+ sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr,
+ BREAK_INSTR_SIZE);
+ fill_get_buf(scratch_buf);
+}
+
+static void sw_break(char *arg)
+{
+ break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0);
+}
+
+static void sw_rem_break(char *arg)
+{
+ break_helper(force_hwbrks ? "z1" : "z0", arg, 0);
+}
+
+static void hw_break(char *arg)
+{
+ break_helper("Z1", arg, 0);
+}
+
+static void hw_rem_break(char *arg)
+{
+ break_helper("z1", arg, 0);
+}
+
+static void hw_write_break(char *arg)
+{
+ break_helper("Z2", arg, 0);
+}
+
+static void hw_rem_write_break(char *arg)
+{
+ break_helper("z2", arg, 0);
+}
+
+static void hw_access_break(char *arg)
+{
+ break_helper("Z4", arg, 0);
+}
+
+static void hw_rem_access_break(char *arg)
+{
+ break_helper("z4", arg, 0);
+}
+
+static void hw_break_val_access(void)
+{
+ hw_break_val2 = hw_break_val;
+}
+
+static void hw_break_val_write(void)
+{
+ hw_break_val++;
+}
+
+static int get_thread_id_continue(char *put_str, char *arg)
+{
+ char *ptr = &put_str[11];
+
+ if (put_str[1] != 'T' || put_str[2] != '0')
+ return 1;
+ kgdb_hex2long(&ptr, &cont_thread_id);
+ return 0;
+}
+
+static int check_and_rewind_pc(char *put_str, char *arg)
+{
+ unsigned long addr = lookup_addr(arg);
+ unsigned long ip;
+ int offset = 0;
+
+ kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+ NUMREGBYTES);
+ gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ ip = instruction_pointer(&kgdbts_regs);
+ v2printk("Stopped at IP: %lx\n", ip);
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+ /* On some arches, a breakpoint stop requires it to be decremented */
+ if (addr + BREAK_INSTR_SIZE == ip)
+ offset = -BREAK_INSTR_SIZE;
+#endif
+
+ if (arch_needs_sstep_emulation && sstep_addr &&
+ ip + offset == sstep_addr &&
+ ((!strcmp(arg, "sys_open") || !strcmp(arg, "do_fork")))) {
+ /* This is special case for emulated single step */
+ v2printk("Emul: rewind hit single step bp\n");
+ restart_from_top_after_write = 1;
+ } else if (strcmp(arg, "silent") && ip + offset != addr) {
+ eprintk("kgdbts: BP mismatch %lx expected %lx\n",
+ ip + offset, addr);
+ return 1;
+ }
+ /* Readjust the instruction pointer if needed */
+ ip += offset;
+ cont_addr = ip;
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+ instruction_pointer_set(&kgdbts_regs, ip);
+#endif
+ return 0;
+}
+
+static int check_single_step(char *put_str, char *arg)
+{
+ unsigned long addr = lookup_addr(arg);
+ static int matched_id;
+
+ /*
+ * From an arch indepent point of view the instruction pointer
+ * should be on a different instruction
+ */
+ kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+ NUMREGBYTES);
+ gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ v2printk("Singlestep stopped at IP: %lx\n",
+ instruction_pointer(&kgdbts_regs));
+
+ if (sstep_thread_id != cont_thread_id) {
+ /*
+ * Ensure we stopped in the same thread id as before, else the
+ * debugger should continue until the original thread that was
+ * single stepped is scheduled again, emulating gdb's behavior.
+ */
+ v2printk("ThrID does not match: %lx\n", cont_thread_id);
+ if (arch_needs_sstep_emulation) {
+ if (matched_id &&
+ instruction_pointer(&kgdbts_regs) != addr)
+ goto continue_test;
+ matched_id++;
+ ts.idx -= 2;
+ sstep_state = 0;
+ return 0;
+ }
+ cont_instead_of_sstep = 1;
+ ts.idx -= 4;
+ return 0;
+ }
+continue_test:
+ matched_id = 0;
+ if (instruction_pointer(&kgdbts_regs) == addr) {
+ eprintk("kgdbts: SingleStep failed at %lx\n",
+ instruction_pointer(&kgdbts_regs));
+ return 1;
+ }
+
+ return 0;
+}
+
+static void write_regs(char *arg)
+{
+ memset(scratch_buf, 0, sizeof(scratch_buf));
+ scratch_buf[0] = 'G';
+ pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES);
+ fill_get_buf(scratch_buf);
+}
+
+static void skip_back_repeat_test(char *arg)
+{
+ int go_back = simple_strtol(arg, NULL, 10);
+
+ repeat_test--;
+ if (repeat_test <= 0)
+ ts.idx++;
+ else
+ ts.idx -= go_back;
+ fill_get_buf(ts.tst[ts.idx].get);
+}
+
+static int got_break(char *put_str, char *arg)
+{
+ test_complete = 1;
+ if (!strncmp(put_str+1, arg, 2)) {
+ if (!strncmp(arg, "T0", 2))
+ test_complete = 2;
+ return 0;
+ }
+ return 1;
+}
+
+static void get_cont_catch(char *arg)
+{
+ /* Always send detach because the test is completed at this point */
+ fill_get_buf("D");
+}
+
+static int put_cont_catch(char *put_str, char *arg)
+{
+ /* This is at the end of the test and we catch any and all input */
+ v2printk("kgdbts: cleanup task: %lx\n", sstep_thread_id);
+ ts.idx--;
+ return 0;
+}
+
+static int emul_reset(char *put_str, char *arg)
+{
+ if (strncmp(put_str, "$OK", 3))
+ return 1;
+ if (restart_from_top_after_write) {
+ restart_from_top_after_write = 0;
+ ts.idx = -1;
+ }
+ return 0;
+}
+
+static void emul_sstep_get(char *arg)
+{
+ if (!arch_needs_sstep_emulation) {
+ if (cont_instead_of_sstep) {
+ cont_instead_of_sstep = 0;
+ fill_get_buf("c");
+ } else {
+ fill_get_buf(arg);
+ }
+ return;
+ }
+ switch (sstep_state) {
+ case 0:
+ v2printk("Emulate single step\n");
+ /* Start by looking at the current PC */
+ fill_get_buf("g");
+ break;
+ case 1:
+ /* set breakpoint */
+ break_helper("Z0", NULL, sstep_addr);
+ break;
+ case 2:
+ /* Continue */
+ fill_get_buf("c");
+ break;
+ case 3:
+ /* Clear breakpoint */
+ break_helper("z0", NULL, sstep_addr);
+ break;
+ default:
+ eprintk("kgdbts: ERROR failed sstep get emulation\n");
+ }
+ sstep_state++;
+}
+
+static int emul_sstep_put(char *put_str, char *arg)
+{
+ if (!arch_needs_sstep_emulation) {
+ char *ptr = &put_str[11];
+ if (put_str[1] != 'T' || put_str[2] != '0')
+ return 1;
+ kgdb_hex2long(&ptr, &sstep_thread_id);
+ return 0;
+ }
+ switch (sstep_state) {
+ case 1:
+ /* validate the "g" packet to get the IP */
+ kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+ NUMREGBYTES);
+ gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ v2printk("Stopped at IP: %lx\n",
+ instruction_pointer(&kgdbts_regs));
+ /* Want to stop at IP + break instruction size by default */
+ sstep_addr = cont_addr + BREAK_INSTR_SIZE;
+ break;
+ case 2:
+ if (strncmp(put_str, "$OK", 3)) {
+ eprintk("kgdbts: failed sstep break set\n");
+ return 1;
+ }
+ break;
+ case 3:
+ if (strncmp(put_str, "$T0", 3)) {
+ eprintk("kgdbts: failed continue sstep\n");
+ return 1;
+ } else {
+ char *ptr = &put_str[11];
+ kgdb_hex2long(&ptr, &sstep_thread_id);
+ }
+ break;
+ case 4:
+ if (strncmp(put_str, "$OK", 3)) {
+ eprintk("kgdbts: failed sstep break unset\n");
+ return 1;
+ }
+ /* Single step is complete so continue on! */
+ sstep_state = 0;
+ return 0;
+ default:
+ eprintk("kgdbts: ERROR failed sstep put emulation\n");
+ }
+
+ /* Continue on the same test line until emulation is complete */
+ ts.idx--;
+ return 0;
+}
+
+static int final_ack_set(char *put_str, char *arg)
+{
+ if (strncmp(put_str+1, arg, 2))
+ return 1;
+ final_ack = 1;
+ return 0;
+}
+/*
+ * Test to plant a breakpoint and detach, which should clear out the
+ * breakpoint and restore the original instruction.
+ */
+static struct test_struct plant_and_detach_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "", "" },
+};
+
+/*
+ * Simple test to write in a software breakpoint, check for the
+ * correct stop location and detach.
+ */
+static struct test_struct sw_breakpoint_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", NULL, got_break }, /* On success we made it here */
+ { "", "" },
+};
+
+/*
+ * Test a known bad memory read location to test the fault handler and
+ * read bytes 1-8 at the bad address
+ */
+static struct test_struct bad_read_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "m0,1", "E*" }, /* read 1 byte at address 1 */
+ { "m0,2", "E*" }, /* read 1 byte at address 2 */
+ { "m0,3", "E*" }, /* read 1 byte at address 3 */
+ { "m0,4", "E*" }, /* read 1 byte at address 4 */
+ { "m0,5", "E*" }, /* read 1 byte at address 5 */
+ { "m0,6", "E*" }, /* read 1 byte at address 6 */
+ { "m0,7", "E*" }, /* read 1 byte at address 7 */
+ { "m0,8", "E*" }, /* read 1 byte at address 8 */
+ { "D", "OK" }, /* Detach which removes all breakpoints and continues */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint, remove it, single step, plant it
+ * again and detach.
+ */
+static struct test_struct singlestep_break_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+ { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+ { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ { "write", "OK", write_regs }, /* Write registers */
+ { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+ { "g", "kgdbts_break_test", NULL, check_single_step },
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ { "write", "OK", write_regs }, /* Write registers */
+ { "D", "OK" }, /* Remove all breakpoints and continues */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint at do_fork for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct do_fork_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+ { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
+ { "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */
+ { "write", "OK", write_regs, emul_reset }, /* Write registers */
+ { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+ { "g", "do_fork", NULL, check_single_step },
+ { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+ { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+ { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+ { "", "", get_cont_catch, put_cont_catch },
+};
+
+/* Test for hitting a breakpoint at sys_open for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct sys_open_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+ { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
+ { "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */
+ { "write", "OK", write_regs, emul_reset }, /* Write registers */
+ { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+ { "g", "sys_open", NULL, check_single_step },
+ { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+ { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+ { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+ { "", "", get_cont_catch, put_cont_catch },
+};
+
+/*
+ * Test for hitting a simple hw breakpoint
+ */
+static struct test_struct hw_breakpoint_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", NULL, got_break }, /* On success we made it here */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a hw write breakpoint
+ */
+static struct test_struct hw_write_break_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
+ { "c", "T0*", NULL, got_break }, /* Continue */
+ { "g", "silent", NULL, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", NULL, got_break }, /* On success we made it here */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct hw_access_break_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
+ { "c", "T0*", NULL, got_break }, /* Continue */
+ { "g", "silent", NULL, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", NULL, got_break }, /* On success we made it here */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct nmi_sleep_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "c", "T0*", NULL, got_break }, /* Continue */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", NULL, got_break }, /* On success we made it here */
+ { "", "" },
+};
+
+static void fill_get_buf(char *buf)
+{
+ unsigned char checksum = 0;
+ int count = 0;
+ char ch;
+
+ strcpy(get_buf, "$");
+ strcat(get_buf, buf);
+ while ((ch = buf[count])) {
+ checksum += ch;
+ count++;
+ }
+ strcat(get_buf, "#");
+ get_buf[count + 2] = hex_asc_hi(checksum);
+ get_buf[count + 3] = hex_asc_lo(checksum);
+ get_buf[count + 4] = '\0';
+ v2printk("get%i: %s\n", ts.idx, get_buf);
+}
+
+static int validate_simple_test(char *put_str)
+{
+ char *chk_str;
+
+ if (ts.tst[ts.idx].put_handler)
+ return ts.tst[ts.idx].put_handler(put_str,
+ ts.tst[ts.idx].put);
+
+ chk_str = ts.tst[ts.idx].put;
+ if (*put_str == '$')
+ put_str++;
+
+ while (*chk_str != '\0' && *put_str != '\0') {
+ /* If someone does a * to match the rest of the string, allow
+ * it, or stop if the received string is complete.
+ */
+ if (*put_str == '#' || *chk_str == '*')
+ return 0;
+ if (*put_str != *chk_str)
+ return 1;
+
+ chk_str++;
+ put_str++;
+ }
+ if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#'))
+ return 0;
+
+ return 1;
+}
+
+static int run_simple_test(int is_get_char, int chr)
+{
+ int ret = 0;
+ if (is_get_char) {
+ /* Send an ACK on the get if a prior put completed and set the
+ * send ack variable
+ */
+ if (send_ack) {
+ send_ack = 0;
+ return '+';
+ }
+ /* On the first get char, fill the transmit buffer and then
+ * take from the get_string.
+ */
+ if (get_buf_cnt == 0) {
+ if (ts.tst[ts.idx].get_handler)
+ ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get);
+ else
+ fill_get_buf(ts.tst[ts.idx].get);
+ }
+
+ if (get_buf[get_buf_cnt] == '\0') {
+ eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n",
+ ts.name, ts.idx);
+ get_buf_cnt = 0;
+ fill_get_buf("D");
+ }
+ ret = get_buf[get_buf_cnt];
+ get_buf_cnt++;
+ return ret;
+ }
+
+ /* This callback is a put char which is when kgdb sends data to
+ * this I/O module.
+ */
+ if (ts.tst[ts.idx].get[0] == '\0' && ts.tst[ts.idx].put[0] == '\0' &&
+ !ts.tst[ts.idx].get_handler) {
+ eprintk("kgdbts: ERROR: beyond end of test on"
+ " '%s' line %i\n", ts.name, ts.idx);
+ return 0;
+ }
+
+ if (put_buf_cnt >= BUFMAX) {
+ eprintk("kgdbts: ERROR: put buffer overflow on"
+ " '%s' line %i\n", ts.name, ts.idx);
+ put_buf_cnt = 0;
+ return 0;
+ }
+ /* Ignore everything until the first valid packet start '$' */
+ if (put_buf_cnt == 0 && chr != '$')
+ return 0;
+
+ put_buf[put_buf_cnt] = chr;
+ put_buf_cnt++;
+
+ /* End of packet == #XX so look for the '#' */
+ if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
+ if (put_buf_cnt >= BUFMAX) {
+ eprintk("kgdbts: ERROR: put buffer overflow on"
+ " '%s' line %i\n", ts.name, ts.idx);
+ put_buf_cnt = 0;
+ return 0;
+ }
+ put_buf[put_buf_cnt] = '\0';
+ v2printk("put%i: %s\n", ts.idx, put_buf);
+ /* Trigger check here */
+ if (ts.validate_put && ts.validate_put(put_buf)) {
+ eprintk("kgdbts: ERROR PUT: end of test "
+ "buffer on '%s' line %i expected %s got %s\n",
+ ts.name, ts.idx, ts.tst[ts.idx].put, put_buf);
+ }
+ ts.idx++;
+ put_buf_cnt = 0;
+ get_buf_cnt = 0;
+ send_ack = 1;
+ }
+ return 0;
+}
+
+static void init_simple_test(void)
+{
+ memset(&ts, 0, sizeof(ts));
+ ts.run_test = run_simple_test;
+ ts.validate_put = validate_simple_test;
+}
+
+static void run_plant_and_detach_test(int is_early)
+{
+ char before[BREAK_INSTR_SIZE];
+ char after[BREAK_INSTR_SIZE];
+
+ probe_kernel_read(before, (char *)kgdbts_break_test,
+ BREAK_INSTR_SIZE);
+ init_simple_test();
+ ts.tst = plant_and_detach_test;
+ ts.name = "plant_and_detach_test";
+ /* Activate test with initial breakpoint */
+ if (!is_early)
+ kgdb_breakpoint();
+ probe_kernel_read(after, (char *)kgdbts_break_test,
+ BREAK_INSTR_SIZE);
+ if (memcmp(before, after, BREAK_INSTR_SIZE)) {
+ printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");
+ panic("kgdb memory corruption");
+ }
+
+ /* complete the detach test */
+ if (!is_early)
+ kgdbts_break_test();
+}
+
+static void run_breakpoint_test(int is_hw_breakpoint)
+{
+ test_complete = 0;
+ init_simple_test();
+ if (is_hw_breakpoint) {
+ ts.tst = hw_breakpoint_test;
+ ts.name = "hw_breakpoint_test";
+ } else {
+ ts.tst = sw_breakpoint_test;
+ ts.name = "sw_breakpoint_test";
+ }
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ /* run code with the break point in it */
+ kgdbts_break_test();
+ kgdb_breakpoint();
+
+ if (test_complete)
+ return;
+
+ eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+ if (is_hw_breakpoint)
+ hwbreaks_ok = 0;
+}
+
+static void run_hw_break_test(int is_write_test)
+{
+ test_complete = 0;
+ init_simple_test();
+ if (is_write_test) {
+ ts.tst = hw_write_break_test;
+ ts.name = "hw_write_break_test";
+ } else {
+ ts.tst = hw_access_break_test;
+ ts.name = "hw_access_break_test";
+ }
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ hw_break_val_access();
+ if (is_write_test) {
+ if (test_complete == 2) {
+ eprintk("kgdbts: ERROR %s broke on access\n",
+ ts.name);
+ hwbreaks_ok = 0;
+ }
+ hw_break_val_write();
+ }
+ kgdb_breakpoint();
+
+ if (test_complete == 1)
+ return;
+
+ eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+ hwbreaks_ok = 0;
+}
+
+static void run_nmi_sleep_test(int nmi_sleep)
+{
+ unsigned long flags;
+
+ init_simple_test();
+ ts.tst = nmi_sleep_test;
+ ts.name = "nmi_sleep_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ local_irq_save(flags);
+ mdelay(nmi_sleep*1000);
+ touch_nmi_watchdog();
+ local_irq_restore(flags);
+ if (test_complete != 2)
+ eprintk("kgdbts: ERROR nmi_test did not hit nmi\n");
+ kgdb_breakpoint();
+ if (test_complete == 1)
+ return;
+
+ eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_bad_read_test(void)
+{
+ init_simple_test();
+ ts.tst = bad_read_test;
+ ts.name = "bad_read_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+}
+
+static void run_do_fork_test(void)
+{
+ init_simple_test();
+ ts.tst = do_fork_test;
+ ts.name = "do_fork_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+}
+
+static void run_sys_open_test(void)
+{
+ init_simple_test();
+ ts.tst = sys_open_test;
+ ts.name = "sys_open_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+}
+
+static void run_singlestep_break_test(void)
+{
+ init_simple_test();
+ ts.tst = singlestep_break_test;
+ ts.name = "singlestep_breakpoint_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ kgdbts_break_test();
+ kgdbts_break_test();
+}
+
+static void kgdbts_run_tests(void)
+{
+ char *ptr;
+ int fork_test = 0;
+ int do_sys_open_test = 0;
+ int sstep_test = 1000;
+ int nmi_sleep = 0;
+ int i;
+
+ ptr = strchr(config, 'F');
+ if (ptr)
+ fork_test = simple_strtol(ptr + 1, NULL, 10);
+ ptr = strchr(config, 'S');
+ if (ptr)
+ do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
+ ptr = strchr(config, 'N');
+ if (ptr)
+ nmi_sleep = simple_strtol(ptr+1, NULL, 10);
+ ptr = strchr(config, 'I');
+ if (ptr)
+ sstep_test = simple_strtol(ptr+1, NULL, 10);
+
+ /* All HW break point tests */
+ if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) {
+ hwbreaks_ok = 1;
+ v1printk("kgdbts:RUN hw breakpoint test\n");
+ run_breakpoint_test(1);
+ v1printk("kgdbts:RUN hw write breakpoint test\n");
+ run_hw_break_test(1);
+ v1printk("kgdbts:RUN access write breakpoint test\n");
+ run_hw_break_test(0);
+ }
+
+ /* required internal KGDB tests */
+ v1printk("kgdbts:RUN plant and detach test\n");
+ run_plant_and_detach_test(0);
+ v1printk("kgdbts:RUN sw breakpoint test\n");
+ run_breakpoint_test(0);
+ v1printk("kgdbts:RUN bad memory access test\n");
+ run_bad_read_test();
+ v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test);
+ for (i = 0; i < sstep_test; i++) {
+ run_singlestep_break_test();
+ if (i % 100 == 0)
+ v1printk("kgdbts:RUN singlestep [%i/%i]\n",
+ i, sstep_test);
+ }
+
+ /* ===Optional tests=== */
+
+ if (nmi_sleep) {
+ v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep);
+ run_nmi_sleep_test(nmi_sleep);
+ }
+
+ /* If the do_fork test is run it will be the last test that is
+ * executed because a kernel thread will be spawned at the very
+ * end to unregister the debug hooks.
+ */
+ if (fork_test) {
+ repeat_test = fork_test;
+ printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
+ repeat_test);
+ kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+ run_do_fork_test();
+ return;
+ }
+
+ /* If the sys_open test is run it will be the last test that is
+ * executed because a kernel thread will be spawned at the very
+ * end to unregister the debug hooks.
+ */
+ if (do_sys_open_test) {
+ repeat_test = do_sys_open_test;
+ printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
+ repeat_test);
+ kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+ run_sys_open_test();
+ return;
+ }
+ /* Shutdown and unregister */
+ kgdb_unregister_io_module(&kgdbts_io_ops);
+ configured = 0;
+}
+
+static int kgdbts_option_setup(char *opt)
+{
+ if (strlen(opt) >= MAX_CONFIG_LEN) {
+ printk(KERN_ERR "kgdbts: config string too long\n");
+ return -ENOSPC;
+ }
+ strcpy(config, opt);
+
+ verbose = 0;
+ if (strstr(config, "V1"))
+ verbose = 1;
+ if (strstr(config, "V2"))
+ verbose = 2;
+
+ return 0;
+}
+
+__setup("kgdbts=", kgdbts_option_setup);
+
+static int configure_kgdbts(void)
+{
+ int err = 0;
+
+ if (!strlen(config) || isspace(config[0]))
+ goto noconfig;
+ err = kgdbts_option_setup(config);
+ if (err)
+ goto noconfig;
+
+ final_ack = 0;
+ run_plant_and_detach_test(1);
+
+ err = kgdb_register_io_module(&kgdbts_io_ops);
+ if (err) {
+ configured = 0;
+ return err;
+ }
+ configured = 1;
+ kgdbts_run_tests();
+
+ return err;
+
+noconfig:
+ config[0] = 0;
+ configured = 0;
+
+ return err;
+}
+
+static int __init init_kgdbts(void)
+{
+ /* Already configured? */
+ if (configured == 1)
+ return 0;
+
+ return configure_kgdbts();
+}
+device_initcall(init_kgdbts);
+
+static int kgdbts_get_char(void)
+{
+ int val = 0;
+
+ if (ts.run_test)
+ val = ts.run_test(1, 0);
+
+ return val;
+}
+
+static void kgdbts_put_char(u8 chr)
+{
+ if (ts.run_test)
+ ts.run_test(0, chr);
+}
+
+static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
+{
+ int len = strlen(kmessage);
+
+ if (len >= MAX_CONFIG_LEN) {
+ printk(KERN_ERR "kgdbts: config string too long\n");
+ return -ENOSPC;
+ }
+
+ /* Only copy in the string if the init function has not run yet */
+ if (configured < 0) {
+ strcpy(config, kmessage);
+ return 0;
+ }
+
+ if (configured == 1) {
+ printk(KERN_ERR "kgdbts: ERROR: Already configured and running.\n");
+ return -EBUSY;
+ }
+
+ strcpy(config, kmessage);
+ /* Chop out \n char as a result of echo */
+ if (config[len - 1] == '\n')
+ config[len - 1] = '\0';
+
+ /* Go and configure with the new params. */
+ return configure_kgdbts();
+}
+
+static void kgdbts_pre_exp_handler(void)
+{
+ /* Increment the module count when the debugger is active */
+ if (!kgdb_connected)
+ try_module_get(THIS_MODULE);
+}
+
+static void kgdbts_post_exp_handler(void)
+{
+ /* decrement the module count when the debugger detaches */
+ if (!kgdb_connected)
+ module_put(THIS_MODULE);
+}
+
+static struct kgdb_io kgdbts_io_ops = {
+ .name = "kgdbts",
+ .read_char = kgdbts_get_char,
+ .write_char = kgdbts_put_char,
+ .pre_exception = kgdbts_pre_exp_handler,
+ .post_exception = kgdbts_post_exp_handler,
+};
+
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
+module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
+MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
new file mode 100644
index 0000000..626fdca
--- /dev/null
+++ b/drivers/misc/lattice-ecp3-config.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2012 Stefan Roese <sr@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+
+#define FIRMWARE_NAME "lattice-ecp3.bit"
+
+/*
+ * The JTAG ID's of the supported FPGA's. The ID is 32bit wide
+ * reversed as noted in the manual.
+ */
+#define ID_ECP3_17 0xc2088080
+#define ID_ECP3_35 0xc2048080
+
+/* FPGA commands */
+#define FPGA_CMD_READ_ID 0x07 /* plus 24 bits */
+#define FPGA_CMD_READ_STATUS 0x09 /* plus 24 bits */
+#define FPGA_CMD_CLEAR 0x70
+#define FPGA_CMD_REFRESH 0x71
+#define FPGA_CMD_WRITE_EN 0x4a /* plus 2 bits */
+#define FPGA_CMD_WRITE_DIS 0x4f /* plus 8 bits */
+#define FPGA_CMD_WRITE_INC 0x41 /* plus 0 bits */
+
+/*
+ * The status register is 32bit revered, DONE is bit 17 from the TN1222.pdf
+ * (LatticeECP3 Slave SPI Port User's Guide)
+ */
+#define FPGA_STATUS_DONE 0x00004000
+#define FPGA_STATUS_CLEARED 0x00010000
+
+#define FPGA_CLEAR_TIMEOUT 5000 /* max. 5000ms for FPGA clear */
+#define FPGA_CLEAR_MSLEEP 10
+#define FPGA_CLEAR_LOOP_COUNT (FPGA_CLEAR_TIMEOUT / FPGA_CLEAR_MSLEEP)
+
+struct fpga_data {
+ struct completion fw_loaded;
+};
+
+struct ecp3_dev {
+ u32 jedec_id;
+ char *name;
+};
+
+static const struct ecp3_dev ecp3_dev[] = {
+ {
+ .jedec_id = ID_ECP3_17,
+ .name = "Lattice ECP3-17",
+ },
+ {
+ .jedec_id = ID_ECP3_35,
+ .name = "Lattice ECP3-35",
+ },
+};
+
+static void firmware_load(const struct firmware *fw, void *context)
+{
+ struct spi_device *spi = (struct spi_device *)context;
+ struct fpga_data *data = spi_get_drvdata(spi);
+ u8 *buffer;
+ int ret;
+ u8 txbuf[8];
+ u8 rxbuf[8];
+ int rx_len = 8;
+ int i;
+ u32 jedec_id;
+ u32 status;
+
+ if (fw == NULL) {
+ dev_err(&spi->dev, "Cannot load firmware, aborting\n");
+ return;
+ }
+
+ if (fw->size == 0) {
+ dev_err(&spi->dev, "Error: Firmware size is 0!\n");
+ return;
+ }
+
+ /* Fill dummy data (24 stuffing bits for commands) */
+ txbuf[1] = 0x00;
+ txbuf[2] = 0x00;
+ txbuf[3] = 0x00;
+
+ /* Trying to speak with the FPGA via SPI... */
+ txbuf[0] = FPGA_CMD_READ_ID;
+ ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+ jedec_id = get_unaligned_be32(&rxbuf[4]);
+ dev_dbg(&spi->dev, "FPGA JTAG ID=%08x\n", jedec_id);
+
+ for (i = 0; i < ARRAY_SIZE(ecp3_dev); i++) {
+ if (jedec_id == ecp3_dev[i].jedec_id)
+ break;
+ }
+ if (i == ARRAY_SIZE(ecp3_dev)) {
+ dev_err(&spi->dev,
+ "Error: No supported FPGA detected (JEDEC_ID=%08x)!\n",
+ jedec_id);
+ return;
+ }
+
+ dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name);
+
+ txbuf[0] = FPGA_CMD_READ_STATUS;
+ ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+ status = get_unaligned_be32(&rxbuf[4]);
+ dev_dbg(&spi->dev, "FPGA Status=%08x\n", status);
+
+ buffer = kzalloc(fw->size + 8, GFP_KERNEL);
+ if (!buffer) {
+ dev_err(&spi->dev, "Error: Can't allocate memory!\n");
+ return;
+ }
+
+ /*
+ * Insert WRITE_INC command into stream (one SPI frame)
+ */
+ buffer[0] = FPGA_CMD_WRITE_INC;
+ buffer[1] = 0xff;
+ buffer[2] = 0xff;
+ buffer[3] = 0xff;
+ memcpy(buffer + 4, fw->data, fw->size);
+
+ txbuf[0] = FPGA_CMD_REFRESH;
+ ret = spi_write(spi, txbuf, 4);
+
+ txbuf[0] = FPGA_CMD_WRITE_EN;
+ ret = spi_write(spi, txbuf, 4);
+
+ txbuf[0] = FPGA_CMD_CLEAR;
+ ret = spi_write(spi, txbuf, 4);
+
+ /*
+ * Wait for FPGA memory to become cleared
+ */
+ for (i = 0; i < FPGA_CLEAR_LOOP_COUNT; i++) {
+ txbuf[0] = FPGA_CMD_READ_STATUS;
+ ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+ status = get_unaligned_be32(&rxbuf[4]);
+ if (status == FPGA_STATUS_CLEARED)
+ break;
+
+ msleep(FPGA_CLEAR_MSLEEP);
+ }
+
+ if (i == FPGA_CLEAR_LOOP_COUNT) {
+ dev_err(&spi->dev,
+ "Error: Timeout waiting for FPGA to clear (status=%08x)!\n",
+ status);
+ kfree(buffer);
+ return;
+ }
+
+ dev_info(&spi->dev, "Configuring the FPGA...\n");
+ ret = spi_write(spi, buffer, fw->size + 8);
+
+ txbuf[0] = FPGA_CMD_WRITE_DIS;
+ ret = spi_write(spi, txbuf, 4);
+
+ txbuf[0] = FPGA_CMD_READ_STATUS;
+ ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+ status = get_unaligned_be32(&rxbuf[4]);
+ dev_dbg(&spi->dev, "FPGA Status=%08x\n", status);
+
+ /* Check result */
+ if (status & FPGA_STATUS_DONE)
+ dev_info(&spi->dev, "FPGA successfully configured!\n");
+ else
+ dev_info(&spi->dev, "FPGA not configured (DONE not set)\n");
+
+ /*
+ * Don't forget to release the firmware again
+ */
+ release_firmware(fw);
+
+ kfree(buffer);
+
+ complete(&data->fw_loaded);
+}
+
+static int lattice_ecp3_probe(struct spi_device *spi)
+{
+ struct fpga_data *data;
+ int err;
+
+ data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ dev_err(&spi->dev, "Memory allocation for fpga_data failed\n");
+ return -ENOMEM;
+ }
+ spi_set_drvdata(spi, data);
+
+ init_completion(&data->fw_loaded);
+ err = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+ FIRMWARE_NAME, &spi->dev,
+ GFP_KERNEL, spi, firmware_load);
+ if (err) {
+ dev_err(&spi->dev, "Firmware loading failed with %d!\n", err);
+ return err;
+ }
+
+ dev_info(&spi->dev, "FPGA bitstream configuration driver registered\n");
+
+ return 0;
+}
+
+static int lattice_ecp3_remove(struct spi_device *spi)
+{
+ struct fpga_data *data = spi_get_drvdata(spi);
+
+ wait_for_completion(&data->fw_loaded);
+
+ return 0;
+}
+
+static const struct spi_device_id lattice_ecp3_id[] = {
+ { "ecp3-17", 0 },
+ { "ecp3-35", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, lattice_ecp3_id);
+
+static struct spi_driver lattice_ecp3_driver = {
+ .driver = {
+ .name = "lattice-ecp3",
+ },
+ .probe = lattice_ecp3_probe,
+ .remove = lattice_ecp3_remove,
+ .id_table = lattice_ecp3_id,
+};
+
+module_spi_driver(lattice_ecp3_driver);
+
+MODULE_AUTHOR("Stefan Roese <sr@denx.de>");
+MODULE_DESCRIPTION("Lattice ECP3 FPGA configuration via SPI");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(FIRMWARE_NAME);
diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig
new file mode 100644
index 0000000..8f474e6
--- /dev/null
+++ b/drivers/misc/lis3lv02d/Kconfig
@@ -0,0 +1,37 @@
+#
+# STMicroelectonics LIS3LV02D and similar accelerometers
+#
+
+config SENSORS_LIS3_SPI
+ tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)"
+ depends on !ACPI && SPI_MASTER && INPUT
+ select SENSORS_LIS3LV02D
+ default n
+ help
+ This driver provides support for the LIS3LV02Dx accelerometer connected
+ via SPI. The accelerometer data is readable via
+ /sys/devices/platform/lis3lv02d.
+
+ This driver also provides an absolute input class device, allowing
+ the laptop to act as a pinball machine-esque joystick.
+
+ This driver can also be built as modules. If so, the core module
+ will be called lis3lv02d and a specific module for the SPI transport
+ is called lis3lv02d_spi.
+
+config SENSORS_LIS3_I2C
+ tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (I2C)"
+ depends on I2C && INPUT
+ select SENSORS_LIS3LV02D
+ default n
+ help
+ This driver provides support for the LIS3LV02Dx accelerometer connected
+ via I2C. The accelerometer data is readable via
+ /sys/devices/platform/lis3lv02d.
+
+ This driver also provides an absolute input class device, allowing
+ the device to act as a pinball machine-esque joystick.
+
+ This driver can also be built as modules. If so, the core module
+ will be called lis3lv02d and a specific module for the I2C transport
+ is called lis3lv02d_i2c.
diff --git a/drivers/misc/lis3lv02d/Makefile b/drivers/misc/lis3lv02d/Makefile
new file mode 100644
index 0000000..4bf58b1
--- /dev/null
+++ b/drivers/misc/lis3lv02d/Makefile
@@ -0,0 +1,7 @@
+#
+# STMicroelectonics LIS3LV02D and similar accelerometers
+#
+
+obj-$(CONFIG_SENSORS_LIS3LV02D) += lis3lv02d.o
+obj-$(CONFIG_SENSORS_LIS3_SPI) += lis3lv02d_spi.o
+obj-$(CONFIG_SENSORS_LIS3_I2C) += lis3lv02d_i2c.o
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
new file mode 100644
index 0000000..fb8705f
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -0,0 +1,1262 @@
+/*
+ * lis3lv02d.c - ST LIS3LV02DL accelerometer driver
+ *
+ * Copyright (C) 2007-2008 Yan Burman
+ * Copyright (C) 2008 Eric Piel
+ * Copyright (C) 2008-2009 Pavel Machek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/dmi.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/input-polldev.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/freezer.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/pm_runtime.h>
+#include <linux/atomic.h>
+#include <linux/of_device.h>
+#include "lis3lv02d.h"
+
+#define DRIVER_NAME "lis3lv02d"
+
+/* joystick device poll interval in milliseconds */
+#define MDPS_POLL_INTERVAL 50
+#define MDPS_POLL_MIN 0
+#define MDPS_POLL_MAX 2000
+
+#define LIS3_SYSFS_POWERDOWN_DELAY 5000 /* In milliseconds */
+
+#define SELFTEST_OK 0
+#define SELFTEST_FAIL -1
+#define SELFTEST_IRQ -2
+
+#define IRQ_LINE0 0
+#define IRQ_LINE1 1
+
+/*
+ * The sensor can also generate interrupts (DRDY) but it's pretty pointless
+ * because they are generated even if the data do not change. So it's better
+ * to keep the interrupt for the free-fall event. The values are updated at
+ * 40Hz (at the lowest frequency), but as it can be pretty time consuming on
+ * some low processor, we poll the sensor only at 20Hz... enough for the
+ * joystick.
+ */
+
+#define LIS3_PWRON_DELAY_WAI_12B (5000)
+#define LIS3_PWRON_DELAY_WAI_8B (3000)
+
+/*
+ * LIS3LV02D spec says 1024 LSBs corresponds 1 G -> 1LSB is 1000/1024 mG
+ * LIS302D spec says: 18 mG / digit
+ * LIS3_ACCURACY is used to increase accuracy of the intermediate
+ * calculation results.
+ */
+#define LIS3_ACCURACY 1024
+/* Sensitivity values for -2G +2G scale */
+#define LIS3_SENSITIVITY_12B ((LIS3_ACCURACY * 1000) / 1024)
+#define LIS3_SENSITIVITY_8B (18 * LIS3_ACCURACY)
+
+/*
+ * LIS331DLH spec says 1LSBs corresponds 4G/4096 -> 1LSB is 1000/1024 mG.
+ * Below macros defines sensitivity values for +/-2G. Dataout bits for
+ * +/-2G range is 12 bits so 4 bits adjustment must be done to get 12bit
+ * data from 16bit value. Currently this driver supports only 2G range.
+ */
+#define LIS3DLH_SENSITIVITY_2G ((LIS3_ACCURACY * 1000) / 1024)
+#define SHIFT_ADJ_2G 4
+
+#define LIS3_DEFAULT_FUZZ_12B 3
+#define LIS3_DEFAULT_FLAT_12B 3
+#define LIS3_DEFAULT_FUZZ_8B 1
+#define LIS3_DEFAULT_FLAT_8B 1
+
+struct lis3lv02d lis3_dev = {
+ .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lis3_dev.misc_wait),
+};
+EXPORT_SYMBOL_GPL(lis3_dev);
+
+/* just like param_set_int() but does sanity-check so that it won't point
+ * over the axis array size
+ */
+static int param_set_axis(const char *val, const struct kernel_param *kp)
+{
+ int ret = param_set_int(val, kp);
+ if (!ret) {
+ int val = *(int *)kp->arg;
+ if (val < 0)
+ val = -val;
+ if (!val || val > 3)
+ return -EINVAL;
+ }
+ return ret;
+}
+
+static const struct kernel_param_ops param_ops_axis = {
+ .set = param_set_axis,
+ .get = param_get_int,
+};
+
+#define param_check_axis(name, p) param_check_int(name, p)
+
+module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644);
+MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions");
+
+static s16 lis3lv02d_read_8(struct lis3lv02d *lis3, int reg)
+{
+ s8 lo;
+ if (lis3->read(lis3, reg, &lo) < 0)
+ return 0;
+
+ return lo;
+}
+
+static s16 lis3lv02d_read_12(struct lis3lv02d *lis3, int reg)
+{
+ u8 lo, hi;
+
+ lis3->read(lis3, reg - 1, &lo);
+ lis3->read(lis3, reg, &hi);
+ /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */
+ return (s16)((hi << 8) | lo);
+}
+
+/* 12bits for 2G range, 13 bits for 4G range and 14 bits for 8G range */
+static s16 lis331dlh_read_data(struct lis3lv02d *lis3, int reg)
+{
+ u8 lo, hi;
+ int v;
+
+ lis3->read(lis3, reg - 1, &lo);
+ lis3->read(lis3, reg, &hi);
+ v = (int) ((hi << 8) | lo);
+
+ return (s16) v >> lis3->shift_adj;
+}
+
+/**
+ * lis3lv02d_get_axis - For the given axis, give the value converted
+ * @axis: 1,2,3 - can also be negative
+ * @hw_values: raw values returned by the hardware
+ *
+ * Returns the converted value.
+ */
+static inline int lis3lv02d_get_axis(s8 axis, int hw_values[3])
+{
+ if (axis > 0)
+ return hw_values[axis - 1];
+ else
+ return -hw_values[-axis - 1];
+}
+
+/**
+ * lis3lv02d_get_xyz - Get X, Y and Z axis values from the accelerometer
+ * @lis3: pointer to the device struct
+ * @x: where to store the X axis value
+ * @y: where to store the Y axis value
+ * @z: where to store the Z axis value
+ *
+ * Note that 40Hz input device can eat up about 10% CPU at 800MHZ
+ */
+static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z)
+{
+ int position[3];
+ int i;
+
+ if (lis3->blkread) {
+ if (lis3->whoami == WAI_12B) {
+ u16 data[3];
+ lis3->blkread(lis3, OUTX_L, 6, (u8 *)data);
+ for (i = 0; i < 3; i++)
+ position[i] = (s16)le16_to_cpu(data[i]);
+ } else {
+ u8 data[5];
+ /* Data: x, dummy, y, dummy, z */
+ lis3->blkread(lis3, OUTX, 5, data);
+ for (i = 0; i < 3; i++)
+ position[i] = (s8)data[i * 2];
+ }
+ } else {
+ position[0] = lis3->read_data(lis3, OUTX);
+ position[1] = lis3->read_data(lis3, OUTY);
+ position[2] = lis3->read_data(lis3, OUTZ);
+ }
+
+ for (i = 0; i < 3; i++)
+ position[i] = (position[i] * lis3->scale) / LIS3_ACCURACY;
+
+ *x = lis3lv02d_get_axis(lis3->ac.x, position);
+ *y = lis3lv02d_get_axis(lis3->ac.y, position);
+ *z = lis3lv02d_get_axis(lis3->ac.z, position);
+}
+
+/* conversion btw sampling rate and the register values */
+static int lis3_12_rates[4] = {40, 160, 640, 2560};
+static int lis3_8_rates[2] = {100, 400};
+static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000};
+static int lis3_3dlh_rates[4] = {50, 100, 400, 1000};
+
+/* ODR is Output Data Rate */
+static int lis3lv02d_get_odr(struct lis3lv02d *lis3)
+{
+ u8 ctrl;
+ int shift;
+
+ lis3->read(lis3, CTRL_REG1, &ctrl);
+ ctrl &= lis3->odr_mask;
+ shift = ffs(lis3->odr_mask) - 1;
+ return lis3->odrs[(ctrl >> shift)];
+}
+
+static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3)
+{
+ int div = lis3lv02d_get_odr(lis3);
+
+ if (WARN_ONCE(div == 0, "device returned spurious data"))
+ return -ENXIO;
+
+ /* LIS3 power on delay is quite long */
+ msleep(lis3->pwron_delay / div);
+ return 0;
+}
+
+static int lis3lv02d_set_odr(struct lis3lv02d *lis3, int rate)
+{
+ u8 ctrl;
+ int i, len, shift;
+
+ if (!rate)
+ return -EINVAL;
+
+ lis3->read(lis3, CTRL_REG1, &ctrl);
+ ctrl &= ~lis3->odr_mask;
+ len = 1 << hweight_long(lis3->odr_mask); /* # of possible values */
+ shift = ffs(lis3->odr_mask) - 1;
+
+ for (i = 0; i < len; i++)
+ if (lis3->odrs[i] == rate) {
+ lis3->write(lis3, CTRL_REG1,
+ ctrl | (i << shift));
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3])
+{
+ u8 ctlreg, reg;
+ s16 x, y, z;
+ u8 selftest;
+ int ret;
+ u8 ctrl_reg_data;
+ unsigned char irq_cfg;
+
+ mutex_lock(&lis3->mutex);
+
+ irq_cfg = lis3->irq_cfg;
+ if (lis3->whoami == WAI_8B) {
+ lis3->data_ready_count[IRQ_LINE0] = 0;
+ lis3->data_ready_count[IRQ_LINE1] = 0;
+
+ /* Change interrupt cfg to data ready for selftest */
+ atomic_inc(&lis3->wake_thread);
+ lis3->irq_cfg = LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY;
+ lis3->read(lis3, CTRL_REG3, &ctrl_reg_data);
+ lis3->write(lis3, CTRL_REG3, (ctrl_reg_data &
+ ~(LIS3_IRQ1_MASK | LIS3_IRQ2_MASK)) |
+ (LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY));
+ }
+
+ if ((lis3->whoami == WAI_3DC) || (lis3->whoami == WAI_3DLH)) {
+ ctlreg = CTRL_REG4;
+ selftest = CTRL4_ST0;
+ } else {
+ ctlreg = CTRL_REG1;
+ if (lis3->whoami == WAI_12B)
+ selftest = CTRL1_ST;
+ else
+ selftest = CTRL1_STP;
+ }
+
+ lis3->read(lis3, ctlreg, ®);
+ lis3->write(lis3, ctlreg, (reg | selftest));
+ ret = lis3lv02d_get_pwron_wait(lis3);
+ if (ret)
+ goto fail;
+
+ /* Read directly to avoid axis remap */
+ x = lis3->read_data(lis3, OUTX);
+ y = lis3->read_data(lis3, OUTY);
+ z = lis3->read_data(lis3, OUTZ);
+
+ /* back to normal settings */
+ lis3->write(lis3, ctlreg, reg);
+ ret = lis3lv02d_get_pwron_wait(lis3);
+ if (ret)
+ goto fail;
+
+ results[0] = x - lis3->read_data(lis3, OUTX);
+ results[1] = y - lis3->read_data(lis3, OUTY);
+ results[2] = z - lis3->read_data(lis3, OUTZ);
+
+ ret = 0;
+
+ if (lis3->whoami == WAI_8B) {
+ /* Restore original interrupt configuration */
+ atomic_dec(&lis3->wake_thread);
+ lis3->write(lis3, CTRL_REG3, ctrl_reg_data);
+ lis3->irq_cfg = irq_cfg;
+
+ if ((irq_cfg & LIS3_IRQ1_MASK) &&
+ lis3->data_ready_count[IRQ_LINE0] < 2) {
+ ret = SELFTEST_IRQ;
+ goto fail;
+ }
+
+ if ((irq_cfg & LIS3_IRQ2_MASK) &&
+ lis3->data_ready_count[IRQ_LINE1] < 2) {
+ ret = SELFTEST_IRQ;
+ goto fail;
+ }
+ }
+
+ if (lis3->pdata) {
+ int i;
+ for (i = 0; i < 3; i++) {
+ /* Check against selftest acceptance limits */
+ if ((results[i] < lis3->pdata->st_min_limits[i]) ||
+ (results[i] > lis3->pdata->st_max_limits[i])) {
+ ret = SELFTEST_FAIL;
+ goto fail;
+ }
+ }
+ }
+
+ /* test passed */
+fail:
+ mutex_unlock(&lis3->mutex);
+ return ret;
+}
+
+/*
+ * Order of registers in the list affects to order of the restore process.
+ * Perhaps it is a good idea to set interrupt enable register as a last one
+ * after all other configurations
+ */
+static u8 lis3_wai8_regs[] = { FF_WU_CFG_1, FF_WU_THS_1, FF_WU_DURATION_1,
+ FF_WU_CFG_2, FF_WU_THS_2, FF_WU_DURATION_2,
+ CLICK_CFG, CLICK_SRC, CLICK_THSY_X, CLICK_THSZ,
+ CLICK_TIMELIMIT, CLICK_LATENCY, CLICK_WINDOW,
+ CTRL_REG1, CTRL_REG2, CTRL_REG3};
+
+static u8 lis3_wai12_regs[] = {FF_WU_CFG, FF_WU_THS_L, FF_WU_THS_H,
+ FF_WU_DURATION, DD_CFG, DD_THSI_L, DD_THSI_H,
+ DD_THSE_L, DD_THSE_H,
+ CTRL_REG1, CTRL_REG3, CTRL_REG2};
+
+static inline void lis3_context_save(struct lis3lv02d *lis3)
+{
+ int i;
+ for (i = 0; i < lis3->regs_size; i++)
+ lis3->read(lis3, lis3->regs[i], &lis3->reg_cache[i]);
+ lis3->regs_stored = true;
+}
+
+static inline void lis3_context_restore(struct lis3lv02d *lis3)
+{
+ int i;
+ if (lis3->regs_stored)
+ for (i = 0; i < lis3->regs_size; i++)
+ lis3->write(lis3, lis3->regs[i], lis3->reg_cache[i]);
+}
+
+void lis3lv02d_poweroff(struct lis3lv02d *lis3)
+{
+ if (lis3->reg_ctrl)
+ lis3_context_save(lis3);
+ /* disable X,Y,Z axis and power down */
+ lis3->write(lis3, CTRL_REG1, 0x00);
+ if (lis3->reg_ctrl)
+ lis3->reg_ctrl(lis3, LIS3_REG_OFF);
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_poweroff);
+
+int lis3lv02d_poweron(struct lis3lv02d *lis3)
+{
+ int err;
+ u8 reg;
+
+ lis3->init(lis3);
+
+ /*
+ * Common configuration
+ * BDU: (12 bits sensors only) LSB and MSB values are not updated until
+ * both have been read. So the value read will always be correct.
+ * Set BOOT bit to refresh factory tuning values.
+ */
+ if (lis3->pdata) {
+ lis3->read(lis3, CTRL_REG2, ®);
+ if (lis3->whoami == WAI_12B)
+ reg |= CTRL2_BDU | CTRL2_BOOT;
+ else if (lis3->whoami == WAI_3DLH)
+ reg |= CTRL2_BOOT_3DLH;
+ else
+ reg |= CTRL2_BOOT_8B;
+ lis3->write(lis3, CTRL_REG2, reg);
+
+ if (lis3->whoami == WAI_3DLH) {
+ lis3->read(lis3, CTRL_REG4, ®);
+ reg |= CTRL4_BDU;
+ lis3->write(lis3, CTRL_REG4, reg);
+ }
+ }
+
+ err = lis3lv02d_get_pwron_wait(lis3);
+ if (err)
+ return err;
+
+ if (lis3->reg_ctrl)
+ lis3_context_restore(lis3);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_poweron);
+
+
+static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev)
+{
+ struct lis3lv02d *lis3 = pidev->private;
+ int x, y, z;
+
+ mutex_lock(&lis3->mutex);
+ lis3lv02d_get_xyz(lis3, &x, &y, &z);
+ input_report_abs(pidev->input, ABS_X, x);
+ input_report_abs(pidev->input, ABS_Y, y);
+ input_report_abs(pidev->input, ABS_Z, z);
+ input_sync(pidev->input);
+ mutex_unlock(&lis3->mutex);
+}
+
+static void lis3lv02d_joystick_open(struct input_polled_dev *pidev)
+{
+ struct lis3lv02d *lis3 = pidev->private;
+
+ if (lis3->pm_dev)
+ pm_runtime_get_sync(lis3->pm_dev);
+
+ if (lis3->pdata && lis3->whoami == WAI_8B && lis3->idev)
+ atomic_set(&lis3->wake_thread, 1);
+ /*
+ * Update coordinates for the case where poll interval is 0 and
+ * the chip in running purely under interrupt control
+ */
+ lis3lv02d_joystick_poll(pidev);
+}
+
+static void lis3lv02d_joystick_close(struct input_polled_dev *pidev)
+{
+ struct lis3lv02d *lis3 = pidev->private;
+
+ atomic_set(&lis3->wake_thread, 0);
+ if (lis3->pm_dev)
+ pm_runtime_put(lis3->pm_dev);
+}
+
+static irqreturn_t lis302dl_interrupt(int irq, void *data)
+{
+ struct lis3lv02d *lis3 = data;
+
+ if (!test_bit(0, &lis3->misc_opened))
+ goto out;
+
+ /*
+ * Be careful: on some HP laptops the bios force DD when on battery and
+ * the lid is closed. This leads to interrupts as soon as a little move
+ * is done.
+ */
+ atomic_inc(&lis3->count);
+
+ wake_up_interruptible(&lis3->misc_wait);
+ kill_fasync(&lis3->async_queue, SIGIO, POLL_IN);
+out:
+ if (atomic_read(&lis3->wake_thread))
+ return IRQ_WAKE_THREAD;
+ return IRQ_HANDLED;
+}
+
+static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3)
+{
+ struct input_dev *dev = lis3->idev->input;
+ u8 click_src;
+
+ mutex_lock(&lis3->mutex);
+ lis3->read(lis3, CLICK_SRC, &click_src);
+
+ if (click_src & CLICK_SINGLE_X) {
+ input_report_key(dev, lis3->mapped_btns[0], 1);
+ input_report_key(dev, lis3->mapped_btns[0], 0);
+ }
+
+ if (click_src & CLICK_SINGLE_Y) {
+ input_report_key(dev, lis3->mapped_btns[1], 1);
+ input_report_key(dev, lis3->mapped_btns[1], 0);
+ }
+
+ if (click_src & CLICK_SINGLE_Z) {
+ input_report_key(dev, lis3->mapped_btns[2], 1);
+ input_report_key(dev, lis3->mapped_btns[2], 0);
+ }
+ input_sync(dev);
+ mutex_unlock(&lis3->mutex);
+}
+
+static inline void lis302dl_data_ready(struct lis3lv02d *lis3, int index)
+{
+ int dummy;
+
+ /* Dummy read to ack interrupt */
+ lis3lv02d_get_xyz(lis3, &dummy, &dummy, &dummy);
+ lis3->data_ready_count[index]++;
+}
+
+static irqreturn_t lis302dl_interrupt_thread1_8b(int irq, void *data)
+{
+ struct lis3lv02d *lis3 = data;
+ u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ1_MASK;
+
+ if (irq_cfg == LIS3_IRQ1_CLICK)
+ lis302dl_interrupt_handle_click(lis3);
+ else if (unlikely(irq_cfg == LIS3_IRQ1_DATA_READY))
+ lis302dl_data_ready(lis3, IRQ_LINE0);
+ else
+ lis3lv02d_joystick_poll(lis3->idev);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t lis302dl_interrupt_thread2_8b(int irq, void *data)
+{
+ struct lis3lv02d *lis3 = data;
+ u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ2_MASK;
+
+ if (irq_cfg == LIS3_IRQ2_CLICK)
+ lis302dl_interrupt_handle_click(lis3);
+ else if (unlikely(irq_cfg == LIS3_IRQ2_DATA_READY))
+ lis302dl_data_ready(lis3, IRQ_LINE1);
+ else
+ lis3lv02d_joystick_poll(lis3->idev);
+
+ return IRQ_HANDLED;
+}
+
+static int lis3lv02d_misc_open(struct inode *inode, struct file *file)
+{
+ struct lis3lv02d *lis3 = container_of(file->private_data,
+ struct lis3lv02d, miscdev);
+
+ if (test_and_set_bit(0, &lis3->misc_opened))
+ return -EBUSY; /* already open */
+
+ if (lis3->pm_dev)
+ pm_runtime_get_sync(lis3->pm_dev);
+
+ atomic_set(&lis3->count, 0);
+ return 0;
+}
+
+static int lis3lv02d_misc_release(struct inode *inode, struct file *file)
+{
+ struct lis3lv02d *lis3 = container_of(file->private_data,
+ struct lis3lv02d, miscdev);
+
+ clear_bit(0, &lis3->misc_opened); /* release the device */
+ if (lis3->pm_dev)
+ pm_runtime_put(lis3->pm_dev);
+ return 0;
+}
+
+static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct lis3lv02d *lis3 = container_of(file->private_data,
+ struct lis3lv02d, miscdev);
+
+ DECLARE_WAITQUEUE(wait, current);
+ u32 data;
+ unsigned char byte_data;
+ ssize_t retval = 1;
+
+ if (count < 1)
+ return -EINVAL;
+
+ add_wait_queue(&lis3->misc_wait, &wait);
+ while (true) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ data = atomic_xchg(&lis3->count, 0);
+ if (data)
+ break;
+
+ if (file->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ goto out;
+ }
+
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ goto out;
+ }
+
+ schedule();
+ }
+
+ if (data < 255)
+ byte_data = data;
+ else
+ byte_data = 255;
+
+ /* make sure we are not going into copy_to_user() with
+ * TASK_INTERRUPTIBLE state */
+ set_current_state(TASK_RUNNING);
+ if (copy_to_user(buf, &byte_data, sizeof(byte_data)))
+ retval = -EFAULT;
+
+out:
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&lis3->misc_wait, &wait);
+
+ return retval;
+}
+
+static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait)
+{
+ struct lis3lv02d *lis3 = container_of(file->private_data,
+ struct lis3lv02d, miscdev);
+
+ poll_wait(file, &lis3->misc_wait, wait);
+ if (atomic_read(&lis3->count))
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
+
+static int lis3lv02d_misc_fasync(int fd, struct file *file, int on)
+{
+ struct lis3lv02d *lis3 = container_of(file->private_data,
+ struct lis3lv02d, miscdev);
+
+ return fasync_helper(fd, file, on, &lis3->async_queue);
+}
+
+static const struct file_operations lis3lv02d_misc_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = lis3lv02d_misc_read,
+ .open = lis3lv02d_misc_open,
+ .release = lis3lv02d_misc_release,
+ .poll = lis3lv02d_misc_poll,
+ .fasync = lis3lv02d_misc_fasync,
+};
+
+int lis3lv02d_joystick_enable(struct lis3lv02d *lis3)
+{
+ struct input_dev *input_dev;
+ int err;
+ int max_val, fuzz, flat;
+ int btns[] = {BTN_X, BTN_Y, BTN_Z};
+
+ if (lis3->idev)
+ return -EINVAL;
+
+ lis3->idev = input_allocate_polled_device();
+ if (!lis3->idev)
+ return -ENOMEM;
+
+ lis3->idev->poll = lis3lv02d_joystick_poll;
+ lis3->idev->open = lis3lv02d_joystick_open;
+ lis3->idev->close = lis3lv02d_joystick_close;
+ lis3->idev->poll_interval = MDPS_POLL_INTERVAL;
+ lis3->idev->poll_interval_min = MDPS_POLL_MIN;
+ lis3->idev->poll_interval_max = MDPS_POLL_MAX;
+ lis3->idev->private = lis3;
+ input_dev = lis3->idev->input;
+
+ input_dev->name = "ST LIS3LV02DL Accelerometer";
+ input_dev->phys = DRIVER_NAME "/input0";
+ input_dev->id.bustype = BUS_HOST;
+ input_dev->id.vendor = 0;
+ input_dev->dev.parent = &lis3->pdev->dev;
+
+ set_bit(EV_ABS, input_dev->evbit);
+ max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY;
+ if (lis3->whoami == WAI_12B) {
+ fuzz = LIS3_DEFAULT_FUZZ_12B;
+ flat = LIS3_DEFAULT_FLAT_12B;
+ } else {
+ fuzz = LIS3_DEFAULT_FUZZ_8B;
+ flat = LIS3_DEFAULT_FLAT_8B;
+ }
+ fuzz = (fuzz * lis3->scale) / LIS3_ACCURACY;
+ flat = (flat * lis3->scale) / LIS3_ACCURACY;
+
+ input_set_abs_params(input_dev, ABS_X, -max_val, max_val, fuzz, flat);
+ input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat);
+ input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat);
+
+ lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns);
+ lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns);
+ lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns);
+
+ err = input_register_polled_device(lis3->idev);
+ if (err) {
+ input_free_polled_device(lis3->idev);
+ lis3->idev = NULL;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable);
+
+void lis3lv02d_joystick_disable(struct lis3lv02d *lis3)
+{
+ if (lis3->irq)
+ free_irq(lis3->irq, lis3);
+ if (lis3->pdata && lis3->pdata->irq2)
+ free_irq(lis3->pdata->irq2, lis3);
+
+ if (!lis3->idev)
+ return;
+
+ if (lis3->irq)
+ misc_deregister(&lis3->miscdev);
+ input_unregister_polled_device(lis3->idev);
+ input_free_polled_device(lis3->idev);
+ lis3->idev = NULL;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable);
+
+/* Sysfs stuff */
+static void lis3lv02d_sysfs_poweron(struct lis3lv02d *lis3)
+{
+ /*
+ * SYSFS functions are fast visitors so put-call
+ * immediately after the get-call. However, keep
+ * chip running for a while and schedule delayed
+ * suspend. This way periodic sysfs calls doesn't
+ * suffer from relatively long power up time.
+ */
+
+ if (lis3->pm_dev) {
+ pm_runtime_get_sync(lis3->pm_dev);
+ pm_runtime_put_noidle(lis3->pm_dev);
+ pm_schedule_suspend(lis3->pm_dev, LIS3_SYSFS_POWERDOWN_DELAY);
+ }
+}
+
+static ssize_t lis3lv02d_selftest_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+ s16 values[3];
+
+ static const char ok[] = "OK";
+ static const char fail[] = "FAIL";
+ static const char irq[] = "FAIL_IRQ";
+ const char *res;
+
+ lis3lv02d_sysfs_poweron(lis3);
+ switch (lis3lv02d_selftest(lis3, values)) {
+ case SELFTEST_FAIL:
+ res = fail;
+ break;
+ case SELFTEST_IRQ:
+ res = irq;
+ break;
+ case SELFTEST_OK:
+ default:
+ res = ok;
+ break;
+ }
+ return sprintf(buf, "%s %d %d %d\n", res,
+ values[0], values[1], values[2]);
+}
+
+static ssize_t lis3lv02d_position_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+ int x, y, z;
+
+ lis3lv02d_sysfs_poweron(lis3);
+ mutex_lock(&lis3->mutex);
+ lis3lv02d_get_xyz(lis3, &x, &y, &z);
+ mutex_unlock(&lis3->mutex);
+ return sprintf(buf, "(%d,%d,%d)\n", x, y, z);
+}
+
+static ssize_t lis3lv02d_rate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+
+ lis3lv02d_sysfs_poweron(lis3);
+ return sprintf(buf, "%d\n", lis3lv02d_get_odr(lis3));
+}
+
+static ssize_t lis3lv02d_rate_set(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+ unsigned long rate;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &rate);
+ if (ret)
+ return ret;
+
+ lis3lv02d_sysfs_poweron(lis3);
+ if (lis3lv02d_set_odr(lis3, rate))
+ return -EINVAL;
+
+ return count;
+}
+
+static DEVICE_ATTR(selftest, S_IRUSR, lis3lv02d_selftest_show, NULL);
+static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL);
+static DEVICE_ATTR(rate, S_IRUGO | S_IWUSR, lis3lv02d_rate_show,
+ lis3lv02d_rate_set);
+
+static struct attribute *lis3lv02d_attributes[] = {
+ &dev_attr_selftest.attr,
+ &dev_attr_position.attr,
+ &dev_attr_rate.attr,
+ NULL
+};
+
+static struct attribute_group lis3lv02d_attribute_group = {
+ .attrs = lis3lv02d_attributes
+};
+
+
+static int lis3lv02d_add_fs(struct lis3lv02d *lis3)
+{
+ lis3->pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
+ if (IS_ERR(lis3->pdev))
+ return PTR_ERR(lis3->pdev);
+
+ platform_set_drvdata(lis3->pdev, lis3);
+ return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group);
+}
+
+int lis3lv02d_remove_fs(struct lis3lv02d *lis3)
+{
+ sysfs_remove_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group);
+ platform_device_unregister(lis3->pdev);
+ if (lis3->pm_dev) {
+ /* Barrier after the sysfs remove */
+ pm_runtime_barrier(lis3->pm_dev);
+
+ /* SYSFS may have left chip running. Turn off if necessary */
+ if (!pm_runtime_suspended(lis3->pm_dev))
+ lis3lv02d_poweroff(lis3);
+
+ pm_runtime_disable(lis3->pm_dev);
+ pm_runtime_set_suspended(lis3->pm_dev);
+ }
+ kfree(lis3->reg_cache);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs);
+
+static void lis3lv02d_8b_configure(struct lis3lv02d *lis3,
+ struct lis3lv02d_platform_data *p)
+{
+ int err;
+ int ctrl2 = p->hipass_ctrl;
+
+ if (p->click_flags) {
+ lis3->write(lis3, CLICK_CFG, p->click_flags);
+ lis3->write(lis3, CLICK_TIMELIMIT, p->click_time_limit);
+ lis3->write(lis3, CLICK_LATENCY, p->click_latency);
+ lis3->write(lis3, CLICK_WINDOW, p->click_window);
+ lis3->write(lis3, CLICK_THSZ, p->click_thresh_z & 0xf);
+ lis3->write(lis3, CLICK_THSY_X,
+ (p->click_thresh_x & 0xf) |
+ (p->click_thresh_y << 4));
+
+ if (lis3->idev) {
+ struct input_dev *input_dev = lis3->idev->input;
+ input_set_capability(input_dev, EV_KEY, BTN_X);
+ input_set_capability(input_dev, EV_KEY, BTN_Y);
+ input_set_capability(input_dev, EV_KEY, BTN_Z);
+ }
+ }
+
+ if (p->wakeup_flags) {
+ lis3->write(lis3, FF_WU_CFG_1, p->wakeup_flags);
+ lis3->write(lis3, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
+ /* pdata value + 1 to keep this backward compatible*/
+ lis3->write(lis3, FF_WU_DURATION_1, p->duration1 + 1);
+ ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/
+ }
+
+ if (p->wakeup_flags2) {
+ lis3->write(lis3, FF_WU_CFG_2, p->wakeup_flags2);
+ lis3->write(lis3, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f);
+ /* pdata value + 1 to keep this backward compatible*/
+ lis3->write(lis3, FF_WU_DURATION_2, p->duration2 + 1);
+ ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/
+ }
+ /* Configure hipass filters */
+ lis3->write(lis3, CTRL_REG2, ctrl2);
+
+ if (p->irq2) {
+ err = request_threaded_irq(p->irq2,
+ NULL,
+ lis302dl_interrupt_thread2_8b,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+ (p->irq_flags2 & IRQF_TRIGGER_MASK),
+ DRIVER_NAME, lis3);
+ if (err < 0)
+ pr_err("No second IRQ. Limited functionality\n");
+ }
+}
+
+#ifdef CONFIG_OF
+int lis3lv02d_init_dt(struct lis3lv02d *lis3)
+{
+ struct lis3lv02d_platform_data *pdata;
+ struct device_node *np = lis3->of_node;
+ u32 val;
+ s32 sval;
+
+ if (!lis3->of_node)
+ return 0;
+
+ pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ if (of_get_property(np, "st,click-single-x", NULL))
+ pdata->click_flags |= LIS3_CLICK_SINGLE_X;
+ if (of_get_property(np, "st,click-double-x", NULL))
+ pdata->click_flags |= LIS3_CLICK_DOUBLE_X;
+
+ if (of_get_property(np, "st,click-single-y", NULL))
+ pdata->click_flags |= LIS3_CLICK_SINGLE_Y;
+ if (of_get_property(np, "st,click-double-y", NULL))
+ pdata->click_flags |= LIS3_CLICK_DOUBLE_Y;
+
+ if (of_get_property(np, "st,click-single-z", NULL))
+ pdata->click_flags |= LIS3_CLICK_SINGLE_Z;
+ if (of_get_property(np, "st,click-double-z", NULL))
+ pdata->click_flags |= LIS3_CLICK_DOUBLE_Z;
+
+ if (!of_property_read_u32(np, "st,click-threshold-x", &val))
+ pdata->click_thresh_x = val;
+ if (!of_property_read_u32(np, "st,click-threshold-y", &val))
+ pdata->click_thresh_y = val;
+ if (!of_property_read_u32(np, "st,click-threshold-z", &val))
+ pdata->click_thresh_z = val;
+
+ if (!of_property_read_u32(np, "st,click-time-limit", &val))
+ pdata->click_time_limit = val;
+ if (!of_property_read_u32(np, "st,click-latency", &val))
+ pdata->click_latency = val;
+ if (!of_property_read_u32(np, "st,click-window", &val))
+ pdata->click_window = val;
+
+ if (of_get_property(np, "st,irq1-disable", NULL))
+ pdata->irq_cfg |= LIS3_IRQ1_DISABLE;
+ if (of_get_property(np, "st,irq1-ff-wu-1", NULL))
+ pdata->irq_cfg |= LIS3_IRQ1_FF_WU_1;
+ if (of_get_property(np, "st,irq1-ff-wu-2", NULL))
+ pdata->irq_cfg |= LIS3_IRQ1_FF_WU_2;
+ if (of_get_property(np, "st,irq1-data-ready", NULL))
+ pdata->irq_cfg |= LIS3_IRQ1_DATA_READY;
+ if (of_get_property(np, "st,irq1-click", NULL))
+ pdata->irq_cfg |= LIS3_IRQ1_CLICK;
+
+ if (of_get_property(np, "st,irq2-disable", NULL))
+ pdata->irq_cfg |= LIS3_IRQ2_DISABLE;
+ if (of_get_property(np, "st,irq2-ff-wu-1", NULL))
+ pdata->irq_cfg |= LIS3_IRQ2_FF_WU_1;
+ if (of_get_property(np, "st,irq2-ff-wu-2", NULL))
+ pdata->irq_cfg |= LIS3_IRQ2_FF_WU_2;
+ if (of_get_property(np, "st,irq2-data-ready", NULL))
+ pdata->irq_cfg |= LIS3_IRQ2_DATA_READY;
+ if (of_get_property(np, "st,irq2-click", NULL))
+ pdata->irq_cfg |= LIS3_IRQ2_CLICK;
+
+ if (of_get_property(np, "st,irq-open-drain", NULL))
+ pdata->irq_cfg |= LIS3_IRQ_OPEN_DRAIN;
+ if (of_get_property(np, "st,irq-active-low", NULL))
+ pdata->irq_cfg |= LIS3_IRQ_ACTIVE_LOW;
+
+ if (!of_property_read_u32(np, "st,wu-duration-1", &val))
+ pdata->duration1 = val;
+ if (!of_property_read_u32(np, "st,wu-duration-2", &val))
+ pdata->duration2 = val;
+
+ if (of_get_property(np, "st,wakeup-x-lo", NULL))
+ pdata->wakeup_flags |= LIS3_WAKEUP_X_LO;
+ if (of_get_property(np, "st,wakeup-x-hi", NULL))
+ pdata->wakeup_flags |= LIS3_WAKEUP_X_HI;
+ if (of_get_property(np, "st,wakeup-y-lo", NULL))
+ pdata->wakeup_flags |= LIS3_WAKEUP_Y_LO;
+ if (of_get_property(np, "st,wakeup-y-hi", NULL))
+ pdata->wakeup_flags |= LIS3_WAKEUP_Y_HI;
+ if (of_get_property(np, "st,wakeup-z-lo", NULL))
+ pdata->wakeup_flags |= LIS3_WAKEUP_Z_LO;
+ if (of_get_property(np, "st,wakeup-z-hi", NULL))
+ pdata->wakeup_flags |= LIS3_WAKEUP_Z_HI;
+ if (of_get_property(np, "st,wakeup-threshold", &val))
+ pdata->wakeup_thresh = val;
+
+ if (of_get_property(np, "st,wakeup2-x-lo", NULL))
+ pdata->wakeup_flags2 |= LIS3_WAKEUP_X_LO;
+ if (of_get_property(np, "st,wakeup2-x-hi", NULL))
+ pdata->wakeup_flags2 |= LIS3_WAKEUP_X_HI;
+ if (of_get_property(np, "st,wakeup2-y-lo", NULL))
+ pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_LO;
+ if (of_get_property(np, "st,wakeup2-y-hi", NULL))
+ pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_HI;
+ if (of_get_property(np, "st,wakeup2-z-lo", NULL))
+ pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_LO;
+ if (of_get_property(np, "st,wakeup2-z-hi", NULL))
+ pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_HI;
+ if (of_get_property(np, "st,wakeup2-threshold", &val))
+ pdata->wakeup_thresh2 = val;
+
+ if (!of_property_read_u32(np, "st,highpass-cutoff-hz", &val)) {
+ switch (val) {
+ case 1:
+ pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_1HZ;
+ break;
+ case 2:
+ pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_2HZ;
+ break;
+ case 4:
+ pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_4HZ;
+ break;
+ case 8:
+ pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_8HZ;
+ break;
+ }
+ }
+
+ if (of_get_property(np, "st,hipass1-disable", NULL))
+ pdata->hipass_ctrl |= LIS3_HIPASS1_DISABLE;
+ if (of_get_property(np, "st,hipass2-disable", NULL))
+ pdata->hipass_ctrl |= LIS3_HIPASS2_DISABLE;
+
+ if (of_property_read_s32(np, "st,axis-x", &sval) == 0)
+ pdata->axis_x = sval;
+ if (of_property_read_s32(np, "st,axis-y", &sval) == 0)
+ pdata->axis_y = sval;
+ if (of_property_read_s32(np, "st,axis-z", &sval) == 0)
+ pdata->axis_z = sval;
+
+ if (of_get_property(np, "st,default-rate", NULL))
+ pdata->default_rate = val;
+
+ if (of_property_read_s32(np, "st,min-limit-x", &sval) == 0)
+ pdata->st_min_limits[0] = sval;
+ if (of_property_read_s32(np, "st,min-limit-y", &sval) == 0)
+ pdata->st_min_limits[1] = sval;
+ if (of_property_read_s32(np, "st,min-limit-z", &sval) == 0)
+ pdata->st_min_limits[2] = sval;
+
+ if (of_property_read_s32(np, "st,max-limit-x", &sval) == 0)
+ pdata->st_max_limits[0] = sval;
+ if (of_property_read_s32(np, "st,max-limit-y", &sval) == 0)
+ pdata->st_max_limits[1] = sval;
+ if (of_property_read_s32(np, "st,max-limit-z", &sval) == 0)
+ pdata->st_max_limits[2] = sval;
+
+
+ lis3->pdata = pdata;
+
+ return 0;
+}
+
+#else
+int lis3lv02d_init_dt(struct lis3lv02d *lis3)
+{
+ return 0;
+}
+#endif
+EXPORT_SYMBOL_GPL(lis3lv02d_init_dt);
+
+/*
+ * Initialise the accelerometer and the various subsystems.
+ * Should be rather independent of the bus system.
+ */
+int lis3lv02d_init_device(struct lis3lv02d *lis3)
+{
+ int err;
+ irq_handler_t thread_fn;
+ int irq_flags = 0;
+
+ lis3->whoami = lis3lv02d_read_8(lis3, WHO_AM_I);
+
+ switch (lis3->whoami) {
+ case WAI_12B:
+ pr_info("12 bits sensor found\n");
+ lis3->read_data = lis3lv02d_read_12;
+ lis3->mdps_max_val = 2048;
+ lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_12B;
+ lis3->odrs = lis3_12_rates;
+ lis3->odr_mask = CTRL1_DF0 | CTRL1_DF1;
+ lis3->scale = LIS3_SENSITIVITY_12B;
+ lis3->regs = lis3_wai12_regs;
+ lis3->regs_size = ARRAY_SIZE(lis3_wai12_regs);
+ break;
+ case WAI_8B:
+ pr_info("8 bits sensor found\n");
+ lis3->read_data = lis3lv02d_read_8;
+ lis3->mdps_max_val = 128;
+ lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+ lis3->odrs = lis3_8_rates;
+ lis3->odr_mask = CTRL1_DR;
+ lis3->scale = LIS3_SENSITIVITY_8B;
+ lis3->regs = lis3_wai8_regs;
+ lis3->regs_size = ARRAY_SIZE(lis3_wai8_regs);
+ break;
+ case WAI_3DC:
+ pr_info("8 bits 3DC sensor found\n");
+ lis3->read_data = lis3lv02d_read_8;
+ lis3->mdps_max_val = 128;
+ lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+ lis3->odrs = lis3_3dc_rates;
+ lis3->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3;
+ lis3->scale = LIS3_SENSITIVITY_8B;
+ break;
+ case WAI_3DLH:
+ pr_info("16 bits lis331dlh sensor found\n");
+ lis3->read_data = lis331dlh_read_data;
+ lis3->mdps_max_val = 2048; /* 12 bits for 2G */
+ lis3->shift_adj = SHIFT_ADJ_2G;
+ lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+ lis3->odrs = lis3_3dlh_rates;
+ lis3->odr_mask = CTRL1_DR0 | CTRL1_DR1;
+ lis3->scale = LIS3DLH_SENSITIVITY_2G;
+ break;
+ default:
+ pr_err("unknown sensor type 0x%X\n", lis3->whoami);
+ return -EINVAL;
+ }
+
+ lis3->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs),
+ sizeof(lis3_wai12_regs)), GFP_KERNEL);
+
+ if (lis3->reg_cache == NULL) {
+ printk(KERN_ERR DRIVER_NAME "out of memory\n");
+ return -ENOMEM;
+ }
+
+ mutex_init(&lis3->mutex);
+ atomic_set(&lis3->wake_thread, 0);
+
+ lis3lv02d_add_fs(lis3);
+ err = lis3lv02d_poweron(lis3);
+ if (err) {
+ lis3lv02d_remove_fs(lis3);
+ return err;
+ }
+
+ if (lis3->pm_dev) {
+ pm_runtime_set_active(lis3->pm_dev);
+ pm_runtime_enable(lis3->pm_dev);
+ }
+
+ if (lis3lv02d_joystick_enable(lis3))
+ pr_err("joystick initialization failed\n");
+
+ /* passing in platform specific data is purely optional and only
+ * used by the SPI transport layer at the moment */
+ if (lis3->pdata) {
+ struct lis3lv02d_platform_data *p = lis3->pdata;
+
+ if (lis3->whoami == WAI_8B)
+ lis3lv02d_8b_configure(lis3, p);
+
+ irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK;
+
+ lis3->irq_cfg = p->irq_cfg;
+ if (p->irq_cfg)
+ lis3->write(lis3, CTRL_REG3, p->irq_cfg);
+
+ if (p->default_rate)
+ lis3lv02d_set_odr(lis3, p->default_rate);
+ }
+
+ /* bail if we did not get an IRQ from the bus layer */
+ if (!lis3->irq) {
+ pr_debug("No IRQ. Disabling /dev/freefall\n");
+ goto out;
+ }
+
+ /*
+ * The sensor can generate interrupts for free-fall and direction
+ * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep
+ * the things simple and _fast_ we activate it only for free-fall, so
+ * no need to read register (very slow with ACPI). For the same reason,
+ * we forbid shared interrupts.
+ *
+ * IRQF_TRIGGER_RISING seems pointless on HP laptops because the
+ * io-apic is not configurable (and generates a warning) but I keep it
+ * in case of support for other hardware.
+ */
+ if (lis3->pdata && lis3->whoami == WAI_8B)
+ thread_fn = lis302dl_interrupt_thread1_8b;
+ else
+ thread_fn = NULL;
+
+ err = request_threaded_irq(lis3->irq, lis302dl_interrupt,
+ thread_fn,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+ irq_flags,
+ DRIVER_NAME, lis3);
+
+ if (err < 0) {
+ pr_err("Cannot get IRQ\n");
+ goto out;
+ }
+
+ lis3->miscdev.minor = MISC_DYNAMIC_MINOR;
+ lis3->miscdev.name = "freefall";
+ lis3->miscdev.fops = &lis3lv02d_misc_fops;
+
+ if (misc_register(&lis3->miscdev))
+ pr_err("misc_register failed\n");
+out:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_init_device);
+
+MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver");
+MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
new file mode 100644
index 0000000..c439c82
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -0,0 +1,331 @@
+/*
+ * lis3lv02d.h - ST LIS3LV02DL accelerometer driver
+ *
+ * Copyright (C) 2007-2008 Yan Burman
+ * Copyright (C) 2008-2009 Eric Piel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/platform_device.h>
+#include <linux/input-polldev.h>
+#include <linux/regulator/consumer.h>
+#include <linux/miscdevice.h>
+
+/*
+ * This driver tries to support the "digital" accelerometer chips from
+ * STMicroelectronics such as LIS3LV02DL, LIS302DL, LIS3L02DQ, LIS331DL,
+ * LIS331DLH, LIS35DE, or LIS202DL. They are very similar in terms of
+ * programming, with almost the same registers. In addition to differing
+ * on physical properties, they differ on the number of axes (2/3),
+ * precision (8/12 bits), and special features (freefall detection,
+ * click...). Unfortunately, not all the differences can be probed via
+ * a register. They can be connected either via I²C or SPI.
+ */
+
+#include <linux/lis3lv02d.h>
+
+enum lis3_reg {
+ WHO_AM_I = 0x0F,
+ OFFSET_X = 0x16,
+ OFFSET_Y = 0x17,
+ OFFSET_Z = 0x18,
+ GAIN_X = 0x19,
+ GAIN_Y = 0x1A,
+ GAIN_Z = 0x1B,
+ CTRL_REG1 = 0x20,
+ CTRL_REG2 = 0x21,
+ CTRL_REG3 = 0x22,
+ CTRL_REG4 = 0x23,
+ HP_FILTER_RESET = 0x23,
+ STATUS_REG = 0x27,
+ OUTX_L = 0x28,
+ OUTX_H = 0x29,
+ OUTX = 0x29,
+ OUTY_L = 0x2A,
+ OUTY_H = 0x2B,
+ OUTY = 0x2B,
+ OUTZ_L = 0x2C,
+ OUTZ_H = 0x2D,
+ OUTZ = 0x2D,
+};
+
+enum lis302d_reg {
+ FF_WU_CFG_1 = 0x30,
+ FF_WU_SRC_1 = 0x31,
+ FF_WU_THS_1 = 0x32,
+ FF_WU_DURATION_1 = 0x33,
+ FF_WU_CFG_2 = 0x34,
+ FF_WU_SRC_2 = 0x35,
+ FF_WU_THS_2 = 0x36,
+ FF_WU_DURATION_2 = 0x37,
+ CLICK_CFG = 0x38,
+ CLICK_SRC = 0x39,
+ CLICK_THSY_X = 0x3B,
+ CLICK_THSZ = 0x3C,
+ CLICK_TIMELIMIT = 0x3D,
+ CLICK_LATENCY = 0x3E,
+ CLICK_WINDOW = 0x3F,
+};
+
+enum lis3lv02d_reg {
+ FF_WU_CFG = 0x30,
+ FF_WU_SRC = 0x31,
+ FF_WU_ACK = 0x32,
+ FF_WU_THS_L = 0x34,
+ FF_WU_THS_H = 0x35,
+ FF_WU_DURATION = 0x36,
+ DD_CFG = 0x38,
+ DD_SRC = 0x39,
+ DD_ACK = 0x3A,
+ DD_THSI_L = 0x3C,
+ DD_THSI_H = 0x3D,
+ DD_THSE_L = 0x3E,
+ DD_THSE_H = 0x3F,
+};
+
+enum lis3_who_am_i {
+ WAI_3DLH = 0x32, /* 16 bits: LIS331DLH */
+ WAI_3DC = 0x33, /* 8 bits: LIS3DC, HP3DC */
+ WAI_12B = 0x3A, /* 12 bits: LIS3LV02D[LQ]... */
+ WAI_8B = 0x3B, /* 8 bits: LIS[23]02D[LQ]... */
+ WAI_6B = 0x52, /* 6 bits: LIS331DLF - not supported */
+};
+
+enum lis3_type {
+ LIS3LV02D,
+ LIS3DC,
+ HP3DC,
+ LIS2302D,
+ LIS331DLF,
+ LIS331DLH,
+};
+
+enum lis3lv02d_ctrl1_12b {
+ CTRL1_Xen = 0x01,
+ CTRL1_Yen = 0x02,
+ CTRL1_Zen = 0x04,
+ CTRL1_ST = 0x08,
+ CTRL1_DF0 = 0x10,
+ CTRL1_DF1 = 0x20,
+ CTRL1_PD0 = 0x40,
+ CTRL1_PD1 = 0x80,
+};
+
+/* Delta to ctrl1_12b version */
+enum lis3lv02d_ctrl1_8b {
+ CTRL1_STM = 0x08,
+ CTRL1_STP = 0x10,
+ CTRL1_FS = 0x20,
+ CTRL1_PD = 0x40,
+ CTRL1_DR = 0x80,
+};
+
+enum lis3lv02d_ctrl1_3dc {
+ CTRL1_ODR0 = 0x10,
+ CTRL1_ODR1 = 0x20,
+ CTRL1_ODR2 = 0x40,
+ CTRL1_ODR3 = 0x80,
+};
+
+enum lis331dlh_ctrl1 {
+ CTRL1_DR0 = 0x08,
+ CTRL1_DR1 = 0x10,
+ CTRL1_PM0 = 0x20,
+ CTRL1_PM1 = 0x40,
+ CTRL1_PM2 = 0x80,
+};
+
+enum lis331dlh_ctrl2 {
+ CTRL2_HPEN1 = 0x04,
+ CTRL2_HPEN2 = 0x08,
+ CTRL2_FDS_3DLH = 0x10,
+ CTRL2_BOOT_3DLH = 0x80,
+};
+
+enum lis331dlh_ctrl4 {
+ CTRL4_STSIGN = 0x08,
+ CTRL4_BLE = 0x40,
+ CTRL4_BDU = 0x80,
+};
+
+enum lis3lv02d_ctrl2 {
+ CTRL2_DAS = 0x01,
+ CTRL2_SIM = 0x02,
+ CTRL2_DRDY = 0x04,
+ CTRL2_IEN = 0x08,
+ CTRL2_BOOT = 0x10,
+ CTRL2_BLE = 0x20,
+ CTRL2_BDU = 0x40, /* Block Data Update */
+ CTRL2_FS = 0x80, /* Full Scale selection */
+};
+
+enum lis3lv02d_ctrl4_3dc {
+ CTRL4_SIM = 0x01,
+ CTRL4_ST0 = 0x02,
+ CTRL4_ST1 = 0x04,
+ CTRL4_FS0 = 0x10,
+ CTRL4_FS1 = 0x20,
+};
+
+enum lis302d_ctrl2 {
+ HP_FF_WU2 = 0x08,
+ HP_FF_WU1 = 0x04,
+ CTRL2_BOOT_8B = 0x40,
+};
+
+enum lis3lv02d_ctrl3 {
+ CTRL3_CFS0 = 0x01,
+ CTRL3_CFS1 = 0x02,
+ CTRL3_FDS = 0x10,
+ CTRL3_HPFF = 0x20,
+ CTRL3_HPDD = 0x40,
+ CTRL3_ECK = 0x80,
+};
+
+enum lis3lv02d_status_reg {
+ STATUS_XDA = 0x01,
+ STATUS_YDA = 0x02,
+ STATUS_ZDA = 0x04,
+ STATUS_XYZDA = 0x08,
+ STATUS_XOR = 0x10,
+ STATUS_YOR = 0x20,
+ STATUS_ZOR = 0x40,
+ STATUS_XYZOR = 0x80,
+};
+
+enum lis3lv02d_ff_wu_cfg {
+ FF_WU_CFG_XLIE = 0x01,
+ FF_WU_CFG_XHIE = 0x02,
+ FF_WU_CFG_YLIE = 0x04,
+ FF_WU_CFG_YHIE = 0x08,
+ FF_WU_CFG_ZLIE = 0x10,
+ FF_WU_CFG_ZHIE = 0x20,
+ FF_WU_CFG_LIR = 0x40,
+ FF_WU_CFG_AOI = 0x80,
+};
+
+enum lis3lv02d_ff_wu_src {
+ FF_WU_SRC_XL = 0x01,
+ FF_WU_SRC_XH = 0x02,
+ FF_WU_SRC_YL = 0x04,
+ FF_WU_SRC_YH = 0x08,
+ FF_WU_SRC_ZL = 0x10,
+ FF_WU_SRC_ZH = 0x20,
+ FF_WU_SRC_IA = 0x40,
+};
+
+enum lis3lv02d_dd_cfg {
+ DD_CFG_XLIE = 0x01,
+ DD_CFG_XHIE = 0x02,
+ DD_CFG_YLIE = 0x04,
+ DD_CFG_YHIE = 0x08,
+ DD_CFG_ZLIE = 0x10,
+ DD_CFG_ZHIE = 0x20,
+ DD_CFG_LIR = 0x40,
+ DD_CFG_IEND = 0x80,
+};
+
+enum lis3lv02d_dd_src {
+ DD_SRC_XL = 0x01,
+ DD_SRC_XH = 0x02,
+ DD_SRC_YL = 0x04,
+ DD_SRC_YH = 0x08,
+ DD_SRC_ZL = 0x10,
+ DD_SRC_ZH = 0x20,
+ DD_SRC_IA = 0x40,
+};
+
+enum lis3lv02d_click_src_8b {
+ CLICK_SINGLE_X = 0x01,
+ CLICK_DOUBLE_X = 0x02,
+ CLICK_SINGLE_Y = 0x04,
+ CLICK_DOUBLE_Y = 0x08,
+ CLICK_SINGLE_Z = 0x10,
+ CLICK_DOUBLE_Z = 0x20,
+ CLICK_IA = 0x40,
+};
+
+enum lis3lv02d_reg_state {
+ LIS3_REG_OFF = 0x00,
+ LIS3_REG_ON = 0x01,
+};
+
+union axis_conversion {
+ struct {
+ int x, y, z;
+ };
+ int as_array[3];
+
+};
+
+struct lis3lv02d {
+ void *bus_priv; /* used by the bus layer only */
+ struct device *pm_dev; /* for pm_runtime purposes */
+ int (*init) (struct lis3lv02d *lis3);
+ int (*write) (struct lis3lv02d *lis3, int reg, u8 val);
+ int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret);
+ int (*blkread) (struct lis3lv02d *lis3, int reg, int len, u8 *ret);
+ int (*reg_ctrl) (struct lis3lv02d *lis3, bool state);
+
+ int *odrs; /* Supported output data rates */
+ u8 *regs; /* Regs to store / restore */
+ int regs_size;
+ u8 *reg_cache;
+ bool regs_stored;
+ u8 odr_mask; /* ODR bit mask */
+ u8 whoami; /* indicates measurement precision */
+ s16 (*read_data) (struct lis3lv02d *lis3, int reg);
+ int mdps_max_val;
+ int pwron_delay;
+ int scale; /*
+ * relationship between 1 LBS and mG
+ * (1/1000th of earth gravity)
+ */
+
+ struct input_polled_dev *idev; /* input device */
+ struct platform_device *pdev; /* platform device */
+ struct regulator_bulk_data regulators[2];
+ atomic_t count; /* interrupt count after last read */
+ union axis_conversion ac; /* hw -> logical axis */
+ int mapped_btns[3];
+
+ u32 irq; /* IRQ number */
+ struct fasync_struct *async_queue; /* queue for the misc device */
+ wait_queue_head_t misc_wait; /* Wait queue for the misc device */
+ unsigned long misc_opened; /* bit0: whether the device is open */
+ struct miscdevice miscdev;
+
+ int data_ready_count[2];
+ atomic_t wake_thread;
+ unsigned char irq_cfg;
+ unsigned int shift_adj;
+
+ struct lis3lv02d_platform_data *pdata; /* for passing board config */
+ struct mutex mutex; /* Serialize poll and selftest */
+
+#ifdef CONFIG_OF
+ struct device_node *of_node;
+#endif
+};
+
+int lis3lv02d_init_device(struct lis3lv02d *lis3);
+int lis3lv02d_joystick_enable(struct lis3lv02d *lis3);
+void lis3lv02d_joystick_disable(struct lis3lv02d *lis3);
+void lis3lv02d_poweroff(struct lis3lv02d *lis3);
+int lis3lv02d_poweron(struct lis3lv02d *lis3);
+int lis3lv02d_remove_fs(struct lis3lv02d *lis3);
+int lis3lv02d_init_dt(struct lis3lv02d *lis3);
+
+extern struct lis3lv02d lis3_dev;
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
new file mode 100644
index 0000000..0c3bb7e
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -0,0 +1,289 @@
+/*
+ * drivers/hwmon/lis3lv02d_i2c.c
+ *
+ * Implements I2C interface for lis3lv02d (STMicroelectronics) accelerometer.
+ * Driver is based on corresponding SPI driver written by Daniel Mack
+ * (lis3lv02d_spi.c (C) 2009 Daniel Mack <daniel@caiaq.de> ).
+ *
+ * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "lis3lv02d.h"
+
+#define DRV_NAME "lis3lv02d_i2c"
+
+static const char reg_vdd[] = "Vdd";
+static const char reg_vdd_io[] = "Vdd_IO";
+
+static int lis3_reg_ctrl(struct lis3lv02d *lis3, bool state)
+{
+ int ret;
+ if (state == LIS3_REG_OFF) {
+ ret = regulator_bulk_disable(ARRAY_SIZE(lis3->regulators),
+ lis3->regulators);
+ } else {
+ ret = regulator_bulk_enable(ARRAY_SIZE(lis3->regulators),
+ lis3->regulators);
+ /* Chip needs time to wakeup. Not mentioned in datasheet */
+ usleep_range(10000, 20000);
+ }
+ return ret;
+}
+
+static inline s32 lis3_i2c_write(struct lis3lv02d *lis3, int reg, u8 value)
+{
+ struct i2c_client *c = lis3->bus_priv;
+ return i2c_smbus_write_byte_data(c, reg, value);
+}
+
+static inline s32 lis3_i2c_read(struct lis3lv02d *lis3, int reg, u8 *v)
+{
+ struct i2c_client *c = lis3->bus_priv;
+ *v = i2c_smbus_read_byte_data(c, reg);
+ return 0;
+}
+
+static inline s32 lis3_i2c_blockread(struct lis3lv02d *lis3, int reg, int len,
+ u8 *v)
+{
+ struct i2c_client *c = lis3->bus_priv;
+ reg |= (1 << 7); /* 7th bit enables address auto incrementation */
+ return i2c_smbus_read_i2c_block_data(c, reg, len, v);
+}
+
+static int lis3_i2c_init(struct lis3lv02d *lis3)
+{
+ u8 reg;
+ int ret;
+
+ lis3_reg_ctrl(lis3, LIS3_REG_ON);
+
+ lis3->read(lis3, WHO_AM_I, ®);
+ if (reg != lis3->whoami)
+ printk(KERN_ERR "lis3: power on failure\n");
+
+ /* power up the device */
+ ret = lis3->read(lis3, CTRL_REG1, ®);
+ if (ret < 0)
+ return ret;
+
+ if (lis3->whoami == WAI_3DLH)
+ reg |= CTRL1_PM0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+ else
+ reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+
+ return lis3->write(lis3, CTRL_REG1, reg);
+}
+
+/* Default axis mapping but it can be overwritten by platform data */
+static union axis_conversion lis3lv02d_axis_map =
+ { .as_array = { LIS3_DEV_X, LIS3_DEV_Y, LIS3_DEV_Z } };
+
+#ifdef CONFIG_OF
+static const struct of_device_id lis3lv02d_i2c_dt_ids[] = {
+ { .compatible = "st,lis3lv02d" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids);
+#endif
+
+static int lis3lv02d_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int ret = 0;
+ struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
+
+#ifdef CONFIG_OF
+ if (of_match_device(lis3lv02d_i2c_dt_ids, &client->dev)) {
+ lis3_dev.of_node = client->dev.of_node;
+ ret = lis3lv02d_init_dt(&lis3_dev);
+ if (ret)
+ return ret;
+ pdata = lis3_dev.pdata;
+ }
+#endif
+
+ if (pdata) {
+ if ((pdata->driver_features & LIS3_USE_BLOCK_READ) &&
+ (i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_I2C_BLOCK)))
+ lis3_dev.blkread = lis3_i2c_blockread;
+
+ if (pdata->axis_x)
+ lis3lv02d_axis_map.x = pdata->axis_x;
+
+ if (pdata->axis_y)
+ lis3lv02d_axis_map.y = pdata->axis_y;
+
+ if (pdata->axis_z)
+ lis3lv02d_axis_map.z = pdata->axis_z;
+
+ if (pdata->setup_resources)
+ ret = pdata->setup_resources();
+
+ if (ret)
+ goto fail;
+ }
+
+ lis3_dev.regulators[0].supply = reg_vdd;
+ lis3_dev.regulators[1].supply = reg_vdd_io;
+ ret = regulator_bulk_get(&client->dev,
+ ARRAY_SIZE(lis3_dev.regulators),
+ lis3_dev.regulators);
+ if (ret < 0)
+ goto fail;
+
+ lis3_dev.pdata = pdata;
+ lis3_dev.bus_priv = client;
+ lis3_dev.init = lis3_i2c_init;
+ lis3_dev.read = lis3_i2c_read;
+ lis3_dev.write = lis3_i2c_write;
+ lis3_dev.irq = client->irq;
+ lis3_dev.ac = lis3lv02d_axis_map;
+ lis3_dev.pm_dev = &client->dev;
+
+ i2c_set_clientdata(client, &lis3_dev);
+
+ /* Provide power over the init call */
+ lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON);
+
+ ret = lis3lv02d_init_device(&lis3_dev);
+
+ lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF);
+
+ if (ret)
+ goto fail2;
+ return 0;
+
+fail2:
+ regulator_bulk_free(ARRAY_SIZE(lis3_dev.regulators),
+ lis3_dev.regulators);
+fail:
+ if (pdata && pdata->release_resources)
+ pdata->release_resources();
+ return ret;
+}
+
+static int lis3lv02d_i2c_remove(struct i2c_client *client)
+{
+ struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+ struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
+
+ if (pdata && pdata->release_resources)
+ pdata->release_resources();
+
+ lis3lv02d_joystick_disable(lis3);
+ lis3lv02d_remove_fs(&lis3_dev);
+
+ regulator_bulk_free(ARRAY_SIZE(lis3->regulators),
+ lis3_dev.regulators);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int lis3lv02d_i2c_suspend(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+ if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+ lis3lv02d_poweroff(lis3);
+ return 0;
+}
+
+static int lis3lv02d_i2c_resume(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+ /*
+ * pm_runtime documentation says that devices should always
+ * be powered on at resume. Pm_runtime turns them off after system
+ * wide resume is complete.
+ */
+ if (!lis3->pdata || !lis3->pdata->wakeup_flags ||
+ pm_runtime_suspended(dev))
+ lis3lv02d_poweron(lis3);
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int lis3_i2c_runtime_suspend(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+ lis3lv02d_poweroff(lis3);
+ return 0;
+}
+
+static int lis3_i2c_runtime_resume(struct device *dev)
+{
+ struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+ struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+ lis3lv02d_poweron(lis3);
+ return 0;
+}
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id lis3lv02d_id[] = {
+ {"lis3lv02d", LIS3LV02D},
+ {"lis331dlh", LIS331DLH},
+ {}
+};
+
+MODULE_DEVICE_TABLE(i2c, lis3lv02d_id);
+
+static const struct dev_pm_ops lis3_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(lis3lv02d_i2c_suspend,
+ lis3lv02d_i2c_resume)
+ SET_RUNTIME_PM_OPS(lis3_i2c_runtime_suspend,
+ lis3_i2c_runtime_resume,
+ NULL)
+};
+
+static struct i2c_driver lis3lv02d_i2c_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ .pm = &lis3_pm_ops,
+ .of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids),
+ },
+ .probe = lis3lv02d_i2c_probe,
+ .remove = lis3lv02d_i2c_remove,
+ .id_table = lis3lv02d_id,
+};
+
+module_i2c_driver(lis3lv02d_i2c_driver);
+
+MODULE_AUTHOR("Nokia Corporation");
+MODULE_DESCRIPTION("lis3lv02d I2C interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
new file mode 100644
index 0000000..e575475
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
@@ -0,0 +1,153 @@
+/*
+ * lis3lv02d_spi - SPI glue layer for lis3lv02d
+ *
+ * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * publishhed by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/spi/spi.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "lis3lv02d.h"
+
+#define DRV_NAME "lis3lv02d_spi"
+#define LIS3_SPI_READ 0x80
+
+static int lis3_spi_read(struct lis3lv02d *lis3, int reg, u8 *v)
+{
+ struct spi_device *spi = lis3->bus_priv;
+ int ret = spi_w8r8(spi, reg | LIS3_SPI_READ);
+ if (ret < 0)
+ return -EINVAL;
+
+ *v = (u8) ret;
+ return 0;
+}
+
+static int lis3_spi_write(struct lis3lv02d *lis3, int reg, u8 val)
+{
+ u8 tmp[2] = { reg, val };
+ struct spi_device *spi = lis3->bus_priv;
+ return spi_write(spi, tmp, sizeof(tmp));
+}
+
+static int lis3_spi_init(struct lis3lv02d *lis3)
+{
+ u8 reg;
+ int ret;
+
+ /* power up the device */
+ ret = lis3->read(lis3, CTRL_REG1, ®);
+ if (ret < 0)
+ return ret;
+
+ reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+ return lis3->write(lis3, CTRL_REG1, reg);
+}
+
+static union axis_conversion lis3lv02d_axis_normal =
+ { .as_array = { 1, 2, 3 } };
+
+#ifdef CONFIG_OF
+static const struct of_device_id lis302dl_spi_dt_ids[] = {
+ { .compatible = "st,lis302dl-spi" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, lis302dl_spi_dt_ids);
+#endif
+
+static int lis302dl_spi_probe(struct spi_device *spi)
+{
+ int ret;
+
+ spi->bits_per_word = 8;
+ spi->mode = SPI_MODE_0;
+ ret = spi_setup(spi);
+ if (ret < 0)
+ return ret;
+
+ lis3_dev.bus_priv = spi;
+ lis3_dev.init = lis3_spi_init;
+ lis3_dev.read = lis3_spi_read;
+ lis3_dev.write = lis3_spi_write;
+ lis3_dev.irq = spi->irq;
+ lis3_dev.ac = lis3lv02d_axis_normal;
+ lis3_dev.pdata = spi->dev.platform_data;
+
+#ifdef CONFIG_OF
+ if (of_match_device(lis302dl_spi_dt_ids, &spi->dev)) {
+ lis3_dev.of_node = spi->dev.of_node;
+ ret = lis3lv02d_init_dt(&lis3_dev);
+ if (ret)
+ return ret;
+ }
+#endif
+ spi_set_drvdata(spi, &lis3_dev);
+
+ return lis3lv02d_init_device(&lis3_dev);
+}
+
+static int lis302dl_spi_remove(struct spi_device *spi)
+{
+ struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+ lis3lv02d_joystick_disable(lis3);
+ lis3lv02d_poweroff(lis3);
+
+ return lis3lv02d_remove_fs(&lis3_dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int lis3lv02d_spi_suspend(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+ if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+ lis3lv02d_poweroff(&lis3_dev);
+
+ return 0;
+}
+
+static int lis3lv02d_spi_resume(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+ if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+ lis3lv02d_poweron(lis3);
+
+ return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend,
+ lis3lv02d_spi_resume);
+
+static struct spi_driver lis302dl_spi_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ .pm = &lis3lv02d_spi_pm,
+ .of_match_table = of_match_ptr(lis302dl_spi_dt_ids),
+ },
+ .probe = lis302dl_spi_probe,
+ .remove = lis302dl_spi_remove,
+};
+
+module_spi_driver(lis302dl_spi_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
new file mode 100644
index 0000000..11fdadc
--- /dev/null
+++ b/drivers/misc/lkdtm.c
@@ -0,0 +1,879 @@
+/*
+ * Kprobe module for testing crash dumps
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * This module induces system failures at predefined crashpoints to
+ * evaluate the reliability of crash dumps obtained using different dumping
+ * solutions.
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Debugfs support added by Simon Kagstrom <simon.kagstrom@netinsight.net>
+ *
+ * See Documentation/fault-injection/provoke-crashes.txt for instructions
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/kprobes.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <scsi/scsi_cmnd.h>
+#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_IDE
+#include <linux/ide.h>
+#endif
+
+/*
+ * Make sure our attempts to over run the kernel stack doesn't trigger
+ * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we
+ * recurse past the end of THREAD_SIZE by default.
+ */
+#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0)
+#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2)
+#else
+#define REC_STACK_SIZE (THREAD_SIZE / 8)
+#endif
+#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2)
+
+#define DEFAULT_COUNT 10
+#define EXEC_SIZE 64
+
+enum cname {
+ CN_INVALID,
+ CN_INT_HARDWARE_ENTRY,
+ CN_INT_HW_IRQ_EN,
+ CN_INT_TASKLET_ENTRY,
+ CN_FS_DEVRW,
+ CN_MEM_SWAPOUT,
+ CN_TIMERADD,
+ CN_SCSI_DISPATCH_CMD,
+ CN_IDE_CORE_CP,
+ CN_DIRECT,
+};
+
+enum ctype {
+ CT_NONE,
+ CT_PANIC,
+ CT_BUG,
+ CT_WARNING,
+ CT_EXCEPTION,
+ CT_LOOP,
+ CT_OVERFLOW,
+ CT_CORRUPT_STACK,
+ CT_UNALIGNED_LOAD_STORE_WRITE,
+ CT_OVERWRITE_ALLOCATION,
+ CT_WRITE_AFTER_FREE,
+ CT_SOFTLOCKUP,
+ CT_HARDLOCKUP,
+ CT_SPINLOCKUP,
+ CT_HUNG_TASK,
+ CT_EXEC_DATA,
+ CT_EXEC_STACK,
+ CT_EXEC_KMALLOC,
+ CT_EXEC_VMALLOC,
+ CT_EXEC_USERSPACE,
+ CT_ACCESS_USERSPACE,
+ CT_WRITE_RO,
+ CT_WRITE_KERN,
+};
+
+static char* cp_name[] = {
+ "INT_HARDWARE_ENTRY",
+ "INT_HW_IRQ_EN",
+ "INT_TASKLET_ENTRY",
+ "FS_DEVRW",
+ "MEM_SWAPOUT",
+ "TIMERADD",
+ "SCSI_DISPATCH_CMD",
+ "IDE_CORE_CP",
+ "DIRECT",
+};
+
+static char* cp_type[] = {
+ "PANIC",
+ "BUG",
+ "WARNING",
+ "EXCEPTION",
+ "LOOP",
+ "OVERFLOW",
+ "CORRUPT_STACK",
+ "UNALIGNED_LOAD_STORE_WRITE",
+ "OVERWRITE_ALLOCATION",
+ "WRITE_AFTER_FREE",
+ "SOFTLOCKUP",
+ "HARDLOCKUP",
+ "SPINLOCKUP",
+ "HUNG_TASK",
+ "EXEC_DATA",
+ "EXEC_STACK",
+ "EXEC_KMALLOC",
+ "EXEC_VMALLOC",
+ "EXEC_USERSPACE",
+ "ACCESS_USERSPACE",
+ "WRITE_RO",
+ "WRITE_KERN",
+};
+
+static struct jprobe lkdtm;
+
+static int lkdtm_parse_commandline(void);
+static void lkdtm_handler(void);
+
+static char* cpoint_name;
+static char* cpoint_type;
+static int cpoint_count = DEFAULT_COUNT;
+static int recur_count = REC_NUM_DEFAULT;
+
+static enum cname cpoint = CN_INVALID;
+static enum ctype cptype = CT_NONE;
+static int count = DEFAULT_COUNT;
+static DEFINE_SPINLOCK(count_lock);
+static DEFINE_SPINLOCK(lock_me_up);
+
+static u8 data_area[EXEC_SIZE];
+
+static const unsigned long rodata = 0xAA55AA55;
+
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
+module_param(cpoint_name, charp, 0444);
+MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
+module_param(cpoint_type, charp, 0444);
+MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
+ "hitting the crash point");
+module_param(cpoint_count, int, 0644);
+MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
+ "crash point is to be hit to trigger action");
+
+static unsigned int jp_do_irq(unsigned int irq)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+static irqreturn_t jp_handle_irq_event(unsigned int irq,
+ struct irqaction *action)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+static void jp_tasklet_action(struct softirq_action *a)
+{
+ lkdtm_handler();
+ jprobe_return();
+}
+
+static void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+{
+ lkdtm_handler();
+ jprobe_return();
+}
+
+struct scan_control;
+
+static unsigned long jp_shrink_inactive_list(unsigned long max_scan,
+ struct zone *zone,
+ struct scan_control *sc)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+static int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim,
+ const enum hrtimer_mode mode)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+#ifdef CONFIG_IDE
+static int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
+ struct block_device *bdev, unsigned int cmd,
+ unsigned long arg)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+#endif
+
+/* Return the crashpoint number or NONE if the name is invalid */
+static enum ctype parse_cp_type(const char *what, size_t count)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cp_type); i++) {
+ if (!strcmp(what, cp_type[i]))
+ return i + 1;
+ }
+
+ return CT_NONE;
+}
+
+static const char *cp_type_to_str(enum ctype type)
+{
+ if (type == CT_NONE || type < 0 || type > ARRAY_SIZE(cp_type))
+ return "None";
+
+ return cp_type[type - 1];
+}
+
+static const char *cp_name_to_str(enum cname name)
+{
+ if (name == CN_INVALID || name < 0 || name > ARRAY_SIZE(cp_name))
+ return "INVALID";
+
+ return cp_name[name - 1];
+}
+
+
+static int lkdtm_parse_commandline(void)
+{
+ int i;
+ unsigned long flags;
+
+ if (cpoint_count < 1 || recur_count < 1)
+ return -EINVAL;
+
+ spin_lock_irqsave(&count_lock, flags);
+ count = cpoint_count;
+ spin_unlock_irqrestore(&count_lock, flags);
+
+ /* No special parameters */
+ if (!cpoint_type && !cpoint_name)
+ return 0;
+
+ /* Neither or both of these need to be set */
+ if (!cpoint_type || !cpoint_name)
+ return -EINVAL;
+
+ cptype = parse_cp_type(cpoint_type, strlen(cpoint_type));
+ if (cptype == CT_NONE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(cp_name); i++) {
+ if (!strcmp(cpoint_name, cp_name[i])) {
+ cpoint = i + 1;
+ return 0;
+ }
+ }
+
+ /* Could not find a valid crash point */
+ return -EINVAL;
+}
+
+static int recursive_loop(int remaining)
+{
+ char buf[REC_STACK_SIZE];
+
+ /* Make sure compiler does not optimize this away. */
+ memset(buf, (remaining & 0xff) | 0x1, REC_STACK_SIZE);
+ if (!remaining)
+ return 0;
+ else
+ return recursive_loop(remaining - 1);
+}
+
+static void do_nothing(void)
+{
+ return;
+}
+
+/* Must immediately follow do_nothing for size calculuations to work out. */
+static void do_overwritten(void)
+{
+ pr_info("do_overwritten wasn't overwritten!\n");
+ return;
+}
+
+static noinline void corrupt_stack(void)
+{
+ /* Use default char array length that triggers stack protection. */
+ char data[8];
+
+ memset((void *)data, 0, 64);
+}
+
+static void execute_location(void *dst)
+{
+ void (*func)(void) = dst;
+
+ pr_info("attempting ok execution at %p\n", do_nothing);
+ do_nothing();
+
+ memcpy(dst, do_nothing, EXEC_SIZE);
+ flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE);
+ pr_info("attempting bad execution at %p\n", func);
+ func();
+}
+
+static void execute_user_location(void *dst)
+{
+ /* Intentionally crossing kernel/user memory boundary. */
+ void (*func)(void) = dst;
+
+ pr_info("attempting ok execution at %p\n", do_nothing);
+ do_nothing();
+
+ if (copy_to_user((void __user *)dst, do_nothing, EXEC_SIZE))
+ return;
+ flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE);
+ pr_info("attempting bad execution at %p\n", func);
+ func();
+}
+
+static void lkdtm_do_action(enum ctype which)
+{
+ switch (which) {
+ case CT_PANIC:
+ panic("dumptest");
+ break;
+ case CT_BUG:
+ BUG();
+ break;
+ case CT_WARNING:
+ WARN_ON(1);
+ break;
+ case CT_EXCEPTION:
+ *((int *) 0) = 0;
+ break;
+ case CT_LOOP:
+ for (;;)
+ ;
+ break;
+ case CT_OVERFLOW:
+ (void) recursive_loop(recur_count);
+ break;
+ case CT_CORRUPT_STACK:
+ corrupt_stack();
+ break;
+ case CT_UNALIGNED_LOAD_STORE_WRITE: {
+ static u8 data[5] __attribute__((aligned(4))) = {1, 2,
+ 3, 4, 5};
+ u32 *p;
+ u32 val = 0x12345678;
+
+ p = (u32 *)(data + 1);
+ if (*p == 0)
+ val = 0x87654321;
+ *p = val;
+ break;
+ }
+ case CT_OVERWRITE_ALLOCATION: {
+ size_t len = 1020;
+ u32 *data = kmalloc(len, GFP_KERNEL);
+
+ data[1024 / sizeof(u32)] = 0x12345678;
+ kfree(data);
+ break;
+ }
+ case CT_WRITE_AFTER_FREE: {
+ size_t len = 1024;
+ u32 *data = kmalloc(len, GFP_KERNEL);
+
+ kfree(data);
+ schedule();
+ memset(data, 0x78, len);
+ break;
+ }
+ case CT_SOFTLOCKUP:
+ preempt_disable();
+ for (;;)
+ cpu_relax();
+ break;
+ case CT_HARDLOCKUP:
+ local_irq_disable();
+ for (;;)
+ cpu_relax();
+ break;
+ case CT_SPINLOCKUP:
+ /* Must be called twice to trigger. */
+ spin_lock(&lock_me_up);
+ /* Let sparse know we intended to exit holding the lock. */
+ __release(&lock_me_up);
+ break;
+ case CT_HUNG_TASK:
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule();
+ break;
+ case CT_EXEC_DATA:
+ execute_location(data_area);
+ break;
+ case CT_EXEC_STACK: {
+ u8 stack_area[EXEC_SIZE];
+ execute_location(stack_area);
+ break;
+ }
+ case CT_EXEC_KMALLOC: {
+ u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL);
+ execute_location(kmalloc_area);
+ kfree(kmalloc_area);
+ break;
+ }
+ case CT_EXEC_VMALLOC: {
+ u32 *vmalloc_area = vmalloc(EXEC_SIZE);
+ execute_location(vmalloc_area);
+ vfree(vmalloc_area);
+ break;
+ }
+ case CT_EXEC_USERSPACE: {
+ unsigned long user_addr;
+
+ user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
+ if (user_addr >= TASK_SIZE) {
+ pr_warn("Failed to allocate user memory\n");
+ return;
+ }
+ execute_user_location((void *)user_addr);
+ vm_munmap(user_addr, PAGE_SIZE);
+ break;
+ }
+ case CT_ACCESS_USERSPACE: {
+ unsigned long user_addr, tmp = 0;
+ unsigned long *ptr;
+
+ user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
+ if (user_addr >= TASK_SIZE) {
+ pr_warn("Failed to allocate user memory\n");
+ return;
+ }
+
+ if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
+ pr_warn("copy_to_user failed\n");
+ vm_munmap(user_addr, PAGE_SIZE);
+ return;
+ }
+
+ ptr = (unsigned long *)user_addr;
+
+ pr_info("attempting bad read at %p\n", ptr);
+ tmp = *ptr;
+ tmp += 0xc0dec0de;
+
+ pr_info("attempting bad write at %p\n", ptr);
+ *ptr = tmp;
+
+ vm_munmap(user_addr, PAGE_SIZE);
+
+ break;
+ }
+ case CT_WRITE_RO: {
+ unsigned long *ptr;
+
+ ptr = (unsigned long *)&rodata;
+
+ pr_info("attempting bad write at %p\n", ptr);
+ *ptr ^= 0xabcd1234;
+
+ break;
+ }
+ case CT_WRITE_KERN: {
+ size_t size;
+ unsigned char *ptr;
+
+ size = (unsigned long)do_overwritten -
+ (unsigned long)do_nothing;
+ ptr = (unsigned char *)do_overwritten;
+
+ pr_info("attempting bad %zu byte write at %p\n", size, ptr);
+ memcpy(ptr, (unsigned char *)do_nothing, size);
+ flush_icache_range((unsigned long)ptr,
+ (unsigned long)(ptr + size));
+
+ do_overwritten();
+ break;
+ }
+ case CT_NONE:
+ default:
+ break;
+ }
+
+}
+
+static void lkdtm_handler(void)
+{
+ unsigned long flags;
+ bool do_it = false;
+
+ spin_lock_irqsave(&count_lock, flags);
+ count--;
+ pr_info("Crash point %s of type %s hit, trigger in %d rounds\n",
+ cp_name_to_str(cpoint), cp_type_to_str(cptype), count);
+
+ if (count == 0) {
+ do_it = true;
+ count = cpoint_count;
+ }
+ spin_unlock_irqrestore(&count_lock, flags);
+
+ if (do_it)
+ lkdtm_do_action(cptype);
+}
+
+static int lkdtm_register_cpoint(enum cname which)
+{
+ int ret;
+
+ cpoint = CN_INVALID;
+ if (lkdtm.entry != NULL)
+ unregister_jprobe(&lkdtm);
+
+ switch (which) {
+ case CN_DIRECT:
+ lkdtm_do_action(cptype);
+ return 0;
+ case CN_INT_HARDWARE_ENTRY:
+ lkdtm.kp.symbol_name = "do_IRQ";
+ lkdtm.entry = (kprobe_opcode_t*) jp_do_irq;
+ break;
+ case CN_INT_HW_IRQ_EN:
+ lkdtm.kp.symbol_name = "handle_IRQ_event";
+ lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event;
+ break;
+ case CN_INT_TASKLET_ENTRY:
+ lkdtm.kp.symbol_name = "tasklet_action";
+ lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action;
+ break;
+ case CN_FS_DEVRW:
+ lkdtm.kp.symbol_name = "ll_rw_block";
+ lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block;
+ break;
+ case CN_MEM_SWAPOUT:
+ lkdtm.kp.symbol_name = "shrink_inactive_list";
+ lkdtm.entry = (kprobe_opcode_t*) jp_shrink_inactive_list;
+ break;
+ case CN_TIMERADD:
+ lkdtm.kp.symbol_name = "hrtimer_start";
+ lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start;
+ break;
+ case CN_SCSI_DISPATCH_CMD:
+ lkdtm.kp.symbol_name = "scsi_dispatch_cmd";
+ lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd;
+ break;
+ case CN_IDE_CORE_CP:
+#ifdef CONFIG_IDE
+ lkdtm.kp.symbol_name = "generic_ide_ioctl";
+ lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl;
+#else
+ pr_info("Crash point not available\n");
+ return -EINVAL;
+#endif
+ break;
+ default:
+ pr_info("Invalid Crash Point\n");
+ return -EINVAL;
+ }
+
+ cpoint = which;
+ if ((ret = register_jprobe(&lkdtm)) < 0) {
+ pr_info("Couldn't register jprobe\n");
+ cpoint = CN_INVALID;
+ }
+
+ return ret;
+}
+
+static ssize_t do_register_entry(enum cname which, struct file *f,
+ const char __user *user_buf, size_t count, loff_t *off)
+{
+ char *buf;
+ int err;
+
+ if (count >= PAGE_SIZE)
+ return -EINVAL;
+
+ buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ if (copy_from_user(buf, user_buf, count)) {
+ free_page((unsigned long) buf);
+ return -EFAULT;
+ }
+ /* NULL-terminate and remove enter */
+ buf[count] = '\0';
+ strim(buf);
+
+ cptype = parse_cp_type(buf, count);
+ free_page((unsigned long) buf);
+
+ if (cptype == CT_NONE)
+ return -EINVAL;
+
+ err = lkdtm_register_cpoint(which);
+ if (err < 0)
+ return err;
+
+ *off += count;
+
+ return count;
+}
+
+/* Generic read callback that just prints out the available crash types */
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+ size_t count, loff_t *off)
+{
+ char *buf;
+ int i, n, out;
+
+ buf = (char *)__get_free_page(GFP_KERNEL);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ n = snprintf(buf, PAGE_SIZE, "Available crash types:\n");
+ for (i = 0; i < ARRAY_SIZE(cp_type); i++)
+ n += snprintf(buf + n, PAGE_SIZE - n, "%s\n", cp_type[i]);
+ buf[n] = '\0';
+
+ out = simple_read_from_buffer(user_buf, count, off,
+ buf, n);
+ free_page((unsigned long) buf);
+
+ return out;
+}
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+
+static ssize_t int_hardware_entry(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_INT_HARDWARE_ENTRY, f, buf, count, off);
+}
+
+static ssize_t int_hw_irq_en(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_INT_HW_IRQ_EN, f, buf, count, off);
+}
+
+static ssize_t int_tasklet_entry(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_INT_TASKLET_ENTRY, f, buf, count, off);
+}
+
+static ssize_t fs_devrw_entry(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_FS_DEVRW, f, buf, count, off);
+}
+
+static ssize_t mem_swapout_entry(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_MEM_SWAPOUT, f, buf, count, off);
+}
+
+static ssize_t timeradd_entry(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_TIMERADD, f, buf, count, off);
+}
+
+static ssize_t scsi_dispatch_cmd_entry(struct file *f,
+ const char __user *buf, size_t count, loff_t *off)
+{
+ return do_register_entry(CN_SCSI_DISPATCH_CMD, f, buf, count, off);
+}
+
+static ssize_t ide_core_cp_entry(struct file *f, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ return do_register_entry(CN_IDE_CORE_CP, f, buf, count, off);
+}
+
+/* Special entry to just crash directly. Available without KPROBEs */
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+ size_t count, loff_t *off)
+{
+ enum ctype type;
+ char *buf;
+
+ if (count >= PAGE_SIZE)
+ return -EINVAL;
+ if (count < 1)
+ return -EINVAL;
+
+ buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ if (copy_from_user(buf, user_buf, count)) {
+ free_page((unsigned long) buf);
+ return -EFAULT;
+ }
+ /* NULL-terminate and remove enter */
+ buf[count] = '\0';
+ strim(buf);
+
+ type = parse_cp_type(buf, count);
+ free_page((unsigned long) buf);
+ if (type == CT_NONE)
+ return -EINVAL;
+
+ pr_info("Performing direct entry %s\n", cp_type_to_str(type));
+ lkdtm_do_action(type);
+ *off += count;
+
+ return count;
+}
+
+struct crash_entry {
+ const char *name;
+ const struct file_operations fops;
+};
+
+static const struct crash_entry crash_entries[] = {
+ {"DIRECT", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = direct_entry} },
+ {"INT_HARDWARE_ENTRY", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = int_hardware_entry} },
+ {"INT_HW_IRQ_EN", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = int_hw_irq_en} },
+ {"INT_TASKLET_ENTRY", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = int_tasklet_entry} },
+ {"FS_DEVRW", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = fs_devrw_entry} },
+ {"MEM_SWAPOUT", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = mem_swapout_entry} },
+ {"TIMERADD", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = timeradd_entry} },
+ {"SCSI_DISPATCH_CMD", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = scsi_dispatch_cmd_entry} },
+ {"IDE_CORE_CP", {.read = lkdtm_debugfs_read,
+ .llseek = generic_file_llseek,
+ .open = lkdtm_debugfs_open,
+ .write = ide_core_cp_entry} },
+};
+
+static struct dentry *lkdtm_debugfs_root;
+
+static int __init lkdtm_module_init(void)
+{
+ int ret = -EINVAL;
+ int n_debugfs_entries = 1; /* Assume only the direct entry */
+ int i;
+
+ /* Register debugfs interface */
+ lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
+ if (!lkdtm_debugfs_root) {
+ pr_err("creating root dir failed\n");
+ return -ENODEV;
+ }
+
+#ifdef CONFIG_KPROBES
+ n_debugfs_entries = ARRAY_SIZE(crash_entries);
+#endif
+
+ for (i = 0; i < n_debugfs_entries; i++) {
+ const struct crash_entry *cur = &crash_entries[i];
+ struct dentry *de;
+
+ de = debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root,
+ NULL, &cur->fops);
+ if (de == NULL) {
+ pr_err("could not create %s\n", cur->name);
+ goto out_err;
+ }
+ }
+
+ if (lkdtm_parse_commandline() == -EINVAL) {
+ pr_info("Invalid command\n");
+ goto out_err;
+ }
+
+ if (cpoint != CN_INVALID && cptype != CT_NONE) {
+ ret = lkdtm_register_cpoint(cpoint);
+ if (ret < 0) {
+ pr_info("Invalid crash point %d\n", cpoint);
+ goto out_err;
+ }
+ pr_info("Crash point %s of type %s registered\n",
+ cpoint_name, cpoint_type);
+ } else {
+ pr_info("No crash points registered, enable through debugfs\n");
+ }
+
+ return 0;
+
+out_err:
+ debugfs_remove_recursive(lkdtm_debugfs_root);
+ return ret;
+}
+
+static void __exit lkdtm_module_exit(void)
+{
+ debugfs_remove_recursive(lkdtm_debugfs_root);
+
+ unregister_jprobe(&lkdtm);
+ pr_info("Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Kprobe module for testing crash dumps");
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig
new file mode 100644
index 0000000..d23384d
--- /dev/null
+++ b/drivers/misc/mei/Kconfig
@@ -0,0 +1,45 @@
+config INTEL_MEI
+ tristate "Intel Management Engine Interface"
+ depends on X86 && PCI && WATCHDOG_CORE
+ help
+ The Intel Management Engine (Intel ME) provides Manageability,
+ Security and Media services for system containing Intel chipsets.
+ if selected /dev/mei misc device will be created.
+
+ For more information see
+ <http://software.intel.com/en-us/manageability/>
+
+config INTEL_MEI_ME
+ tristate "ME Enabled Intel Chipsets"
+ select INTEL_MEI
+ depends on X86 && PCI && WATCHDOG_CORE
+ help
+ MEI support for ME Enabled Intel chipsets.
+
+ Supported Chipsets are:
+ 7 Series Chipset Family
+ 6 Series Chipset Family
+ 5 Series Chipset Family
+ 4 Series Chipset Family
+ Mobile 4 Series Chipset Family
+ ICH9
+ 82946GZ/GL
+ 82G35 Express
+ 82Q963/Q965
+ 82P965/G965
+ Mobile PM965/GM965
+ Mobile GME965/GLE960
+ 82Q35 Express
+ 82G33/G31/P35/P31 Express
+ 82Q33 Express
+ 82X38/X48 Express
+
+config INTEL_MEI_TXE
+ tristate "Intel Trusted Execution Environment with ME Interface"
+ select INTEL_MEI
+ depends on X86 && PCI && WATCHDOG_CORE
+ help
+ MEI Support for Trusted Execution Environment device on Intel SoCs
+
+ Supported SoCs:
+ Intel Bay Trail
diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile
new file mode 100644
index 0000000..01447ca
--- /dev/null
+++ b/drivers/misc/mei/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile - Intel Management Engine Interface (Intel MEI) Linux driver
+# Copyright (c) 2010-2014, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MEI) += mei.o
+mei-objs := init.o
+mei-objs += hbm.o
+mei-objs += interrupt.o
+mei-objs += client.o
+mei-objs += main.o
+mei-objs += amthif.o
+mei-objs += wd.o
+mei-objs += bus.o
+mei-objs += bus-fixup.o
+mei-$(CONFIG_DEBUG_FS) += debugfs.o
+
+obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o
+mei-me-objs := pci-me.o
+mei-me-objs += hw-me.o
+
+obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o
+mei-txe-objs := pci-txe.o
+mei-txe-objs += hw-txe.o
+
+mei-$(CONFIG_EVENT_TRACING) += mei-trace.o
+CFLAGS_mei-trace.o = -I$(src)
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
new file mode 100644
index 0000000..e79c037
--- /dev/null
+++ b/drivers/misc/mei/amthif.c
@@ -0,0 +1,590 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/uuid.h>
+#include <linux/jiffies.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+const uuid_le mei_amthif_guid = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d,
+ 0xac, 0xa8, 0x46, 0xe0,
+ 0xff, 0x65, 0x81, 0x4c);
+
+/**
+ * mei_amthif_reset_params - initializes mei device iamthif
+ *
+ * @dev: the device structure
+ */
+void mei_amthif_reset_params(struct mei_device *dev)
+{
+ /* reset iamthif parameters. */
+ dev->iamthif_current_cb = NULL;
+ dev->iamthif_canceled = false;
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+ dev->iamthif_timer = 0;
+ dev->iamthif_stall_timer = 0;
+ dev->iamthif_open_count = 0;
+}
+
+/**
+ * mei_amthif_host_init - mei initialization amthif client.
+ *
+ * @dev: the device structure
+ * @me_cl: me client
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+ struct mei_cl *cl = &dev->iamthif_cl;
+ int ret;
+
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+
+ mei_cl_init(cl, dev);
+
+ ret = mei_cl_link(cl, MEI_IAMTHIF_HOST_CLIENT_ID);
+ if (ret < 0) {
+ dev_err(dev->dev, "amthif: failed cl_link %d\n", ret);
+ return ret;
+ }
+
+ ret = mei_cl_connect(cl, me_cl, NULL);
+
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+
+ return ret;
+}
+
+/**
+ * mei_amthif_find_read_list_entry - finds a amthilist entry for current file
+ *
+ * @dev: the device structure
+ * @file: pointer to file object
+ *
+ * Return: returned a list entry on success, NULL on failure.
+ */
+struct mei_cl_cb *mei_amthif_find_read_list_entry(struct mei_device *dev,
+ struct file *file)
+{
+ struct mei_cl_cb *cb;
+
+ list_for_each_entry(cb, &dev->amthif_rd_complete_list.list, list)
+ if (cb->file_object == file)
+ return cb;
+ return NULL;
+}
+
+
+/**
+ * mei_amthif_read - read data from AMTHIF client
+ *
+ * @dev: the device structure
+ * @file: pointer to file object
+ * @ubuf: pointer to user data in user space
+ * @length: data length to read
+ * @offset: data read offset
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return:
+ * returned data length on success,
+ * zero if no data to read,
+ * negative on failure.
+ */
+int mei_amthif_read(struct mei_device *dev, struct file *file,
+ char __user *ubuf, size_t length, loff_t *offset)
+{
+ struct mei_cl *cl = file->private_data;
+ struct mei_cl_cb *cb;
+ unsigned long timeout;
+ int rets;
+ int wait_ret;
+
+ /* Only possible if we are in timeout */
+ if (!cl) {
+ dev_err(dev->dev, "bad file ext.\n");
+ return -ETIME;
+ }
+
+ dev_dbg(dev->dev, "checking amthif data\n");
+ cb = mei_amthif_find_read_list_entry(dev, file);
+
+ /* Check for if we can block or not*/
+ if (cb == NULL && file->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+
+ dev_dbg(dev->dev, "waiting for amthif data\n");
+ while (cb == NULL) {
+ /* unlock the Mutex */
+ mutex_unlock(&dev->device_lock);
+
+ wait_ret = wait_event_interruptible(dev->iamthif_cl.wait,
+ (cb = mei_amthif_find_read_list_entry(dev, file)));
+
+ /* Locking again the Mutex */
+ mutex_lock(&dev->device_lock);
+
+ if (wait_ret)
+ return -ERESTARTSYS;
+
+ dev_dbg(dev->dev, "woke up from sleep\n");
+ }
+
+ if (cb->status) {
+ rets = cb->status;
+ dev_dbg(dev->dev, "read operation failed %d\n", rets);
+ goto free;
+ }
+
+ dev_dbg(dev->dev, "Got amthif data\n");
+ dev->iamthif_timer = 0;
+
+ timeout = cb->read_time +
+ mei_secs_to_jiffies(MEI_IAMTHIF_READ_TIMER);
+ dev_dbg(dev->dev, "amthif timeout = %lud\n",
+ timeout);
+
+ if (time_after(jiffies, timeout)) {
+ dev_dbg(dev->dev, "amthif Time out\n");
+ /* 15 sec for the message has expired */
+ list_del_init(&cb->list);
+ rets = -ETIME;
+ goto free;
+ }
+ /* if the whole message will fit remove it from the list */
+ if (cb->buf_idx >= *offset && length >= (cb->buf_idx - *offset))
+ list_del_init(&cb->list);
+ else if (cb->buf_idx > 0 && cb->buf_idx <= *offset) {
+ /* end of the message has been reached */
+ list_del_init(&cb->list);
+ rets = 0;
+ goto free;
+ }
+ /* else means that not full buffer will be read and do not
+ * remove message from deletion list
+ */
+
+ dev_dbg(dev->dev, "amthif cb->buf size - %d\n",
+ cb->buf.size);
+ dev_dbg(dev->dev, "amthif cb->buf_idx - %lu\n", cb->buf_idx);
+
+ /* length is being truncated to PAGE_SIZE, however,
+ * the buf_idx may point beyond */
+ length = min_t(size_t, length, (cb->buf_idx - *offset));
+
+ if (copy_to_user(ubuf, cb->buf.data + *offset, length)) {
+ dev_dbg(dev->dev, "failed to copy data to userland\n");
+ rets = -EFAULT;
+ } else {
+ rets = length;
+ if ((*offset + length) < cb->buf_idx) {
+ *offset += length;
+ goto out;
+ }
+ }
+free:
+ dev_dbg(dev->dev, "free amthif cb memory.\n");
+ *offset = 0;
+ mei_io_cb_free(cb);
+out:
+ return rets;
+}
+
+/**
+ * mei_amthif_read_start - queue message for sending read credential
+ *
+ * @cl: host client
+ * @file: file pointer of message recipient
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_amthif_read_start(struct mei_cl *cl, struct file *file)
+{
+ struct mei_device *dev = cl->dev;
+ struct mei_cl_cb *cb;
+ int rets;
+
+ cb = mei_io_cb_init(cl, MEI_FOP_READ, file);
+ if (!cb) {
+ rets = -ENOMEM;
+ goto err;
+ }
+
+ rets = mei_io_cb_alloc_buf(cb, mei_cl_mtu(cl));
+ if (rets)
+ goto err;
+
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+
+ dev->iamthif_state = MEI_IAMTHIF_READING;
+ dev->iamthif_file_object = cb->file_object;
+ dev->iamthif_current_cb = cb;
+
+ return 0;
+err:
+ mei_io_cb_free(cb);
+ return rets;
+}
+
+/**
+ * mei_amthif_send_cmd - send amthif command to the ME
+ *
+ * @cl: the host client
+ * @cb: mei call back struct
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_amthif_send_cmd(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+ struct mei_device *dev;
+ int ret;
+
+ if (!cl->dev || !cb)
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ dev->iamthif_state = MEI_IAMTHIF_WRITING;
+ dev->iamthif_current_cb = cb;
+ dev->iamthif_file_object = cb->file_object;
+ dev->iamthif_canceled = false;
+
+ ret = mei_cl_write(cl, cb, false);
+ if (ret < 0)
+ return ret;
+
+ if (cb->completed)
+ cb->status = mei_amthif_read_start(cl, cb->file_object);
+
+ return 0;
+}
+
+/**
+ * mei_amthif_run_next_cmd - send next amt command from queue
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_amthif_run_next_cmd(struct mei_device *dev)
+{
+ struct mei_cl *cl = &dev->iamthif_cl;
+ struct mei_cl_cb *cb;
+
+ dev->iamthif_canceled = false;
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+ dev->iamthif_timer = 0;
+ dev->iamthif_file_object = NULL;
+
+ dev_dbg(dev->dev, "complete amthif cmd_list cb.\n");
+
+ cb = list_first_entry_or_null(&dev->amthif_cmd_list.list,
+ typeof(*cb), list);
+ if (!cb)
+ return 0;
+
+ list_del_init(&cb->list);
+ return mei_amthif_send_cmd(cl, cb);
+}
+
+/**
+ * mei_amthif_write - write amthif data to amthif client
+ *
+ * @cl: host client
+ * @cb: mei call back struct
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_amthif_write(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+
+ struct mei_device *dev;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ if (WARN_ON(!cb))
+ return -EINVAL;
+
+ dev = cl->dev;
+
+ list_add_tail(&cb->list, &dev->amthif_cmd_list.list);
+ return mei_amthif_run_next_cmd(dev);
+}
+
+/**
+ * mei_amthif_poll - the amthif poll function
+ *
+ * @dev: the device structure
+ * @file: pointer to file structure
+ * @wait: pointer to poll_table structure
+ *
+ * Return: poll mask
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+
+unsigned int mei_amthif_poll(struct mei_device *dev,
+ struct file *file, poll_table *wait)
+{
+ unsigned int mask = 0;
+
+ poll_wait(file, &dev->iamthif_cl.wait, wait);
+
+ if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE &&
+ dev->iamthif_file_object == file) {
+
+ mask |= POLLIN | POLLRDNORM;
+ mei_amthif_run_next_cmd(dev);
+ }
+
+ return mask;
+}
+
+
+
+/**
+ * mei_amthif_irq_write - write iamthif command in irq thread context.
+ *
+ * @cl: private data of the file object.
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_amthif_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ int ret;
+
+ ret = mei_cl_irq_write(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+
+ if (cb->completed)
+ cb->status = mei_amthif_read_start(cl, cb->file_object);
+
+ return 0;
+}
+
+/**
+ * mei_amthif_irq_read_msg - read routine after ISR to
+ * handle the read amthif message
+ *
+ * @cl: mei client
+ * @mei_hdr: header of amthif message
+ * @cmpl_list: completed callbacks list
+ *
+ * Return: -ENODEV if cb is NULL 0 otherwise; error message is in cb->status
+ */
+int mei_amthif_irq_read_msg(struct mei_cl *cl,
+ struct mei_msg_hdr *mei_hdr,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev;
+ int ret;
+
+ dev = cl->dev;
+
+ if (dev->iamthif_state != MEI_IAMTHIF_READING) {
+ mei_irq_discard_msg(dev, mei_hdr);
+ return 0;
+ }
+
+ ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list);
+ if (ret)
+ return ret;
+
+ if (!mei_hdr->msg_complete)
+ return 0;
+
+ dev_dbg(dev->dev, "completed amthif read.\n ");
+ dev->iamthif_current_cb = NULL;
+ dev->iamthif_stall_timer = 0;
+
+ return 0;
+}
+
+/**
+ * mei_amthif_complete - complete amthif callback.
+ *
+ * @dev: the device structure.
+ * @cb: callback block.
+ */
+void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb)
+{
+
+ if (cb->fop_type == MEI_FOP_WRITE) {
+ if (!cb->status) {
+ dev->iamthif_stall_timer = MEI_IAMTHIF_STALL_TIMER;
+ mei_io_cb_free(cb);
+ return;
+ }
+ /*
+ * in case of error enqueue the write cb to complete read list
+ * so it can be propagated to the reader
+ */
+ list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list);
+ wake_up_interruptible(&dev->iamthif_cl.wait);
+ return;
+ }
+
+ if (!dev->iamthif_canceled) {
+ dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE;
+ dev->iamthif_stall_timer = 0;
+ list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list);
+ dev_dbg(dev->dev, "amthif read completed\n");
+ dev->iamthif_timer = jiffies;
+ dev_dbg(dev->dev, "dev->iamthif_timer = %ld\n",
+ dev->iamthif_timer);
+ } else {
+ mei_amthif_run_next_cmd(dev);
+ }
+
+ dev_dbg(dev->dev, "completing amthif call back.\n");
+ wake_up_interruptible(&dev->iamthif_cl.wait);
+}
+
+/**
+ * mei_clear_list - removes all callbacks associated with file
+ * from mei_cb_list
+ *
+ * @dev: device structure.
+ * @file: file structure
+ * @mei_cb_list: callbacks list
+ *
+ * mei_clear_list is called to clear resources associated with file
+ * when application calls close function or Ctrl-C was pressed
+ *
+ * Return: true if callback removed from the list, false otherwise
+ */
+static bool mei_clear_list(struct mei_device *dev,
+ const struct file *file, struct list_head *mei_cb_list)
+{
+ struct mei_cl *cl = &dev->iamthif_cl;
+ struct mei_cl_cb *cb, *next;
+ bool removed = false;
+
+ /* list all list member */
+ list_for_each_entry_safe(cb, next, mei_cb_list, list) {
+ /* check if list member associated with a file */
+ if (file == cb->file_object) {
+ /* check if cb equal to current iamthif cb */
+ if (dev->iamthif_current_cb == cb) {
+ dev->iamthif_current_cb = NULL;
+ /* send flow control to iamthif client */
+ mei_hbm_cl_flow_control_req(dev, cl);
+ }
+ /* free all allocated buffers */
+ mei_io_cb_free(cb);
+ removed = true;
+ }
+ }
+ return removed;
+}
+
+/**
+ * mei_clear_lists - removes all callbacks associated with file
+ *
+ * @dev: device structure
+ * @file: file structure
+ *
+ * mei_clear_lists is called to clear resources associated with file
+ * when application calls close function or Ctrl-C was pressed
+ *
+ * Return: true if callback removed from the list, false otherwise
+ */
+static bool mei_clear_lists(struct mei_device *dev, struct file *file)
+{
+ bool removed = false;
+
+ /* remove callbacks associated with a file */
+ mei_clear_list(dev, file, &dev->amthif_cmd_list.list);
+ if (mei_clear_list(dev, file, &dev->amthif_rd_complete_list.list))
+ removed = true;
+
+ mei_clear_list(dev, file, &dev->ctrl_rd_list.list);
+
+ if (mei_clear_list(dev, file, &dev->ctrl_wr_list.list))
+ removed = true;
+
+ if (mei_clear_list(dev, file, &dev->write_waiting_list.list))
+ removed = true;
+
+ if (mei_clear_list(dev, file, &dev->write_list.list))
+ removed = true;
+
+ /* check if iamthif_current_cb not NULL */
+ if (dev->iamthif_current_cb && !removed) {
+ /* check file and iamthif current cb association */
+ if (dev->iamthif_current_cb->file_object == file) {
+ /* remove cb */
+ mei_io_cb_free(dev->iamthif_current_cb);
+ dev->iamthif_current_cb = NULL;
+ removed = true;
+ }
+ }
+ return removed;
+}
+
+/**
+* mei_amthif_release - the release function
+*
+* @dev: device structure
+* @file: pointer to file structure
+*
+* Return: 0 on success, <0 on error
+*/
+int mei_amthif_release(struct mei_device *dev, struct file *file)
+{
+ if (dev->iamthif_open_count > 0)
+ dev->iamthif_open_count--;
+
+ if (dev->iamthif_file_object == file &&
+ dev->iamthif_state != MEI_IAMTHIF_IDLE) {
+
+ dev_dbg(dev->dev, "amthif canceled iamthif state %d\n",
+ dev->iamthif_state);
+ dev->iamthif_canceled = true;
+ if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE) {
+ dev_dbg(dev->dev, "run next amthif iamthif cb\n");
+ mei_amthif_run_next_cmd(dev);
+ }
+ }
+
+ if (mei_clear_lists(dev, file))
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+
+ return 0;
+}
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
new file mode 100644
index 0000000..bdc7fcd
--- /dev/null
+++ b/drivers/misc/mei/bus-fixup.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+#define MEI_UUID_NFC_INFO UUID_LE(0xd2de1625, 0x382d, 0x417d, \
+ 0x48, 0xa4, 0xef, 0xab, 0xba, 0x8a, 0x12, 0x06)
+
+static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO;
+
+#define MEI_UUID_NFC_HCI UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, \
+ 0x94, 0xd4, 0x50, 0x26, 0x67, 0x23, 0x77, 0x5c)
+
+#define MEI_UUID_ANY NULL_UUID_LE
+
+/**
+ * number_of_connections - determine whether an client be on the bus
+ * according number of connections
+ * We support only clients:
+ * 1. with single connection
+ * 2. and fixed clients (max_number_of_connections == 0)
+ *
+ * @cldev: me clients device
+ */
+static void number_of_connections(struct mei_cl_device *cldev)
+{
+ dev_dbg(&cldev->dev, "running hook %s on %pUl\n",
+ __func__, mei_me_cl_uuid(cldev->me_cl));
+
+ if (cldev->me_cl->props.max_number_of_connections > 1)
+ cldev->do_match = 0;
+}
+
+/**
+ * blacklist - blacklist a client from the bus
+ *
+ * @cldev: me clients device
+ */
+static void blacklist(struct mei_cl_device *cldev)
+{
+ dev_dbg(&cldev->dev, "running hook %s on %pUl\n",
+ __func__, mei_me_cl_uuid(cldev->me_cl));
+ cldev->do_match = 0;
+}
+
+struct mei_nfc_cmd {
+ u8 command;
+ u8 status;
+ u16 req_id;
+ u32 reserved;
+ u16 data_size;
+ u8 sub_command;
+ u8 data[];
+} __packed;
+
+struct mei_nfc_reply {
+ u8 command;
+ u8 status;
+ u16 req_id;
+ u32 reserved;
+ u16 data_size;
+ u8 sub_command;
+ u8 reply_status;
+ u8 data[];
+} __packed;
+
+struct mei_nfc_if_version {
+ u8 radio_version_sw[3];
+ u8 reserved[3];
+ u8 radio_version_hw[3];
+ u8 i2c_addr;
+ u8 fw_ivn;
+ u8 vendor_id;
+ u8 radio_type;
+} __packed;
+
+
+#define MEI_NFC_CMD_MAINTENANCE 0x00
+#define MEI_NFC_SUBCMD_IF_VERSION 0x01
+
+/* Vendors */
+#define MEI_NFC_VENDOR_INSIDE 0x00
+#define MEI_NFC_VENDOR_NXP 0x01
+
+/* Radio types */
+#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00
+#define MEI_NFC_VENDOR_NXP_PN544 0x01
+
+/**
+ * mei_nfc_if_version - get NFC interface version
+ *
+ * @cl: host client (nfc info)
+ * @ver: NFC interface version to be filled in
+ *
+ * Return: 0 on success; < 0 otherwise
+ */
+static int mei_nfc_if_version(struct mei_cl *cl,
+ struct mei_nfc_if_version *ver)
+{
+ struct mei_device *bus;
+ struct mei_nfc_cmd cmd = {
+ .command = MEI_NFC_CMD_MAINTENANCE,
+ .data_size = 1,
+ .sub_command = MEI_NFC_SUBCMD_IF_VERSION,
+ };
+ struct mei_nfc_reply *reply = NULL;
+ size_t if_version_length;
+ int bytes_recv, ret;
+
+ bus = cl->dev;
+
+ WARN_ON(mutex_is_locked(&bus->device_lock));
+
+ ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd), 1);
+ if (ret < 0) {
+ dev_err(bus->dev, "Could not send IF version cmd\n");
+ return ret;
+ }
+
+ /* to be sure on the stack we alloc memory */
+ if_version_length = sizeof(struct mei_nfc_reply) +
+ sizeof(struct mei_nfc_if_version);
+
+ reply = kzalloc(if_version_length, GFP_KERNEL);
+ if (!reply)
+ return -ENOMEM;
+
+ ret = 0;
+ bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
+ if (bytes_recv < if_version_length) {
+ dev_err(bus->dev, "Could not read IF version\n");
+ ret = -EIO;
+ goto err;
+ }
+
+ memcpy(ver, reply->data, sizeof(struct mei_nfc_if_version));
+
+ dev_info(bus->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n",
+ ver->fw_ivn, ver->vendor_id, ver->radio_type);
+
+err:
+ kfree(reply);
+ return ret;
+}
+
+/**
+ * mei_nfc_radio_name - derive nfc radio name from the interface version
+ *
+ * @ver: NFC radio version
+ *
+ * Return: radio name string
+ */
+static const char *mei_nfc_radio_name(struct mei_nfc_if_version *ver)
+{
+
+ if (ver->vendor_id == MEI_NFC_VENDOR_INSIDE) {
+ if (ver->radio_type == MEI_NFC_VENDOR_INSIDE_UREAD)
+ return "microread";
+ }
+
+ if (ver->vendor_id == MEI_NFC_VENDOR_NXP) {
+ if (ver->radio_type == MEI_NFC_VENDOR_NXP_PN544)
+ return "pn544";
+ }
+
+ return NULL;
+}
+
+/**
+ * mei_nfc - The nfc fixup function. The function retrieves nfc radio
+ * name and set is as device attribute so we can load
+ * the proper device driver for it
+ *
+ * @cldev: me client device (nfc)
+ */
+static void mei_nfc(struct mei_cl_device *cldev)
+{
+ struct mei_device *bus;
+ struct mei_cl *cl;
+ struct mei_me_client *me_cl = NULL;
+ struct mei_nfc_if_version ver;
+ const char *radio_name = NULL;
+ int ret;
+
+ bus = cldev->bus;
+
+ dev_dbg(bus->dev, "running hook %s: %pUl match=%d\n",
+ __func__, mei_me_cl_uuid(cldev->me_cl), cldev->do_match);
+
+ mutex_lock(&bus->device_lock);
+ /* we need to connect to INFO GUID */
+ cl = mei_cl_alloc_linked(bus, MEI_HOST_CLIENT_ID_ANY);
+ if (IS_ERR(cl)) {
+ ret = PTR_ERR(cl);
+ cl = NULL;
+ dev_err(bus->dev, "nfc hook alloc failed %d\n", ret);
+ goto out;
+ }
+
+ me_cl = mei_me_cl_by_uuid(bus, &mei_nfc_info_guid);
+ if (!me_cl) {
+ ret = -ENOTTY;
+ dev_err(bus->dev, "Cannot find nfc info %d\n", ret);
+ goto out;
+ }
+
+ ret = mei_cl_connect(cl, me_cl, NULL);
+ if (ret < 0) {
+ dev_err(&cldev->dev, "Can't connect to the NFC INFO ME ret = %d\n",
+ ret);
+ goto out;
+ }
+
+ mutex_unlock(&bus->device_lock);
+
+ ret = mei_nfc_if_version(cl, &ver);
+ if (ret)
+ goto disconnect;
+
+ radio_name = mei_nfc_radio_name(&ver);
+
+ if (!radio_name) {
+ ret = -ENOENT;
+ dev_err(&cldev->dev, "Can't get the NFC interface version ret = %d\n",
+ ret);
+ goto disconnect;
+ }
+
+ dev_dbg(bus->dev, "nfc radio %s\n", radio_name);
+ strlcpy(cldev->name, radio_name, sizeof(cldev->name));
+
+disconnect:
+ mutex_lock(&bus->device_lock);
+ if (mei_cl_disconnect(cl) < 0)
+ dev_err(bus->dev, "Can't disconnect the NFC INFO ME\n");
+
+ mei_cl_flush_queues(cl, NULL);
+
+out:
+ mei_cl_unlink(cl);
+ mutex_unlock(&bus->device_lock);
+ mei_me_cl_put(me_cl);
+ kfree(cl);
+
+ if (ret)
+ cldev->do_match = 0;
+
+ dev_dbg(bus->dev, "end of fixup match = %d\n", cldev->do_match);
+}
+
+#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook }
+
+static struct mei_fixup {
+
+ const uuid_le uuid;
+ void (*hook)(struct mei_cl_device *cldev);
+} mei_fixups[] = {
+ MEI_FIXUP(MEI_UUID_ANY, number_of_connections),
+ MEI_FIXUP(MEI_UUID_NFC_INFO, blacklist),
+ MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc),
+};
+
+/**
+ * mei_cldev_fixup - run fixup handlers
+ *
+ * @cldev: me client device
+ */
+void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev)
+{
+ struct mei_fixup *f;
+ const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) {
+
+ f = &mei_fixups[i];
+ if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 ||
+ uuid_le_cmp(f->uuid, *uuid) == 0)
+ f->hook(cldev);
+ }
+}
+
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
new file mode 100644
index 0000000..be74a25
--- /dev/null
+++ b/drivers/misc/mei/bus.c
@@ -0,0 +1,1015 @@
+/*
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2012-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+#define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver)
+#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev)
+
+/**
+ * __mei_cl_send - internal client send (write)
+ *
+ * @cl: host client
+ * @buf: buffer to send
+ * @length: buffer length
+ * @blocking: wait for write completion
+ *
+ * Return: written size bytes or < 0 on error
+ */
+ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
+ bool blocking)
+{
+ struct mei_device *bus;
+ struct mei_cl_cb *cb = NULL;
+ ssize_t rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ bus = cl->dev;
+
+ mutex_lock(&bus->device_lock);
+ if (bus->dev_state != MEI_DEV_ENABLED) {
+ rets = -ENODEV;
+ goto out;
+ }
+
+ if (!mei_cl_is_connected(cl)) {
+ rets = -ENODEV;
+ goto out;
+ }
+
+ /* Check if we have an ME client device */
+ if (!mei_me_cl_is_active(cl->me_cl)) {
+ rets = -ENOTTY;
+ goto out;
+ }
+
+ if (length > mei_cl_mtu(cl)) {
+ rets = -EFBIG;
+ goto out;
+ }
+
+ cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, NULL);
+ if (!cb) {
+ rets = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(cb->buf.data, buf, length);
+
+ rets = mei_cl_write(cl, cb, blocking);
+
+out:
+ mutex_unlock(&bus->device_lock);
+ if (rets < 0)
+ mei_io_cb_free(cb);
+
+ return rets;
+}
+
+/**
+ * __mei_cl_recv - internal client receive (read)
+ *
+ * @cl: host client
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
+{
+ struct mei_device *bus;
+ struct mei_cl_cb *cb;
+ size_t r_length;
+ ssize_t rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ bus = cl->dev;
+
+ mutex_lock(&bus->device_lock);
+ if (bus->dev_state != MEI_DEV_ENABLED) {
+ rets = -ENODEV;
+ goto out;
+ }
+
+ cb = mei_cl_read_cb(cl, NULL);
+ if (cb)
+ goto copy;
+
+ rets = mei_cl_read_start(cl, length, NULL);
+ if (rets && rets != -EBUSY)
+ goto out;
+
+ /* wait on event only if there is no other waiter */
+ if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
+
+ mutex_unlock(&bus->device_lock);
+
+ if (wait_event_interruptible(cl->rx_wait,
+ (!list_empty(&cl->rd_completed)) ||
+ (!mei_cl_is_connected(cl)))) {
+
+ if (signal_pending(current))
+ return -EINTR;
+ return -ERESTARTSYS;
+ }
+
+ mutex_lock(&bus->device_lock);
+
+ if (!mei_cl_is_connected(cl)) {
+ rets = -ENODEV;
+ goto out;
+ }
+ }
+
+ cb = mei_cl_read_cb(cl, NULL);
+ if (!cb) {
+ rets = 0;
+ goto out;
+ }
+
+copy:
+ if (cb->status) {
+ rets = cb->status;
+ goto free;
+ }
+
+ r_length = min_t(size_t, length, cb->buf_idx);
+ memcpy(buf, cb->buf.data, r_length);
+ rets = r_length;
+
+free:
+ mei_io_cb_free(cb);
+out:
+ mutex_unlock(&bus->device_lock);
+
+ return rets;
+}
+
+/**
+ * mei_cldev_send - me device send (write)
+ *
+ * @cldev: me client device
+ * @buf: buffer to send
+ * @length: buffer length
+ *
+ * Return: written size in bytes or < 0 on error
+ */
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
+{
+ struct mei_cl *cl = cldev->cl;
+
+ if (cl == NULL)
+ return -ENODEV;
+
+ return __mei_cl_send(cl, buf, length, 1);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send);
+
+/**
+ * mei_cldev_recv - client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
+{
+ struct mei_cl *cl = cldev->cl;
+
+ if (cl == NULL)
+ return -ENODEV;
+
+ return __mei_cl_recv(cl, buf, length);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv);
+
+/**
+ * mei_cl_bus_event_work - dispatch rx event for a bus device
+ * and schedule new work
+ *
+ * @work: work
+ */
+static void mei_cl_bus_event_work(struct work_struct *work)
+{
+ struct mei_cl_device *cldev;
+ struct mei_device *bus;
+
+ cldev = container_of(work, struct mei_cl_device, event_work);
+
+ bus = cldev->bus;
+
+ if (cldev->event_cb)
+ cldev->event_cb(cldev, cldev->events, cldev->event_context);
+
+ cldev->events = 0;
+
+ /* Prepare for the next read */
+ if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
+ mutex_lock(&bus->device_lock);
+ mei_cl_read_start(cldev->cl, 0, NULL);
+ mutex_unlock(&bus->device_lock);
+ }
+}
+
+/**
+ * mei_cl_bus_notify_event - schedule notify cb on bus client
+ *
+ * @cl: host client
+ */
+void mei_cl_bus_notify_event(struct mei_cl *cl)
+{
+ struct mei_cl_device *cldev = cl->cldev;
+
+ if (!cldev || !cldev->event_cb)
+ return;
+
+ if (!(cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)))
+ return;
+
+ if (!cl->notify_ev)
+ return;
+
+ set_bit(MEI_CL_EVENT_NOTIF, &cldev->events);
+
+ schedule_work(&cldev->event_work);
+
+ cl->notify_ev = false;
+}
+
+/**
+ * mei_cl_bus_rx_event - schedule rx evenet
+ *
+ * @cl: host client
+ */
+void mei_cl_bus_rx_event(struct mei_cl *cl)
+{
+ struct mei_cl_device *cldev = cl->cldev;
+
+ if (!cldev || !cldev->event_cb)
+ return;
+
+ if (!(cldev->events_mask & BIT(MEI_CL_EVENT_RX)))
+ return;
+
+ set_bit(MEI_CL_EVENT_RX, &cldev->events);
+
+ schedule_work(&cldev->event_work);
+}
+
+/**
+ * mei_cldev_register_event_cb - register event callback
+ *
+ * @cldev: me client devices
+ * @event_cb: callback function
+ * @events_mask: requested events bitmask
+ * @context: driver context data
+ *
+ * Return: 0 on success
+ * -EALREADY if an callback is already registered
+ * <0 on other errors
+ */
+int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+ unsigned long events_mask,
+ mei_cldev_event_cb_t event_cb, void *context)
+{
+ struct mei_device *bus = cldev->bus;
+ int ret;
+
+ if (cldev->event_cb)
+ return -EALREADY;
+
+ cldev->events = 0;
+ cldev->events_mask = events_mask;
+ cldev->event_cb = event_cb;
+ cldev->event_context = context;
+ INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
+
+ if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
+ mutex_lock(&bus->device_lock);
+ ret = mei_cl_read_start(cldev->cl, 0, NULL);
+ mutex_unlock(&bus->device_lock);
+ if (ret && ret != -EBUSY)
+ return ret;
+ }
+
+ if (cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)) {
+ mutex_lock(&bus->device_lock);
+ ret = mei_cl_notify_request(cldev->cl, NULL, event_cb ? 1 : 0);
+ mutex_unlock(&bus->device_lock);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb);
+
+/**
+ * mei_cldev_get_drvdata - driver data getter
+ *
+ * @cldev: mei client device
+ *
+ * Return: driver private data
+ */
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
+{
+ return dev_get_drvdata(&cldev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
+
+/**
+ * mei_cldev_set_drvdata - driver data setter
+ *
+ * @cldev: mei client device
+ * @data: data to store
+ */
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
+{
+ dev_set_drvdata(&cldev->dev, data);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
+
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
+{
+ return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
+
+/**
+ * mei_cldev_ver - return protocol version of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client protocol version
+ */
+u8 mei_cldev_ver(const struct mei_cl_device *cldev)
+{
+ return mei_me_cl_ver(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_ver);
+
+/**
+ * mei_cldev_enabled - check whether the device is enabled
+ *
+ * @cldev: mei client device
+ *
+ * Return: true if me client is initialized and connected
+ */
+bool mei_cldev_enabled(struct mei_cl_device *cldev)
+{
+ return cldev->cl && mei_cl_is_connected(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enabled);
+
+/**
+ * mei_cldev_enable - enable me client device
+ * create connection with me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_enable(struct mei_cl_device *cldev)
+{
+ struct mei_device *bus = cldev->bus;
+ struct mei_cl *cl;
+ int ret;
+
+ cl = cldev->cl;
+
+ if (!cl) {
+ mutex_lock(&bus->device_lock);
+ cl = mei_cl_alloc_linked(bus, MEI_HOST_CLIENT_ID_ANY);
+ mutex_unlock(&bus->device_lock);
+ if (IS_ERR(cl))
+ return PTR_ERR(cl);
+ /* update pointers */
+ cldev->cl = cl;
+ cl->cldev = cldev;
+ }
+
+ mutex_lock(&bus->device_lock);
+ if (mei_cl_is_connected(cl)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (!mei_me_cl_is_active(cldev->me_cl)) {
+ dev_err(&cldev->dev, "me client is not active\n");
+ ret = -ENOTTY;
+ goto out;
+ }
+
+ ret = mei_cl_connect(cl, cldev->me_cl, NULL);
+ if (ret < 0)
+ dev_err(&cldev->dev, "cannot connect\n");
+
+out:
+ mutex_unlock(&bus->device_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enable);
+
+/**
+ * mei_cldev_disable - disable me client device
+ * disconnect form the me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_disable(struct mei_cl_device *cldev)
+{
+ struct mei_device *bus;
+ struct mei_cl *cl;
+ int err;
+
+ if (!cldev || !cldev->cl)
+ return -ENODEV;
+
+ cl = cldev->cl;
+
+ bus = cldev->bus;
+
+ cldev->event_cb = NULL;
+
+ mutex_lock(&bus->device_lock);
+
+ if (!mei_cl_is_connected(cl)) {
+ dev_err(bus->dev, "Already disconnected");
+ err = 0;
+ goto out;
+ }
+
+ err = mei_cl_disconnect(cl);
+ if (err < 0)
+ dev_err(bus->dev, "Could not disconnect from the ME client");
+
+out:
+ /* Flush queues and remove any pending read */
+ mei_cl_flush_queues(cl, NULL);
+ mei_cl_unlink(cl);
+
+ kfree(cl);
+ cldev->cl = NULL;
+
+ mutex_unlock(&bus->device_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_disable);
+
+/**
+ * mei_cl_device_find - find matching entry in the driver id table
+ *
+ * @cldev: me client device
+ * @cldrv: me client driver
+ *
+ * Return: id on success; NULL if no id is matching
+ */
+static const
+struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
+ struct mei_cl_driver *cldrv)
+{
+ const struct mei_cl_device_id *id;
+ const uuid_le *uuid;
+ u8 version;
+ bool match;
+
+ uuid = mei_me_cl_uuid(cldev->me_cl);
+ version = mei_me_cl_ver(cldev->me_cl);
+
+ id = cldrv->id_table;
+ while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
+ if (!uuid_le_cmp(*uuid, id->uuid)) {
+ match = true;
+
+ if (cldev->name[0])
+ if (strncmp(cldev->name, id->name,
+ sizeof(id->name)))
+ match = false;
+
+ if (id->version != MEI_CL_VERSION_ANY)
+ if (id->version != version)
+ match = false;
+ if (match)
+ return id;
+ }
+
+ id++;
+ }
+
+ return NULL;
+}
+
+/**
+ * mei_cl_device_match - device match function
+ *
+ * @dev: device
+ * @drv: driver
+ *
+ * Return: 1 if matching device was found 0 otherwise
+ */
+static int mei_cl_device_match(struct device *dev, struct device_driver *drv)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ struct mei_cl_driver *cldrv = to_mei_cl_driver(drv);
+ const struct mei_cl_device_id *found_id;
+
+ if (!cldev)
+ return 0;
+
+ if (!cldev->do_match)
+ return 0;
+
+ if (!cldrv || !cldrv->id_table)
+ return 0;
+
+ found_id = mei_cl_device_find(cldev, cldrv);
+ if (found_id)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * mei_cl_device_probe - bus probe function
+ *
+ * @dev: device
+ *
+ * Return: 0 on success; < 0 otherwise
+ */
+static int mei_cl_device_probe(struct device *dev)
+{
+ struct mei_cl_device *cldev;
+ struct mei_cl_driver *cldrv;
+ const struct mei_cl_device_id *id;
+
+ cldev = to_mei_cl_device(dev);
+ cldrv = to_mei_cl_driver(dev->driver);
+
+ if (!cldev)
+ return 0;
+
+ if (!cldrv || !cldrv->probe)
+ return -ENODEV;
+
+ id = mei_cl_device_find(cldev, cldrv);
+ if (!id)
+ return -ENODEV;
+
+ __module_get(THIS_MODULE);
+
+ return cldrv->probe(cldev, id);
+}
+
+/**
+ * mei_cl_device_remove - remove device from the bus
+ *
+ * @dev: device
+ *
+ * Return: 0 on success; < 0 otherwise
+ */
+static int mei_cl_device_remove(struct device *dev)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ struct mei_cl_driver *cldrv;
+ int ret = 0;
+
+ if (!cldev || !dev->driver)
+ return 0;
+
+ if (cldev->event_cb) {
+ cldev->event_cb = NULL;
+ cancel_work_sync(&cldev->event_work);
+ }
+
+ cldrv = to_mei_cl_driver(dev->driver);
+ if (cldrv->remove)
+ ret = cldrv->remove(cldev);
+
+ module_put(THIS_MODULE);
+ dev->driver = NULL;
+ return ret;
+
+}
+
+static ssize_t name_show(struct device *dev, struct device_attribute *a,
+ char *buf)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ size_t len;
+
+ len = snprintf(buf, PAGE_SIZE, "%s", cldev->name);
+
+ return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *a,
+ char *buf)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+ size_t len;
+
+ len = snprintf(buf, PAGE_SIZE, "%pUl", uuid);
+
+ return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(uuid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *a,
+ char *buf)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ u8 version = mei_me_cl_ver(cldev->me_cl);
+ size_t len;
+
+ len = snprintf(buf, PAGE_SIZE, "%02X", version);
+
+ return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
+ char *buf)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+ size_t len;
+
+ len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
+ return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mei_cldev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_uuid.attr,
+ &dev_attr_version.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(mei_cldev);
+
+/**
+ * mei_cl_device_uevent - me client bus uevent handler
+ *
+ * @dev: device
+ * @env: uevent kobject
+ *
+ * Return: 0 on success -ENOMEM on when add_uevent_var fails
+ */
+static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+ u8 version = mei_me_cl_ver(cldev->me_cl);
+
+ if (add_uevent_var(env, "MEI_CL_VERSION=%d", version))
+ return -ENOMEM;
+
+ if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid))
+ return -ENOMEM;
+
+ if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
+ return -ENOMEM;
+
+ if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
+ cldev->name, uuid, version))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static struct bus_type mei_cl_bus_type = {
+ .name = "mei",
+ .dev_groups = mei_cldev_groups,
+ .match = mei_cl_device_match,
+ .probe = mei_cl_device_probe,
+ .remove = mei_cl_device_remove,
+ .uevent = mei_cl_device_uevent,
+};
+
+static struct mei_device *mei_dev_bus_get(struct mei_device *bus)
+{
+ if (bus)
+ get_device(bus->dev);
+
+ return bus;
+}
+
+static void mei_dev_bus_put(struct mei_device *bus)
+{
+ if (bus)
+ put_device(bus->dev);
+}
+
+static void mei_cl_bus_dev_release(struct device *dev)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+
+ if (!cldev)
+ return;
+
+ mei_me_cl_put(cldev->me_cl);
+ mei_dev_bus_put(cldev->bus);
+ kfree(cldev);
+}
+
+static struct device_type mei_cl_device_type = {
+ .release = mei_cl_bus_dev_release,
+};
+
+/**
+ * mei_cl_bus_set_name - set device name for me client device
+ *
+ * @cldev: me client device
+ */
+static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
+{
+ dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X",
+ cldev->name,
+ mei_me_cl_uuid(cldev->me_cl),
+ mei_me_cl_ver(cldev->me_cl));
+}
+
+/**
+ * mei_cl_bus_dev_alloc - initialize and allocate mei client device
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Return: allocated device structur or NULL on allocation failure
+ */
+static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus,
+ struct mei_me_client *me_cl)
+{
+ struct mei_cl_device *cldev;
+
+ cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL);
+ if (!cldev)
+ return NULL;
+
+ device_initialize(&cldev->dev);
+ cldev->dev.parent = bus->dev;
+ cldev->dev.bus = &mei_cl_bus_type;
+ cldev->dev.type = &mei_cl_device_type;
+ cldev->bus = mei_dev_bus_get(bus);
+ cldev->me_cl = mei_me_cl_get(me_cl);
+ mei_cl_bus_set_name(cldev);
+ cldev->is_added = 0;
+ INIT_LIST_HEAD(&cldev->bus_list);
+
+ return cldev;
+}
+
+/**
+ * mei_cl_dev_setup - setup me client device
+ * run fix up routines and set the device name
+ *
+ * @bus: mei device
+ * @cldev: me client device
+ *
+ * Return: true if the device is eligible for enumeration
+ */
+static bool mei_cl_bus_dev_setup(struct mei_device *bus,
+ struct mei_cl_device *cldev)
+{
+ cldev->do_match = 1;
+ mei_cl_bus_dev_fixup(cldev);
+
+ /* the device name can change during fix up */
+ if (cldev->do_match)
+ mei_cl_bus_set_name(cldev);
+
+ return cldev->do_match == 1;
+}
+
+/**
+ * mei_cl_bus_dev_add - add me client devices
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success; < 0 on failre
+ */
+static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
+{
+ int ret;
+
+ dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
+ mei_me_cl_uuid(cldev->me_cl),
+ mei_me_cl_ver(cldev->me_cl));
+ ret = device_add(&cldev->dev);
+ if (!ret)
+ cldev->is_added = 1;
+
+ return ret;
+}
+
+/**
+ * mei_cl_bus_dev_stop - stop the driver
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev)
+{
+ if (cldev->is_added)
+ device_release_driver(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_dev_destroy - destroy me client devices object
+ *
+ * @cldev: me client device
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev)
+{
+
+ WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock));
+
+ if (!cldev->is_added)
+ return;
+
+ device_del(&cldev->dev);
+
+ list_del_init(&cldev->bus_list);
+
+ cldev->is_added = 0;
+ put_device(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_remove_device - remove a devices form the bus
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_remove_device(struct mei_cl_device *cldev)
+{
+ mei_cl_bus_dev_stop(cldev);
+ mei_cl_bus_dev_destroy(cldev);
+}
+
+/**
+ * mei_cl_bus_remove_devices - remove all devices form the bus
+ *
+ * @bus: mei device
+ */
+void mei_cl_bus_remove_devices(struct mei_device *bus)
+{
+ struct mei_cl_device *cldev, *next;
+
+ mutex_lock(&bus->cl_bus_lock);
+ list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list)
+ mei_cl_bus_remove_device(cldev);
+ mutex_unlock(&bus->cl_bus_lock);
+}
+
+
+/**
+ * mei_cl_bus_dev_init - allocate and initializes an mei client devices
+ * based on me client
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_init(struct mei_device *bus,
+ struct mei_me_client *me_cl)
+{
+ struct mei_cl_device *cldev;
+
+ WARN_ON(!mutex_is_locked(&bus->cl_bus_lock));
+
+ dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl));
+
+ if (me_cl->bus_added)
+ return;
+
+ cldev = mei_cl_bus_dev_alloc(bus, me_cl);
+ if (!cldev)
+ return;
+
+ me_cl->bus_added = true;
+ list_add_tail(&cldev->bus_list, &bus->device_list);
+
+}
+
+/**
+ * mei_cl_bus_rescan - scan me clients list and add create
+ * devices for eligible clients
+ *
+ * @bus: mei device
+ */
+void mei_cl_bus_rescan(struct mei_device *bus)
+{
+ struct mei_cl_device *cldev, *n;
+ struct mei_me_client *me_cl;
+
+ mutex_lock(&bus->cl_bus_lock);
+
+ down_read(&bus->me_clients_rwsem);
+ list_for_each_entry(me_cl, &bus->me_clients, list)
+ mei_cl_bus_dev_init(bus, me_cl);
+ up_read(&bus->me_clients_rwsem);
+
+ list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) {
+
+ if (!mei_me_cl_is_active(cldev->me_cl)) {
+ mei_cl_bus_remove_device(cldev);
+ continue;
+ }
+
+ if (cldev->is_added)
+ continue;
+
+ if (mei_cl_bus_dev_setup(bus, cldev))
+ mei_cl_bus_dev_add(cldev);
+ else {
+ list_del_init(&cldev->bus_list);
+ put_device(&cldev->dev);
+ }
+ }
+ mutex_unlock(&bus->cl_bus_lock);
+
+ dev_dbg(bus->dev, "rescan end");
+}
+
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+ struct module *owner)
+{
+ int err;
+
+ cldrv->driver.name = cldrv->name;
+ cldrv->driver.owner = owner;
+ cldrv->driver.bus = &mei_cl_bus_type;
+
+ err = driver_register(&cldrv->driver);
+ if (err)
+ return err;
+
+ pr_debug("mei: driver [%s] registered\n", cldrv->driver.name);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
+
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
+{
+ driver_unregister(&cldrv->driver);
+
+ pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
+
+
+int __init mei_cl_bus_init(void)
+{
+ return bus_register(&mei_cl_bus_type);
+}
+
+void __exit mei_cl_bus_exit(void)
+{
+ bus_unregister(&mei_cl_bus_type);
+}
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
new file mode 100644
index 0000000..df26836
--- /dev/null
+++ b/drivers/misc/mei/client.c
@@ -0,0 +1,1803 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+/**
+ * mei_me_cl_init - initialize me client
+ *
+ * @me_cl: me client
+ */
+void mei_me_cl_init(struct mei_me_client *me_cl)
+{
+ INIT_LIST_HEAD(&me_cl->list);
+ kref_init(&me_cl->refcnt);
+}
+
+/**
+ * mei_me_cl_get - increases me client refcount
+ *
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: me client or NULL
+ */
+struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl)
+{
+ if (me_cl && kref_get_unless_zero(&me_cl->refcnt))
+ return me_cl;
+
+ return NULL;
+}
+
+/**
+ * mei_me_cl_release - free me client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @ref: me_client refcount
+ */
+static void mei_me_cl_release(struct kref *ref)
+{
+ struct mei_me_client *me_cl =
+ container_of(ref, struct mei_me_client, refcnt);
+
+ kfree(me_cl);
+}
+
+/**
+ * mei_me_cl_put - decrease me client refcount and free client if necessary
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @me_cl: me client
+ */
+void mei_me_cl_put(struct mei_me_client *me_cl)
+{
+ if (me_cl)
+ kref_put(&me_cl->refcnt, mei_me_cl_release);
+}
+
+/**
+ * __mei_me_cl_del - delete me client from the list and decrease
+ * reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+ if (!me_cl)
+ return;
+
+ list_del_init(&me_cl->list);
+ mei_me_cl_put(me_cl);
+}
+
+/**
+ * mei_me_cl_del - delete me client from the list and decrease
+ * reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+ down_write(&dev->me_clients_rwsem);
+ __mei_me_cl_del(dev, me_cl);
+ up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_add - add me client to the list
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+ down_write(&dev->me_clients_rwsem);
+ list_add(&me_cl->list, &dev->me_clients);
+ up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * __mei_me_cl_by_uuid - locate me client by uuid
+ * increases ref count
+ *
+ * @dev: mei device
+ * @uuid: me client uuid
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static struct mei_me_client *__mei_me_cl_by_uuid(struct mei_device *dev,
+ const uuid_le *uuid)
+{
+ struct mei_me_client *me_cl;
+ const uuid_le *pn;
+
+ WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem));
+
+ list_for_each_entry(me_cl, &dev->me_clients, list) {
+ pn = &me_cl->props.protocol_name;
+ if (uuid_le_cmp(*uuid, *pn) == 0)
+ return mei_me_cl_get(me_cl);
+ }
+
+ return NULL;
+}
+
+/**
+ * mei_me_cl_by_uuid - locate me client by uuid
+ * increases ref count
+ *
+ * @dev: mei device
+ * @uuid: me client uuid
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev,
+ const uuid_le *uuid)
+{
+ struct mei_me_client *me_cl;
+
+ down_read(&dev->me_clients_rwsem);
+ me_cl = __mei_me_cl_by_uuid(dev, uuid);
+ up_read(&dev->me_clients_rwsem);
+
+ return me_cl;
+}
+
+/**
+ * mei_me_cl_by_id - locate me client by client id
+ * increases ref count
+ *
+ * @dev: the device structure
+ * @client_id: me client id
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id)
+{
+
+ struct mei_me_client *__me_cl, *me_cl = NULL;
+
+ down_read(&dev->me_clients_rwsem);
+ list_for_each_entry(__me_cl, &dev->me_clients, list) {
+ if (__me_cl->client_id == client_id) {
+ me_cl = mei_me_cl_get(__me_cl);
+ break;
+ }
+ }
+ up_read(&dev->me_clients_rwsem);
+
+ return me_cl;
+}
+
+/**
+ * __mei_me_cl_by_uuid_id - locate me client by client id and uuid
+ * increases ref count
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @client_id: me client id
+ *
+ * Return: me client or null if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static struct mei_me_client *__mei_me_cl_by_uuid_id(struct mei_device *dev,
+ const uuid_le *uuid, u8 client_id)
+{
+ struct mei_me_client *me_cl;
+ const uuid_le *pn;
+
+ WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem));
+
+ list_for_each_entry(me_cl, &dev->me_clients, list) {
+ pn = &me_cl->props.protocol_name;
+ if (uuid_le_cmp(*uuid, *pn) == 0 &&
+ me_cl->client_id == client_id)
+ return mei_me_cl_get(me_cl);
+ }
+
+ return NULL;
+}
+
+
+/**
+ * mei_me_cl_by_uuid_id - locate me client by client id and uuid
+ * increases ref count
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @client_id: me client id
+ *
+ * Return: me client or null if not found
+ */
+struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev,
+ const uuid_le *uuid, u8 client_id)
+{
+ struct mei_me_client *me_cl;
+
+ down_read(&dev->me_clients_rwsem);
+ me_cl = __mei_me_cl_by_uuid_id(dev, uuid, client_id);
+ up_read(&dev->me_clients_rwsem);
+
+ return me_cl;
+}
+
+/**
+ * mei_me_cl_rm_by_uuid - remove all me clients matching uuid
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid)
+{
+ struct mei_me_client *me_cl;
+
+ dev_dbg(dev->dev, "remove %pUl\n", uuid);
+
+ down_write(&dev->me_clients_rwsem);
+ me_cl = __mei_me_cl_by_uuid(dev, uuid);
+ __mei_me_cl_del(dev, me_cl);
+ up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_rm_by_uuid_id - remove all me clients matching client id
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @id: me client id
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id)
+{
+ struct mei_me_client *me_cl;
+
+ dev_dbg(dev->dev, "remove %pUl %d\n", uuid, id);
+
+ down_write(&dev->me_clients_rwsem);
+ me_cl = __mei_me_cl_by_uuid_id(dev, uuid, id);
+ __mei_me_cl_del(dev, me_cl);
+ up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_rm_all - remove all me clients
+ *
+ * @dev: the device structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_all(struct mei_device *dev)
+{
+ struct mei_me_client *me_cl, *next;
+
+ down_write(&dev->me_clients_rwsem);
+ list_for_each_entry_safe(me_cl, next, &dev->me_clients, list)
+ __mei_me_cl_del(dev, me_cl);
+ up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_cl_cmp_id - tells if the clients are the same
+ *
+ * @cl1: host client 1
+ * @cl2: host client 2
+ *
+ * Return: true - if the clients has same host and me ids
+ * false - otherwise
+ */
+static inline bool mei_cl_cmp_id(const struct mei_cl *cl1,
+ const struct mei_cl *cl2)
+{
+ return cl1 && cl2 &&
+ (cl1->host_client_id == cl2->host_client_id) &&
+ (mei_cl_me_id(cl1) == mei_cl_me_id(cl2));
+}
+
+/**
+ * mei_io_cb_free - free mei_cb_private related memory
+ *
+ * @cb: mei callback struct
+ */
+void mei_io_cb_free(struct mei_cl_cb *cb)
+{
+ if (cb == NULL)
+ return;
+
+ list_del(&cb->list);
+ kfree(cb->buf.data);
+ kfree(cb);
+}
+
+/**
+ * mei_io_cb_init - allocate and initialize io callback
+ *
+ * @cl: mei client
+ * @type: operation type
+ * @fp: pointer to file structure
+ *
+ * Return: mei_cl_cb pointer or NULL;
+ */
+struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type,
+ struct file *fp)
+{
+ struct mei_cl_cb *cb;
+
+ cb = kzalloc(sizeof(struct mei_cl_cb), GFP_KERNEL);
+ if (!cb)
+ return NULL;
+
+ INIT_LIST_HEAD(&cb->list);
+ cb->file_object = fp;
+ cb->cl = cl;
+ cb->buf_idx = 0;
+ cb->fop_type = type;
+ return cb;
+}
+
+/**
+ * __mei_io_list_flush - removes and frees cbs belonging to cl.
+ *
+ * @list: an instance of our list structure
+ * @cl: host client, can be NULL for flushing the whole list
+ * @free: whether to free the cbs
+ */
+static void __mei_io_list_flush(struct mei_cl_cb *list,
+ struct mei_cl *cl, bool free)
+{
+ struct mei_cl_cb *cb, *next;
+
+ /* enable removing everything if no cl is specified */
+ list_for_each_entry_safe(cb, next, &list->list, list) {
+ if (!cl || mei_cl_cmp_id(cl, cb->cl)) {
+ list_del_init(&cb->list);
+ if (free)
+ mei_io_cb_free(cb);
+ }
+ }
+}
+
+/**
+ * mei_io_list_flush - removes list entry belonging to cl.
+ *
+ * @list: An instance of our list structure
+ * @cl: host client
+ */
+void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl)
+{
+ __mei_io_list_flush(list, cl, false);
+}
+
+/**
+ * mei_io_list_free - removes cb belonging to cl and free them
+ *
+ * @list: An instance of our list structure
+ * @cl: host client
+ */
+static inline void mei_io_list_free(struct mei_cl_cb *list, struct mei_cl *cl)
+{
+ __mei_io_list_flush(list, cl, true);
+}
+
+/**
+ * mei_io_cb_alloc_buf - allocate callback buffer
+ *
+ * @cb: io callback structure
+ * @length: size of the buffer
+ *
+ * Return: 0 on success
+ * -EINVAL if cb is NULL
+ * -ENOMEM if allocation failed
+ */
+int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length)
+{
+ if (!cb)
+ return -EINVAL;
+
+ if (length == 0)
+ return 0;
+
+ cb->buf.data = kmalloc(length, GFP_KERNEL);
+ if (!cb->buf.data)
+ return -ENOMEM;
+ cb->buf.size = length;
+ return 0;
+}
+
+/**
+ * mei_cl_alloc_cb - a convenient wrapper for allocating read cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ */
+struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+ enum mei_cb_file_ops type, struct file *fp)
+{
+ struct mei_cl_cb *cb;
+
+ cb = mei_io_cb_init(cl, type, fp);
+ if (!cb)
+ return NULL;
+
+ if (mei_io_cb_alloc_buf(cb, length)) {
+ mei_io_cb_free(cb);
+ return NULL;
+ }
+
+ return cb;
+}
+
+/**
+ * mei_cl_read_cb - find this cl's callback in the read list
+ * for a specific file
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ *
+ * Return: cb on success, NULL if cb is not found
+ */
+struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl, const struct file *fp)
+{
+ struct mei_cl_cb *cb;
+
+ list_for_each_entry(cb, &cl->rd_completed, list)
+ if (!fp || fp == cb->file_object)
+ return cb;
+
+ return NULL;
+}
+
+/**
+ * mei_cl_read_cb_flush - free client's read pending and completed cbs
+ * for a specific file
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ */
+void mei_cl_read_cb_flush(const struct mei_cl *cl, const struct file *fp)
+{
+ struct mei_cl_cb *cb, *next;
+
+ list_for_each_entry_safe(cb, next, &cl->rd_completed, list)
+ if (!fp || fp == cb->file_object)
+ mei_io_cb_free(cb);
+
+
+ list_for_each_entry_safe(cb, next, &cl->rd_pending, list)
+ if (!fp || fp == cb->file_object)
+ mei_io_cb_free(cb);
+}
+
+/**
+ * mei_cl_flush_queues - flushes queue lists belonging to cl.
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ *
+ * Return: 0 on success, -EINVAL if cl or cl->dev is NULL.
+ */
+int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp)
+{
+ struct mei_device *dev;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -EINVAL;
+
+ dev = cl->dev;
+
+ cl_dbg(dev, cl, "remove list entry belonging to cl\n");
+ mei_io_list_free(&cl->dev->write_list, cl);
+ mei_io_list_free(&cl->dev->write_waiting_list, cl);
+ mei_io_list_flush(&cl->dev->ctrl_wr_list, cl);
+ mei_io_list_flush(&cl->dev->ctrl_rd_list, cl);
+ mei_io_list_flush(&cl->dev->amthif_cmd_list, cl);
+ mei_io_list_flush(&cl->dev->amthif_rd_complete_list, cl);
+
+ mei_cl_read_cb_flush(cl, fp);
+
+ return 0;
+}
+
+
+/**
+ * mei_cl_init - initializes cl.
+ *
+ * @cl: host client to be initialized
+ * @dev: mei device
+ */
+void mei_cl_init(struct mei_cl *cl, struct mei_device *dev)
+{
+ memset(cl, 0, sizeof(struct mei_cl));
+ init_waitqueue_head(&cl->wait);
+ init_waitqueue_head(&cl->rx_wait);
+ init_waitqueue_head(&cl->tx_wait);
+ init_waitqueue_head(&cl->ev_wait);
+ INIT_LIST_HEAD(&cl->rd_completed);
+ INIT_LIST_HEAD(&cl->rd_pending);
+ INIT_LIST_HEAD(&cl->link);
+ cl->writing_state = MEI_IDLE;
+ cl->state = MEI_FILE_INITIALIZING;
+ cl->dev = dev;
+}
+
+/**
+ * mei_cl_allocate - allocates cl structure and sets it up.
+ *
+ * @dev: mei device
+ * Return: The allocated file or NULL on failure
+ */
+struct mei_cl *mei_cl_allocate(struct mei_device *dev)
+{
+ struct mei_cl *cl;
+
+ cl = kmalloc(sizeof(struct mei_cl), GFP_KERNEL);
+ if (!cl)
+ return NULL;
+
+ mei_cl_init(cl, dev);
+
+ return cl;
+}
+
+/**
+ * mei_cl_link - allocate host id in the host map
+ *
+ * @cl: host client
+ * @id: fixed host id or MEI_HOST_CLIENT_ID_ANY (-1) for generic one
+ *
+ * Return: 0 on success
+ * -EINVAL on incorrect values
+ * -EMFILE if open count exceeded.
+ */
+int mei_cl_link(struct mei_cl *cl, int id)
+{
+ struct mei_device *dev;
+ long open_handle_count;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -EINVAL;
+
+ dev = cl->dev;
+
+ /* If Id is not assigned get one*/
+ if (id == MEI_HOST_CLIENT_ID_ANY)
+ id = find_first_zero_bit(dev->host_clients_map,
+ MEI_CLIENTS_MAX);
+
+ if (id >= MEI_CLIENTS_MAX) {
+ dev_err(dev->dev, "id exceeded %d", MEI_CLIENTS_MAX);
+ return -EMFILE;
+ }
+
+ open_handle_count = dev->open_handle_count + dev->iamthif_open_count;
+ if (open_handle_count >= MEI_MAX_OPEN_HANDLE_COUNT) {
+ dev_err(dev->dev, "open_handle_count exceeded %d",
+ MEI_MAX_OPEN_HANDLE_COUNT);
+ return -EMFILE;
+ }
+
+ dev->open_handle_count++;
+
+ cl->host_client_id = id;
+ list_add_tail(&cl->link, &dev->file_list);
+
+ set_bit(id, dev->host_clients_map);
+
+ cl->state = MEI_FILE_INITIALIZING;
+
+ cl_dbg(dev, cl, "link cl\n");
+ return 0;
+}
+
+/**
+ * mei_cl_unlink - remove host client from the list
+ *
+ * @cl: host client
+ *
+ * Return: always 0
+ */
+int mei_cl_unlink(struct mei_cl *cl)
+{
+ struct mei_device *dev;
+
+ /* don't shout on error exit path */
+ if (!cl)
+ return 0;
+
+ /* wd and amthif might not be initialized */
+ if (!cl->dev)
+ return 0;
+
+ dev = cl->dev;
+
+ cl_dbg(dev, cl, "unlink client");
+
+ if (dev->open_handle_count > 0)
+ dev->open_handle_count--;
+
+ /* never clear the 0 bit */
+ if (cl->host_client_id)
+ clear_bit(cl->host_client_id, dev->host_clients_map);
+
+ list_del_init(&cl->link);
+
+ cl->state = MEI_FILE_INITIALIZING;
+
+ return 0;
+}
+
+
+void mei_host_client_init(struct work_struct *work)
+{
+ struct mei_device *dev =
+ container_of(work, struct mei_device, init_work);
+ struct mei_me_client *me_cl;
+
+ mutex_lock(&dev->device_lock);
+
+
+ me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid);
+ if (me_cl)
+ mei_amthif_host_init(dev, me_cl);
+ mei_me_cl_put(me_cl);
+
+ me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid);
+ if (me_cl)
+ mei_wd_host_init(dev, me_cl);
+ mei_me_cl_put(me_cl);
+
+ dev->dev_state = MEI_DEV_ENABLED;
+ dev->reset_count = 0;
+ mutex_unlock(&dev->device_lock);
+
+ mei_cl_bus_rescan(dev);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ dev_dbg(dev->dev, "rpm: autosuspend\n");
+ pm_request_autosuspend(dev->dev);
+}
+
+/**
+ * mei_hbuf_acquire - try to acquire host buffer
+ *
+ * @dev: the device structure
+ * Return: true if host buffer was acquired
+ */
+bool mei_hbuf_acquire(struct mei_device *dev)
+{
+ if (mei_pg_state(dev) == MEI_PG_ON ||
+ mei_pg_in_transition(dev)) {
+ dev_dbg(dev->dev, "device is in pg\n");
+ return false;
+ }
+
+ if (!dev->hbuf_is_ready) {
+ dev_dbg(dev->dev, "hbuf is not ready\n");
+ return false;
+ }
+
+ dev->hbuf_is_ready = false;
+
+ return true;
+}
+
+/**
+ * mei_cl_set_disconnected - set disconnected state and clear
+ * associated states and resources
+ *
+ * @cl: host client
+ */
+void mei_cl_set_disconnected(struct mei_cl *cl)
+{
+ struct mei_device *dev = cl->dev;
+
+ if (cl->state == MEI_FILE_DISCONNECTED ||
+ cl->state == MEI_FILE_INITIALIZING)
+ return;
+
+ cl->state = MEI_FILE_DISCONNECTED;
+ mei_io_list_flush(&dev->ctrl_rd_list, cl);
+ mei_io_list_flush(&dev->ctrl_wr_list, cl);
+ cl->mei_flow_ctrl_creds = 0;
+ cl->timer_count = 0;
+
+ if (!cl->me_cl)
+ return;
+
+ if (!WARN_ON(cl->me_cl->connect_count == 0))
+ cl->me_cl->connect_count--;
+
+ if (cl->me_cl->connect_count == 0)
+ cl->me_cl->mei_flow_ctrl_creds = 0;
+
+ mei_me_cl_put(cl->me_cl);
+ cl->me_cl = NULL;
+}
+
+static int mei_cl_set_connecting(struct mei_cl *cl, struct mei_me_client *me_cl)
+{
+ if (!mei_me_cl_get(me_cl))
+ return -ENOENT;
+
+ /* only one connection is allowed for fixed address clients */
+ if (me_cl->props.fixed_address) {
+ if (me_cl->connect_count) {
+ mei_me_cl_put(me_cl);
+ return -EBUSY;
+ }
+ }
+
+ cl->me_cl = me_cl;
+ cl->state = MEI_FILE_CONNECTING;
+ cl->me_cl->connect_count++;
+
+ return 0;
+}
+
+/*
+ * mei_cl_send_disconnect - send disconnect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+ struct mei_device *dev;
+ int ret;
+
+ dev = cl->dev;
+
+ ret = mei_hbm_cl_disconnect_req(dev, cl);
+ cl->status = ret;
+ if (ret) {
+ cl->state = MEI_FILE_DISCONNECT_REPLY;
+ return ret;
+ }
+
+ list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
+ cl->timer_count = MEI_CONNECT_TIMEOUT;
+
+ return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect - processes close related operation from
+ * interrupt thread context - send disconnect request
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev = cl->dev;
+ u32 msg_slots;
+ int slots;
+ int ret;
+
+ msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
+ slots = mei_hbuf_empty_slots(dev);
+
+ if (slots < msg_slots)
+ return -EMSGSIZE;
+
+ ret = mei_cl_send_disconnect(cl, cb);
+ if (ret)
+ list_move_tail(&cb->list, &cmpl_list->list);
+
+ return ret;
+}
+
+/**
+ * __mei_cl_disconnect - disconnect host client from the me one
+ * internal function runtime pm has to be already acquired
+ *
+ * @cl: host client
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int __mei_cl_disconnect(struct mei_cl *cl)
+{
+ struct mei_device *dev;
+ struct mei_cl_cb *cb;
+ int rets;
+
+ dev = cl->dev;
+
+ cl->state = MEI_FILE_DISCONNECTING;
+
+ cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL);
+ rets = cb ? 0 : -ENOMEM;
+ if (rets)
+ goto out;
+
+ cl_dbg(dev, cl, "add disconnect cb to control write list\n");
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+
+ if (mei_hbuf_acquire(dev)) {
+ rets = mei_cl_send_disconnect(cl, cb);
+ if (rets) {
+ cl_err(dev, cl, "failed to disconnect.\n");
+ goto out;
+ }
+ }
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(cl->wait, cl->state == MEI_FILE_DISCONNECT_REPLY,
+ mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+
+ rets = cl->status;
+ if (cl->state != MEI_FILE_DISCONNECT_REPLY) {
+ cl_dbg(dev, cl, "timeout on disconnect from FW client.\n");
+ rets = -ETIME;
+ }
+
+out:
+ /* we disconnect also on error */
+ mei_cl_set_disconnected(cl);
+ if (!rets)
+ cl_dbg(dev, cl, "successfully disconnected from FW client.\n");
+
+ mei_io_cb_free(cb);
+ return rets;
+}
+
+/**
+ * mei_cl_disconnect - disconnect host client from the me one
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_disconnect(struct mei_cl *cl)
+{
+ struct mei_device *dev;
+ int rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ cl_dbg(dev, cl, "disconnecting");
+
+ if (!mei_cl_is_connected(cl))
+ return 0;
+
+ if (mei_cl_is_fixed_address(cl)) {
+ mei_cl_set_disconnected(cl);
+ return 0;
+ }
+
+ rets = pm_runtime_get(dev->dev);
+ if (rets < 0 && rets != -EINPROGRESS) {
+ pm_runtime_put_noidle(dev->dev);
+ cl_err(dev, cl, "rpm: get failed %d\n", rets);
+ return rets;
+ }
+
+ rets = __mei_cl_disconnect(cl);
+
+ cl_dbg(dev, cl, "rpm: autosuspend\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return rets;
+}
+
+
+/**
+ * mei_cl_is_other_connecting - checks if other
+ * client with the same me client id is connecting
+ *
+ * @cl: private data of the file object
+ *
+ * Return: true if other client is connected, false - otherwise.
+ */
+static bool mei_cl_is_other_connecting(struct mei_cl *cl)
+{
+ struct mei_device *dev;
+ struct mei_cl_cb *cb;
+
+ dev = cl->dev;
+
+ list_for_each_entry(cb, &dev->ctrl_rd_list.list, list) {
+ if (cb->fop_type == MEI_FOP_CONNECT &&
+ mei_cl_me_id(cl) == mei_cl_me_id(cb->cl))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * mei_cl_send_connect - send connect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_connect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+ struct mei_device *dev;
+ int ret;
+
+ dev = cl->dev;
+
+ ret = mei_hbm_cl_connect_req(dev, cl);
+ cl->status = ret;
+ if (ret) {
+ cl->state = MEI_FILE_DISCONNECT_REPLY;
+ return ret;
+ }
+
+ list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
+ cl->timer_count = MEI_CONNECT_TIMEOUT;
+ return 0;
+}
+
+/**
+ * mei_cl_irq_connect - send connect request in irq_thread context
+ *
+ * @cl: host client
+ * @cb: callback block
+ * @cmpl_list: complete list
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev = cl->dev;
+ u32 msg_slots;
+ int slots;
+ int rets;
+
+ msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
+ slots = mei_hbuf_empty_slots(dev);
+
+ if (mei_cl_is_other_connecting(cl))
+ return 0;
+
+ if (slots < msg_slots)
+ return -EMSGSIZE;
+
+ rets = mei_cl_send_connect(cl, cb);
+ if (rets)
+ list_move_tail(&cb->list, &cmpl_list->list);
+
+ return rets;
+}
+
+/**
+ * mei_cl_connect - connect host client to the me one
+ *
+ * @cl: host client
+ * @me_cl: me client
+ * @file: pointer to file structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+ struct file *file)
+{
+ struct mei_device *dev;
+ struct mei_cl_cb *cb;
+ int rets;
+
+ if (WARN_ON(!cl || !cl->dev || !me_cl))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ rets = mei_cl_set_connecting(cl, me_cl);
+ if (rets)
+ return rets;
+
+ if (mei_cl_is_fixed_address(cl)) {
+ cl->state = MEI_FILE_CONNECTED;
+ return 0;
+ }
+
+ rets = pm_runtime_get(dev->dev);
+ if (rets < 0 && rets != -EINPROGRESS) {
+ pm_runtime_put_noidle(dev->dev);
+ cl_err(dev, cl, "rpm: get failed %d\n", rets);
+ goto nortpm;
+ }
+
+ cb = mei_io_cb_init(cl, MEI_FOP_CONNECT, file);
+ rets = cb ? 0 : -ENOMEM;
+ if (rets)
+ goto out;
+
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+
+ /* run hbuf acquire last so we don't have to undo */
+ if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) {
+ rets = mei_cl_send_connect(cl, cb);
+ if (rets)
+ goto out;
+ }
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(cl->wait,
+ (cl->state == MEI_FILE_CONNECTED ||
+ cl->state == MEI_FILE_DISCONNECT_REQUIRED ||
+ cl->state == MEI_FILE_DISCONNECT_REPLY),
+ mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+
+ if (!mei_cl_is_connected(cl)) {
+ if (cl->state == MEI_FILE_DISCONNECT_REQUIRED) {
+ mei_io_list_flush(&dev->ctrl_rd_list, cl);
+ mei_io_list_flush(&dev->ctrl_wr_list, cl);
+ /* ignore disconnect return valuue;
+ * in case of failure reset will be invoked
+ */
+ __mei_cl_disconnect(cl);
+ rets = -EFAULT;
+ goto out;
+ }
+
+ /* timeout or something went really wrong */
+ if (!cl->status)
+ cl->status = -EFAULT;
+ }
+
+ rets = cl->status;
+out:
+ cl_dbg(dev, cl, "rpm: autosuspend\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ mei_io_cb_free(cb);
+
+nortpm:
+ if (!mei_cl_is_connected(cl))
+ mei_cl_set_disconnected(cl);
+
+ return rets;
+}
+
+/**
+ * mei_cl_alloc_linked - allocate and link host client
+ *
+ * @dev: the device structure
+ * @id: fixed host id or MEI_HOST_CLIENT_ID_ANY (-1) for generic one
+ *
+ * Return: cl on success ERR_PTR on failure
+ */
+struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev, int id)
+{
+ struct mei_cl *cl;
+ int ret;
+
+ cl = mei_cl_allocate(dev);
+ if (!cl) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = mei_cl_link(cl, id);
+ if (ret)
+ goto err;
+
+ return cl;
+err:
+ kfree(cl);
+ return ERR_PTR(ret);
+}
+
+
+
+/**
+ * mei_cl_flow_ctrl_creds - checks flow_control credits for cl.
+ *
+ * @cl: private data of the file object
+ *
+ * Return: 1 if mei_flow_ctrl_creds >0, 0 - otherwise.
+ */
+int mei_cl_flow_ctrl_creds(struct mei_cl *cl)
+{
+ int rets;
+
+ if (WARN_ON(!cl || !cl->me_cl))
+ return -EINVAL;
+
+ if (cl->mei_flow_ctrl_creds > 0)
+ return 1;
+
+ if (mei_cl_is_fixed_address(cl)) {
+ rets = mei_cl_read_start(cl, mei_cl_mtu(cl), NULL);
+ if (rets && rets != -EBUSY)
+ return rets;
+ return 1;
+ }
+
+ if (mei_cl_is_single_recv_buf(cl)) {
+ if (cl->me_cl->mei_flow_ctrl_creds > 0)
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * mei_cl_flow_ctrl_reduce - reduces flow_control.
+ *
+ * @cl: private data of the file object
+ *
+ * Return:
+ * 0 on success
+ * -EINVAL when ctrl credits are <= 0
+ */
+int mei_cl_flow_ctrl_reduce(struct mei_cl *cl)
+{
+ if (WARN_ON(!cl || !cl->me_cl))
+ return -EINVAL;
+
+ if (mei_cl_is_fixed_address(cl))
+ return 0;
+
+ if (mei_cl_is_single_recv_buf(cl)) {
+ if (WARN_ON(cl->me_cl->mei_flow_ctrl_creds <= 0))
+ return -EINVAL;
+ cl->me_cl->mei_flow_ctrl_creds--;
+ } else {
+ if (WARN_ON(cl->mei_flow_ctrl_creds <= 0))
+ return -EINVAL;
+ cl->mei_flow_ctrl_creds--;
+ }
+ return 0;
+}
+
+/**
+ * mei_cl_notify_fop2req - convert fop to proper request
+ *
+ * @fop: client notification start response command
+ *
+ * Return: MEI_HBM_NOTIFICATION_START/STOP
+ */
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop)
+{
+ if (fop == MEI_FOP_NOTIFY_START)
+ return MEI_HBM_NOTIFICATION_START;
+ else
+ return MEI_HBM_NOTIFICATION_STOP;
+}
+
+/**
+ * mei_cl_notify_req2fop - convert notification request top file operation type
+ *
+ * @req: hbm notification request type
+ *
+ * Return: MEI_FOP_NOTIFY_START/STOP
+ */
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 req)
+{
+ if (req == MEI_HBM_NOTIFICATION_START)
+ return MEI_FOP_NOTIFY_START;
+ else
+ return MEI_FOP_NOTIFY_STOP;
+}
+
+/**
+ * mei_cl_irq_notify - send notification request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev = cl->dev;
+ u32 msg_slots;
+ int slots;
+ int ret;
+ bool request;
+
+ msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
+ slots = mei_hbuf_empty_slots(dev);
+
+ if (slots < msg_slots)
+ return -EMSGSIZE;
+
+ request = mei_cl_notify_fop2req(cb->fop_type);
+ ret = mei_hbm_cl_notify_req(dev, cl, request);
+ if (ret) {
+ cl->status = ret;
+ list_move_tail(&cb->list, &cmpl_list->list);
+ return ret;
+ }
+
+ list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
+ return 0;
+}
+
+/**
+ * mei_cl_notify_request - send notification stop/start request
+ *
+ * @cl: host client
+ * @file: associate request with file
+ * @request: 1 for start or 0 for stop
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_request(struct mei_cl *cl, struct file *file, u8 request)
+{
+ struct mei_device *dev;
+ struct mei_cl_cb *cb;
+ enum mei_cb_file_ops fop_type;
+ int rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ if (!dev->hbm_f_ev_supported) {
+ cl_dbg(dev, cl, "notifications not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mei_cl_is_connected(cl))
+ return -ENODEV;
+
+ rets = pm_runtime_get(dev->dev);
+ if (rets < 0 && rets != -EINPROGRESS) {
+ pm_runtime_put_noidle(dev->dev);
+ cl_err(dev, cl, "rpm: get failed %d\n", rets);
+ return rets;
+ }
+
+ fop_type = mei_cl_notify_req2fop(request);
+ cb = mei_io_cb_init(cl, fop_type, file);
+ if (!cb) {
+ rets = -ENOMEM;
+ goto out;
+ }
+
+ if (mei_hbuf_acquire(dev)) {
+ if (mei_hbm_cl_notify_req(dev, cl, request)) {
+ rets = -ENODEV;
+ goto out;
+ }
+ list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
+ } else {
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ }
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(cl->wait, cl->notify_en == request,
+ mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+
+ if (cl->notify_en != request) {
+ mei_io_list_flush(&dev->ctrl_rd_list, cl);
+ mei_io_list_flush(&dev->ctrl_wr_list, cl);
+ if (!cl->status)
+ cl->status = -EFAULT;
+ }
+
+ rets = cl->status;
+
+out:
+ cl_dbg(dev, cl, "rpm: autosuspend\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ mei_io_cb_free(cb);
+ return rets;
+}
+
+/**
+ * mei_cl_notify - raise notification
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_cl_notify(struct mei_cl *cl)
+{
+ struct mei_device *dev;
+
+ if (!cl || !cl->dev)
+ return;
+
+ dev = cl->dev;
+
+ if (!cl->notify_en)
+ return;
+
+ cl_dbg(dev, cl, "notify event");
+ cl->notify_ev = true;
+ wake_up_interruptible_all(&cl->ev_wait);
+
+ if (cl->ev_async)
+ kill_fasync(&cl->ev_async, SIGIO, POLL_PRI);
+
+ mei_cl_bus_notify_event(cl);
+}
+
+/**
+ * mei_cl_notify_get - get or wait for notification event
+ *
+ * @cl: host client
+ * @block: this request is blocking
+ * @notify_ev: true if notification event was received
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev)
+{
+ struct mei_device *dev;
+ int rets;
+
+ *notify_ev = false;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ if (!mei_cl_is_connected(cl))
+ return -ENODEV;
+
+ if (cl->notify_ev)
+ goto out;
+
+ if (!block)
+ return -EAGAIN;
+
+ mutex_unlock(&dev->device_lock);
+ rets = wait_event_interruptible(cl->ev_wait, cl->notify_ev);
+ mutex_lock(&dev->device_lock);
+
+ if (rets < 0)
+ return rets;
+
+out:
+ *notify_ev = cl->notify_ev;
+ cl->notify_ev = false;
+ return 0;
+}
+
+/**
+ * mei_cl_read_start - the start read client message function.
+ *
+ * @cl: host client
+ * @length: number of bytes to read
+ * @fp: pointer to file structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp)
+{
+ struct mei_device *dev;
+ struct mei_cl_cb *cb;
+ int rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ if (!mei_cl_is_connected(cl))
+ return -ENODEV;
+
+ /* HW currently supports only one pending read */
+ if (!list_empty(&cl->rd_pending))
+ return -EBUSY;
+
+ if (!mei_me_cl_is_active(cl->me_cl)) {
+ cl_err(dev, cl, "no such me client\n");
+ return -ENOTTY;
+ }
+
+ /* always allocate at least client max message */
+ length = max_t(size_t, length, mei_cl_mtu(cl));
+ cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp);
+ if (!cb)
+ return -ENOMEM;
+
+ if (mei_cl_is_fixed_address(cl)) {
+ list_add_tail(&cb->list, &cl->rd_pending);
+ return 0;
+ }
+
+ rets = pm_runtime_get(dev->dev);
+ if (rets < 0 && rets != -EINPROGRESS) {
+ pm_runtime_put_noidle(dev->dev);
+ cl_err(dev, cl, "rpm: get failed %d\n", rets);
+ goto nortpm;
+ }
+
+ if (mei_hbuf_acquire(dev)) {
+ rets = mei_hbm_cl_flow_control_req(dev, cl);
+ if (rets < 0)
+ goto out;
+
+ list_add_tail(&cb->list, &cl->rd_pending);
+ } else {
+ rets = 0;
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ }
+
+out:
+ cl_dbg(dev, cl, "rpm: autosuspend\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+nortpm:
+ if (rets)
+ mei_io_cb_free(cb);
+
+ return rets;
+}
+
+/**
+ * mei_cl_irq_write - write a message to device
+ * from the interrupt thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise error.
+ */
+int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev;
+ struct mei_msg_data *buf;
+ struct mei_msg_hdr mei_hdr;
+ size_t len;
+ u32 msg_slots;
+ int slots;
+ int rets;
+ bool first_chunk;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ buf = &cb->buf;
+
+ first_chunk = cb->buf_idx == 0;
+
+ rets = first_chunk ? mei_cl_flow_ctrl_creds(cl) : 1;
+ if (rets < 0)
+ return rets;
+
+ if (rets == 0) {
+ cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
+ return 0;
+ }
+
+ slots = mei_hbuf_empty_slots(dev);
+ len = buf->size - cb->buf_idx;
+ msg_slots = mei_data2slots(len);
+
+ mei_hdr.host_addr = mei_cl_host_addr(cl);
+ mei_hdr.me_addr = mei_cl_me_id(cl);
+ mei_hdr.reserved = 0;
+ mei_hdr.internal = cb->internal;
+
+ if (slots >= msg_slots) {
+ mei_hdr.length = len;
+ mei_hdr.msg_complete = 1;
+ /* Split the message only if we can write the whole host buffer */
+ } else if (slots == dev->hbuf_depth) {
+ msg_slots = slots;
+ len = (slots * sizeof(u32)) - sizeof(struct mei_msg_hdr);
+ mei_hdr.length = len;
+ mei_hdr.msg_complete = 0;
+ } else {
+ /* wait for next time the host buffer is empty */
+ return 0;
+ }
+
+ cl_dbg(dev, cl, "buf: size = %d idx = %lu\n",
+ cb->buf.size, cb->buf_idx);
+
+ rets = mei_write_message(dev, &mei_hdr, buf->data + cb->buf_idx);
+ if (rets) {
+ cl->status = rets;
+ list_move_tail(&cb->list, &cmpl_list->list);
+ return rets;
+ }
+
+ cl->status = 0;
+ cl->writing_state = MEI_WRITING;
+ cb->buf_idx += mei_hdr.length;
+ cb->completed = mei_hdr.msg_complete == 1;
+
+ if (first_chunk) {
+ if (mei_cl_flow_ctrl_reduce(cl))
+ return -EIO;
+ }
+
+ if (mei_hdr.msg_complete)
+ list_move_tail(&cb->list, &dev->write_waiting_list.list);
+
+ return 0;
+}
+
+/**
+ * mei_cl_write - submit a write cb to mei device
+ * assumes device_lock is locked
+ *
+ * @cl: host client
+ * @cb: write callback with filled data
+ * @blocking: block until completed
+ *
+ * Return: number of bytes sent on success, <0 on failure.
+ */
+int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
+{
+ struct mei_device *dev;
+ struct mei_msg_data *buf;
+ struct mei_msg_hdr mei_hdr;
+ int size;
+ int rets;
+
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ if (WARN_ON(!cb))
+ return -EINVAL;
+
+ dev = cl->dev;
+
+ buf = &cb->buf;
+ size = buf->size;
+
+ cl_dbg(dev, cl, "size=%d\n", size);
+
+ rets = pm_runtime_get(dev->dev);
+ if (rets < 0 && rets != -EINPROGRESS) {
+ pm_runtime_put_noidle(dev->dev);
+ cl_err(dev, cl, "rpm: get failed %d\n", rets);
+ return rets;
+ }
+
+ cb->buf_idx = 0;
+ cl->writing_state = MEI_IDLE;
+
+ mei_hdr.host_addr = mei_cl_host_addr(cl);
+ mei_hdr.me_addr = mei_cl_me_id(cl);
+ mei_hdr.reserved = 0;
+ mei_hdr.msg_complete = 0;
+ mei_hdr.internal = cb->internal;
+
+ rets = mei_cl_flow_ctrl_creds(cl);
+ if (rets < 0)
+ goto err;
+
+ if (rets == 0) {
+ cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
+ rets = size;
+ goto out;
+ }
+ if (!mei_hbuf_acquire(dev)) {
+ cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n");
+ rets = size;
+ goto out;
+ }
+
+ /* Check for a maximum length */
+ if (size > mei_hbuf_max_len(dev)) {
+ mei_hdr.length = mei_hbuf_max_len(dev);
+ mei_hdr.msg_complete = 0;
+ } else {
+ mei_hdr.length = size;
+ mei_hdr.msg_complete = 1;
+ }
+
+ rets = mei_write_message(dev, &mei_hdr, buf->data);
+ if (rets)
+ goto err;
+
+ rets = mei_cl_flow_ctrl_reduce(cl);
+ if (rets)
+ goto err;
+
+ cl->writing_state = MEI_WRITING;
+ cb->buf_idx = mei_hdr.length;
+ cb->completed = mei_hdr.msg_complete == 1;
+
+out:
+ if (mei_hdr.msg_complete)
+ list_add_tail(&cb->list, &dev->write_waiting_list.list);
+ else
+ list_add_tail(&cb->list, &dev->write_list.list);
+
+ cb = NULL;
+ if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) {
+
+ mutex_unlock(&dev->device_lock);
+ rets = wait_event_interruptible(cl->tx_wait,
+ cl->writing_state == MEI_WRITE_COMPLETE);
+ mutex_lock(&dev->device_lock);
+ /* wait_event_interruptible returns -ERESTARTSYS */
+ if (rets) {
+ if (signal_pending(current))
+ rets = -EINTR;
+ goto err;
+ }
+ }
+
+ rets = size;
+err:
+ cl_dbg(dev, cl, "rpm: autosuspend\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return rets;
+}
+
+
+/**
+ * mei_cl_complete - processes completed operation for a client
+ *
+ * @cl: private data of the file object.
+ * @cb: callback block.
+ */
+void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+ struct mei_device *dev = cl->dev;
+
+ switch (cb->fop_type) {
+ case MEI_FOP_WRITE:
+ mei_io_cb_free(cb);
+ cl->writing_state = MEI_WRITE_COMPLETE;
+ if (waitqueue_active(&cl->tx_wait)) {
+ wake_up_interruptible(&cl->tx_wait);
+ } else {
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_request_autosuspend(dev->dev);
+ }
+ break;
+
+ case MEI_FOP_READ:
+ list_add_tail(&cb->list, &cl->rd_completed);
+ if (waitqueue_active(&cl->rx_wait))
+ wake_up_interruptible_all(&cl->rx_wait);
+ else
+ mei_cl_bus_rx_event(cl);
+ break;
+
+ case MEI_FOP_CONNECT:
+ case MEI_FOP_DISCONNECT:
+ case MEI_FOP_NOTIFY_STOP:
+ case MEI_FOP_NOTIFY_START:
+ if (waitqueue_active(&cl->wait))
+ wake_up(&cl->wait);
+
+ break;
+ case MEI_FOP_DISCONNECT_RSP:
+ mei_io_cb_free(cb);
+ mei_cl_set_disconnected(cl);
+ break;
+ default:
+ BUG_ON(0);
+ }
+}
+
+
+/**
+ * mei_cl_all_disconnect - disconnect forcefully all connected clients
+ *
+ * @dev: mei device
+ */
+void mei_cl_all_disconnect(struct mei_device *dev)
+{
+ struct mei_cl *cl;
+
+ list_for_each_entry(cl, &dev->file_list, link)
+ mei_cl_set_disconnected(cl);
+}
+
+
+/**
+ * mei_cl_all_wakeup - wake up all readers and writers they can be interrupted
+ *
+ * @dev: mei device
+ */
+void mei_cl_all_wakeup(struct mei_device *dev)
+{
+ struct mei_cl *cl;
+
+ list_for_each_entry(cl, &dev->file_list, link) {
+ if (waitqueue_active(&cl->rx_wait)) {
+ cl_dbg(dev, cl, "Waking up reading client!\n");
+ wake_up_interruptible(&cl->rx_wait);
+ }
+ if (waitqueue_active(&cl->tx_wait)) {
+ cl_dbg(dev, cl, "Waking up writing client!\n");
+ wake_up_interruptible(&cl->tx_wait);
+ }
+
+ /* synchronized under device mutex */
+ if (waitqueue_active(&cl->ev_wait)) {
+ cl_dbg(dev, cl, "Waking up waiting for event clients!\n");
+ wake_up_interruptible(&cl->ev_wait);
+ }
+ }
+}
+
+/**
+ * mei_cl_all_write_clear - clear all pending writes
+ *
+ * @dev: mei device
+ */
+void mei_cl_all_write_clear(struct mei_device *dev)
+{
+ mei_io_list_free(&dev->write_list, NULL);
+ mei_io_list_free(&dev->write_waiting_list, NULL);
+}
+
+
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
new file mode 100644
index 0000000..04e1aa3
--- /dev/null
+++ b/drivers/misc/mei/client.h
@@ -0,0 +1,255 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_CLIENT_H_
+#define _MEI_CLIENT_H_
+
+#include <linux/types.h>
+#include <linux/watchdog.h>
+#include <linux/poll.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+
+/*
+ * reference counting base function
+ */
+void mei_me_cl_init(struct mei_me_client *me_cl);
+void mei_me_cl_put(struct mei_me_client *me_cl);
+struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl);
+
+void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl);
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl);
+
+struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev,
+ const uuid_le *uuid);
+struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id);
+struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev,
+ const uuid_le *uuid, u8 client_id);
+void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid);
+void mei_me_cl_rm_by_uuid_id(struct mei_device *dev,
+ const uuid_le *uuid, u8 id);
+void mei_me_cl_rm_all(struct mei_device *dev);
+
+/**
+ * mei_me_cl_is_active - check whether me client is active in the fw
+ *
+ * @me_cl: me client
+ *
+ * Return: true if the me client is active in the firmware
+ */
+static inline bool mei_me_cl_is_active(const struct mei_me_client *me_cl)
+{
+ return !list_empty_careful(&me_cl->list);
+}
+
+/**
+ * mei_me_cl_uuid - return me client protocol name (uuid)
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol name
+ */
+static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl)
+{
+ return &me_cl->props.protocol_name;
+}
+
+/**
+ * mei_me_cl_ver - return me client protocol version
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol version
+ */
+static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
+{
+ return me_cl->props.protocol_version;
+}
+
+/*
+ * MEI IO Functions
+ */
+struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type,
+ struct file *fp);
+void mei_io_cb_free(struct mei_cl_cb *priv_cb);
+int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length);
+
+
+/**
+ * mei_io_list_init - Sets up a queue list.
+ *
+ * @list: An instance cl callback structure
+ */
+static inline void mei_io_list_init(struct mei_cl_cb *list)
+{
+ INIT_LIST_HEAD(&list->list);
+}
+void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl);
+
+/*
+ * MEI Host Client Functions
+ */
+
+struct mei_cl *mei_cl_allocate(struct mei_device *dev);
+void mei_cl_init(struct mei_cl *cl, struct mei_device *dev);
+
+
+int mei_cl_link(struct mei_cl *cl, int id);
+int mei_cl_unlink(struct mei_cl *cl);
+
+struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev, int id);
+
+struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl,
+ const struct file *fp);
+void mei_cl_read_cb_flush(const struct mei_cl *cl, const struct file *fp);
+struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+ enum mei_cb_file_ops type, struct file *fp);
+int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp);
+
+int mei_cl_flow_ctrl_creds(struct mei_cl *cl);
+
+int mei_cl_flow_ctrl_reduce(struct mei_cl *cl);
+/*
+ * MEI input output function prototype
+ */
+
+/**
+ * mei_cl_is_connected - host client is connected
+ *
+ * @cl: host client
+ *
+ * Return: true if the host client is connected
+ */
+static inline bool mei_cl_is_connected(struct mei_cl *cl)
+{
+ return cl->state == MEI_FILE_CONNECTED;
+}
+
+/**
+ * mei_cl_me_id - me client id
+ *
+ * @cl: host client
+ *
+ * Return: me client id or 0 if client is not connected
+ */
+static inline u8 mei_cl_me_id(const struct mei_cl *cl)
+{
+ return cl->me_cl ? cl->me_cl->client_id : 0;
+}
+
+/**
+ * mei_cl_mtu - maximal message that client can send and receive
+ *
+ * @cl: host client
+ *
+ * Return: mtu
+ */
+static inline size_t mei_cl_mtu(const struct mei_cl *cl)
+{
+ return cl->me_cl->props.max_msg_length;
+}
+
+/**
+ * mei_cl_is_fixed_address - check whether the me client uses fixed address
+ *
+ * @cl: host client
+ *
+ * Return: true if the client is connected and it has fixed me address
+ */
+static inline bool mei_cl_is_fixed_address(const struct mei_cl *cl)
+{
+ return cl->me_cl && cl->me_cl->props.fixed_address;
+}
+
+/**
+ * mei_cl_is_single_recv_buf- check whether the me client
+ * uses single receiving buffer
+ *
+ * @cl: host client
+ *
+ * Return: true if single_recv_buf == 1; 0 otherwise
+ */
+static inline bool mei_cl_is_single_recv_buf(const struct mei_cl *cl)
+{
+ return cl->me_cl->props.single_recv_buf;
+}
+
+/**
+ * mei_cl_uuid - client's uuid
+ *
+ * @cl: host client
+ *
+ * Return: return uuid of connected me client
+ */
+static inline const uuid_le *mei_cl_uuid(const struct mei_cl *cl)
+{
+ return mei_me_cl_uuid(cl->me_cl);
+}
+
+/**
+ * mei_cl_host_addr - client's host address
+ *
+ * @cl: host client
+ *
+ * Return: 0 for fixed address client, host address for dynamic client
+ */
+static inline u8 mei_cl_host_addr(const struct mei_cl *cl)
+{
+ return mei_cl_is_fixed_address(cl) ? 0 : cl->host_client_id;
+}
+
+int mei_cl_disconnect(struct mei_cl *cl);
+void mei_cl_set_disconnected(struct mei_cl *cl);
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list);
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+ struct file *file);
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list);
+int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp);
+int mei_cl_irq_read_msg(struct mei_cl *cl, struct mei_msg_hdr *hdr,
+ struct mei_cl_cb *cmpl_list);
+int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking);
+int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list);
+
+void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb);
+
+void mei_host_client_init(struct work_struct *work);
+
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop);
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 request);
+int mei_cl_notify_request(struct mei_cl *cl, struct file *file, u8 request);
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list);
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev);
+void mei_cl_notify(struct mei_cl *cl);
+
+void mei_cl_all_disconnect(struct mei_device *dev);
+void mei_cl_all_wakeup(struct mei_device *dev);
+void mei_cl_all_write_clear(struct mei_device *dev);
+
+#define MEI_CL_FMT "cl:host=%02d me=%02d "
+#define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl)
+
+#define cl_dbg(dev, cl, format, arg...) \
+ dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#define cl_err(dev, cl, format, arg...) \
+ dev_err((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#endif /* _MEI_CLIENT_H_ */
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
new file mode 100644
index 0000000..a138d8a
--- /dev/null
+++ b/drivers/misc/mei/debugfs.c
@@ -0,0 +1,238 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2012-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "hw.h"
+
+static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct mei_device *dev = fp->private_data;
+ struct mei_me_client *me_cl;
+ size_t bufsz = 1;
+ char *buf;
+ int i = 0;
+ int pos = 0;
+ int ret;
+
+#define HDR \
+" |id|fix| UUID |con|msg len|sb|refc|\n"
+
+ down_read(&dev->me_clients_rwsem);
+ list_for_each_entry(me_cl, &dev->me_clients, list)
+ bufsz++;
+
+ bufsz *= sizeof(HDR) + 1;
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf) {
+ up_read(&dev->me_clients_rwsem);
+ return -ENOMEM;
+ }
+
+ pos += scnprintf(buf + pos, bufsz - pos, HDR);
+
+ /* if the driver is not enabled the list won't be consistent */
+ if (dev->dev_state != MEI_DEV_ENABLED)
+ goto out;
+
+ list_for_each_entry(me_cl, &dev->me_clients, list) {
+
+ if (mei_me_cl_get(me_cl)) {
+ pos += scnprintf(buf + pos, bufsz - pos,
+ "%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|\n",
+ i++, me_cl->client_id,
+ me_cl->props.fixed_address,
+ &me_cl->props.protocol_name,
+ me_cl->props.max_number_of_connections,
+ me_cl->props.max_msg_length,
+ me_cl->props.single_recv_buf,
+ atomic_read(&me_cl->refcnt.refcount));
+
+ mei_me_cl_put(me_cl);
+ }
+ }
+
+out:
+ up_read(&dev->me_clients_rwsem);
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
+ kfree(buf);
+ return ret;
+}
+
+static const struct file_operations mei_dbgfs_fops_meclients = {
+ .open = simple_open,
+ .read = mei_dbgfs_read_meclients,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct mei_device *dev = fp->private_data;
+ struct mei_cl *cl;
+ const size_t bufsz = 1024;
+ char *buf;
+ int i = 0;
+ int pos = 0;
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ pos += scnprintf(buf + pos, bufsz - pos,
+ " |me|host|state|rd|wr|\n");
+
+ mutex_lock(&dev->device_lock);
+
+ /* if the driver is not enabled the list won't be consistent */
+ if (dev->dev_state != MEI_DEV_ENABLED)
+ goto out;
+
+ list_for_each_entry(cl, &dev->file_list, link) {
+
+ pos += scnprintf(buf + pos, bufsz - pos,
+ "%2d|%2d|%4d|%5d|%2d|%2d|\n",
+ i, mei_cl_me_id(cl), cl->host_client_id, cl->state,
+ !list_empty(&cl->rd_completed), cl->writing_state);
+ i++;
+ }
+out:
+ mutex_unlock(&dev->device_lock);
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
+ kfree(buf);
+ return ret;
+}
+
+static const struct file_operations mei_dbgfs_fops_active = {
+ .open = simple_open,
+ .read = mei_dbgfs_read_active,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct mei_device *dev = fp->private_data;
+ const size_t bufsz = 1024;
+ char *buf = kzalloc(bufsz, GFP_KERNEL);
+ int pos = 0;
+ int ret;
+
+ if (!buf)
+ return -ENOMEM;
+
+ pos += scnprintf(buf + pos, bufsz - pos, "dev: %s\n",
+ mei_dev_state_str(dev->dev_state));
+ pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n",
+ mei_hbm_state_str(dev->hbm_state));
+
+ if (dev->hbm_state == MEI_HBM_STARTED) {
+ pos += scnprintf(buf + pos, bufsz - pos, "hbm features:\n");
+ pos += scnprintf(buf + pos, bufsz - pos, "\tPG: %01d\n",
+ dev->hbm_f_pg_supported);
+ pos += scnprintf(buf + pos, bufsz - pos, "\tDC: %01d\n",
+ dev->hbm_f_dc_supported);
+ pos += scnprintf(buf + pos, bufsz - pos, "\tDOT: %01d\n",
+ dev->hbm_f_dot_supported);
+ pos += scnprintf(buf + pos, bufsz - pos, "\tEV: %01d\n",
+ dev->hbm_f_ev_supported);
+ }
+
+ pos += scnprintf(buf + pos, bufsz - pos, "pg: %s, %s\n",
+ mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED",
+ mei_pg_state_str(mei_pg_state(dev)));
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
+ kfree(buf);
+ return ret;
+}
+static const struct file_operations mei_dbgfs_fops_devstate = {
+ .open = simple_open,
+ .read = mei_dbgfs_read_devstate,
+ .llseek = generic_file_llseek,
+};
+
+/**
+ * mei_dbgfs_deregister - Remove the debugfs files and directories
+ *
+ * @dev: the mei device structure
+ */
+void mei_dbgfs_deregister(struct mei_device *dev)
+{
+ if (!dev->dbgfs_dir)
+ return;
+ debugfs_remove_recursive(dev->dbgfs_dir);
+ dev->dbgfs_dir = NULL;
+}
+
+/**
+ * mei_dbgfs_register - Add the debugfs files
+ *
+ * @dev: the mei device structure
+ * @name: the mei device name
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_dbgfs_register(struct mei_device *dev, const char *name)
+{
+ struct dentry *dir, *f;
+
+ dir = debugfs_create_dir(name, NULL);
+ if (!dir)
+ return -ENOMEM;
+
+ dev->dbgfs_dir = dir;
+
+ f = debugfs_create_file("meclients", S_IRUSR, dir,
+ dev, &mei_dbgfs_fops_meclients);
+ if (!f) {
+ dev_err(dev->dev, "meclients: registration failed\n");
+ goto err;
+ }
+ f = debugfs_create_file("active", S_IRUSR, dir,
+ dev, &mei_dbgfs_fops_active);
+ if (!f) {
+ dev_err(dev->dev, "active: registration failed\n");
+ goto err;
+ }
+ f = debugfs_create_file("devstate", S_IRUSR, dir,
+ dev, &mei_dbgfs_fops_devstate);
+ if (!f) {
+ dev_err(dev->dev, "devstate: registration failed\n");
+ goto err;
+ }
+ f = debugfs_create_bool("allow_fixed_address", S_IRUSR | S_IWUSR, dir,
+ &dev->allow_fixed_address);
+ if (!f) {
+ dev_err(dev->dev, "allow_fixed_address: registration failed\n");
+ goto err;
+ }
+ return 0;
+err:
+ mei_dbgfs_deregister(dev);
+ return -ENODEV;
+}
+
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
new file mode 100644
index 0000000..fd8a9f0
--- /dev/null
+++ b/drivers/misc/mei/hbm.c
@@ -0,0 +1,1244 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+static const char *mei_hbm_status_str(enum mei_hbm_status status)
+{
+#define MEI_HBM_STATUS(status) case MEI_HBMS_##status: return #status
+ switch (status) {
+ MEI_HBM_STATUS(SUCCESS);
+ MEI_HBM_STATUS(CLIENT_NOT_FOUND);
+ MEI_HBM_STATUS(ALREADY_EXISTS);
+ MEI_HBM_STATUS(REJECTED);
+ MEI_HBM_STATUS(INVALID_PARAMETER);
+ MEI_HBM_STATUS(NOT_ALLOWED);
+ MEI_HBM_STATUS(ALREADY_STARTED);
+ MEI_HBM_STATUS(NOT_STARTED);
+ default: return "unknown";
+ }
+#undef MEI_HBM_STATUS
+};
+
+static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status)
+{
+#define MEI_CL_CS(status) case MEI_CL_CONN_##status: return #status
+ switch (status) {
+ MEI_CL_CS(SUCCESS);
+ MEI_CL_CS(NOT_FOUND);
+ MEI_CL_CS(ALREADY_STARTED);
+ MEI_CL_CS(OUT_OF_RESOURCES);
+ MEI_CL_CS(MESSAGE_SMALL);
+ MEI_CL_CS(NOT_ALLOWED);
+ default: return "unknown";
+ }
+#undef MEI_CL_CCS
+}
+
+const char *mei_hbm_state_str(enum mei_hbm_state state)
+{
+#define MEI_HBM_STATE(state) case MEI_HBM_##state: return #state
+ switch (state) {
+ MEI_HBM_STATE(IDLE);
+ MEI_HBM_STATE(STARTING);
+ MEI_HBM_STATE(STARTED);
+ MEI_HBM_STATE(ENUM_CLIENTS);
+ MEI_HBM_STATE(CLIENT_PROPERTIES);
+ MEI_HBM_STATE(STOPPED);
+ default:
+ return "unknown";
+ }
+#undef MEI_HBM_STATE
+}
+
+/**
+ * mei_cl_conn_status_to_errno - convert client connect response
+ * status to error code
+ *
+ * @status: client connect response status
+ *
+ * Return: corresponding error code
+ */
+static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status)
+{
+ switch (status) {
+ case MEI_CL_CONN_SUCCESS: return 0;
+ case MEI_CL_CONN_NOT_FOUND: return -ENOTTY;
+ case MEI_CL_CONN_ALREADY_STARTED: return -EBUSY;
+ case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY;
+ case MEI_CL_CONN_MESSAGE_SMALL: return -EINVAL;
+ case MEI_CL_CONN_NOT_ALLOWED: return -EBUSY;
+ default: return -EINVAL;
+ }
+}
+
+/**
+ * mei_hbm_idle - set hbm to idle state
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_idle(struct mei_device *dev)
+{
+ dev->init_clients_timer = 0;
+ dev->hbm_state = MEI_HBM_IDLE;
+}
+
+/**
+ * mei_hbm_reset - reset hbm counters and book keeping data structurs
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_reset(struct mei_device *dev)
+{
+ dev->me_client_index = 0;
+
+ mei_me_cl_rm_all(dev);
+
+ mei_hbm_idle(dev);
+}
+
+/**
+ * mei_hbm_hdr - construct hbm header
+ *
+ * @hdr: hbm header
+ * @length: payload length
+ */
+
+static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length)
+{
+ hdr->host_addr = 0;
+ hdr->me_addr = 0;
+ hdr->length = length;
+ hdr->msg_complete = 1;
+ hdr->reserved = 0;
+}
+
+/**
+ * mei_hbm_cl_hdr - construct client hbm header
+ *
+ * @cl: client
+ * @hbm_cmd: host bus message command
+ * @buf: buffer for cl header
+ * @len: buffer length
+ */
+static inline
+void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len)
+{
+ struct mei_hbm_cl_cmd *cmd = buf;
+
+ memset(cmd, 0, len);
+
+ cmd->hbm_cmd = hbm_cmd;
+ cmd->host_addr = mei_cl_host_addr(cl);
+ cmd->me_addr = mei_cl_me_id(cl);
+}
+
+/**
+ * mei_hbm_cl_write - write simple hbm client message
+ *
+ * @dev: the device structure
+ * @cl: client
+ * @hbm_cmd: host bus message command
+ * @len: buffer length
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static inline
+int mei_hbm_cl_write(struct mei_device *dev,
+ struct mei_cl *cl, u8 hbm_cmd, size_t len)
+{
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+
+ mei_hbm_hdr(mei_hdr, len);
+ mei_hbm_cl_hdr(cl, hbm_cmd, dev->wr_msg.data, len);
+
+ return mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+}
+
+/**
+ * mei_hbm_cl_addr_equal - check if the client's and
+ * the message address match
+ *
+ * @cl: client
+ * @cmd: hbm client message
+ *
+ * Return: true if addresses are the same
+ */
+static inline
+bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd)
+{
+ return mei_cl_host_addr(cl) == cmd->host_addr &&
+ mei_cl_me_id(cl) == cmd->me_addr;
+}
+
+/**
+ * mei_hbm_cl_find_by_cmd - find recipient client
+ *
+ * @dev: the device structure
+ * @buf: a buffer with hbm cl command
+ *
+ * Return: the recipient client or NULL if not found
+ */
+static inline
+struct mei_cl *mei_hbm_cl_find_by_cmd(struct mei_device *dev, void *buf)
+{
+ struct mei_hbm_cl_cmd *cmd = (struct mei_hbm_cl_cmd *)buf;
+ struct mei_cl *cl;
+
+ list_for_each_entry(cl, &dev->file_list, link)
+ if (mei_hbm_cl_addr_equal(cl, cmd))
+ return cl;
+ return NULL;
+}
+
+
+/**
+ * mei_hbm_start_wait - wait for start response message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_start_wait(struct mei_device *dev)
+{
+ int ret;
+
+ if (dev->hbm_state > MEI_HBM_STARTING)
+ return 0;
+
+ mutex_unlock(&dev->device_lock);
+ ret = wait_event_timeout(dev->wait_hbm_start,
+ dev->hbm_state != MEI_HBM_STARTING,
+ mei_secs_to_jiffies(MEI_HBM_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+
+ if (ret == 0 && (dev->hbm_state <= MEI_HBM_STARTING)) {
+ dev->hbm_state = MEI_HBM_IDLE;
+ dev_err(dev->dev, "waiting for mei start failed\n");
+ return -ETIME;
+ }
+ return 0;
+}
+
+/**
+ * mei_hbm_start_req - sends start request message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_start_req(struct mei_device *dev)
+{
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_host_version_request *start_req;
+ const size_t len = sizeof(struct hbm_host_version_request);
+ int ret;
+
+ mei_hbm_reset(dev);
+
+ mei_hbm_hdr(mei_hdr, len);
+
+ /* host start message */
+ start_req = (struct hbm_host_version_request *)dev->wr_msg.data;
+ memset(start_req, 0, len);
+ start_req->hbm_cmd = HOST_START_REQ_CMD;
+ start_req->host_version.major_version = HBM_MAJOR_VERSION;
+ start_req->host_version.minor_version = HBM_MINOR_VERSION;
+
+ dev->hbm_state = MEI_HBM_IDLE;
+ ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+ if (ret) {
+ dev_err(dev->dev, "version message write failed: ret = %d\n",
+ ret);
+ return ret;
+ }
+
+ dev->hbm_state = MEI_HBM_STARTING;
+ dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+ return 0;
+}
+
+/**
+ * mei_hbm_enum_clients_req - sends enumeration client request message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_enum_clients_req(struct mei_device *dev)
+{
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_host_enum_request *enum_req;
+ const size_t len = sizeof(struct hbm_host_enum_request);
+ int ret;
+
+ /* enumerate clients */
+ mei_hbm_hdr(mei_hdr, len);
+
+ enum_req = (struct hbm_host_enum_request *)dev->wr_msg.data;
+ memset(enum_req, 0, len);
+ enum_req->hbm_cmd = HOST_ENUM_REQ_CMD;
+ enum_req->allow_add = dev->hbm_f_dc_supported;
+
+ ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+ if (ret) {
+ dev_err(dev->dev, "enumeration request write failed: ret = %d.\n",
+ ret);
+ return ret;
+ }
+ dev->hbm_state = MEI_HBM_ENUM_CLIENTS;
+ dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+ return 0;
+}
+
+/**
+ * mei_hbm_me_cl_add - add new me client to the list
+ *
+ * @dev: the device structure
+ * @res: hbm property response
+ *
+ * Return: 0 on success and -ENOMEM on allocation failure
+ */
+
+static int mei_hbm_me_cl_add(struct mei_device *dev,
+ struct hbm_props_response *res)
+{
+ struct mei_me_client *me_cl;
+ const uuid_le *uuid = &res->client_properties.protocol_name;
+
+ mei_me_cl_rm_by_uuid(dev, uuid);
+
+ me_cl = kzalloc(sizeof(struct mei_me_client), GFP_KERNEL);
+ if (!me_cl)
+ return -ENOMEM;
+
+ mei_me_cl_init(me_cl);
+
+ me_cl->props = res->client_properties;
+ me_cl->client_id = res->me_addr;
+ me_cl->mei_flow_ctrl_creds = 0;
+
+ mei_me_cl_add(dev, me_cl);
+
+ return 0;
+}
+
+/**
+ * mei_hbm_add_cl_resp - send response to fw on client add request
+ *
+ * @dev: the device structure
+ * @addr: me address
+ * @status: response status
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status)
+{
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_add_client_response *resp;
+ const size_t len = sizeof(struct hbm_add_client_response);
+ int ret;
+
+ dev_dbg(dev->dev, "adding client response\n");
+
+ resp = (struct hbm_add_client_response *)dev->wr_msg.data;
+
+ mei_hbm_hdr(mei_hdr, len);
+ memset(resp, 0, sizeof(struct hbm_add_client_response));
+
+ resp->hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD;
+ resp->me_addr = addr;
+ resp->status = status;
+
+ ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+ if (ret)
+ dev_err(dev->dev, "add client response write failed: ret = %d\n",
+ ret);
+ return ret;
+}
+
+/**
+ * mei_hbm_fw_add_cl_req - request from the fw to add a client
+ *
+ * @dev: the device structure
+ * @req: add client request
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_fw_add_cl_req(struct mei_device *dev,
+ struct hbm_add_client_request *req)
+{
+ int ret;
+ u8 status = MEI_HBMS_SUCCESS;
+
+ BUILD_BUG_ON(sizeof(struct hbm_add_client_request) !=
+ sizeof(struct hbm_props_response));
+
+ ret = mei_hbm_me_cl_add(dev, (struct hbm_props_response *)req);
+ if (ret)
+ status = !MEI_HBMS_SUCCESS;
+
+ return mei_hbm_add_cl_resp(dev, req->me_addr, status);
+}
+
+/**
+ * mei_hbm_cl_notify_req - send notification request
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ * @start: true for start false for stop
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+ struct mei_cl *cl, u8 start)
+{
+
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_notification_request *req;
+ const size_t len = sizeof(struct hbm_notification_request);
+ int ret;
+
+ mei_hbm_hdr(mei_hdr, len);
+ mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, dev->wr_msg.data, len);
+
+ req = (struct hbm_notification_request *)dev->wr_msg.data;
+ req->start = start;
+
+ ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+ if (ret)
+ dev_err(dev->dev, "notify request failed: ret = %d\n", ret);
+
+ return ret;
+}
+
+/**
+ * notify_res_to_fop - convert notification response to the proper
+ * notification FOP
+ *
+ * @cmd: client notification start response command
+ *
+ * Return: MEI_FOP_NOTIFY_START or MEI_FOP_NOTIFY_STOP;
+ */
+static inline enum mei_cb_file_ops notify_res_to_fop(struct mei_hbm_cl_cmd *cmd)
+{
+ struct hbm_notification_response *rs =
+ (struct hbm_notification_response *)cmd;
+
+ return mei_cl_notify_req2fop(rs->start);
+}
+
+/**
+ * mei_hbm_cl_notify_start_res - update the client state according
+ * notify start response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification start response command
+ */
+static void mei_hbm_cl_notify_start_res(struct mei_device *dev,
+ struct mei_cl *cl,
+ struct mei_hbm_cl_cmd *cmd)
+{
+ struct hbm_notification_response *rs =
+ (struct hbm_notification_response *)cmd;
+
+ cl_dbg(dev, cl, "hbm: notify start response status=%d\n", rs->status);
+
+ if (rs->status == MEI_HBMS_SUCCESS ||
+ rs->status == MEI_HBMS_ALREADY_STARTED) {
+ cl->notify_en = true;
+ cl->status = 0;
+ } else {
+ cl->status = -EINVAL;
+ }
+}
+
+/**
+ * mei_hbm_cl_notify_stop_res - update the client state according
+ * notify stop response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification stop response command
+ */
+static void mei_hbm_cl_notify_stop_res(struct mei_device *dev,
+ struct mei_cl *cl,
+ struct mei_hbm_cl_cmd *cmd)
+{
+ struct hbm_notification_response *rs =
+ (struct hbm_notification_response *)cmd;
+
+ cl_dbg(dev, cl, "hbm: notify stop response status=%d\n", rs->status);
+
+ if (rs->status == MEI_HBMS_SUCCESS ||
+ rs->status == MEI_HBMS_NOT_STARTED) {
+ cl->notify_en = false;
+ cl->status = 0;
+ } else {
+ /* TODO: spec is not clear yet about other possible issues */
+ cl->status = -EINVAL;
+ }
+}
+
+/**
+ * mei_hbm_cl_notify - signal notification event
+ *
+ * @dev: the device structure
+ * @cmd: notification client message
+ */
+static void mei_hbm_cl_notify(struct mei_device *dev,
+ struct mei_hbm_cl_cmd *cmd)
+{
+ struct mei_cl *cl;
+
+ cl = mei_hbm_cl_find_by_cmd(dev, cmd);
+ if (cl)
+ mei_cl_notify(cl);
+}
+
+/**
+ * mei_hbm_prop_req - request property for a single client
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+
+static int mei_hbm_prop_req(struct mei_device *dev)
+{
+
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_props_request *prop_req;
+ const size_t len = sizeof(struct hbm_props_request);
+ unsigned long next_client_index;
+ int ret;
+
+ next_client_index = find_next_bit(dev->me_clients_map, MEI_CLIENTS_MAX,
+ dev->me_client_index);
+
+ /* We got all client properties */
+ if (next_client_index == MEI_CLIENTS_MAX) {
+ dev->hbm_state = MEI_HBM_STARTED;
+ schedule_work(&dev->init_work);
+
+ return 0;
+ }
+
+ mei_hbm_hdr(mei_hdr, len);
+ prop_req = (struct hbm_props_request *)dev->wr_msg.data;
+
+ memset(prop_req, 0, sizeof(struct hbm_props_request));
+
+ prop_req->hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD;
+ prop_req->me_addr = next_client_index;
+
+ ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+ if (ret) {
+ dev_err(dev->dev, "properties request write failed: ret = %d\n",
+ ret);
+ return ret;
+ }
+
+ dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+ dev->me_client_index = next_client_index;
+
+ return 0;
+}
+
+/**
+ * mei_hbm_pg - sends pg command
+ *
+ * @dev: the device structure
+ * @pg_cmd: the pg command code
+ *
+ * Return: -EIO on write failure
+ * -EOPNOTSUPP if the operation is not supported by the protocol
+ */
+int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd)
+{
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_power_gate *req;
+ const size_t len = sizeof(struct hbm_power_gate);
+ int ret;
+
+ if (!dev->hbm_f_pg_supported)
+ return -EOPNOTSUPP;
+
+ mei_hbm_hdr(mei_hdr, len);
+
+ req = (struct hbm_power_gate *)dev->wr_msg.data;
+ memset(req, 0, len);
+ req->hbm_cmd = pg_cmd;
+
+ ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+ if (ret)
+ dev_err(dev->dev, "power gate command write failed.\n");
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg);
+
+/**
+ * mei_hbm_stop_req - send stop request message
+ *
+ * @dev: mei device
+ *
+ * Return: -EIO on write failure
+ */
+static int mei_hbm_stop_req(struct mei_device *dev)
+{
+ struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+ struct hbm_host_stop_request *req =
+ (struct hbm_host_stop_request *)dev->wr_msg.data;
+ const size_t len = sizeof(struct hbm_host_stop_request);
+
+ mei_hbm_hdr(mei_hdr, len);
+
+ memset(req, 0, len);
+ req->hbm_cmd = HOST_STOP_REQ_CMD;
+ req->reason = DRIVER_STOP_REQUEST;
+
+ return mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+}
+
+/**
+ * mei_hbm_cl_flow_control_req - sends flow control request.
+ *
+ * @dev: the device structure
+ * @cl: client info
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl)
+{
+ const size_t len = sizeof(struct hbm_flow_control);
+
+ cl_dbg(dev, cl, "sending flow control\n");
+ return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD, len);
+}
+
+/**
+ * mei_hbm_add_single_flow_creds - adds single buffer credentials.
+ *
+ * @dev: the device structure
+ * @flow: flow control.
+ *
+ * Return: 0 on success, < 0 otherwise
+ */
+static int mei_hbm_add_single_flow_creds(struct mei_device *dev,
+ struct hbm_flow_control *flow)
+{
+ struct mei_me_client *me_cl;
+ int rets;
+
+ me_cl = mei_me_cl_by_id(dev, flow->me_addr);
+ if (!me_cl) {
+ dev_err(dev->dev, "no such me client %d\n",
+ flow->me_addr);
+ return -ENOENT;
+ }
+
+ if (WARN_ON(me_cl->props.single_recv_buf == 0)) {
+ rets = -EINVAL;
+ goto out;
+ }
+
+ me_cl->mei_flow_ctrl_creds++;
+ dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n",
+ flow->me_addr, me_cl->mei_flow_ctrl_creds);
+
+ rets = 0;
+out:
+ mei_me_cl_put(me_cl);
+ return rets;
+}
+
+/**
+ * mei_hbm_cl_flow_control_res - flow control response from me
+ *
+ * @dev: the device structure
+ * @flow_control: flow control response bus message
+ */
+static void mei_hbm_cl_flow_control_res(struct mei_device *dev,
+ struct hbm_flow_control *flow_control)
+{
+ struct mei_cl *cl;
+
+ if (!flow_control->host_addr) {
+ /* single receive buffer */
+ mei_hbm_add_single_flow_creds(dev, flow_control);
+ return;
+ }
+
+ cl = mei_hbm_cl_find_by_cmd(dev, flow_control);
+ if (cl) {
+ cl->mei_flow_ctrl_creds++;
+ cl_dbg(dev, cl, "flow control creds = %d.\n",
+ cl->mei_flow_ctrl_creds);
+ }
+}
+
+
+/**
+ * mei_hbm_cl_disconnect_req - sends disconnect message to fw.
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl)
+{
+ const size_t len = sizeof(struct hbm_client_connect_request);
+
+ return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD, len);
+}
+
+/**
+ * mei_hbm_cl_disconnect_rsp - sends disconnect respose to the FW
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl)
+{
+ const size_t len = sizeof(struct hbm_client_connect_response);
+
+ return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD, len);
+}
+
+/**
+ * mei_hbm_cl_disconnect_res - update the client state according
+ * disconnect response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: disconnect client response host bus message
+ */
+static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl,
+ struct mei_hbm_cl_cmd *cmd)
+{
+ struct hbm_client_connect_response *rs =
+ (struct hbm_client_connect_response *)cmd;
+
+ cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status);
+
+ if (rs->status == MEI_CL_DISCONN_SUCCESS)
+ cl->state = MEI_FILE_DISCONNECT_REPLY;
+ cl->status = 0;
+}
+
+/**
+ * mei_hbm_cl_connect_req - send connection request to specific me client
+ *
+ * @dev: the device structure
+ * @cl: a client to connect to
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl)
+{
+ const size_t len = sizeof(struct hbm_client_connect_request);
+
+ return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD, len);
+}
+
+/**
+ * mei_hbm_cl_connect_res - update the client state according
+ * connection response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: connect client response host bus message
+ */
+static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl,
+ struct mei_hbm_cl_cmd *cmd)
+{
+ struct hbm_client_connect_response *rs =
+ (struct hbm_client_connect_response *)cmd;
+
+ cl_dbg(dev, cl, "hbm: connect response status=%s\n",
+ mei_cl_conn_status_str(rs->status));
+
+ if (rs->status == MEI_CL_CONN_SUCCESS)
+ cl->state = MEI_FILE_CONNECTED;
+ else {
+ cl->state = MEI_FILE_DISCONNECT_REPLY;
+ if (rs->status == MEI_CL_CONN_NOT_FOUND)
+ mei_me_cl_del(dev, cl->me_cl);
+ }
+ cl->status = mei_cl_conn_status_to_errno(rs->status);
+}
+
+/**
+ * mei_hbm_cl_res - process hbm response received on behalf
+ * an client
+ *
+ * @dev: the device structure
+ * @rs: hbm client message
+ * @fop_type: file operation type
+ */
+static void mei_hbm_cl_res(struct mei_device *dev,
+ struct mei_hbm_cl_cmd *rs,
+ enum mei_cb_file_ops fop_type)
+{
+ struct mei_cl *cl;
+ struct mei_cl_cb *cb, *next;
+
+ cl = NULL;
+ list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list.list, list) {
+
+ cl = cb->cl;
+
+ if (cb->fop_type != fop_type)
+ continue;
+
+ if (mei_hbm_cl_addr_equal(cl, rs)) {
+ list_del_init(&cb->list);
+ break;
+ }
+ }
+
+ if (!cl)
+ return;
+
+ switch (fop_type) {
+ case MEI_FOP_CONNECT:
+ mei_hbm_cl_connect_res(dev, cl, rs);
+ break;
+ case MEI_FOP_DISCONNECT:
+ mei_hbm_cl_disconnect_res(dev, cl, rs);
+ break;
+ case MEI_FOP_NOTIFY_START:
+ mei_hbm_cl_notify_start_res(dev, cl, rs);
+ break;
+ case MEI_FOP_NOTIFY_STOP:
+ mei_hbm_cl_notify_stop_res(dev, cl, rs);
+ break;
+ default:
+ return;
+ }
+
+ cl->timer_count = 0;
+ wake_up(&cl->wait);
+}
+
+
+/**
+ * mei_hbm_fw_disconnect_req - disconnect request initiated by ME firmware
+ * host sends disconnect response
+ *
+ * @dev: the device structure.
+ * @disconnect_req: disconnect request bus message from the me
+ *
+ * Return: -ENOMEM on allocation failure
+ */
+static int mei_hbm_fw_disconnect_req(struct mei_device *dev,
+ struct hbm_client_connect_request *disconnect_req)
+{
+ struct mei_cl *cl;
+ struct mei_cl_cb *cb;
+
+ cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req);
+ if (cl) {
+ cl_dbg(dev, cl, "fw disconnect request received\n");
+ cl->state = MEI_FILE_DISCONNECTING;
+ cl->timer_count = 0;
+
+ cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL);
+ if (!cb)
+ return -ENOMEM;
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ }
+ return 0;
+}
+
+/**
+ * mei_hbm_pg_enter_res - PG enter response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_enter_res(struct mei_device *dev)
+{
+ if (mei_pg_state(dev) != MEI_PG_OFF ||
+ dev->pg_event != MEI_PG_EVENT_WAIT) {
+ dev_err(dev->dev, "hbm: pg entry response: state mismatch [%s, %d]\n",
+ mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+ return -EPROTO;
+ }
+
+ dev->pg_event = MEI_PG_EVENT_RECEIVED;
+ wake_up(&dev->wait_pg);
+
+ return 0;
+}
+
+/**
+ * mei_hbm_pg_resume - process with PG resume
+ *
+ * @dev: the device structure.
+ */
+void mei_hbm_pg_resume(struct mei_device *dev)
+{
+ pm_request_resume(dev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg_resume);
+
+/**
+ * mei_hbm_pg_exit_res - PG exit response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_exit_res(struct mei_device *dev)
+{
+ if (mei_pg_state(dev) != MEI_PG_ON ||
+ (dev->pg_event != MEI_PG_EVENT_WAIT &&
+ dev->pg_event != MEI_PG_EVENT_IDLE)) {
+ dev_err(dev->dev, "hbm: pg exit response: state mismatch [%s, %d]\n",
+ mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+ return -EPROTO;
+ }
+
+ switch (dev->pg_event) {
+ case MEI_PG_EVENT_WAIT:
+ dev->pg_event = MEI_PG_EVENT_RECEIVED;
+ wake_up(&dev->wait_pg);
+ break;
+ case MEI_PG_EVENT_IDLE:
+ /*
+ * If the driver is not waiting on this then
+ * this is HW initiated exit from PG.
+ * Start runtime pm resume sequence to exit from PG.
+ */
+ dev->pg_event = MEI_PG_EVENT_RECEIVED;
+ mei_hbm_pg_resume(dev);
+ break;
+ default:
+ WARN(1, "hbm: pg exit response: unexpected pg event = %d\n",
+ dev->pg_event);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+/**
+ * mei_hbm_config_features - check what hbm features and commands
+ * are supported by the fw
+ *
+ * @dev: the device structure
+ */
+static void mei_hbm_config_features(struct mei_device *dev)
+{
+ /* Power Gating Isolation Support */
+ dev->hbm_f_pg_supported = 0;
+ if (dev->version.major_version > HBM_MAJOR_VERSION_PGI)
+ dev->hbm_f_pg_supported = 1;
+
+ if (dev->version.major_version == HBM_MAJOR_VERSION_PGI &&
+ dev->version.minor_version >= HBM_MINOR_VERSION_PGI)
+ dev->hbm_f_pg_supported = 1;
+
+ if (dev->version.major_version >= HBM_MAJOR_VERSION_DC)
+ dev->hbm_f_dc_supported = 1;
+
+ /* disconnect on connect timeout instead of link reset */
+ if (dev->version.major_version >= HBM_MAJOR_VERSION_DOT)
+ dev->hbm_f_dot_supported = 1;
+
+ /* Notification Event Support */
+ if (dev->version.major_version >= HBM_MAJOR_VERSION_EV)
+ dev->hbm_f_ev_supported = 1;
+}
+
+/**
+ * mei_hbm_version_is_supported - checks whether the driver can
+ * support the hbm version of the device
+ *
+ * @dev: the device structure
+ * Return: true if driver can support hbm version of the device
+ */
+bool mei_hbm_version_is_supported(struct mei_device *dev)
+{
+ return (dev->version.major_version < HBM_MAJOR_VERSION) ||
+ (dev->version.major_version == HBM_MAJOR_VERSION &&
+ dev->version.minor_version <= HBM_MINOR_VERSION);
+}
+
+/**
+ * mei_hbm_dispatch - bottom half read routine after ISR to
+ * handle the read bus message cmd processing.
+ *
+ * @dev: the device structure
+ * @hdr: header of bus message
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
+{
+ struct mei_bus_message *mei_msg;
+ struct hbm_host_version_response *version_res;
+ struct hbm_props_response *props_res;
+ struct hbm_host_enum_response *enum_res;
+ struct hbm_add_client_request *add_cl_req;
+ int ret;
+
+ struct mei_hbm_cl_cmd *cl_cmd;
+ struct hbm_client_connect_request *disconnect_req;
+ struct hbm_flow_control *flow_control;
+
+ /* read the message to our buffer */
+ BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
+ mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
+ mei_msg = (struct mei_bus_message *)dev->rd_msg_buf;
+ cl_cmd = (struct mei_hbm_cl_cmd *)mei_msg;
+
+ /* ignore spurious message and prevent reset nesting
+ * hbm is put to idle during system reset
+ */
+ if (dev->hbm_state == MEI_HBM_IDLE) {
+ dev_dbg(dev->dev, "hbm: state is idle ignore spurious messages\n");
+ return 0;
+ }
+
+ switch (mei_msg->hbm_cmd) {
+ case HOST_START_RES_CMD:
+ dev_dbg(dev->dev, "hbm: start: response message received.\n");
+
+ dev->init_clients_timer = 0;
+
+ version_res = (struct hbm_host_version_response *)mei_msg;
+
+ dev_dbg(dev->dev, "HBM VERSION: DRIVER=%02d:%02d DEVICE=%02d:%02d\n",
+ HBM_MAJOR_VERSION, HBM_MINOR_VERSION,
+ version_res->me_max_version.major_version,
+ version_res->me_max_version.minor_version);
+
+ if (version_res->host_version_supported) {
+ dev->version.major_version = HBM_MAJOR_VERSION;
+ dev->version.minor_version = HBM_MINOR_VERSION;
+ } else {
+ dev->version.major_version =
+ version_res->me_max_version.major_version;
+ dev->version.minor_version =
+ version_res->me_max_version.minor_version;
+ }
+
+ if (!mei_hbm_version_is_supported(dev)) {
+ dev_warn(dev->dev, "hbm: start: version mismatch - stopping the driver.\n");
+
+ dev->hbm_state = MEI_HBM_STOPPED;
+ if (mei_hbm_stop_req(dev)) {
+ dev_err(dev->dev, "hbm: start: failed to send stop request\n");
+ return -EIO;
+ }
+ break;
+ }
+
+ mei_hbm_config_features(dev);
+
+ if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+ dev->hbm_state != MEI_HBM_STARTING) {
+ dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n",
+ dev->dev_state, dev->hbm_state);
+ return -EPROTO;
+ }
+
+ if (mei_hbm_enum_clients_req(dev)) {
+ dev_err(dev->dev, "hbm: start: failed to send enumeration request\n");
+ return -EIO;
+ }
+
+ wake_up(&dev->wait_hbm_start);
+ break;
+
+ case CLIENT_CONNECT_RES_CMD:
+ dev_dbg(dev->dev, "hbm: client connect response: message received.\n");
+ mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_CONNECT);
+ break;
+
+ case CLIENT_DISCONNECT_RES_CMD:
+ dev_dbg(dev->dev, "hbm: client disconnect response: message received.\n");
+ mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_DISCONNECT);
+ break;
+
+ case MEI_FLOW_CONTROL_CMD:
+ dev_dbg(dev->dev, "hbm: client flow control response: message received.\n");
+
+ flow_control = (struct hbm_flow_control *) mei_msg;
+ mei_hbm_cl_flow_control_res(dev, flow_control);
+ break;
+
+ case MEI_PG_ISOLATION_ENTRY_RES_CMD:
+ dev_dbg(dev->dev, "hbm: power gate isolation entry response received\n");
+ ret = mei_hbm_pg_enter_res(dev);
+ if (ret)
+ return ret;
+ break;
+
+ case MEI_PG_ISOLATION_EXIT_REQ_CMD:
+ dev_dbg(dev->dev, "hbm: power gate isolation exit request received\n");
+ ret = mei_hbm_pg_exit_res(dev);
+ if (ret)
+ return ret;
+ break;
+
+ case HOST_CLIENT_PROPERTIES_RES_CMD:
+ dev_dbg(dev->dev, "hbm: properties response: message received.\n");
+
+ dev->init_clients_timer = 0;
+
+ if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+ dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) {
+ dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n",
+ dev->dev_state, dev->hbm_state);
+ return -EPROTO;
+ }
+
+ props_res = (struct hbm_props_response *)mei_msg;
+
+ if (props_res->status) {
+ dev_err(dev->dev, "hbm: properties response: wrong status = %d %s\n",
+ props_res->status,
+ mei_hbm_status_str(props_res->status));
+ return -EPROTO;
+ }
+
+ mei_hbm_me_cl_add(dev, props_res);
+
+ dev->me_client_index++;
+
+ /* request property for the next client */
+ if (mei_hbm_prop_req(dev))
+ return -EIO;
+
+ break;
+
+ case HOST_ENUM_RES_CMD:
+ dev_dbg(dev->dev, "hbm: enumeration response: message received\n");
+
+ dev->init_clients_timer = 0;
+
+ enum_res = (struct hbm_host_enum_response *) mei_msg;
+ BUILD_BUG_ON(sizeof(dev->me_clients_map)
+ < sizeof(enum_res->valid_addresses));
+ memcpy(dev->me_clients_map, enum_res->valid_addresses,
+ sizeof(enum_res->valid_addresses));
+
+ if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+ dev->hbm_state != MEI_HBM_ENUM_CLIENTS) {
+ dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n",
+ dev->dev_state, dev->hbm_state);
+ return -EPROTO;
+ }
+
+ dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES;
+
+ /* first property request */
+ if (mei_hbm_prop_req(dev))
+ return -EIO;
+
+ break;
+
+ case HOST_STOP_RES_CMD:
+ dev_dbg(dev->dev, "hbm: stop response: message received\n");
+
+ dev->init_clients_timer = 0;
+
+ if (dev->hbm_state != MEI_HBM_STOPPED) {
+ dev_err(dev->dev, "hbm: stop response: state mismatch, [%d, %d]\n",
+ dev->dev_state, dev->hbm_state);
+ return -EPROTO;
+ }
+
+ dev->dev_state = MEI_DEV_POWER_DOWN;
+ dev_info(dev->dev, "hbm: stop response: resetting.\n");
+ /* force the reset */
+ return -EPROTO;
+ break;
+
+ case CLIENT_DISCONNECT_REQ_CMD:
+ dev_dbg(dev->dev, "hbm: disconnect request: message received\n");
+
+ disconnect_req = (struct hbm_client_connect_request *)mei_msg;
+ mei_hbm_fw_disconnect_req(dev, disconnect_req);
+ break;
+
+ case ME_STOP_REQ_CMD:
+ dev_dbg(dev->dev, "hbm: stop request: message received\n");
+ dev->hbm_state = MEI_HBM_STOPPED;
+ if (mei_hbm_stop_req(dev)) {
+ dev_err(dev->dev, "hbm: stop request: failed to send stop request\n");
+ return -EIO;
+ }
+ break;
+
+ case MEI_HBM_ADD_CLIENT_REQ_CMD:
+ dev_dbg(dev->dev, "hbm: add client request received\n");
+ /*
+ * after the host receives the enum_resp
+ * message clients may be added or removed
+ */
+ if (dev->hbm_state <= MEI_HBM_ENUM_CLIENTS ||
+ dev->hbm_state >= MEI_HBM_STOPPED) {
+ dev_err(dev->dev, "hbm: add client: state mismatch, [%d, %d]\n",
+ dev->dev_state, dev->hbm_state);
+ return -EPROTO;
+ }
+ add_cl_req = (struct hbm_add_client_request *)mei_msg;
+ ret = mei_hbm_fw_add_cl_req(dev, add_cl_req);
+ if (ret) {
+ dev_err(dev->dev, "hbm: add client: failed to send response %d\n",
+ ret);
+ return -EIO;
+ }
+ dev_dbg(dev->dev, "hbm: add client request processed\n");
+ break;
+
+ case MEI_HBM_NOTIFY_RES_CMD:
+ dev_dbg(dev->dev, "hbm: notify response received\n");
+ mei_hbm_cl_res(dev, cl_cmd, notify_res_to_fop(cl_cmd));
+ break;
+
+ case MEI_HBM_NOTIFICATION_CMD:
+ dev_dbg(dev->dev, "hbm: notification\n");
+ mei_hbm_cl_notify(dev, cl_cmd);
+ break;
+
+ default:
+ BUG();
+ break;
+
+ }
+ return 0;
+}
+
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
new file mode 100644
index 0000000..a2025a5
--- /dev/null
+++ b/drivers/misc/mei/hbm.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_HBM_H_
+#define _MEI_HBM_H_
+
+struct mei_device;
+struct mei_msg_hdr;
+struct mei_cl;
+
+/**
+ * enum mei_hbm_state - host bus message protocol state
+ *
+ * @MEI_HBM_IDLE : protocol not started
+ * @MEI_HBM_STARTING : start request message was sent
+ * @MEI_HBM_ENUM_CLIENTS : enumeration request was sent
+ * @MEI_HBM_CLIENT_PROPERTIES : acquiring clients properties
+ * @MEI_HBM_STARTED : enumeration was completed
+ * @MEI_HBM_STOPPED : stopping exchange
+ */
+enum mei_hbm_state {
+ MEI_HBM_IDLE = 0,
+ MEI_HBM_STARTING,
+ MEI_HBM_ENUM_CLIENTS,
+ MEI_HBM_CLIENT_PROPERTIES,
+ MEI_HBM_STARTED,
+ MEI_HBM_STOPPED,
+};
+
+const char *mei_hbm_state_str(enum mei_hbm_state state);
+
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr);
+
+void mei_hbm_idle(struct mei_device *dev);
+void mei_hbm_reset(struct mei_device *dev);
+int mei_hbm_start_req(struct mei_device *dev);
+int mei_hbm_start_wait(struct mei_device *dev);
+int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl);
+bool mei_hbm_version_is_supported(struct mei_device *dev);
+int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd);
+void mei_hbm_pg_resume(struct mei_device *dev);
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+ struct mei_cl *cl, u8 request);
+
+#endif /* _MEI_HBM_H_ */
+
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
new file mode 100644
index 0000000..d277419
--- /dev/null
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -0,0 +1,218 @@
+/******************************************************************************
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Intel MEI Interface Header
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
+ * USA
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation.
+ * linux-mei@linux.intel.com
+ * http://www.intel.com
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef _MEI_HW_MEI_REGS_H_
+#define _MEI_HW_MEI_REGS_H_
+
+/*
+ * MEI device IDs
+ */
+#define MEI_DEV_ID_82946GZ 0x2974 /* 82946GZ/GL */
+#define MEI_DEV_ID_82G35 0x2984 /* 82G35 Express */
+#define MEI_DEV_ID_82Q965 0x2994 /* 82Q963/Q965 */
+#define MEI_DEV_ID_82G965 0x29A4 /* 82P965/G965 */
+
+#define MEI_DEV_ID_82GM965 0x2A04 /* Mobile PM965/GM965 */
+#define MEI_DEV_ID_82GME965 0x2A14 /* Mobile GME965/GLE960 */
+
+#define MEI_DEV_ID_ICH9_82Q35 0x29B4 /* 82Q35 Express */
+#define MEI_DEV_ID_ICH9_82G33 0x29C4 /* 82G33/G31/P35/P31 Express */
+#define MEI_DEV_ID_ICH9_82Q33 0x29D4 /* 82Q33 Express */
+#define MEI_DEV_ID_ICH9_82X38 0x29E4 /* 82X38/X48 Express */
+#define MEI_DEV_ID_ICH9_3200 0x29F4 /* 3200/3210 Server */
+
+#define MEI_DEV_ID_ICH9_6 0x28B4 /* Bearlake */
+#define MEI_DEV_ID_ICH9_7 0x28C4 /* Bearlake */
+#define MEI_DEV_ID_ICH9_8 0x28D4 /* Bearlake */
+#define MEI_DEV_ID_ICH9_9 0x28E4 /* Bearlake */
+#define MEI_DEV_ID_ICH9_10 0x28F4 /* Bearlake */
+
+#define MEI_DEV_ID_ICH9M_1 0x2A44 /* Cantiga */
+#define MEI_DEV_ID_ICH9M_2 0x2A54 /* Cantiga */
+#define MEI_DEV_ID_ICH9M_3 0x2A64 /* Cantiga */
+#define MEI_DEV_ID_ICH9M_4 0x2A74 /* Cantiga */
+
+#define MEI_DEV_ID_ICH10_1 0x2E04 /* Eaglelake */
+#define MEI_DEV_ID_ICH10_2 0x2E14 /* Eaglelake */
+#define MEI_DEV_ID_ICH10_3 0x2E24 /* Eaglelake */
+#define MEI_DEV_ID_ICH10_4 0x2E34 /* Eaglelake */
+
+#define MEI_DEV_ID_IBXPK_1 0x3B64 /* Calpella */
+#define MEI_DEV_ID_IBXPK_2 0x3B65 /* Calpella */
+
+#define MEI_DEV_ID_CPT_1 0x1C3A /* Couger Point */
+#define MEI_DEV_ID_PBG_1 0x1D3A /* C600/X79 Patsburg */
+
+#define MEI_DEV_ID_PPT_1 0x1E3A /* Panther Point */
+#define MEI_DEV_ID_PPT_2 0x1CBA /* Panther Point */
+#define MEI_DEV_ID_PPT_3 0x1DBA /* Panther Point */
+
+#define MEI_DEV_ID_LPT_H 0x8C3A /* Lynx Point H */
+#define MEI_DEV_ID_LPT_W 0x8D3A /* Lynx Point - Wellsburg */
+#define MEI_DEV_ID_LPT_LP 0x9C3A /* Lynx Point LP */
+#define MEI_DEV_ID_LPT_HR 0x8CBA /* Lynx Point H Refresh */
+
+#define MEI_DEV_ID_WPT_LP 0x9CBA /* Wildcat Point LP */
+#define MEI_DEV_ID_WPT_LP_2 0x9CBB /* Wildcat Point LP 2 */
+
+#define MEI_DEV_ID_SPT 0x9D3A /* Sunrise Point */
+#define MEI_DEV_ID_SPT_2 0x9D3B /* Sunrise Point 2 */
+#define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */
+#define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */
+
+#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */
+#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */
+
+#define MEI_DEV_ID_LBG 0xA1BA /* Lewisburg (SPT) */
+
+#define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */
+#define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */
+
+/*
+ * MEI HW Section
+ */
+
+/* Host Firmware Status Registers in PCI Config Space */
+#define PCI_CFG_HFS_1 0x40
+# define PCI_CFG_HFS_1_D0I3_MSK 0x80000000
+#define PCI_CFG_HFS_2 0x48
+#define PCI_CFG_HFS_3 0x60
+#define PCI_CFG_HFS_4 0x64
+#define PCI_CFG_HFS_5 0x68
+#define PCI_CFG_HFS_6 0x6C
+
+/* MEI registers */
+/* H_CB_WW - Host Circular Buffer (CB) Write Window register */
+#define H_CB_WW 0
+/* H_CSR - Host Control Status register */
+#define H_CSR 4
+/* ME_CB_RW - ME Circular Buffer Read Window register (read only) */
+#define ME_CB_RW 8
+/* ME_CSR_HA - ME Control Status Host Access register (read only) */
+#define ME_CSR_HA 0xC
+/* H_HGC_CSR - PGI register */
+#define H_HPG_CSR 0x10
+/* H_D0I3C - D0I3 Control */
+#define H_D0I3C 0x800
+
+/* register bits of H_CSR (Host Control Status register) */
+/* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */
+#define H_CBD 0xFF000000
+/* Host Circular Buffer Write Pointer */
+#define H_CBWP 0x00FF0000
+/* Host Circular Buffer Read Pointer */
+#define H_CBRP 0x0000FF00
+/* Host Reset */
+#define H_RST 0x00000010
+/* Host Ready */
+#define H_RDY 0x00000008
+/* Host Interrupt Generate */
+#define H_IG 0x00000004
+/* Host Interrupt Status */
+#define H_IS 0x00000002
+/* Host Interrupt Enable */
+#define H_IE 0x00000001
+/* Host D0I3 Interrupt Enable */
+#define H_D0I3C_IE 0x00000020
+/* Host D0I3 Interrupt Status */
+#define H_D0I3C_IS 0x00000040
+
+/* H_CSR masks */
+#define H_CSR_IE_MASK (H_IE | H_D0I3C_IE)
+#define H_CSR_IS_MASK (H_IS | H_D0I3C_IS)
+
+/* register bits of ME_CSR_HA (ME Control Status Host Access register) */
+/* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only
+access to ME_CBD */
+#define ME_CBD_HRA 0xFF000000
+/* ME CB Write Pointer HRA - host read only access to ME_CBWP */
+#define ME_CBWP_HRA 0x00FF0000
+/* ME CB Read Pointer HRA - host read only access to ME_CBRP */
+#define ME_CBRP_HRA 0x0000FF00
+/* ME Power Gate Isolation Capability HRA - host ready only access */
+#define ME_PGIC_HRA 0x00000040
+/* ME Reset HRA - host read only access to ME_RST */
+#define ME_RST_HRA 0x00000010
+/* ME Ready HRA - host read only access to ME_RDY */
+#define ME_RDY_HRA 0x00000008
+/* ME Interrupt Generate HRA - host read only access to ME_IG */
+#define ME_IG_HRA 0x00000004
+/* ME Interrupt Status HRA - host read only access to ME_IS */
+#define ME_IS_HRA 0x00000002
+/* ME Interrupt Enable HRA - host read only access to ME_IE */
+#define ME_IE_HRA 0x00000001
+
+
+/* H_HPG_CSR register bits */
+#define H_HPG_CSR_PGIHEXR 0x00000001
+#define H_HPG_CSR_PGI 0x00000002
+
+/* H_D0I3C register bits */
+#define H_D0I3C_CIP 0x00000001
+#define H_D0I3C_IR 0x00000002
+#define H_D0I3C_I3 0x00000004
+#define H_D0I3C_RR 0x00000008
+
+#endif /* _MEI_HW_MEI_REGS_H_ */
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
new file mode 100644
index 0000000..3633375
--- /dev/null
+++ b/drivers/misc/mei/hw-me.c
@@ -0,0 +1,1355 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+
+#include "hw-me.h"
+#include "hw-me-regs.h"
+
+#include "mei-trace.h"
+
+/**
+ * mei_me_reg_read - Reads 32bit data from the mei device
+ *
+ * @hw: the me hardware structure
+ * @offset: offset from which to read the data
+ *
+ * Return: register value (u32)
+ */
+static inline u32 mei_me_reg_read(const struct mei_me_hw *hw,
+ unsigned long offset)
+{
+ return ioread32(hw->mem_addr + offset);
+}
+
+
+/**
+ * mei_me_reg_write - Writes 32bit data to the mei device
+ *
+ * @hw: the me hardware structure
+ * @offset: offset from which to write the data
+ * @value: register value to write (u32)
+ */
+static inline void mei_me_reg_write(const struct mei_me_hw *hw,
+ unsigned long offset, u32 value)
+{
+ iowrite32(value, hw->mem_addr + offset);
+}
+
+/**
+ * mei_me_mecbrw_read - Reads 32bit data from ME circular buffer
+ * read window register
+ *
+ * @dev: the device structure
+ *
+ * Return: ME_CB_RW register value (u32)
+ */
+static inline u32 mei_me_mecbrw_read(const struct mei_device *dev)
+{
+ return mei_me_reg_read(to_me_hw(dev), ME_CB_RW);
+}
+
+/**
+ * mei_me_hcbww_write - write 32bit data to the host circular buffer
+ *
+ * @dev: the device structure
+ * @data: 32bit data to be written to the host circular buffer
+ */
+static inline void mei_me_hcbww_write(struct mei_device *dev, u32 data)
+{
+ mei_me_reg_write(to_me_hw(dev), H_CB_WW, data);
+}
+
+/**
+ * mei_me_mecsr_read - Reads 32bit data from the ME CSR
+ *
+ * @dev: the device structure
+ *
+ * Return: ME_CSR_HA register value (u32)
+ */
+static inline u32 mei_me_mecsr_read(const struct mei_device *dev)
+{
+ u32 reg;
+
+ reg = mei_me_reg_read(to_me_hw(dev), ME_CSR_HA);
+ trace_mei_reg_read(dev->dev, "ME_CSR_HA", ME_CSR_HA, reg);
+
+ return reg;
+}
+
+/**
+ * mei_hcsr_read - Reads 32bit data from the host CSR
+ *
+ * @dev: the device structure
+ *
+ * Return: H_CSR register value (u32)
+ */
+static inline u32 mei_hcsr_read(const struct mei_device *dev)
+{
+ u32 reg;
+
+ reg = mei_me_reg_read(to_me_hw(dev), H_CSR);
+ trace_mei_reg_read(dev->dev, "H_CSR", H_CSR, reg);
+
+ return reg;
+}
+
+/**
+ * mei_hcsr_write - writes H_CSR register to the mei device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_hcsr_write(struct mei_device *dev, u32 reg)
+{
+ trace_mei_reg_write(dev->dev, "H_CSR", H_CSR, reg);
+ mei_me_reg_write(to_me_hw(dev), H_CSR, reg);
+}
+
+/**
+ * mei_hcsr_set - writes H_CSR register to the mei device,
+ * and ignores the H_IS bit for it is write-one-to-zero.
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_hcsr_set(struct mei_device *dev, u32 reg)
+{
+ reg &= ~H_CSR_IS_MASK;
+ mei_hcsr_write(dev, reg);
+}
+
+/**
+ * mei_me_d0i3c_read - Reads 32bit data from the D0I3C register
+ *
+ * @dev: the device structure
+ *
+ * Return: H_D0I3C register value (u32)
+ */
+static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
+{
+ u32 reg;
+
+ reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C);
+ trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg);
+
+ return reg;
+}
+
+/**
+ * mei_me_d0i3c_write - writes H_D0I3C register to device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
+{
+ trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg);
+ mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg);
+}
+
+/**
+ * mei_me_fw_status - read fw status register from pci config space
+ *
+ * @dev: mei device
+ * @fw_status: fw status register values
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_fw_status(struct mei_device *dev,
+ struct mei_fw_status *fw_status)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ struct mei_me_hw *hw = to_me_hw(dev);
+ const struct mei_fw_status *fw_src = &hw->cfg->fw_status;
+ int ret;
+ int i;
+
+ if (!fw_status)
+ return -EINVAL;
+
+ fw_status->count = fw_src->count;
+ for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
+ ret = pci_read_config_dword(pdev,
+ fw_src->status[i], &fw_status->status[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * mei_me_hw_config - configure hw dependent settings
+ *
+ * @dev: mei device
+ */
+static void mei_me_hw_config(struct mei_device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ struct mei_me_hw *hw = to_me_hw(dev);
+ u32 hcsr, reg;
+
+ /* Doesn't change in runtime */
+ hcsr = mei_hcsr_read(dev);
+ dev->hbuf_depth = (hcsr & H_CBD) >> 24;
+
+ reg = 0;
+ pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®);
+ hw->d0i3_supported =
+ ((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK);
+
+ hw->pg_state = MEI_PG_OFF;
+ if (hw->d0i3_supported) {
+ reg = mei_me_d0i3c_read(dev);
+ if (reg & H_D0I3C_I3)
+ hw->pg_state = MEI_PG_ON;
+ }
+}
+
+/**
+ * mei_me_pg_state - translate internal pg state
+ * to the mei power gating state
+ *
+ * @dev: mei device
+ *
+ * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise
+ */
+static inline enum mei_pg_state mei_me_pg_state(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ return hw->pg_state;
+}
+
+/**
+ * mei_me_intr_clear - clear and stop interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_clear(struct mei_device *dev)
+{
+ u32 hcsr = mei_hcsr_read(dev);
+
+ if (hcsr & H_CSR_IS_MASK)
+ mei_hcsr_write(dev, hcsr);
+}
+/**
+ * mei_me_intr_enable - enables mei device interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_enable(struct mei_device *dev)
+{
+ u32 hcsr = mei_hcsr_read(dev);
+
+ hcsr |= H_CSR_IE_MASK;
+ mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_intr_disable - disables mei device interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_disable(struct mei_device *dev)
+{
+ u32 hcsr = mei_hcsr_read(dev);
+
+ hcsr &= ~H_CSR_IE_MASK;
+ mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_hw_reset_release - release device from the reset
+ *
+ * @dev: the device structure
+ */
+static void mei_me_hw_reset_release(struct mei_device *dev)
+{
+ u32 hcsr = mei_hcsr_read(dev);
+
+ hcsr |= H_IG;
+ hcsr &= ~H_RST;
+ mei_hcsr_set(dev, hcsr);
+
+ /* complete this write before we set host ready on another CPU */
+ mmiowb();
+}
+
+/**
+ * mei_me_host_set_ready - enable device
+ *
+ * @dev: mei device
+ */
+static void mei_me_host_set_ready(struct mei_device *dev)
+{
+ u32 hcsr = mei_hcsr_read(dev);
+
+ hcsr |= H_CSR_IE_MASK | H_IG | H_RDY;
+ mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_host_is_ready - check whether the host has turned ready
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_host_is_ready(struct mei_device *dev)
+{
+ u32 hcsr = mei_hcsr_read(dev);
+
+ return (hcsr & H_RDY) == H_RDY;
+}
+
+/**
+ * mei_me_hw_is_ready - check whether the me(hw) has turned ready
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_hw_is_ready(struct mei_device *dev)
+{
+ u32 mecsr = mei_me_mecsr_read(dev);
+
+ return (mecsr & ME_RDY_HRA) == ME_RDY_HRA;
+}
+
+/**
+ * mei_me_hw_ready_wait - wait until the me(hw) has turned ready
+ * or timeout is reached
+ *
+ * @dev: mei device
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_hw_ready_wait(struct mei_device *dev)
+{
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_hw_ready,
+ dev->recvd_hw_ready,
+ mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+ if (!dev->recvd_hw_ready) {
+ dev_err(dev->dev, "wait hw ready failed\n");
+ return -ETIME;
+ }
+
+ mei_me_hw_reset_release(dev);
+ dev->recvd_hw_ready = false;
+ return 0;
+}
+
+/**
+ * mei_me_hw_start - hw start routine
+ *
+ * @dev: mei device
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_hw_start(struct mei_device *dev)
+{
+ int ret = mei_me_hw_ready_wait(dev);
+
+ if (ret)
+ return ret;
+ dev_dbg(dev->dev, "hw is ready\n");
+
+ mei_me_host_set_ready(dev);
+ return ret;
+}
+
+
+/**
+ * mei_hbuf_filled_slots - gets number of device filled buffer slots
+ *
+ * @dev: the device structure
+ *
+ * Return: number of filled slots
+ */
+static unsigned char mei_hbuf_filled_slots(struct mei_device *dev)
+{
+ u32 hcsr;
+ char read_ptr, write_ptr;
+
+ hcsr = mei_hcsr_read(dev);
+
+ read_ptr = (char) ((hcsr & H_CBRP) >> 8);
+ write_ptr = (char) ((hcsr & H_CBWP) >> 16);
+
+ return (unsigned char) (write_ptr - read_ptr);
+}
+
+/**
+ * mei_me_hbuf_is_empty - checks if host buffer is empty.
+ *
+ * @dev: the device structure
+ *
+ * Return: true if empty, false - otherwise.
+ */
+static bool mei_me_hbuf_is_empty(struct mei_device *dev)
+{
+ return mei_hbuf_filled_slots(dev) == 0;
+}
+
+/**
+ * mei_me_hbuf_empty_slots - counts write empty slots.
+ *
+ * @dev: the device structure
+ *
+ * Return: -EOVERFLOW if overflow, otherwise empty slots count
+ */
+static int mei_me_hbuf_empty_slots(struct mei_device *dev)
+{
+ unsigned char filled_slots, empty_slots;
+
+ filled_slots = mei_hbuf_filled_slots(dev);
+ empty_slots = dev->hbuf_depth - filled_slots;
+
+ /* check for overflow */
+ if (filled_slots > dev->hbuf_depth)
+ return -EOVERFLOW;
+
+ return empty_slots;
+}
+
+/**
+ * mei_me_hbuf_max_len - returns size of hw buffer.
+ *
+ * @dev: the device structure
+ *
+ * Return: size of hw buffer in bytes
+ */
+static size_t mei_me_hbuf_max_len(const struct mei_device *dev)
+{
+ return dev->hbuf_depth * sizeof(u32) - sizeof(struct mei_msg_hdr);
+}
+
+
+/**
+ * mei_me_write_message - writes a message to mei device.
+ *
+ * @dev: the device structure
+ * @header: mei HECI header of message
+ * @buf: message payload will be written
+ *
+ * Return: -EIO if write has failed
+ */
+static int mei_me_write_message(struct mei_device *dev,
+ struct mei_msg_hdr *header,
+ unsigned char *buf)
+{
+ unsigned long rem;
+ unsigned long length = header->length;
+ u32 *reg_buf = (u32 *)buf;
+ u32 hcsr;
+ u32 dw_cnt;
+ int i;
+ int empty_slots;
+
+ dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(header));
+
+ empty_slots = mei_hbuf_empty_slots(dev);
+ dev_dbg(dev->dev, "empty slots = %hu.\n", empty_slots);
+
+ dw_cnt = mei_data2slots(length);
+ if (empty_slots < 0 || dw_cnt > empty_slots)
+ return -EMSGSIZE;
+
+ mei_me_hcbww_write(dev, *((u32 *) header));
+
+ for (i = 0; i < length / 4; i++)
+ mei_me_hcbww_write(dev, reg_buf[i]);
+
+ rem = length & 0x3;
+ if (rem > 0) {
+ u32 reg = 0;
+
+ memcpy(®, &buf[length - rem], rem);
+ mei_me_hcbww_write(dev, reg);
+ }
+
+ hcsr = mei_hcsr_read(dev) | H_IG;
+ mei_hcsr_set(dev, hcsr);
+ if (!mei_me_hw_is_ready(dev))
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * mei_me_count_full_read_slots - counts read full slots.
+ *
+ * @dev: the device structure
+ *
+ * Return: -EOVERFLOW if overflow, otherwise filled slots count
+ */
+static int mei_me_count_full_read_slots(struct mei_device *dev)
+{
+ u32 me_csr;
+ char read_ptr, write_ptr;
+ unsigned char buffer_depth, filled_slots;
+
+ me_csr = mei_me_mecsr_read(dev);
+ buffer_depth = (unsigned char)((me_csr & ME_CBD_HRA) >> 24);
+ read_ptr = (char) ((me_csr & ME_CBRP_HRA) >> 8);
+ write_ptr = (char) ((me_csr & ME_CBWP_HRA) >> 16);
+ filled_slots = (unsigned char) (write_ptr - read_ptr);
+
+ /* check for overflow */
+ if (filled_slots > buffer_depth)
+ return -EOVERFLOW;
+
+ dev_dbg(dev->dev, "filled_slots =%08x\n", filled_slots);
+ return (int)filled_slots;
+}
+
+/**
+ * mei_me_read_slots - reads a message from mei device.
+ *
+ * @dev: the device structure
+ * @buffer: message buffer will be written
+ * @buffer_length: message size will be read
+ *
+ * Return: always 0
+ */
+static int mei_me_read_slots(struct mei_device *dev, unsigned char *buffer,
+ unsigned long buffer_length)
+{
+ u32 *reg_buf = (u32 *)buffer;
+ u32 hcsr;
+
+ for (; buffer_length >= sizeof(u32); buffer_length -= sizeof(u32))
+ *reg_buf++ = mei_me_mecbrw_read(dev);
+
+ if (buffer_length > 0) {
+ u32 reg = mei_me_mecbrw_read(dev);
+
+ memcpy(reg_buf, ®, buffer_length);
+ }
+
+ hcsr = mei_hcsr_read(dev) | H_IG;
+ mei_hcsr_set(dev, hcsr);
+ return 0;
+}
+
+/**
+ * mei_me_pg_set - write pg enter register
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_set(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ u32 reg;
+
+ reg = mei_me_reg_read(hw, H_HPG_CSR);
+ trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+
+ reg |= H_HPG_CSR_PGI;
+
+ trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+ mei_me_reg_write(hw, H_HPG_CSR, reg);
+}
+
+/**
+ * mei_me_pg_unset - write pg exit register
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_unset(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ u32 reg;
+
+ reg = mei_me_reg_read(hw, H_HPG_CSR);
+ trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+
+ WARN(!(reg & H_HPG_CSR_PGI), "PGI is not set\n");
+
+ reg |= H_HPG_CSR_PGIHEXR;
+
+ trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+ mei_me_reg_write(hw, H_HPG_CSR, reg);
+}
+
+/**
+ * mei_me_pg_legacy_enter_sync - perform legacy pg entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_pg_legacy_enter_sync(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+ int ret;
+
+ dev->pg_event = MEI_PG_EVENT_WAIT;
+
+ ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+ if (ret)
+ return ret;
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_pg,
+ dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+ mutex_lock(&dev->device_lock);
+
+ if (dev->pg_event == MEI_PG_EVENT_RECEIVED) {
+ mei_me_pg_set(dev);
+ ret = 0;
+ } else {
+ ret = -ETIME;
+ }
+
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ hw->pg_state = MEI_PG_ON;
+
+ return ret;
+}
+
+/**
+ * mei_me_pg_legacy_exit_sync - perform legacy pg exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_pg_legacy_exit_sync(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+ int ret;
+
+ if (dev->pg_event == MEI_PG_EVENT_RECEIVED)
+ goto reply;
+
+ dev->pg_event = MEI_PG_EVENT_WAIT;
+
+ mei_me_pg_unset(dev);
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_pg,
+ dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+ mutex_lock(&dev->device_lock);
+
+reply:
+ if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+ ret = -ETIME;
+ goto out;
+ }
+
+ dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+ ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
+ if (ret)
+ return ret;
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_pg,
+ dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
+ mutex_lock(&dev->device_lock);
+
+ if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+out:
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ hw->pg_state = MEI_PG_OFF;
+
+ return ret;
+}
+
+/**
+ * mei_me_pg_in_transition - is device now in pg transition
+ *
+ * @dev: the device structure
+ *
+ * Return: true if in pg transition, false otherwise
+ */
+static bool mei_me_pg_in_transition(struct mei_device *dev)
+{
+ return dev->pg_event >= MEI_PG_EVENT_WAIT &&
+ dev->pg_event <= MEI_PG_EVENT_INTR_WAIT;
+}
+
+/**
+ * mei_me_pg_is_enabled - detect if PG is supported by HW
+ *
+ * @dev: the device structure
+ *
+ * Return: true is pg supported, false otherwise
+ */
+static bool mei_me_pg_is_enabled(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ u32 reg = mei_me_mecsr_read(dev);
+
+ if (hw->d0i3_supported)
+ return true;
+
+ if ((reg & ME_PGIC_HRA) == 0)
+ goto notsupported;
+
+ if (!dev->hbm_f_pg_supported)
+ goto notsupported;
+
+ return true;
+
+notsupported:
+ dev_dbg(dev->dev, "pg: not supported: d0i3 = %d HGP = %d hbm version %d.%d ?= %d.%d\n",
+ hw->d0i3_supported,
+ !!(reg & ME_PGIC_HRA),
+ dev->version.major_version,
+ dev->version.minor_version,
+ HBM_MAJOR_VERSION_PGI,
+ HBM_MINOR_VERSION_PGI);
+
+ return false;
+}
+
+/**
+ * mei_me_d0i3_set - write d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ * @intr: ask for interrupt
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_set(struct mei_device *dev, bool intr)
+{
+ u32 reg = mei_me_d0i3c_read(dev);
+
+ reg |= H_D0I3C_I3;
+ if (intr)
+ reg |= H_D0I3C_IR;
+ else
+ reg &= ~H_D0I3C_IR;
+ mei_me_d0i3c_write(dev, reg);
+ /* read it to ensure HW consistency */
+ reg = mei_me_d0i3c_read(dev);
+ return reg;
+}
+
+/**
+ * mei_me_d0i3_unset - clean d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_unset(struct mei_device *dev)
+{
+ u32 reg = mei_me_d0i3c_read(dev);
+
+ reg &= ~H_D0I3C_I3;
+ reg |= H_D0I3C_IR;
+ mei_me_d0i3c_write(dev, reg);
+ /* read it to ensure HW consistency */
+ reg = mei_me_d0i3c_read(dev);
+ return reg;
+}
+
+/**
+ * mei_me_d0i3_enter_sync - perform d0i3 entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter_sync(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ unsigned long d0i3_timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+ unsigned long pgi_timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+ int ret;
+ u32 reg;
+
+ reg = mei_me_d0i3c_read(dev);
+ if (reg & H_D0I3C_I3) {
+ /* we are in d0i3, nothing to do */
+ dev_dbg(dev->dev, "d0i3 set not needed\n");
+ ret = 0;
+ goto on;
+ }
+
+ /* PGI entry procedure */
+ dev->pg_event = MEI_PG_EVENT_WAIT;
+
+ ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+ if (ret)
+ /* FIXME: should we reset here? */
+ goto out;
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_pg,
+ dev->pg_event == MEI_PG_EVENT_RECEIVED, pgi_timeout);
+ mutex_lock(&dev->device_lock);
+
+ if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+ ret = -ETIME;
+ goto out;
+ }
+ /* end PGI entry procedure */
+
+ dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+ reg = mei_me_d0i3_set(dev, true);
+ if (!(reg & H_D0I3C_CIP)) {
+ dev_dbg(dev->dev, "d0i3 enter wait not needed\n");
+ ret = 0;
+ goto on;
+ }
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_pg,
+ dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, d0i3_timeout);
+ mutex_lock(&dev->device_lock);
+
+ if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+ reg = mei_me_d0i3c_read(dev);
+ if (!(reg & H_D0I3C_I3)) {
+ ret = -ETIME;
+ goto out;
+ }
+ }
+
+ ret = 0;
+on:
+ hw->pg_state = MEI_PG_ON;
+out:
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ dev_dbg(dev->dev, "d0i3 enter ret = %d\n", ret);
+ return ret;
+}
+
+/**
+ * mei_me_d0i3_enter - perform d0i3 entry procedure
+ * no hbm PG handshake
+ * no waiting for confirmation; runs with interrupts
+ * disabled
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ u32 reg;
+
+ reg = mei_me_d0i3c_read(dev);
+ if (reg & H_D0I3C_I3) {
+ /* we are in d0i3, nothing to do */
+ dev_dbg(dev->dev, "already d0i3 : set not needed\n");
+ goto on;
+ }
+
+ mei_me_d0i3_set(dev, false);
+on:
+ hw->pg_state = MEI_PG_ON;
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ dev_dbg(dev->dev, "d0i3 enter\n");
+ return 0;
+}
+
+/**
+ * mei_me_d0i3_exit_sync - perform d0i3 exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_exit_sync(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ unsigned long timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+ int ret;
+ u32 reg;
+
+ dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+ reg = mei_me_d0i3c_read(dev);
+ if (!(reg & H_D0I3C_I3)) {
+ /* we are not in d0i3, nothing to do */
+ dev_dbg(dev->dev, "d0i3 exit not needed\n");
+ ret = 0;
+ goto off;
+ }
+
+ reg = mei_me_d0i3_unset(dev);
+ if (!(reg & H_D0I3C_CIP)) {
+ dev_dbg(dev->dev, "d0i3 exit wait not needed\n");
+ ret = 0;
+ goto off;
+ }
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_pg,
+ dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
+ mutex_lock(&dev->device_lock);
+
+ if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+ reg = mei_me_d0i3c_read(dev);
+ if (reg & H_D0I3C_I3) {
+ ret = -ETIME;
+ goto out;
+ }
+ }
+
+ ret = 0;
+off:
+ hw->pg_state = MEI_PG_OFF;
+out:
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+
+ dev_dbg(dev->dev, "d0i3 exit ret = %d\n", ret);
+ return ret;
+}
+
+/**
+ * mei_me_pg_legacy_intr - perform legacy pg processing
+ * in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_legacy_intr(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT)
+ return;
+
+ dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+ hw->pg_state = MEI_PG_OFF;
+ if (waitqueue_active(&dev->wait_pg))
+ wake_up(&dev->wait_pg);
+}
+
+/**
+ * mei_me_d0i3_intr - perform d0i3 processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_d0i3_intr(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ if (dev->pg_event == MEI_PG_EVENT_INTR_WAIT &&
+ (hw->intr_source & H_D0I3C_IS)) {
+ dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+ if (hw->pg_state == MEI_PG_ON) {
+ hw->pg_state = MEI_PG_OFF;
+ if (dev->hbm_state != MEI_HBM_IDLE) {
+ /*
+ * force H_RDY because it could be
+ * wiped off during PG
+ */
+ dev_dbg(dev->dev, "d0i3 set host ready\n");
+ mei_me_host_set_ready(dev);
+ }
+ } else {
+ hw->pg_state = MEI_PG_ON;
+ }
+
+ wake_up(&dev->wait_pg);
+ }
+
+ if (hw->pg_state == MEI_PG_ON && (hw->intr_source & H_IS)) {
+ /*
+ * HW sent some data and we are in D0i3, so
+ * we got here because of HW initiated exit from D0i3.
+ * Start runtime pm resume sequence to exit low power state.
+ */
+ dev_dbg(dev->dev, "d0i3 want resume\n");
+ mei_hbm_pg_resume(dev);
+ }
+}
+
+/**
+ * mei_me_pg_intr - perform pg processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_intr(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ if (hw->d0i3_supported)
+ mei_me_d0i3_intr(dev);
+ else
+ mei_me_pg_legacy_intr(dev);
+}
+
+/**
+ * mei_me_pg_enter_sync - perform runtime pm entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_enter_sync(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ if (hw->d0i3_supported)
+ return mei_me_d0i3_enter_sync(dev);
+ else
+ return mei_me_pg_legacy_enter_sync(dev);
+}
+
+/**
+ * mei_me_pg_exit_sync - perform runtime pm exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_exit_sync(struct mei_device *dev)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ if (hw->d0i3_supported)
+ return mei_me_d0i3_exit_sync(dev);
+ else
+ return mei_me_pg_legacy_exit_sync(dev);
+}
+
+/**
+ * mei_me_hw_reset - resets fw via mei csr register.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+ int ret;
+ u32 hcsr;
+
+ if (intr_enable) {
+ mei_me_intr_enable(dev);
+ if (hw->d0i3_supported) {
+ ret = mei_me_d0i3_exit_sync(dev);
+ if (ret)
+ return ret;
+ }
+ }
+
+ hcsr = mei_hcsr_read(dev);
+ /* H_RST may be found lit before reset is started,
+ * for example if preceding reset flow hasn't completed.
+ * In that case asserting H_RST will be ignored, therefore
+ * we need to clean H_RST bit to start a successful reset sequence.
+ */
+ if ((hcsr & H_RST) == H_RST) {
+ dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr);
+ hcsr &= ~H_RST;
+ mei_hcsr_set(dev, hcsr);
+ hcsr = mei_hcsr_read(dev);
+ }
+
+ hcsr |= H_RST | H_IG | H_CSR_IS_MASK;
+
+ if (!intr_enable)
+ hcsr &= ~H_CSR_IE_MASK;
+
+ dev->recvd_hw_ready = false;
+ mei_hcsr_write(dev, hcsr);
+
+ /*
+ * Host reads the H_CSR once to ensure that the
+ * posted write to H_CSR completes.
+ */
+ hcsr = mei_hcsr_read(dev);
+
+ if ((hcsr & H_RST) == 0)
+ dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr);
+
+ if ((hcsr & H_RDY) == H_RDY)
+ dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr);
+
+ if (!intr_enable) {
+ mei_me_hw_reset_release(dev);
+ if (hw->d0i3_supported) {
+ ret = mei_me_d0i3_enter(dev);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/**
+ * mei_me_irq_quick_handler - The ISR of the MEI device
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: irqreturn_t
+ */
+irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id)
+{
+ struct mei_device *dev = (struct mei_device *)dev_id;
+ struct mei_me_hw *hw = to_me_hw(dev);
+ u32 hcsr;
+
+ hcsr = mei_hcsr_read(dev);
+ if (!(hcsr & H_CSR_IS_MASK))
+ return IRQ_NONE;
+
+ hw->intr_source = hcsr & H_CSR_IS_MASK;
+ dev_dbg(dev->dev, "interrupt source 0x%08X.\n", hw->intr_source);
+
+ /* clear H_IS and H_D0I3C_IS bits in H_CSR to clear the interrupts */
+ mei_hcsr_write(dev, hcsr);
+
+ return IRQ_WAKE_THREAD;
+}
+
+/**
+ * mei_me_irq_thread_handler - function called after ISR to handle the interrupt
+ * processing.
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: irqreturn_t
+ *
+ */
+irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
+{
+ struct mei_device *dev = (struct mei_device *) dev_id;
+ struct mei_cl_cb complete_list;
+ s32 slots;
+ int rets = 0;
+
+ dev_dbg(dev->dev, "function called after ISR to handle the interrupt processing.\n");
+ /* initialize our complete list */
+ mutex_lock(&dev->device_lock);
+ mei_io_list_init(&complete_list);
+
+ /* check if ME wants a reset */
+ if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
+ dev_warn(dev->dev, "FW not ready: resetting.\n");
+ schedule_work(&dev->reset_work);
+ goto end;
+ }
+
+ mei_me_pg_intr(dev);
+
+ /* check if we need to start the dev */
+ if (!mei_host_is_ready(dev)) {
+ if (mei_hw_is_ready(dev)) {
+ dev_dbg(dev->dev, "we need to start the dev.\n");
+ dev->recvd_hw_ready = true;
+ wake_up(&dev->wait_hw_ready);
+ } else {
+ dev_dbg(dev->dev, "Spurious Interrupt\n");
+ }
+ goto end;
+ }
+ /* check slots available for reading */
+ slots = mei_count_full_read_slots(dev);
+ while (slots > 0) {
+ dev_dbg(dev->dev, "slots to read = %08x\n", slots);
+ rets = mei_irq_read_handler(dev, &complete_list, &slots);
+ /* There is a race between ME write and interrupt delivery:
+ * Not all data is always available immediately after the
+ * interrupt, so try to read again on the next interrupt.
+ */
+ if (rets == -ENODATA)
+ break;
+
+ if (rets && dev->dev_state != MEI_DEV_RESETTING) {
+ dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n",
+ rets);
+ schedule_work(&dev->reset_work);
+ goto end;
+ }
+ }
+
+ dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+
+ /*
+ * During PG handshake only allowed write is the replay to the
+ * PG exit message, so block calling write function
+ * if the pg event is in PG handshake
+ */
+ if (dev->pg_event != MEI_PG_EVENT_WAIT &&
+ dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+ rets = mei_irq_write_handler(dev, &complete_list);
+ dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+ }
+
+ mei_irq_compl_handler(dev, &complete_list);
+
+end:
+ dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets);
+ mutex_unlock(&dev->device_lock);
+ return IRQ_HANDLED;
+}
+
+static const struct mei_hw_ops mei_me_hw_ops = {
+
+ .fw_status = mei_me_fw_status,
+ .pg_state = mei_me_pg_state,
+
+ .host_is_ready = mei_me_host_is_ready,
+
+ .hw_is_ready = mei_me_hw_is_ready,
+ .hw_reset = mei_me_hw_reset,
+ .hw_config = mei_me_hw_config,
+ .hw_start = mei_me_hw_start,
+
+ .pg_in_transition = mei_me_pg_in_transition,
+ .pg_is_enabled = mei_me_pg_is_enabled,
+
+ .intr_clear = mei_me_intr_clear,
+ .intr_enable = mei_me_intr_enable,
+ .intr_disable = mei_me_intr_disable,
+
+ .hbuf_free_slots = mei_me_hbuf_empty_slots,
+ .hbuf_is_ready = mei_me_hbuf_is_empty,
+ .hbuf_max_len = mei_me_hbuf_max_len,
+
+ .write = mei_me_write_message,
+
+ .rdbuf_full_slots = mei_me_count_full_read_slots,
+ .read_hdr = mei_me_mecbrw_read,
+ .read = mei_me_read_slots
+};
+
+static bool mei_me_fw_type_nm(struct pci_dev *pdev)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, PCI_CFG_HFS_2, ®);
+ /* make sure that bit 9 (NM) is up and bit 10 (DM) is down */
+ return (reg & 0x600) == 0x200;
+}
+
+#define MEI_CFG_FW_NM \
+ .quirk_probe = mei_me_fw_type_nm
+
+static bool mei_me_fw_type_sps(struct pci_dev *pdev)
+{
+ u32 reg;
+ unsigned int devfn;
+
+ /*
+ * Read ME FW Status register to check for SPS Firmware
+ * The SPS FW is only signaled in pci function 0
+ */
+ devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+ pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, ®);
+ /* if bits [19:16] = 15, running SPS Firmware */
+ return (reg & 0xf0000) == 0xf0000;
+}
+
+#define MEI_CFG_FW_SPS \
+ .quirk_probe = mei_me_fw_type_sps
+
+
+#define MEI_CFG_LEGACY_HFS \
+ .fw_status.count = 0
+
+#define MEI_CFG_ICH_HFS \
+ .fw_status.count = 1, \
+ .fw_status.status[0] = PCI_CFG_HFS_1
+
+#define MEI_CFG_PCH_HFS \
+ .fw_status.count = 2, \
+ .fw_status.status[0] = PCI_CFG_HFS_1, \
+ .fw_status.status[1] = PCI_CFG_HFS_2
+
+#define MEI_CFG_PCH8_HFS \
+ .fw_status.count = 6, \
+ .fw_status.status[0] = PCI_CFG_HFS_1, \
+ .fw_status.status[1] = PCI_CFG_HFS_2, \
+ .fw_status.status[2] = PCI_CFG_HFS_3, \
+ .fw_status.status[3] = PCI_CFG_HFS_4, \
+ .fw_status.status[4] = PCI_CFG_HFS_5, \
+ .fw_status.status[5] = PCI_CFG_HFS_6
+
+/* ICH Legacy devices */
+const struct mei_cfg mei_me_legacy_cfg = {
+ MEI_CFG_LEGACY_HFS,
+};
+
+/* ICH devices */
+const struct mei_cfg mei_me_ich_cfg = {
+ MEI_CFG_ICH_HFS,
+};
+
+/* PCH devices */
+const struct mei_cfg mei_me_pch_cfg = {
+ MEI_CFG_PCH_HFS,
+};
+
+
+/* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */
+const struct mei_cfg mei_me_pch_cpt_pbg_cfg = {
+ MEI_CFG_PCH_HFS,
+ MEI_CFG_FW_NM,
+};
+
+/* PCH8 Lynx Point and newer devices */
+const struct mei_cfg mei_me_pch8_cfg = {
+ MEI_CFG_PCH8_HFS,
+};
+
+/* PCH8 Lynx Point with quirk for SPS Firmware exclusion */
+const struct mei_cfg mei_me_pch8_sps_cfg = {
+ MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_SPS,
+};
+
+/**
+ * mei_me_dev_init - allocates and initializes the mei device structure
+ *
+ * @pdev: The pci device structure
+ * @cfg: per device generation config
+ *
+ * Return: The mei_device_device pointer on success, NULL on failure.
+ */
+struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
+ const struct mei_cfg *cfg)
+{
+ struct mei_device *dev;
+ struct mei_me_hw *hw;
+
+ dev = kzalloc(sizeof(struct mei_device) +
+ sizeof(struct mei_me_hw), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ hw = to_me_hw(dev);
+
+ mei_device_init(dev, &pdev->dev, &mei_me_hw_ops);
+ hw->cfg = cfg;
+ return dev;
+}
+
diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h
new file mode 100644
index 0000000..2ee14dc
--- /dev/null
+++ b/drivers/misc/mei/hw-me.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+
+
+#ifndef _MEI_INTERFACE_H_
+#define _MEI_INTERFACE_H_
+
+#include <linux/irqreturn.h>
+#include <linux/pci.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+/*
+ * mei_cfg - mei device configuration
+ *
+ * @fw_status: FW status
+ * @quirk_probe: device exclusion quirk
+ */
+struct mei_cfg {
+ const struct mei_fw_status fw_status;
+ bool (*quirk_probe)(struct pci_dev *pdev);
+};
+
+
+#define MEI_PCI_DEVICE(dev, cfg) \
+ .vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \
+ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \
+ .driver_data = (kernel_ulong_t)&(cfg)
+
+
+#define MEI_ME_RPM_TIMEOUT 500 /* ms */
+
+/**
+ * struct mei_me_hw - me hw specific data
+ *
+ * @cfg: per device generation config and ops
+ * @mem_addr: io memory address
+ * @intr_source: interrupt source
+ * @pg_state: power gating state
+ * @d0i3_supported: di03 support
+ */
+struct mei_me_hw {
+ const struct mei_cfg *cfg;
+ void __iomem *mem_addr;
+ u32 intr_source;
+ enum mei_pg_state pg_state;
+ bool d0i3_supported;
+};
+
+#define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw)
+
+extern const struct mei_cfg mei_me_legacy_cfg;
+extern const struct mei_cfg mei_me_ich_cfg;
+extern const struct mei_cfg mei_me_pch_cfg;
+extern const struct mei_cfg mei_me_pch_cpt_pbg_cfg;
+extern const struct mei_cfg mei_me_pch8_cfg;
+extern const struct mei_cfg mei_me_pch8_sps_cfg;
+
+struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
+ const struct mei_cfg *cfg);
+
+int mei_me_pg_enter_sync(struct mei_device *dev);
+int mei_me_pg_exit_sync(struct mei_device *dev);
+
+irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id);
+irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id);
+
+#endif /* _MEI_INTERFACE_H_ */
diff --git a/drivers/misc/mei/hw-txe-regs.h b/drivers/misc/mei/hw-txe-regs.h
new file mode 100644
index 0000000..f19229c
--- /dev/null
+++ b/drivers/misc/mei/hw-txe-regs.h
@@ -0,0 +1,294 @@
+/******************************************************************************
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Intel MEI Interface Header
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING
+ *
+ * Contact Information:
+ * Intel Corporation.
+ * linux-mei@linux.intel.com
+ * http://www.intel.com
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef _MEI_HW_TXE_REGS_H_
+#define _MEI_HW_TXE_REGS_H_
+
+#include "hw.h"
+
+#define SEC_ALIVENESS_TIMER_TIMEOUT (5 * MSEC_PER_SEC)
+#define SEC_ALIVENESS_WAIT_TIMEOUT (1 * MSEC_PER_SEC)
+#define SEC_RESET_WAIT_TIMEOUT (1 * MSEC_PER_SEC)
+#define SEC_READY_WAIT_TIMEOUT (5 * MSEC_PER_SEC)
+#define START_MESSAGE_RESPONSE_WAIT_TIMEOUT (5 * MSEC_PER_SEC)
+#define RESET_CANCEL_WAIT_TIMEOUT (1 * MSEC_PER_SEC)
+
+enum {
+ SEC_BAR,
+ BRIDGE_BAR,
+
+ NUM_OF_MEM_BARS
+};
+
+/* SeC FW Status Register
+ *
+ * FW uses this register in order to report its status to host.
+ * This register resides in PCI-E config space.
+ */
+#define PCI_CFG_TXE_FW_STS0 0x40
+# define PCI_CFG_TXE_FW_STS0_WRK_ST_MSK 0x0000000F
+# define PCI_CFG_TXE_FW_STS0_OP_ST_MSK 0x000001C0
+# define PCI_CFG_TXE_FW_STS0_FW_INIT_CMPLT 0x00000200
+# define PCI_CFG_TXE_FW_STS0_ERR_CODE_MSK 0x0000F000
+# define PCI_CFG_TXE_FW_STS0_OP_MODE_MSK 0x000F0000
+# define PCI_CFG_TXE_FW_STS0_RST_CNT_MSK 0x00F00000
+#define PCI_CFG_TXE_FW_STS1 0x48
+
+#define IPC_BASE_ADDR 0x80400 /* SeC IPC Base Address */
+
+/* IPC Input Doorbell Register */
+#define SEC_IPC_INPUT_DOORBELL_REG (0x0000 + IPC_BASE_ADDR)
+
+/* IPC Input Status Register
+ * This register indicates whether or not processing of
+ * the most recent command has been completed by the SEC
+ * New commands and payloads should not be written by the Host
+ * until this indicates that the previous command has been processed.
+ */
+#define SEC_IPC_INPUT_STATUS_REG (0x0008 + IPC_BASE_ADDR)
+# define SEC_IPC_INPUT_STATUS_RDY BIT(0)
+
+/* IPC Host Interrupt Status Register */
+#define SEC_IPC_HOST_INT_STATUS_REG (0x0010 + IPC_BASE_ADDR)
+#define SEC_IPC_HOST_INT_STATUS_OUT_DB BIT(0)
+#define SEC_IPC_HOST_INT_STATUS_IN_RDY BIT(1)
+#define SEC_IPC_HOST_INT_STATUS_HDCP_M0_RCVD BIT(5)
+#define SEC_IPC_HOST_INT_STATUS_ILL_MEM_ACCESS BIT(17)
+#define SEC_IPC_HOST_INT_STATUS_AES_HKEY_ERR BIT(18)
+#define SEC_IPC_HOST_INT_STATUS_DES_HKEY_ERR BIT(19)
+#define SEC_IPC_HOST_INT_STATUS_TMRMTB_OVERFLOW BIT(21)
+
+/* Convenient mask for pending interrupts */
+#define SEC_IPC_HOST_INT_STATUS_PENDING \
+ (SEC_IPC_HOST_INT_STATUS_OUT_DB| \
+ SEC_IPC_HOST_INT_STATUS_IN_RDY)
+
+/* IPC Host Interrupt Mask Register */
+#define SEC_IPC_HOST_INT_MASK_REG (0x0014 + IPC_BASE_ADDR)
+
+# define SEC_IPC_HOST_INT_MASK_OUT_DB BIT(0) /* Output Doorbell Int Mask */
+# define SEC_IPC_HOST_INT_MASK_IN_RDY BIT(1) /* Input Ready Int Mask */
+
+/* IPC Input Payload RAM */
+#define SEC_IPC_INPUT_PAYLOAD_REG (0x0100 + IPC_BASE_ADDR)
+/* IPC Shared Payload RAM */
+#define IPC_SHARED_PAYLOAD_REG (0x0200 + IPC_BASE_ADDR)
+
+/* SeC Address Translation Table Entry 2 - Ctrl
+ *
+ * This register resides also in SeC's PCI-E Memory space.
+ */
+#define SATT2_CTRL_REG 0x1040
+# define SATT2_CTRL_VALID_MSK BIT(0)
+# define SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT 8
+# define SATT2_CTRL_BRIDGE_HOST_EN_MSK BIT(12)
+
+/* SATT Table Entry 2 SAP Base Address Register */
+#define SATT2_SAP_BA_REG 0x1044
+/* SATT Table Entry 2 SAP Size Register. */
+#define SATT2_SAP_SIZE_REG 0x1048
+ /* SATT Table Entry 2 SAP Bridge Address - LSB Register */
+#define SATT2_BRG_BA_LSB_REG 0x104C
+
+/* Host High-level Interrupt Status Register */
+#define HHISR_REG 0x2020
+/* Host High-level Interrupt Enable Register
+ *
+ * Resides in PCI memory space. This is the top hierarchy for
+ * interrupts from SeC to host, aggregating both interrupts that
+ * arrive through HICR registers as well as interrupts
+ * that arrive via IPC.
+ */
+#define HHIER_REG 0x2024
+#define IPC_HHIER_SEC BIT(0)
+#define IPC_HHIER_BRIDGE BIT(1)
+#define IPC_HHIER_MSK (IPC_HHIER_SEC | IPC_HHIER_BRIDGE)
+
+/* Host High-level Interrupt Mask Register.
+ *
+ * Resides in PCI memory space.
+ * This is the top hierarchy for masking interrupts from SeC to host.
+ */
+#define HHIMR_REG 0x2028
+#define IPC_HHIMR_SEC BIT(0)
+#define IPC_HHIMR_BRIDGE BIT(1)
+
+/* Host High-level IRQ Status Register */
+#define HHIRQSR_REG 0x202C
+
+/* Host Interrupt Cause Register 0 - SeC IPC Readiness
+ *
+ * This register is both an ICR to Host from PCI Memory Space
+ * and it is also exposed in the SeC memory space.
+ * This register is used by SeC's IPC driver in order
+ * to synchronize with host about IPC interface state.
+ */
+#define HICR_SEC_IPC_READINESS_REG 0x2040
+#define HICR_SEC_IPC_READINESS_HOST_RDY BIT(0)
+#define HICR_SEC_IPC_READINESS_SEC_RDY BIT(1)
+#define HICR_SEC_IPC_READINESS_SYS_RDY \
+ (HICR_SEC_IPC_READINESS_HOST_RDY | \
+ HICR_SEC_IPC_READINESS_SEC_RDY)
+#define HICR_SEC_IPC_READINESS_RDY_CLR BIT(2)
+
+/* Host Interrupt Cause Register 1 - Aliveness Response */
+/* This register is both an ICR to Host from PCI Memory Space
+ * and it is also exposed in the SeC memory space.
+ * The register may be used by SeC to ACK a host request for aliveness.
+ */
+#define HICR_HOST_ALIVENESS_RESP_REG 0x2044
+#define HICR_HOST_ALIVENESS_RESP_ACK BIT(0)
+
+/* Host Interrupt Cause Register 2 - SeC IPC Output Doorbell */
+#define HICR_SEC_IPC_OUTPUT_DOORBELL_REG 0x2048
+
+/* Host Interrupt Status Register.
+ *
+ * Resides in PCI memory space.
+ * This is the main register involved in generating interrupts
+ * from SeC to host via HICRs.
+ * The interrupt generation rules are as follows:
+ * An interrupt will be generated whenever for any i,
+ * there is a transition from a state where at least one of
+ * the following conditions did not hold, to a state where
+ * ALL the following conditions hold:
+ * A) HISR.INT[i]_STS == 1.
+ * B) HIER.INT[i]_EN == 1.
+ */
+#define HISR_REG 0x2060
+#define HISR_INT_0_STS BIT(0)
+#define HISR_INT_1_STS BIT(1)
+#define HISR_INT_2_STS BIT(2)
+#define HISR_INT_3_STS BIT(3)
+#define HISR_INT_4_STS BIT(4)
+#define HISR_INT_5_STS BIT(5)
+#define HISR_INT_6_STS BIT(6)
+#define HISR_INT_7_STS BIT(7)
+#define HISR_INT_STS_MSK \
+ (HISR_INT_0_STS | HISR_INT_1_STS | HISR_INT_2_STS)
+
+/* Host Interrupt Enable Register. Resides in PCI memory space. */
+#define HIER_REG 0x2064
+#define HIER_INT_0_EN BIT(0)
+#define HIER_INT_1_EN BIT(1)
+#define HIER_INT_2_EN BIT(2)
+#define HIER_INT_3_EN BIT(3)
+#define HIER_INT_4_EN BIT(4)
+#define HIER_INT_5_EN BIT(5)
+#define HIER_INT_6_EN BIT(6)
+#define HIER_INT_7_EN BIT(7)
+
+#define HIER_INT_EN_MSK \
+ (HIER_INT_0_EN | HIER_INT_1_EN | HIER_INT_2_EN)
+
+
+/* SEC Memory Space IPC output payload.
+ *
+ * This register is part of the output payload which SEC provides to host.
+ */
+#define BRIDGE_IPC_OUTPUT_PAYLOAD_REG 0x20C0
+
+/* SeC Interrupt Cause Register - Host Aliveness Request
+ * This register is both an ICR to SeC and it is also exposed
+ * in the host-visible PCI memory space.
+ * The register is used by host to request SeC aliveness.
+ */
+#define SICR_HOST_ALIVENESS_REQ_REG 0x214C
+#define SICR_HOST_ALIVENESS_REQ_REQUESTED BIT(0)
+
+
+/* SeC Interrupt Cause Register - Host IPC Readiness
+ *
+ * This register is both an ICR to SeC and it is also exposed
+ * in the host-visible PCI memory space.
+ * This register is used by the host's SeC driver uses in order
+ * to synchronize with SeC about IPC interface state.
+ */
+#define SICR_HOST_IPC_READINESS_REQ_REG 0x2150
+
+
+#define SICR_HOST_IPC_READINESS_HOST_RDY BIT(0)
+#define SICR_HOST_IPC_READINESS_SEC_RDY BIT(1)
+#define SICR_HOST_IPC_READINESS_SYS_RDY \
+ (SICR_HOST_IPC_READINESS_HOST_RDY | \
+ SICR_HOST_IPC_READINESS_SEC_RDY)
+#define SICR_HOST_IPC_READINESS_RDY_CLR BIT(2)
+
+/* SeC Interrupt Cause Register - SeC IPC Output Status
+ *
+ * This register indicates whether or not processing of the most recent
+ * command has been completed by the Host.
+ * New commands and payloads should not be written by SeC until this
+ * register indicates that the previous command has been processed.
+ */
+#define SICR_SEC_IPC_OUTPUT_STATUS_REG 0x2154
+# define SEC_IPC_OUTPUT_STATUS_RDY BIT(0)
+
+
+
+/* MEI IPC Message payload size 64 bytes */
+#define PAYLOAD_SIZE 64
+
+/* MAX size for SATT range 32MB */
+#define SATT_RANGE_MAX (32 << 20)
+
+
+#endif /* _MEI_HW_TXE_REGS_H_ */
+
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
new file mode 100644
index 0000000..396d75d
--- /dev/null
+++ b/drivers/misc/mei/hw-txe.c
@@ -0,0 +1,1244 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/irqreturn.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hw-txe.h"
+#include "client.h"
+#include "hbm.h"
+
+/**
+ * mei_txe_reg_read - Reads 32bit data from the txe device
+ *
+ * @base_addr: registers base address
+ * @offset: register offset
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_reg_read(void __iomem *base_addr,
+ unsigned long offset)
+{
+ return ioread32(base_addr + offset);
+}
+
+/**
+ * mei_txe_reg_write - Writes 32bit data to the txe device
+ *
+ * @base_addr: registers base address
+ * @offset: register offset
+ * @value: the value to write
+ */
+static inline void mei_txe_reg_write(void __iomem *base_addr,
+ unsigned long offset, u32 value)
+{
+ iowrite32(value, base_addr + offset);
+}
+
+/**
+ * mei_txe_sec_reg_read_silent - Reads 32bit data from the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ *
+ * Doesn't check for aliveness while Reads 32bit data from the SeC BAR
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_sec_reg_read_silent(struct mei_txe_hw *hw,
+ unsigned long offset)
+{
+ return mei_txe_reg_read(hw->mem_addr[SEC_BAR], offset);
+}
+
+/**
+ * mei_txe_sec_reg_read - Reads 32bit data from the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ *
+ * Reads 32bit data from the SeC BAR and shout loud if aliveness is not set
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_sec_reg_read(struct mei_txe_hw *hw,
+ unsigned long offset)
+{
+ WARN(!hw->aliveness, "sec read: aliveness not asserted\n");
+ return mei_txe_sec_reg_read_silent(hw, offset);
+}
+/**
+ * mei_txe_sec_reg_write_silent - Writes 32bit data to the SeC BAR
+ * doesn't check for aliveness
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ * @value: value to write
+ *
+ * Doesn't check for aliveness while writes 32bit data from to the SeC BAR
+ */
+static inline void mei_txe_sec_reg_write_silent(struct mei_txe_hw *hw,
+ unsigned long offset, u32 value)
+{
+ mei_txe_reg_write(hw->mem_addr[SEC_BAR], offset, value);
+}
+
+/**
+ * mei_txe_sec_reg_write - Writes 32bit data to the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ * @value: value to write
+ *
+ * Writes 32bit data from the SeC BAR and shout loud if aliveness is not set
+ */
+static inline void mei_txe_sec_reg_write(struct mei_txe_hw *hw,
+ unsigned long offset, u32 value)
+{
+ WARN(!hw->aliveness, "sec write: aliveness not asserted\n");
+ mei_txe_sec_reg_write_silent(hw, offset, value);
+}
+/**
+ * mei_txe_br_reg_read - Reads 32bit data from the Bridge BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: offset from which to read the data
+ *
+ * Return: the byte read.
+ */
+static inline u32 mei_txe_br_reg_read(struct mei_txe_hw *hw,
+ unsigned long offset)
+{
+ return mei_txe_reg_read(hw->mem_addr[BRIDGE_BAR], offset);
+}
+
+/**
+ * mei_txe_br_reg_write - Writes 32bit data to the Bridge BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: offset from which to write the data
+ * @value: the byte to write
+ */
+static inline void mei_txe_br_reg_write(struct mei_txe_hw *hw,
+ unsigned long offset, u32 value)
+{
+ mei_txe_reg_write(hw->mem_addr[BRIDGE_BAR], offset, value);
+}
+
+/**
+ * mei_txe_aliveness_set - request for aliveness change
+ *
+ * @dev: the device structure
+ * @req: requested aliveness value
+ *
+ * Request for aliveness change and returns true if the change is
+ * really needed and false if aliveness is already
+ * in the requested state
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: true if request was send
+ */
+static bool mei_txe_aliveness_set(struct mei_device *dev, u32 req)
+{
+
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ bool do_req = hw->aliveness != req;
+
+ dev_dbg(dev->dev, "Aliveness current=%d request=%d\n",
+ hw->aliveness, req);
+ if (do_req) {
+ dev->pg_event = MEI_PG_EVENT_WAIT;
+ mei_txe_br_reg_write(hw, SICR_HOST_ALIVENESS_REQ_REG, req);
+ }
+ return do_req;
+}
+
+
+/**
+ * mei_txe_aliveness_req_get - get aliveness requested register value
+ *
+ * @dev: the device structure
+ *
+ * Extract HICR_HOST_ALIVENESS_RESP_ACK bit from
+ * from HICR_HOST_ALIVENESS_REQ register value
+ *
+ * Return: SICR_HOST_ALIVENESS_REQ_REQUESTED bit value
+ */
+static u32 mei_txe_aliveness_req_get(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 reg;
+
+ reg = mei_txe_br_reg_read(hw, SICR_HOST_ALIVENESS_REQ_REG);
+ return reg & SICR_HOST_ALIVENESS_REQ_REQUESTED;
+}
+
+/**
+ * mei_txe_aliveness_get - get aliveness response register value
+ *
+ * @dev: the device structure
+ *
+ * Return: HICR_HOST_ALIVENESS_RESP_ACK bit from HICR_HOST_ALIVENESS_RESP
+ * register
+ */
+static u32 mei_txe_aliveness_get(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 reg;
+
+ reg = mei_txe_br_reg_read(hw, HICR_HOST_ALIVENESS_RESP_REG);
+ return reg & HICR_HOST_ALIVENESS_RESP_ACK;
+}
+
+/**
+ * mei_txe_aliveness_poll - waits for aliveness to settle
+ *
+ * @dev: the device structure
+ * @expected: expected aliveness value
+ *
+ * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+ *
+ * Return: 0 if the expected value was received, -ETIME otherwise
+ */
+static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ ktime_t stop, start;
+
+ start = ktime_get();
+ stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT));
+ do {
+ hw->aliveness = mei_txe_aliveness_get(dev);
+ if (hw->aliveness == expected) {
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ dev_dbg(dev->dev, "aliveness settled after %lld usecs\n",
+ ktime_to_us(ktime_sub(ktime_get(), start)));
+ return 0;
+ }
+ usleep_range(20, 50);
+ } while (ktime_before(ktime_get(), stop));
+
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ dev_err(dev->dev, "aliveness timed out\n");
+ return -ETIME;
+}
+
+/**
+ * mei_txe_aliveness_wait - waits for aliveness to settle
+ *
+ * @dev: the device structure
+ * @expected: expected aliveness value
+ *
+ * Waits for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+ *
+ * Return: 0 on success and < 0 otherwise
+ */
+static int mei_txe_aliveness_wait(struct mei_device *dev, u32 expected)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ const unsigned long timeout =
+ msecs_to_jiffies(SEC_ALIVENESS_WAIT_TIMEOUT);
+ long err;
+ int ret;
+
+ hw->aliveness = mei_txe_aliveness_get(dev);
+ if (hw->aliveness == expected)
+ return 0;
+
+ mutex_unlock(&dev->device_lock);
+ err = wait_event_timeout(hw->wait_aliveness_resp,
+ dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+ mutex_lock(&dev->device_lock);
+
+ hw->aliveness = mei_txe_aliveness_get(dev);
+ ret = hw->aliveness == expected ? 0 : -ETIME;
+
+ if (ret)
+ dev_warn(dev->dev, "aliveness timed out = %ld aliveness = %d event = %d\n",
+ err, hw->aliveness, dev->pg_event);
+ else
+ dev_dbg(dev->dev, "aliveness settled after = %d msec aliveness = %d event = %d\n",
+ jiffies_to_msecs(timeout - err),
+ hw->aliveness, dev->pg_event);
+
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ return ret;
+}
+
+/**
+ * mei_txe_aliveness_set_sync - sets an wait for aliveness to complete
+ *
+ * @dev: the device structure
+ * @req: requested aliveness value
+ *
+ * Return: 0 on success and < 0 otherwise
+ */
+int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req)
+{
+ if (mei_txe_aliveness_set(dev, req))
+ return mei_txe_aliveness_wait(dev, req);
+ return 0;
+}
+
+/**
+ * mei_txe_pg_in_transition - is device now in pg transition
+ *
+ * @dev: the device structure
+ *
+ * Return: true if in pg transition, false otherwise
+ */
+static bool mei_txe_pg_in_transition(struct mei_device *dev)
+{
+ return dev->pg_event == MEI_PG_EVENT_WAIT;
+}
+
+/**
+ * mei_txe_pg_is_enabled - detect if PG is supported by HW
+ *
+ * @dev: the device structure
+ *
+ * Return: true is pg supported, false otherwise
+ */
+static bool mei_txe_pg_is_enabled(struct mei_device *dev)
+{
+ return true;
+}
+
+/**
+ * mei_txe_pg_state - translate aliveness register value
+ * to the mei power gating state
+ *
+ * @dev: the device structure
+ *
+ * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise
+ */
+static inline enum mei_pg_state mei_txe_pg_state(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ return hw->aliveness ? MEI_PG_OFF : MEI_PG_ON;
+}
+
+/**
+ * mei_txe_input_ready_interrupt_enable - sets the Input Ready Interrupt
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_input_ready_interrupt_enable(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 hintmsk;
+ /* Enable the SEC_IPC_HOST_INT_MASK_IN_RDY interrupt */
+ hintmsk = mei_txe_sec_reg_read(hw, SEC_IPC_HOST_INT_MASK_REG);
+ hintmsk |= SEC_IPC_HOST_INT_MASK_IN_RDY;
+ mei_txe_sec_reg_write(hw, SEC_IPC_HOST_INT_MASK_REG, hintmsk);
+}
+
+/**
+ * mei_txe_input_doorbell_set - sets bit 0 in
+ * SEC_IPC_INPUT_DOORBELL.IPC_INPUT_DOORBELL.
+ *
+ * @hw: the txe hardware structure
+ */
+static void mei_txe_input_doorbell_set(struct mei_txe_hw *hw)
+{
+ /* Clear the interrupt cause */
+ clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause);
+ mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_DOORBELL_REG, 1);
+}
+
+/**
+ * mei_txe_output_ready_set - Sets the SICR_SEC_IPC_OUTPUT_STATUS bit to 1
+ *
+ * @hw: the txe hardware structure
+ */
+static void mei_txe_output_ready_set(struct mei_txe_hw *hw)
+{
+ mei_txe_br_reg_write(hw,
+ SICR_SEC_IPC_OUTPUT_STATUS_REG,
+ SEC_IPC_OUTPUT_STATUS_RDY);
+}
+
+/**
+ * mei_txe_is_input_ready - check if TXE is ready for receiving data
+ *
+ * @dev: the device structure
+ *
+ * Return: true if INPUT STATUS READY bit is set
+ */
+static bool mei_txe_is_input_ready(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 status;
+
+ status = mei_txe_sec_reg_read(hw, SEC_IPC_INPUT_STATUS_REG);
+ return !!(SEC_IPC_INPUT_STATUS_RDY & status);
+}
+
+/**
+ * mei_txe_intr_clear - clear all interrupts
+ *
+ * @dev: the device structure
+ */
+static inline void mei_txe_intr_clear(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ mei_txe_sec_reg_write_silent(hw, SEC_IPC_HOST_INT_STATUS_REG,
+ SEC_IPC_HOST_INT_STATUS_PENDING);
+ mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_STS_MSK);
+ mei_txe_br_reg_write(hw, HHISR_REG, IPC_HHIER_MSK);
+}
+
+/**
+ * mei_txe_intr_disable - disable all interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_intr_disable(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ mei_txe_br_reg_write(hw, HHIER_REG, 0);
+ mei_txe_br_reg_write(hw, HIER_REG, 0);
+}
+/**
+ * mei_txe_intr_enable - enable all interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_intr_enable(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ mei_txe_br_reg_write(hw, HHIER_REG, IPC_HHIER_MSK);
+ mei_txe_br_reg_write(hw, HIER_REG, HIER_INT_EN_MSK);
+}
+
+/**
+ * mei_txe_pending_interrupts - check if there are pending interrupts
+ * only Aliveness, Input ready, and output doorbell are of relevance
+ *
+ * @dev: the device structure
+ *
+ * Checks if there are pending interrupts
+ * only Aliveness, Readiness, Input ready, and Output doorbell are relevant
+ *
+ * Return: true if there are pending interrupts
+ */
+static bool mei_txe_pending_interrupts(struct mei_device *dev)
+{
+
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ bool ret = (hw->intr_cause & (TXE_INTR_READINESS |
+ TXE_INTR_ALIVENESS |
+ TXE_INTR_IN_READY |
+ TXE_INTR_OUT_DB));
+
+ if (ret) {
+ dev_dbg(dev->dev,
+ "Pending Interrupts InReady=%01d Readiness=%01d, Aliveness=%01d, OutDoor=%01d\n",
+ !!(hw->intr_cause & TXE_INTR_IN_READY),
+ !!(hw->intr_cause & TXE_INTR_READINESS),
+ !!(hw->intr_cause & TXE_INTR_ALIVENESS),
+ !!(hw->intr_cause & TXE_INTR_OUT_DB));
+ }
+ return ret;
+}
+
+/**
+ * mei_txe_input_payload_write - write a dword to the host buffer
+ * at offset idx
+ *
+ * @dev: the device structure
+ * @idx: index in the host buffer
+ * @value: value
+ */
+static void mei_txe_input_payload_write(struct mei_device *dev,
+ unsigned long idx, u32 value)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_PAYLOAD_REG +
+ (idx * sizeof(u32)), value);
+}
+
+/**
+ * mei_txe_out_data_read - read dword from the device buffer
+ * at offset idx
+ *
+ * @dev: the device structure
+ * @idx: index in the device buffer
+ *
+ * Return: register value at index
+ */
+static u32 mei_txe_out_data_read(const struct mei_device *dev,
+ unsigned long idx)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ return mei_txe_br_reg_read(hw,
+ BRIDGE_IPC_OUTPUT_PAYLOAD_REG + (idx * sizeof(u32)));
+}
+
+/* Readiness */
+
+/**
+ * mei_txe_readiness_set_host_rdy - set host readiness bit
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_readiness_set_host_rdy(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ mei_txe_br_reg_write(hw,
+ SICR_HOST_IPC_READINESS_REQ_REG,
+ SICR_HOST_IPC_READINESS_HOST_RDY);
+}
+
+/**
+ * mei_txe_readiness_clear - clear host readiness bit
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_readiness_clear(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ mei_txe_br_reg_write(hw, SICR_HOST_IPC_READINESS_REQ_REG,
+ SICR_HOST_IPC_READINESS_RDY_CLR);
+}
+/**
+ * mei_txe_readiness_get - Reads and returns
+ * the HICR_SEC_IPC_READINESS register value
+ *
+ * @dev: the device structure
+ *
+ * Return: the HICR_SEC_IPC_READINESS register value
+ */
+static u32 mei_txe_readiness_get(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ return mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG);
+}
+
+
+/**
+ * mei_txe_readiness_is_sec_rdy - check readiness
+ * for HICR_SEC_IPC_READINESS_SEC_RDY
+ *
+ * @readiness: cached readiness state
+ *
+ * Return: true if readiness bit is set
+ */
+static inline bool mei_txe_readiness_is_sec_rdy(u32 readiness)
+{
+ return !!(readiness & HICR_SEC_IPC_READINESS_SEC_RDY);
+}
+
+/**
+ * mei_txe_hw_is_ready - check if the hw is ready
+ *
+ * @dev: the device structure
+ *
+ * Return: true if sec is ready
+ */
+static bool mei_txe_hw_is_ready(struct mei_device *dev)
+{
+ u32 readiness = mei_txe_readiness_get(dev);
+
+ return mei_txe_readiness_is_sec_rdy(readiness);
+}
+
+/**
+ * mei_txe_host_is_ready - check if the host is ready
+ *
+ * @dev: the device structure
+ *
+ * Return: true if host is ready
+ */
+static inline bool mei_txe_host_is_ready(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 reg = mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG);
+
+ return !!(reg & HICR_SEC_IPC_READINESS_HOST_RDY);
+}
+
+/**
+ * mei_txe_readiness_wait - wait till readiness settles
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and -ETIME on timeout
+ */
+static int mei_txe_readiness_wait(struct mei_device *dev)
+{
+ if (mei_txe_hw_is_ready(dev))
+ return 0;
+
+ mutex_unlock(&dev->device_lock);
+ wait_event_timeout(dev->wait_hw_ready, dev->recvd_hw_ready,
+ msecs_to_jiffies(SEC_RESET_WAIT_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+ if (!dev->recvd_hw_ready) {
+ dev_err(dev->dev, "wait for readiness failed\n");
+ return -ETIME;
+ }
+
+ dev->recvd_hw_ready = false;
+ return 0;
+}
+
+static const struct mei_fw_status mei_txe_fw_sts = {
+ .count = 2,
+ .status[0] = PCI_CFG_TXE_FW_STS0,
+ .status[1] = PCI_CFG_TXE_FW_STS1
+};
+
+/**
+ * mei_txe_fw_status - read fw status register from pci config space
+ *
+ * @dev: mei device
+ * @fw_status: fw status register values
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_txe_fw_status(struct mei_device *dev,
+ struct mei_fw_status *fw_status)
+{
+ const struct mei_fw_status *fw_src = &mei_txe_fw_sts;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ int ret;
+ int i;
+
+ if (!fw_status)
+ return -EINVAL;
+
+ fw_status->count = fw_src->count;
+ for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
+ ret = pci_read_config_dword(pdev,
+ fw_src->status[i], &fw_status->status[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * mei_txe_hw_config - configure hardware at the start of the devices
+ *
+ * @dev: the device structure
+ *
+ * Configure hardware at the start of the device should be done only
+ * once at the device probe time
+ */
+static void mei_txe_hw_config(struct mei_device *dev)
+{
+
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ /* Doesn't change in runtime */
+ dev->hbuf_depth = PAYLOAD_SIZE / 4;
+
+ hw->aliveness = mei_txe_aliveness_get(dev);
+ hw->readiness = mei_txe_readiness_get(dev);
+
+ dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n",
+ hw->aliveness, hw->readiness);
+}
+
+
+/**
+ * mei_txe_write - writes a message to device.
+ *
+ * @dev: the device structure
+ * @header: header of message
+ * @buf: message buffer will be written
+ *
+ * Return: 0 if success, <0 - otherwise.
+ */
+
+static int mei_txe_write(struct mei_device *dev,
+ struct mei_msg_hdr *header, unsigned char *buf)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ unsigned long rem;
+ unsigned long length;
+ int slots = dev->hbuf_depth;
+ u32 *reg_buf = (u32 *)buf;
+ u32 dw_cnt;
+ int i;
+
+ if (WARN_ON(!header || !buf))
+ return -EINVAL;
+
+ length = header->length;
+
+ dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(header));
+
+ dw_cnt = mei_data2slots(length);
+ if (dw_cnt > slots)
+ return -EMSGSIZE;
+
+ if (WARN(!hw->aliveness, "txe write: aliveness not asserted\n"))
+ return -EAGAIN;
+
+ /* Enable Input Ready Interrupt. */
+ mei_txe_input_ready_interrupt_enable(dev);
+
+ if (!mei_txe_is_input_ready(dev)) {
+ char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+ mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+ dev_err(dev->dev, "Input is not ready %s\n", fw_sts_str);
+ return -EAGAIN;
+ }
+
+ mei_txe_input_payload_write(dev, 0, *((u32 *)header));
+
+ for (i = 0; i < length / 4; i++)
+ mei_txe_input_payload_write(dev, i + 1, reg_buf[i]);
+
+ rem = length & 0x3;
+ if (rem > 0) {
+ u32 reg = 0;
+
+ memcpy(®, &buf[length - rem], rem);
+ mei_txe_input_payload_write(dev, i + 1, reg);
+ }
+
+ /* after each write the whole buffer is consumed */
+ hw->slots = 0;
+
+ /* Set Input-Doorbell */
+ mei_txe_input_doorbell_set(hw);
+
+ return 0;
+}
+
+/**
+ * mei_txe_hbuf_max_len - mimics the me hbuf circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: the PAYLOAD_SIZE - 4
+ */
+static size_t mei_txe_hbuf_max_len(const struct mei_device *dev)
+{
+ return PAYLOAD_SIZE - sizeof(struct mei_msg_hdr);
+}
+
+/**
+ * mei_txe_hbuf_empty_slots - mimics the me hbuf circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: always hbuf_depth
+ */
+static int mei_txe_hbuf_empty_slots(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ return hw->slots;
+}
+
+/**
+ * mei_txe_count_full_read_slots - mimics the me device circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: always buffer size in dwords count
+ */
+static int mei_txe_count_full_read_slots(struct mei_device *dev)
+{
+ /* read buffers has static size */
+ return PAYLOAD_SIZE / 4;
+}
+
+/**
+ * mei_txe_read_hdr - read message header which is always in 4 first bytes
+ *
+ * @dev: the device structure
+ *
+ * Return: mei message header
+ */
+
+static u32 mei_txe_read_hdr(const struct mei_device *dev)
+{
+ return mei_txe_out_data_read(dev, 0);
+}
+/**
+ * mei_txe_read - reads a message from the txe device.
+ *
+ * @dev: the device structure
+ * @buf: message buffer will be written
+ * @len: message size will be read
+ *
+ * Return: -EINVAL on error wrong argument and 0 on success
+ */
+static int mei_txe_read(struct mei_device *dev,
+ unsigned char *buf, unsigned long len)
+{
+
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 *reg_buf, reg;
+ u32 rem;
+ u32 i;
+
+ if (WARN_ON(!buf || !len))
+ return -EINVAL;
+
+ reg_buf = (u32 *)buf;
+ rem = len & 0x3;
+
+ dev_dbg(dev->dev, "buffer-length = %lu buf[0]0x%08X\n",
+ len, mei_txe_out_data_read(dev, 0));
+
+ for (i = 0; i < len / 4; i++) {
+ /* skip header: index starts from 1 */
+ reg = mei_txe_out_data_read(dev, i + 1);
+ dev_dbg(dev->dev, "buf[%d] = 0x%08X\n", i, reg);
+ *reg_buf++ = reg;
+ }
+
+ if (rem) {
+ reg = mei_txe_out_data_read(dev, i + 1);
+ memcpy(reg_buf, ®, rem);
+ }
+
+ mei_txe_output_ready_set(hw);
+ return 0;
+}
+
+/**
+ * mei_txe_hw_reset - resets host and fw.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success and < 0 in case of error
+ */
+static int mei_txe_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ u32 aliveness_req;
+ /*
+ * read input doorbell to ensure consistency between Bridge and SeC
+ * return value might be garbage return
+ */
+ (void)mei_txe_sec_reg_read_silent(hw, SEC_IPC_INPUT_DOORBELL_REG);
+
+ aliveness_req = mei_txe_aliveness_req_get(dev);
+ hw->aliveness = mei_txe_aliveness_get(dev);
+
+ /* Disable interrupts in this stage we will poll */
+ mei_txe_intr_disable(dev);
+
+ /*
+ * If Aliveness Request and Aliveness Response are not equal then
+ * wait for them to be equal
+ * Since we might have interrupts disabled - poll for it
+ */
+ if (aliveness_req != hw->aliveness)
+ if (mei_txe_aliveness_poll(dev, aliveness_req) < 0) {
+ dev_err(dev->dev, "wait for aliveness settle failed ... bailing out\n");
+ return -EIO;
+ }
+
+ /*
+ * If Aliveness Request and Aliveness Response are set then clear them
+ */
+ if (aliveness_req) {
+ mei_txe_aliveness_set(dev, 0);
+ if (mei_txe_aliveness_poll(dev, 0) < 0) {
+ dev_err(dev->dev, "wait for aliveness failed ... bailing out\n");
+ return -EIO;
+ }
+ }
+
+ /*
+ * Set readiness RDY_CLR bit
+ */
+ mei_txe_readiness_clear(dev);
+
+ return 0;
+}
+
+/**
+ * mei_txe_hw_start - start the hardware after reset
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_txe_hw_start(struct mei_device *dev)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ int ret;
+
+ u32 hisr;
+
+ /* bring back interrupts */
+ mei_txe_intr_enable(dev);
+
+ ret = mei_txe_readiness_wait(dev);
+ if (ret < 0) {
+ dev_err(dev->dev, "waiting for readiness failed\n");
+ return ret;
+ }
+
+ /*
+ * If HISR.INT2_STS interrupt status bit is set then clear it.
+ */
+ hisr = mei_txe_br_reg_read(hw, HISR_REG);
+ if (hisr & HISR_INT_2_STS)
+ mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_2_STS);
+
+ /* Clear the interrupt cause of OutputDoorbell */
+ clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause);
+
+ ret = mei_txe_aliveness_set_sync(dev, 1);
+ if (ret < 0) {
+ dev_err(dev->dev, "wait for aliveness failed ... bailing out\n");
+ return ret;
+ }
+
+ /* enable input ready interrupts:
+ * SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK
+ */
+ mei_txe_input_ready_interrupt_enable(dev);
+
+
+ /* Set the SICR_SEC_IPC_OUTPUT_STATUS.IPC_OUTPUT_READY bit */
+ mei_txe_output_ready_set(hw);
+
+ /* Set bit SICR_HOST_IPC_READINESS.HOST_RDY
+ */
+ mei_txe_readiness_set_host_rdy(dev);
+
+ return 0;
+}
+
+/**
+ * mei_txe_check_and_ack_intrs - translate multi BAR interrupt into
+ * single bit mask and acknowledge the interrupts
+ *
+ * @dev: the device structure
+ * @do_ack: acknowledge interrupts
+ *
+ * Return: true if found interrupts to process.
+ */
+static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ u32 hisr;
+ u32 hhisr;
+ u32 ipc_isr;
+ u32 aliveness;
+ bool generated;
+
+ /* read interrupt registers */
+ hhisr = mei_txe_br_reg_read(hw, HHISR_REG);
+ generated = (hhisr & IPC_HHIER_MSK);
+ if (!generated)
+ goto out;
+
+ hisr = mei_txe_br_reg_read(hw, HISR_REG);
+
+ aliveness = mei_txe_aliveness_get(dev);
+ if (hhisr & IPC_HHIER_SEC && aliveness) {
+ ipc_isr = mei_txe_sec_reg_read_silent(hw,
+ SEC_IPC_HOST_INT_STATUS_REG);
+ } else {
+ ipc_isr = 0;
+ hhisr &= ~IPC_HHIER_SEC;
+ }
+
+ generated = generated ||
+ (hisr & HISR_INT_STS_MSK) ||
+ (ipc_isr & SEC_IPC_HOST_INT_STATUS_PENDING);
+
+ if (generated && do_ack) {
+ /* Save the interrupt causes */
+ hw->intr_cause |= hisr & HISR_INT_STS_MSK;
+ if (ipc_isr & SEC_IPC_HOST_INT_STATUS_IN_RDY)
+ hw->intr_cause |= TXE_INTR_IN_READY;
+
+
+ mei_txe_intr_disable(dev);
+ /* Clear the interrupts in hierarchy:
+ * IPC and Bridge, than the High Level */
+ mei_txe_sec_reg_write_silent(hw,
+ SEC_IPC_HOST_INT_STATUS_REG, ipc_isr);
+ mei_txe_br_reg_write(hw, HISR_REG, hisr);
+ mei_txe_br_reg_write(hw, HHISR_REG, hhisr);
+ }
+
+out:
+ return generated;
+}
+
+/**
+ * mei_txe_irq_quick_handler - The ISR of the MEI device
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: IRQ_WAKE_THREAD if interrupt is designed for the device
+ * IRQ_NONE otherwise
+ */
+irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id)
+{
+ struct mei_device *dev = dev_id;
+
+ if (mei_txe_check_and_ack_intrs(dev, true))
+ return IRQ_WAKE_THREAD;
+ return IRQ_NONE;
+}
+
+
+/**
+ * mei_txe_irq_thread_handler - txe interrupt thread
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: IRQ_HANDLED
+ */
+irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id)
+{
+ struct mei_device *dev = (struct mei_device *) dev_id;
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+ struct mei_cl_cb complete_list;
+ s32 slots;
+ int rets = 0;
+
+ dev_dbg(dev->dev, "irq thread: Interrupt Registers HHISR|HISR|SEC=%02X|%04X|%02X\n",
+ mei_txe_br_reg_read(hw, HHISR_REG),
+ mei_txe_br_reg_read(hw, HISR_REG),
+ mei_txe_sec_reg_read_silent(hw, SEC_IPC_HOST_INT_STATUS_REG));
+
+
+ /* initialize our complete list */
+ mutex_lock(&dev->device_lock);
+ mei_io_list_init(&complete_list);
+
+ if (pci_dev_msi_enabled(to_pci_dev(dev->dev)))
+ mei_txe_check_and_ack_intrs(dev, true);
+
+ /* show irq events */
+ mei_txe_pending_interrupts(dev);
+
+ hw->aliveness = mei_txe_aliveness_get(dev);
+ hw->readiness = mei_txe_readiness_get(dev);
+
+ /* Readiness:
+ * Detection of TXE driver going through reset
+ * or TXE driver resetting the HECI interface.
+ */
+ if (test_and_clear_bit(TXE_INTR_READINESS_BIT, &hw->intr_cause)) {
+ dev_dbg(dev->dev, "Readiness Interrupt was received...\n");
+
+ /* Check if SeC is going through reset */
+ if (mei_txe_readiness_is_sec_rdy(hw->readiness)) {
+ dev_dbg(dev->dev, "we need to start the dev.\n");
+ dev->recvd_hw_ready = true;
+ } else {
+ dev->recvd_hw_ready = false;
+ if (dev->dev_state != MEI_DEV_RESETTING) {
+
+ dev_warn(dev->dev, "FW not ready: resetting.\n");
+ schedule_work(&dev->reset_work);
+ goto end;
+
+ }
+ }
+ wake_up(&dev->wait_hw_ready);
+ }
+
+ /************************************************************/
+ /* Check interrupt cause:
+ * Aliveness: Detection of SeC acknowledge of host request that
+ * it remain alive or host cancellation of that request.
+ */
+
+ if (test_and_clear_bit(TXE_INTR_ALIVENESS_BIT, &hw->intr_cause)) {
+ /* Clear the interrupt cause */
+ dev_dbg(dev->dev,
+ "Aliveness Interrupt: Status: %d\n", hw->aliveness);
+ dev->pg_event = MEI_PG_EVENT_RECEIVED;
+ if (waitqueue_active(&hw->wait_aliveness_resp))
+ wake_up(&hw->wait_aliveness_resp);
+ }
+
+
+ /* Output Doorbell:
+ * Detection of SeC having sent output to host
+ */
+ slots = mei_count_full_read_slots(dev);
+ if (test_and_clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause)) {
+ /* Read from TXE */
+ rets = mei_irq_read_handler(dev, &complete_list, &slots);
+ if (rets && dev->dev_state != MEI_DEV_RESETTING) {
+ dev_err(dev->dev,
+ "mei_irq_read_handler ret = %d.\n", rets);
+
+ schedule_work(&dev->reset_work);
+ goto end;
+ }
+ }
+ /* Input Ready: Detection if host can write to SeC */
+ if (test_and_clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause)) {
+ dev->hbuf_is_ready = true;
+ hw->slots = dev->hbuf_depth;
+ }
+
+ if (hw->aliveness && dev->hbuf_is_ready) {
+ /* get the real register value */
+ dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+ rets = mei_irq_write_handler(dev, &complete_list);
+ if (rets && rets != -EMSGSIZE)
+ dev_err(dev->dev, "mei_irq_write_handler ret = %d.\n",
+ rets);
+ dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+ }
+
+ mei_irq_compl_handler(dev, &complete_list);
+
+end:
+ dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets);
+
+ mutex_unlock(&dev->device_lock);
+
+ mei_enable_interrupts(dev);
+ return IRQ_HANDLED;
+}
+
+static const struct mei_hw_ops mei_txe_hw_ops = {
+
+ .host_is_ready = mei_txe_host_is_ready,
+
+ .fw_status = mei_txe_fw_status,
+ .pg_state = mei_txe_pg_state,
+
+ .hw_is_ready = mei_txe_hw_is_ready,
+ .hw_reset = mei_txe_hw_reset,
+ .hw_config = mei_txe_hw_config,
+ .hw_start = mei_txe_hw_start,
+
+ .pg_in_transition = mei_txe_pg_in_transition,
+ .pg_is_enabled = mei_txe_pg_is_enabled,
+
+ .intr_clear = mei_txe_intr_clear,
+ .intr_enable = mei_txe_intr_enable,
+ .intr_disable = mei_txe_intr_disable,
+
+ .hbuf_free_slots = mei_txe_hbuf_empty_slots,
+ .hbuf_is_ready = mei_txe_is_input_ready,
+ .hbuf_max_len = mei_txe_hbuf_max_len,
+
+ .write = mei_txe_write,
+
+ .rdbuf_full_slots = mei_txe_count_full_read_slots,
+ .read_hdr = mei_txe_read_hdr,
+
+ .read = mei_txe_read,
+
+};
+
+/**
+ * mei_txe_dev_init - allocates and initializes txe hardware specific structure
+ *
+ * @pdev: pci device
+ *
+ * Return: struct mei_device * on success or NULL
+ */
+struct mei_device *mei_txe_dev_init(struct pci_dev *pdev)
+{
+ struct mei_device *dev;
+ struct mei_txe_hw *hw;
+
+ dev = kzalloc(sizeof(struct mei_device) +
+ sizeof(struct mei_txe_hw), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ mei_device_init(dev, &pdev->dev, &mei_txe_hw_ops);
+
+ hw = to_txe_hw(dev);
+
+ init_waitqueue_head(&hw->wait_aliveness_resp);
+
+ return dev;
+}
+
+/**
+ * mei_txe_setup_satt2 - SATT2 configuration for DMA support.
+ *
+ * @dev: the device structure
+ * @addr: physical address start of the range
+ * @range: physical range size
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range)
+{
+ struct mei_txe_hw *hw = to_txe_hw(dev);
+
+ u32 lo32 = lower_32_bits(addr);
+ u32 hi32 = upper_32_bits(addr);
+ u32 ctrl;
+
+ /* SATT is limited to 36 Bits */
+ if (hi32 & ~0xF)
+ return -EINVAL;
+
+ /* SATT has to be 16Byte aligned */
+ if (lo32 & 0xF)
+ return -EINVAL;
+
+ /* SATT range has to be 4Bytes aligned */
+ if (range & 0x4)
+ return -EINVAL;
+
+ /* SATT is limited to 32 MB range*/
+ if (range > SATT_RANGE_MAX)
+ return -EINVAL;
+
+ ctrl = SATT2_CTRL_VALID_MSK;
+ ctrl |= hi32 << SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT;
+
+ mei_txe_br_reg_write(hw, SATT2_SAP_SIZE_REG, range);
+ mei_txe_br_reg_write(hw, SATT2_BRG_BA_LSB_REG, lo32);
+ mei_txe_br_reg_write(hw, SATT2_CTRL_REG, ctrl);
+ dev_dbg(dev->dev, "SATT2: SAP_SIZE_OFFSET=0x%08X, BRG_BA_LSB_OFFSET=0x%08X, CTRL_OFFSET=0x%08X\n",
+ range, lo32, ctrl);
+
+ return 0;
+}
diff --git a/drivers/misc/mei/hw-txe.h b/drivers/misc/mei/hw-txe.h
new file mode 100644
index 0000000..ce3ed0b
--- /dev/null
+++ b/drivers/misc/mei/hw-txe.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_HW_TXE_H_
+#define _MEI_HW_TXE_H_
+
+#include <linux/irqreturn.h>
+
+#include "hw.h"
+#include "hw-txe-regs.h"
+
+#define MEI_TXI_RPM_TIMEOUT 500 /* ms */
+
+/* Flatten Hierarchy interrupt cause */
+#define TXE_INTR_READINESS_BIT 0 /* HISR_INT_0_STS */
+#define TXE_INTR_READINESS HISR_INT_0_STS
+#define TXE_INTR_ALIVENESS_BIT 1 /* HISR_INT_1_STS */
+#define TXE_INTR_ALIVENESS HISR_INT_1_STS
+#define TXE_INTR_OUT_DB_BIT 2 /* HISR_INT_2_STS */
+#define TXE_INTR_OUT_DB HISR_INT_2_STS
+#define TXE_INTR_IN_READY_BIT 8 /* beyond HISR */
+#define TXE_INTR_IN_READY BIT(8)
+
+/**
+ * struct mei_txe_hw - txe hardware specifics
+ *
+ * @mem_addr: SeC and BRIDGE bars
+ * @aliveness: aliveness (power gating) state of the hardware
+ * @readiness: readiness state of the hardware
+ * @slots: number of empty slots
+ * @wait_aliveness_resp: aliveness wait queue
+ * @intr_cause: translated interrupt cause
+ */
+struct mei_txe_hw {
+ void __iomem *mem_addr[NUM_OF_MEM_BARS];
+ u32 aliveness;
+ u32 readiness;
+ u32 slots;
+
+ wait_queue_head_t wait_aliveness_resp;
+
+ unsigned long intr_cause;
+};
+
+#define to_txe_hw(dev) (struct mei_txe_hw *)((dev)->hw)
+
+static inline struct mei_device *hw_txe_to_mei(struct mei_txe_hw *hw)
+{
+ return container_of((void *)hw, struct mei_device, hw);
+}
+
+struct mei_device *mei_txe_dev_init(struct pci_dev *pdev);
+
+irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id);
+irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id);
+
+int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req);
+
+int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range);
+
+
+#endif /* _MEI_HW_TXE_H_ */
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
new file mode 100644
index 0000000..4cebde8
--- /dev/null
+++ b/drivers/misc/mei/hw.h
@@ -0,0 +1,426 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_HW_TYPES_H_
+#define _MEI_HW_TYPES_H_
+
+#include <linux/uuid.h>
+
+/*
+ * Timeouts in Seconds
+ */
+#define MEI_HW_READY_TIMEOUT 2 /* Timeout on ready message */
+#define MEI_CONNECT_TIMEOUT 3 /* HPS: at least 2 seconds */
+
+#define MEI_CL_CONNECT_TIMEOUT 15 /* HPS: Client Connect Timeout */
+#define MEI_CLIENTS_INIT_TIMEOUT 15 /* HPS: Clients Enumeration Timeout */
+
+#define MEI_IAMTHIF_STALL_TIMER 12 /* HPS */
+#define MEI_IAMTHIF_READ_TIMER 10 /* HPS */
+
+#define MEI_PGI_TIMEOUT 1 /* PG Isolation time response 1 sec */
+#define MEI_D0I3_TIMEOUT 5 /* D0i3 set/unset max response time */
+#define MEI_HBM_TIMEOUT 1 /* 1 second */
+
+/*
+ * MEI Version
+ */
+#define HBM_MINOR_VERSION 0
+#define HBM_MAJOR_VERSION 2
+
+/*
+ * MEI version with PGI support
+ */
+#define HBM_MINOR_VERSION_PGI 1
+#define HBM_MAJOR_VERSION_PGI 1
+
+/*
+ * MEI version with Dynamic clients support
+ */
+#define HBM_MINOR_VERSION_DC 0
+#define HBM_MAJOR_VERSION_DC 2
+
+/*
+ * MEI version with disconnect on connection timeout support
+ */
+#define HBM_MINOR_VERSION_DOT 0
+#define HBM_MAJOR_VERSION_DOT 2
+
+/*
+ * MEI version with notifcation support
+ */
+#define HBM_MINOR_VERSION_EV 0
+#define HBM_MAJOR_VERSION_EV 2
+
+/* Host bus message command opcode */
+#define MEI_HBM_CMD_OP_MSK 0x7f
+/* Host bus message command RESPONSE */
+#define MEI_HBM_CMD_RES_MSK 0x80
+
+/*
+ * MEI Bus Message Command IDs
+ */
+#define HOST_START_REQ_CMD 0x01
+#define HOST_START_RES_CMD 0x81
+
+#define HOST_STOP_REQ_CMD 0x02
+#define HOST_STOP_RES_CMD 0x82
+
+#define ME_STOP_REQ_CMD 0x03
+
+#define HOST_ENUM_REQ_CMD 0x04
+#define HOST_ENUM_RES_CMD 0x84
+
+#define HOST_CLIENT_PROPERTIES_REQ_CMD 0x05
+#define HOST_CLIENT_PROPERTIES_RES_CMD 0x85
+
+#define CLIENT_CONNECT_REQ_CMD 0x06
+#define CLIENT_CONNECT_RES_CMD 0x86
+
+#define CLIENT_DISCONNECT_REQ_CMD 0x07
+#define CLIENT_DISCONNECT_RES_CMD 0x87
+
+#define MEI_FLOW_CONTROL_CMD 0x08
+
+#define MEI_PG_ISOLATION_ENTRY_REQ_CMD 0x0a
+#define MEI_PG_ISOLATION_ENTRY_RES_CMD 0x8a
+#define MEI_PG_ISOLATION_EXIT_REQ_CMD 0x0b
+#define MEI_PG_ISOLATION_EXIT_RES_CMD 0x8b
+
+#define MEI_HBM_ADD_CLIENT_REQ_CMD 0x0f
+#define MEI_HBM_ADD_CLIENT_RES_CMD 0x8f
+
+#define MEI_HBM_NOTIFY_REQ_CMD 0x10
+#define MEI_HBM_NOTIFY_RES_CMD 0x90
+#define MEI_HBM_NOTIFICATION_CMD 0x11
+
+/*
+ * MEI Stop Reason
+ * used by hbm_host_stop_request.reason
+ */
+enum mei_stop_reason_types {
+ DRIVER_STOP_REQUEST = 0x00,
+ DEVICE_D1_ENTRY = 0x01,
+ DEVICE_D2_ENTRY = 0x02,
+ DEVICE_D3_ENTRY = 0x03,
+ SYSTEM_S1_ENTRY = 0x04,
+ SYSTEM_S2_ENTRY = 0x05,
+ SYSTEM_S3_ENTRY = 0x06,
+ SYSTEM_S4_ENTRY = 0x07,
+ SYSTEM_S5_ENTRY = 0x08
+};
+
+
+/**
+ * enum mei_hbm_status - mei host bus messages return values
+ *
+ * @MEI_HBMS_SUCCESS : status success
+ * @MEI_HBMS_CLIENT_NOT_FOUND : client not found
+ * @MEI_HBMS_ALREADY_EXISTS : connection already established
+ * @MEI_HBMS_REJECTED : connection is rejected
+ * @MEI_HBMS_INVALID_PARAMETER : invalid parameter
+ * @MEI_HBMS_NOT_ALLOWED : operation not allowed
+ * @MEI_HBMS_ALREADY_STARTED : system is already started
+ * @MEI_HBMS_NOT_STARTED : system not started
+ *
+ * @MEI_HBMS_MAX : sentinel
+ */
+enum mei_hbm_status {
+ MEI_HBMS_SUCCESS = 0,
+ MEI_HBMS_CLIENT_NOT_FOUND = 1,
+ MEI_HBMS_ALREADY_EXISTS = 2,
+ MEI_HBMS_REJECTED = 3,
+ MEI_HBMS_INVALID_PARAMETER = 4,
+ MEI_HBMS_NOT_ALLOWED = 5,
+ MEI_HBMS_ALREADY_STARTED = 6,
+ MEI_HBMS_NOT_STARTED = 7,
+
+ MEI_HBMS_MAX
+};
+
+
+/*
+ * Client Connect Status
+ * used by hbm_client_connect_response.status
+ */
+enum mei_cl_connect_status {
+ MEI_CL_CONN_SUCCESS = MEI_HBMS_SUCCESS,
+ MEI_CL_CONN_NOT_FOUND = MEI_HBMS_CLIENT_NOT_FOUND,
+ MEI_CL_CONN_ALREADY_STARTED = MEI_HBMS_ALREADY_EXISTS,
+ MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED,
+ MEI_CL_CONN_MESSAGE_SMALL = MEI_HBMS_INVALID_PARAMETER,
+ MEI_CL_CONN_NOT_ALLOWED = MEI_HBMS_NOT_ALLOWED,
+};
+
+/*
+ * Client Disconnect Status
+ */
+enum mei_cl_disconnect_status {
+ MEI_CL_DISCONN_SUCCESS = MEI_HBMS_SUCCESS
+};
+
+/*
+ * MEI BUS Interface Section
+ */
+struct mei_msg_hdr {
+ u32 me_addr:8;
+ u32 host_addr:8;
+ u32 length:9;
+ u32 reserved:5;
+ u32 internal:1;
+ u32 msg_complete:1;
+} __packed;
+
+
+struct mei_bus_message {
+ u8 hbm_cmd;
+ u8 data[0];
+} __packed;
+
+/**
+ * struct hbm_cl_cmd - client specific host bus command
+ * CONNECT, DISCONNECT, and FlOW CONTROL
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @data: generic data
+ */
+struct mei_hbm_cl_cmd {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 data;
+};
+
+struct hbm_version {
+ u8 minor_version;
+ u8 major_version;
+} __packed;
+
+struct hbm_host_version_request {
+ u8 hbm_cmd;
+ u8 reserved;
+ struct hbm_version host_version;
+} __packed;
+
+struct hbm_host_version_response {
+ u8 hbm_cmd;
+ u8 host_version_supported;
+ struct hbm_version me_max_version;
+} __packed;
+
+struct hbm_host_stop_request {
+ u8 hbm_cmd;
+ u8 reason;
+ u8 reserved[2];
+} __packed;
+
+struct hbm_host_stop_response {
+ u8 hbm_cmd;
+ u8 reserved[3];
+} __packed;
+
+struct hbm_me_stop_request {
+ u8 hbm_cmd;
+ u8 reason;
+ u8 reserved[2];
+} __packed;
+
+/**
+ * struct hbm_host_enum_request - enumeration request from host to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @allow_add: allow dynamic clients add HBM version >= 2.0
+ * @reserved: reserved
+ */
+struct hbm_host_enum_request {
+ u8 hbm_cmd;
+ u8 allow_add;
+ u8 reserved[2];
+} __packed;
+
+struct hbm_host_enum_response {
+ u8 hbm_cmd;
+ u8 reserved[3];
+ u8 valid_addresses[32];
+} __packed;
+
+struct mei_client_properties {
+ uuid_le protocol_name;
+ u8 protocol_version;
+ u8 max_number_of_connections;
+ u8 fixed_address;
+ u8 single_recv_buf;
+ u32 max_msg_length;
+} __packed;
+
+struct hbm_props_request {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 reserved[2];
+} __packed;
+
+struct hbm_props_response {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 status;
+ u8 reserved[1];
+ struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_request - request to add a client
+ * might be sent by fw after enumeration has already completed
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @reserved: reserved
+ * @client_properties: client properties
+ */
+struct hbm_add_client_request {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 reserved[2];
+ struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_response - response to add a client
+ * sent by the host to report client addition status to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @status: if HBMS_SUCCESS then the client can now accept connections.
+ * @reserved: reserved
+ */
+struct hbm_add_client_response {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 status;
+ u8 reserved[1];
+} __packed;
+
+/**
+ * struct hbm_power_gate - power gate request/response
+ *
+ * @hbm_cmd: bus message command header
+ * @reserved: reserved
+ */
+struct hbm_power_gate {
+ u8 hbm_cmd;
+ u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_client_connect_request - connect/disconnect request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @reserved: reserved
+ */
+struct hbm_client_connect_request {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_client_connect_response - connect/disconnect response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @status: status of the request
+ */
+struct hbm_client_connect_response {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 status;
+} __packed;
+
+
+#define MEI_FC_MESSAGE_RESERVED_LENGTH 5
+
+struct hbm_flow_control {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH];
+} __packed;
+
+#define MEI_HBM_NOTIFICATION_START 1
+#define MEI_HBM_NOTIFICATION_STOP 0
+/**
+ * struct hbm_notification_request - start/stop notification request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @start: start = 1 or stop = 0 asynchronous notifications
+ */
+struct hbm_notification_request {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 start;
+} __packed;
+
+/**
+ * struct hbm_notification_response - start/stop notification response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: - address of the client in the driver
+ * @status: (mei_hbm_status) response status for the request
+ * - MEI_HBMS_SUCCESS: successful stop/start
+ * - MEI_HBMS_CLIENT_NOT_FOUND: if the connection could not be found.
+ * - MEI_HBMS_ALREADY_STARTED: for start requests for a previously
+ * started notification.
+ * - MEI_HBMS_NOT_STARTED: for stop request for a connected client for whom
+ * asynchronous notifications are currently disabled.
+ *
+ * @start: start = 1 or stop = 0 asynchronous notifications
+ * @reserved: reserved
+ */
+struct hbm_notification_response {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 status;
+ u8 start;
+ u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_notification - notification event
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @reserved: reserved for alignment
+ */
+struct hbm_notification {
+ u8 hbm_cmd;
+ u8 me_addr;
+ u8 host_addr;
+ u8 reserved[1];
+} __packed;
+
+#endif
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
new file mode 100644
index 0000000..3edafc8
--- /dev/null
+++ b/drivers/misc/mei/init.c
@@ -0,0 +1,429 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+const char *mei_dev_state_str(int state)
+{
+#define MEI_DEV_STATE(state) case MEI_DEV_##state: return #state
+ switch (state) {
+ MEI_DEV_STATE(INITIALIZING);
+ MEI_DEV_STATE(INIT_CLIENTS);
+ MEI_DEV_STATE(ENABLED);
+ MEI_DEV_STATE(RESETTING);
+ MEI_DEV_STATE(DISABLED);
+ MEI_DEV_STATE(POWER_DOWN);
+ MEI_DEV_STATE(POWER_UP);
+ default:
+ return "unknown";
+ }
+#undef MEI_DEV_STATE
+}
+
+const char *mei_pg_state_str(enum mei_pg_state state)
+{
+#define MEI_PG_STATE(state) case MEI_PG_##state: return #state
+ switch (state) {
+ MEI_PG_STATE(OFF);
+ MEI_PG_STATE(ON);
+ default:
+ return "unknown";
+ }
+#undef MEI_PG_STATE
+}
+
+/**
+ * mei_fw_status2str - convert fw status registers to printable string
+ *
+ * @fw_status: firmware status
+ * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ
+ * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ
+ *
+ * Return: number of bytes written or -EINVAL if buffer is to small
+ */
+ssize_t mei_fw_status2str(struct mei_fw_status *fw_status,
+ char *buf, size_t len)
+{
+ ssize_t cnt = 0;
+ int i;
+
+ buf[0] = '\0';
+
+ if (len < MEI_FW_STATUS_STR_SZ)
+ return -EINVAL;
+
+ for (i = 0; i < fw_status->count; i++)
+ cnt += scnprintf(buf + cnt, len - cnt, "%08X ",
+ fw_status->status[i]);
+
+ /* drop last space */
+ buf[cnt] = '\0';
+ return cnt;
+}
+EXPORT_SYMBOL_GPL(mei_fw_status2str);
+
+/**
+ * mei_cancel_work - Cancel mei background jobs
+ *
+ * @dev: the device structure
+ */
+void mei_cancel_work(struct mei_device *dev)
+{
+ cancel_work_sync(&dev->init_work);
+ cancel_work_sync(&dev->reset_work);
+
+ cancel_delayed_work(&dev->timer_work);
+}
+EXPORT_SYMBOL_GPL(mei_cancel_work);
+
+/**
+ * mei_reset - resets host and fw.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success or < 0 if the reset hasn't succeeded
+ */
+int mei_reset(struct mei_device *dev)
+{
+ enum mei_dev_state state = dev->dev_state;
+ bool interrupts_enabled;
+ int ret;
+
+ if (state != MEI_DEV_INITIALIZING &&
+ state != MEI_DEV_DISABLED &&
+ state != MEI_DEV_POWER_DOWN &&
+ state != MEI_DEV_POWER_UP) {
+ char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+ mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+ dev_warn(dev->dev, "unexpected reset: dev_state = %s fw status = %s\n",
+ mei_dev_state_str(state), fw_sts_str);
+ }
+
+ /* we're already in reset, cancel the init timer
+ * if the reset was called due the hbm protocol error
+ * we need to call it before hw start
+ * so the hbm watchdog won't kick in
+ */
+ mei_hbm_idle(dev);
+
+ /* enter reset flow */
+ interrupts_enabled = state != MEI_DEV_POWER_DOWN;
+ dev->dev_state = MEI_DEV_RESETTING;
+
+ dev->reset_count++;
+ if (dev->reset_count > MEI_MAX_CONSEC_RESET) {
+ dev_err(dev->dev, "reset: reached maximal consecutive resets: disabling the device\n");
+ dev->dev_state = MEI_DEV_DISABLED;
+ return -ENODEV;
+ }
+
+ ret = mei_hw_reset(dev, interrupts_enabled);
+ /* fall through and remove the sw state even if hw reset has failed */
+
+ /* no need to clean up software state in case of power up */
+ if (state != MEI_DEV_INITIALIZING &&
+ state != MEI_DEV_POWER_UP) {
+
+ /* remove all waiting requests */
+ mei_cl_all_write_clear(dev);
+
+ mei_cl_all_disconnect(dev);
+
+ /* wake up all readers and writers so they can be interrupted */
+ mei_cl_all_wakeup(dev);
+
+ /* remove entry if already in list */
+ dev_dbg(dev->dev, "remove iamthif and wd from the file list.\n");
+ mei_cl_unlink(&dev->wd_cl);
+ mei_cl_unlink(&dev->iamthif_cl);
+ mei_amthif_reset_params(dev);
+ }
+
+ mei_hbm_reset(dev);
+
+ dev->rd_msg_hdr = 0;
+ dev->wd_pending = false;
+
+ if (ret) {
+ dev_err(dev->dev, "hw_reset failed ret = %d\n", ret);
+ return ret;
+ }
+
+ if (state == MEI_DEV_POWER_DOWN) {
+ dev_dbg(dev->dev, "powering down: end of reset\n");
+ dev->dev_state = MEI_DEV_DISABLED;
+ return 0;
+ }
+
+ ret = mei_hw_start(dev);
+ if (ret) {
+ dev_err(dev->dev, "hw_start failed ret = %d\n", ret);
+ return ret;
+ }
+
+ dev_dbg(dev->dev, "link is established start sending messages.\n");
+
+ dev->dev_state = MEI_DEV_INIT_CLIENTS;
+ ret = mei_hbm_start_req(dev);
+ if (ret) {
+ dev_err(dev->dev, "hbm_start failed ret = %d\n", ret);
+ dev->dev_state = MEI_DEV_RESETTING;
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mei_reset);
+
+/**
+ * mei_start - initializes host and fw to start work.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_start(struct mei_device *dev)
+{
+ int ret;
+
+ mutex_lock(&dev->device_lock);
+
+ /* acknowledge interrupt and stop interrupts */
+ mei_clear_interrupts(dev);
+
+ mei_hw_config(dev);
+
+ dev_dbg(dev->dev, "reset in start the mei device.\n");
+
+ dev->reset_count = 0;
+ do {
+ dev->dev_state = MEI_DEV_INITIALIZING;
+ ret = mei_reset(dev);
+
+ if (ret == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) {
+ dev_err(dev->dev, "reset failed ret = %d", ret);
+ goto err;
+ }
+ } while (ret);
+
+ /* we cannot start the device w/o hbm start message completed */
+ if (dev->dev_state == MEI_DEV_DISABLED) {
+ dev_err(dev->dev, "reset failed");
+ goto err;
+ }
+
+ if (mei_hbm_start_wait(dev)) {
+ dev_err(dev->dev, "HBM haven't started");
+ goto err;
+ }
+
+ if (!mei_host_is_ready(dev)) {
+ dev_err(dev->dev, "host is not ready.\n");
+ goto err;
+ }
+
+ if (!mei_hw_is_ready(dev)) {
+ dev_err(dev->dev, "ME is not ready.\n");
+ goto err;
+ }
+
+ if (!mei_hbm_version_is_supported(dev)) {
+ dev_dbg(dev->dev, "MEI start failed.\n");
+ goto err;
+ }
+
+ dev_dbg(dev->dev, "link layer has been established.\n");
+
+ mutex_unlock(&dev->device_lock);
+ return 0;
+err:
+ dev_err(dev->dev, "link layer initialization failed.\n");
+ dev->dev_state = MEI_DEV_DISABLED;
+ mutex_unlock(&dev->device_lock);
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(mei_start);
+
+/**
+ * mei_restart - restart device after suspend
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success or -ENODEV if the restart hasn't succeeded
+ */
+int mei_restart(struct mei_device *dev)
+{
+ int err;
+
+ mutex_lock(&dev->device_lock);
+
+ mei_clear_interrupts(dev);
+
+ dev->dev_state = MEI_DEV_POWER_UP;
+ dev->reset_count = 0;
+
+ err = mei_reset(dev);
+
+ mutex_unlock(&dev->device_lock);
+
+ if (err == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) {
+ dev_err(dev->dev, "device disabled = %d\n", err);
+ return -ENODEV;
+ }
+
+ /* try to start again */
+ if (err)
+ schedule_work(&dev->reset_work);
+
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mei_restart);
+
+static void mei_reset_work(struct work_struct *work)
+{
+ struct mei_device *dev =
+ container_of(work, struct mei_device, reset_work);
+ int ret;
+
+ mutex_lock(&dev->device_lock);
+
+ ret = mei_reset(dev);
+
+ mutex_unlock(&dev->device_lock);
+
+ if (dev->dev_state == MEI_DEV_DISABLED) {
+ dev_err(dev->dev, "device disabled = %d\n", ret);
+ return;
+ }
+
+ /* retry reset in case of failure */
+ if (ret)
+ schedule_work(&dev->reset_work);
+}
+
+void mei_stop(struct mei_device *dev)
+{
+ dev_dbg(dev->dev, "stopping the device.\n");
+
+ mei_cl_bus_remove_devices(dev);
+
+ mei_cancel_work(dev);
+
+ mutex_lock(&dev->device_lock);
+
+ mei_wd_stop(dev);
+
+ dev->dev_state = MEI_DEV_POWER_DOWN;
+ mei_reset(dev);
+ /* move device to disabled state unconditionally */
+ dev->dev_state = MEI_DEV_DISABLED;
+
+ mutex_unlock(&dev->device_lock);
+
+ mei_watchdog_unregister(dev);
+}
+EXPORT_SYMBOL_GPL(mei_stop);
+
+/**
+ * mei_write_is_idle - check if the write queues are idle
+ *
+ * @dev: the device structure
+ *
+ * Return: true of there is no pending write
+ */
+bool mei_write_is_idle(struct mei_device *dev)
+{
+ bool idle = (dev->dev_state == MEI_DEV_ENABLED &&
+ list_empty(&dev->ctrl_wr_list.list) &&
+ list_empty(&dev->write_list.list) &&
+ list_empty(&dev->write_waiting_list.list));
+
+ dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%01d write=%01d wwait=%01d\n",
+ idle,
+ mei_dev_state_str(dev->dev_state),
+ list_empty(&dev->ctrl_wr_list.list),
+ list_empty(&dev->write_list.list),
+ list_empty(&dev->write_waiting_list.list));
+
+ return idle;
+}
+EXPORT_SYMBOL_GPL(mei_write_is_idle);
+
+/**
+ * mei_device_init -- initialize mei_device structure
+ *
+ * @dev: the mei device
+ * @device: the device structure
+ * @hw_ops: hw operations
+ */
+void mei_device_init(struct mei_device *dev,
+ struct device *device,
+ const struct mei_hw_ops *hw_ops)
+{
+ /* setup our list array */
+ INIT_LIST_HEAD(&dev->file_list);
+ INIT_LIST_HEAD(&dev->device_list);
+ INIT_LIST_HEAD(&dev->me_clients);
+ mutex_init(&dev->device_lock);
+ init_rwsem(&dev->me_clients_rwsem);
+ mutex_init(&dev->cl_bus_lock);
+ init_waitqueue_head(&dev->wait_hw_ready);
+ init_waitqueue_head(&dev->wait_pg);
+ init_waitqueue_head(&dev->wait_hbm_start);
+ init_waitqueue_head(&dev->wait_stop_wd);
+ dev->dev_state = MEI_DEV_INITIALIZING;
+ dev->reset_count = 0;
+
+ mei_io_list_init(&dev->write_list);
+ mei_io_list_init(&dev->write_waiting_list);
+ mei_io_list_init(&dev->ctrl_wr_list);
+ mei_io_list_init(&dev->ctrl_rd_list);
+
+ INIT_DELAYED_WORK(&dev->timer_work, mei_timer);
+ INIT_WORK(&dev->init_work, mei_host_client_init);
+ INIT_WORK(&dev->reset_work, mei_reset_work);
+
+ INIT_LIST_HEAD(&dev->wd_cl.link);
+ INIT_LIST_HEAD(&dev->iamthif_cl.link);
+ mei_io_list_init(&dev->amthif_cmd_list);
+ mei_io_list_init(&dev->amthif_rd_complete_list);
+
+ bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX);
+ dev->open_handle_count = 0;
+
+ /*
+ * Reserving the first client ID
+ * 0: Reserved for MEI Bus Message communications
+ */
+ bitmap_set(dev->host_clients_map, 0, 1);
+
+ dev->pg_event = MEI_PG_EVENT_IDLE;
+ dev->ops = hw_ops;
+ dev->dev = device;
+}
+EXPORT_SYMBOL_GPL(mei_device_init);
+
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
new file mode 100644
index 0000000..d1df797
--- /dev/null
+++ b/drivers/misc/mei/interrupt.c
@@ -0,0 +1,550 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+
+#include <linux/export.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+
+/**
+ * mei_irq_compl_handler - dispatch complete handlers
+ * for the completed callbacks
+ *
+ * @dev: mei device
+ * @compl_list: list of completed cbs
+ */
+void mei_irq_compl_handler(struct mei_device *dev, struct mei_cl_cb *compl_list)
+{
+ struct mei_cl_cb *cb, *next;
+ struct mei_cl *cl;
+
+ list_for_each_entry_safe(cb, next, &compl_list->list, list) {
+ cl = cb->cl;
+ list_del_init(&cb->list);
+
+ dev_dbg(dev->dev, "completing call back.\n");
+ if (cl == &dev->iamthif_cl)
+ mei_amthif_complete(dev, cb);
+ else
+ mei_cl_complete(cl, cb);
+ }
+}
+EXPORT_SYMBOL_GPL(mei_irq_compl_handler);
+
+/**
+ * mei_cl_hbm_equal - check if hbm is addressed to the client
+ *
+ * @cl: host client
+ * @mei_hdr: header of mei client message
+ *
+ * Return: true if matches, false otherwise
+ */
+static inline int mei_cl_hbm_equal(struct mei_cl *cl,
+ struct mei_msg_hdr *mei_hdr)
+{
+ return mei_cl_host_addr(cl) == mei_hdr->host_addr &&
+ mei_cl_me_id(cl) == mei_hdr->me_addr;
+}
+
+/**
+ * mei_irq_discard_msg - discard received message
+ *
+ * @dev: mei device
+ * @hdr: message header
+ */
+void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr)
+{
+ /*
+ * no need to check for size as it is guarantied
+ * that length fits into rd_msg_buf
+ */
+ mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
+ dev_dbg(dev->dev, "discarding message " MEI_HDR_FMT "\n",
+ MEI_HDR_PRM(hdr));
+}
+
+/**
+ * mei_cl_irq_read_msg - process client message
+ *
+ * @cl: reading client
+ * @mei_hdr: header of mei client message
+ * @complete_list: completion list
+ *
+ * Return: always 0
+ */
+int mei_cl_irq_read_msg(struct mei_cl *cl,
+ struct mei_msg_hdr *mei_hdr,
+ struct mei_cl_cb *complete_list)
+{
+ struct mei_device *dev = cl->dev;
+ struct mei_cl_cb *cb;
+ unsigned char *buffer = NULL;
+
+ cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list);
+ if (!cb) {
+ cl_err(dev, cl, "pending read cb not found\n");
+ goto out;
+ }
+
+ if (!mei_cl_is_connected(cl)) {
+ cl_dbg(dev, cl, "not connected\n");
+ cb->status = -ENODEV;
+ goto out;
+ }
+
+ if (cb->buf.size == 0 || cb->buf.data == NULL) {
+ cl_err(dev, cl, "response buffer is not allocated.\n");
+ list_move_tail(&cb->list, &complete_list->list);
+ cb->status = -ENOMEM;
+ goto out;
+ }
+
+ if (cb->buf.size < mei_hdr->length + cb->buf_idx) {
+ cl_dbg(dev, cl, "message overflow. size %d len %d idx %ld\n",
+ cb->buf.size, mei_hdr->length, cb->buf_idx);
+ buffer = krealloc(cb->buf.data, mei_hdr->length + cb->buf_idx,
+ GFP_KERNEL);
+
+ if (!buffer) {
+ cb->status = -ENOMEM;
+ list_move_tail(&cb->list, &complete_list->list);
+ goto out;
+ }
+ cb->buf.data = buffer;
+ cb->buf.size = mei_hdr->length + cb->buf_idx;
+ }
+
+ buffer = cb->buf.data + cb->buf_idx;
+ mei_read_slots(dev, buffer, mei_hdr->length);
+
+ cb->buf_idx += mei_hdr->length;
+
+ if (mei_hdr->msg_complete) {
+ cb->read_time = jiffies;
+ cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx);
+ list_move_tail(&cb->list, &complete_list->list);
+ } else {
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_request_autosuspend(dev->dev);
+ }
+
+out:
+ if (!buffer)
+ mei_irq_discard_msg(dev, mei_hdr);
+
+ return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect_rsp - send disconnection response message
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev = cl->dev;
+ u32 msg_slots;
+ int slots;
+ int ret;
+
+ slots = mei_hbuf_empty_slots(dev);
+ msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_response));
+
+ if (slots < msg_slots)
+ return -EMSGSIZE;
+
+ ret = mei_hbm_cl_disconnect_rsp(dev, cl);
+ list_move_tail(&cb->list, &cmpl_list->list);
+
+ return ret;
+}
+
+/**
+ * mei_cl_irq_read - processes client read related operation from the
+ * interrupt thread context - request for flow control credits
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list)
+{
+ struct mei_device *dev = cl->dev;
+ u32 msg_slots;
+ int slots;
+ int ret;
+
+ msg_slots = mei_data2slots(sizeof(struct hbm_flow_control));
+ slots = mei_hbuf_empty_slots(dev);
+
+ if (slots < msg_slots)
+ return -EMSGSIZE;
+
+ ret = mei_hbm_cl_flow_control_req(dev, cl);
+ if (ret) {
+ cl->status = ret;
+ cb->buf_idx = 0;
+ list_move_tail(&cb->list, &cmpl_list->list);
+ return ret;
+ }
+
+ list_move_tail(&cb->list, &cl->rd_pending);
+
+ return 0;
+}
+
+/**
+ * mei_irq_read_handler - bottom half read routine after ISR to
+ * handle the read processing.
+ *
+ * @dev: the device structure
+ * @cmpl_list: An instance of our list structure
+ * @slots: slots to read.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_irq_read_handler(struct mei_device *dev,
+ struct mei_cl_cb *cmpl_list, s32 *slots)
+{
+ struct mei_msg_hdr *mei_hdr;
+ struct mei_cl *cl;
+ int ret;
+
+ if (!dev->rd_msg_hdr) {
+ dev->rd_msg_hdr = mei_read_hdr(dev);
+ (*slots)--;
+ dev_dbg(dev->dev, "slots =%08x.\n", *slots);
+ }
+ mei_hdr = (struct mei_msg_hdr *) &dev->rd_msg_hdr;
+ dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr));
+
+ if (mei_hdr->reserved || !dev->rd_msg_hdr) {
+ dev_err(dev->dev, "corrupted message header 0x%08X\n",
+ dev->rd_msg_hdr);
+ ret = -EBADMSG;
+ goto end;
+ }
+
+ if (mei_slots2data(*slots) < mei_hdr->length) {
+ dev_err(dev->dev, "less data available than length=%08x.\n",
+ *slots);
+ /* we can't read the message */
+ ret = -ENODATA;
+ goto end;
+ }
+
+ /* HBM message */
+ if (mei_hdr->host_addr == 0 && mei_hdr->me_addr == 0) {
+ ret = mei_hbm_dispatch(dev, mei_hdr);
+ if (ret) {
+ dev_dbg(dev->dev, "mei_hbm_dispatch failed ret = %d\n",
+ ret);
+ goto end;
+ }
+ goto reset_slots;
+ }
+
+ /* find recipient cl */
+ list_for_each_entry(cl, &dev->file_list, link) {
+ if (mei_cl_hbm_equal(cl, mei_hdr)) {
+ cl_dbg(dev, cl, "got a message\n");
+ break;
+ }
+ }
+
+ /* if no recipient cl was found we assume corrupted header */
+ if (&cl->link == &dev->file_list) {
+ dev_err(dev->dev, "no destination client found 0x%08X\n",
+ dev->rd_msg_hdr);
+ ret = -EBADMSG;
+ goto end;
+ }
+
+ if (cl == &dev->iamthif_cl) {
+ ret = mei_amthif_irq_read_msg(cl, mei_hdr, cmpl_list);
+ } else {
+ ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list);
+ }
+
+
+reset_slots:
+ /* reset the number of slots and header */
+ *slots = mei_count_full_read_slots(dev);
+ dev->rd_msg_hdr = 0;
+
+ if (*slots == -EOVERFLOW) {
+ /* overflow - reset */
+ dev_err(dev->dev, "resetting due to slots overflow.\n");
+ /* set the event since message has been read */
+ ret = -ERANGE;
+ goto end;
+ }
+end:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mei_irq_read_handler);
+
+
+/**
+ * mei_irq_write_handler - dispatch write requests
+ * after irq received
+ *
+ * @dev: the device structure
+ * @cmpl_list: An instance of our list structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list)
+{
+
+ struct mei_cl *cl;
+ struct mei_cl_cb *cb, *next;
+ struct mei_cl_cb *list;
+ s32 slots;
+ int ret;
+
+
+ if (!mei_hbuf_acquire(dev))
+ return 0;
+
+ slots = mei_hbuf_empty_slots(dev);
+ if (slots <= 0)
+ return -EMSGSIZE;
+
+ /* complete all waiting for write CB */
+ dev_dbg(dev->dev, "complete all waiting for write cb.\n");
+
+ list = &dev->write_waiting_list;
+ list_for_each_entry_safe(cb, next, &list->list, list) {
+ cl = cb->cl;
+
+ cl->status = 0;
+ cl_dbg(dev, cl, "MEI WRITE COMPLETE\n");
+ cl->writing_state = MEI_WRITE_COMPLETE;
+ list_move_tail(&cb->list, &cmpl_list->list);
+ }
+
+ if (dev->wd_state == MEI_WD_STOPPING) {
+ dev->wd_state = MEI_WD_IDLE;
+ wake_up(&dev->wait_stop_wd);
+ }
+
+ if (mei_cl_is_connected(&dev->wd_cl)) {
+ if (dev->wd_pending &&
+ mei_cl_flow_ctrl_creds(&dev->wd_cl) > 0) {
+ ret = mei_wd_send(dev);
+ if (ret)
+ return ret;
+ dev->wd_pending = false;
+ }
+ }
+
+ /* complete control write list CB */
+ dev_dbg(dev->dev, "complete control write list cb.\n");
+ list_for_each_entry_safe(cb, next, &dev->ctrl_wr_list.list, list) {
+ cl = cb->cl;
+ switch (cb->fop_type) {
+ case MEI_FOP_DISCONNECT:
+ /* send disconnect message */
+ ret = mei_cl_irq_disconnect(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+
+ break;
+ case MEI_FOP_READ:
+ /* send flow control message */
+ ret = mei_cl_irq_read(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+
+ break;
+ case MEI_FOP_CONNECT:
+ /* connect message */
+ ret = mei_cl_irq_connect(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+
+ break;
+ case MEI_FOP_DISCONNECT_RSP:
+ /* send disconnect resp */
+ ret = mei_cl_irq_disconnect_rsp(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+ break;
+
+ case MEI_FOP_NOTIFY_START:
+ case MEI_FOP_NOTIFY_STOP:
+ ret = mei_cl_irq_notify(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+ break;
+ default:
+ BUG();
+ }
+
+ }
+ /* complete write list CB */
+ dev_dbg(dev->dev, "complete write list cb.\n");
+ list_for_each_entry_safe(cb, next, &dev->write_list.list, list) {
+ cl = cb->cl;
+ if (cl == &dev->iamthif_cl)
+ ret = mei_amthif_irq_write(cl, cb, cmpl_list);
+ else
+ ret = mei_cl_irq_write(cl, cb, cmpl_list);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mei_irq_write_handler);
+
+
+/**
+ * mei_connect_timeout - connect/disconnect timeouts
+ *
+ * @cl: host client
+ */
+static void mei_connect_timeout(struct mei_cl *cl)
+{
+ struct mei_device *dev = cl->dev;
+
+ if (cl->state == MEI_FILE_CONNECTING) {
+ if (dev->hbm_f_dot_supported) {
+ cl->state = MEI_FILE_DISCONNECT_REQUIRED;
+ wake_up(&cl->wait);
+ return;
+ }
+ }
+ mei_reset(dev);
+}
+
+/**
+ * mei_timer - timer function.
+ *
+ * @work: pointer to the work_struct structure
+ *
+ */
+void mei_timer(struct work_struct *work)
+{
+ unsigned long timeout;
+ struct mei_cl *cl;
+
+ struct mei_device *dev = container_of(work,
+ struct mei_device, timer_work.work);
+
+
+ mutex_lock(&dev->device_lock);
+
+ /* Catch interrupt stalls during HBM init handshake */
+ if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
+ dev->hbm_state != MEI_HBM_IDLE) {
+
+ if (dev->init_clients_timer) {
+ if (--dev->init_clients_timer == 0) {
+ dev_err(dev->dev, "timer: init clients timeout hbm_state = %d.\n",
+ dev->hbm_state);
+ mei_reset(dev);
+ goto out;
+ }
+ }
+ }
+
+ if (dev->dev_state != MEI_DEV_ENABLED)
+ goto out;
+
+ /*** connect/disconnect timeouts ***/
+ list_for_each_entry(cl, &dev->file_list, link) {
+ if (cl->timer_count) {
+ if (--cl->timer_count == 0) {
+ dev_err(dev->dev, "timer: connect/disconnect timeout.\n");
+ mei_connect_timeout(cl);
+ goto out;
+ }
+ }
+ }
+
+ if (!mei_cl_is_connected(&dev->iamthif_cl))
+ goto out;
+
+ if (dev->iamthif_stall_timer) {
+ if (--dev->iamthif_stall_timer == 0) {
+ dev_err(dev->dev, "timer: amthif hanged.\n");
+ mei_reset(dev);
+ dev->iamthif_canceled = false;
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+ dev->iamthif_timer = 0;
+
+ mei_io_cb_free(dev->iamthif_current_cb);
+ dev->iamthif_current_cb = NULL;
+
+ dev->iamthif_file_object = NULL;
+ mei_amthif_run_next_cmd(dev);
+ }
+ }
+
+ if (dev->iamthif_timer) {
+
+ timeout = dev->iamthif_timer +
+ mei_secs_to_jiffies(MEI_IAMTHIF_READ_TIMER);
+
+ dev_dbg(dev->dev, "dev->iamthif_timer = %ld\n",
+ dev->iamthif_timer);
+ dev_dbg(dev->dev, "timeout = %ld\n", timeout);
+ dev_dbg(dev->dev, "jiffies = %ld\n", jiffies);
+ if (time_after(jiffies, timeout)) {
+ /*
+ * User didn't read the AMTHI data on time (15sec)
+ * freeing AMTHI for other requests
+ */
+
+ dev_dbg(dev->dev, "freeing AMTHI for other requests\n");
+
+ mei_io_list_flush(&dev->amthif_rd_complete_list,
+ &dev->iamthif_cl);
+ mei_io_cb_free(dev->iamthif_current_cb);
+ dev->iamthif_current_cb = NULL;
+
+ dev->iamthif_file_object->private_data = NULL;
+ dev->iamthif_file_object = NULL;
+ dev->iamthif_timer = 0;
+ mei_amthif_run_next_cmd(dev);
+
+ }
+ }
+out:
+ if (dev->dev_state != MEI_DEV_DISABLED)
+ schedule_delayed_work(&dev->timer_work, 2 * HZ);
+ mutex_unlock(&dev->device_lock);
+}
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
new file mode 100644
index 0000000..4ef189a
--- /dev/null
+++ b/drivers/misc/mei/main.c
@@ -0,0 +1,887 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/uuid.h>
+#include <linux/compat.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+/**
+ * mei_open - the open function
+ *
+ * @inode: pointer to inode structure
+ * @file: pointer to file structure
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int mei_open(struct inode *inode, struct file *file)
+{
+ struct mei_device *dev;
+ struct mei_cl *cl;
+
+ int err;
+
+ dev = container_of(inode->i_cdev, struct mei_device, cdev);
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ if (dev->dev_state != MEI_DEV_ENABLED) {
+ dev_dbg(dev->dev, "dev_state != MEI_ENABLED dev_state = %s\n",
+ mei_dev_state_str(dev->dev_state));
+ err = -ENODEV;
+ goto err_unlock;
+ }
+
+ cl = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY);
+ if (IS_ERR(cl)) {
+ err = PTR_ERR(cl);
+ goto err_unlock;
+ }
+
+ file->private_data = cl;
+
+ mutex_unlock(&dev->device_lock);
+
+ return nonseekable_open(inode, file);
+
+err_unlock:
+ mutex_unlock(&dev->device_lock);
+ return err;
+}
+
+/**
+ * mei_release - the release function
+ *
+ * @inode: pointer to inode structure
+ * @file: pointer to file structure
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int mei_release(struct inode *inode, struct file *file)
+{
+ struct mei_cl *cl = file->private_data;
+ struct mei_device *dev;
+ int rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ mutex_lock(&dev->device_lock);
+ if (cl == &dev->iamthif_cl) {
+ rets = mei_amthif_release(dev, file);
+ goto out;
+ }
+ rets = mei_cl_disconnect(cl);
+
+ mei_cl_flush_queues(cl, file);
+ cl_dbg(dev, cl, "removing\n");
+
+ mei_cl_unlink(cl);
+
+ file->private_data = NULL;
+
+ kfree(cl);
+out:
+ mutex_unlock(&dev->device_lock);
+ return rets;
+}
+
+
+/**
+ * mei_read - the read function.
+ *
+ * @file: pointer to file structure
+ * @ubuf: pointer to user buffer
+ * @length: buffer length
+ * @offset: data offset in buffer
+ *
+ * Return: >=0 data length on success , <0 on error
+ */
+static ssize_t mei_read(struct file *file, char __user *ubuf,
+ size_t length, loff_t *offset)
+{
+ struct mei_cl *cl = file->private_data;
+ struct mei_device *dev;
+ struct mei_cl_cb *cb = NULL;
+ int rets;
+ int err;
+
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+
+ mutex_lock(&dev->device_lock);
+ if (dev->dev_state != MEI_DEV_ENABLED) {
+ rets = -ENODEV;
+ goto out;
+ }
+
+ if (length == 0) {
+ rets = 0;
+ goto out;
+ }
+
+ if (cl == &dev->iamthif_cl) {
+ rets = mei_amthif_read(dev, file, ubuf, length, offset);
+ goto out;
+ }
+
+ cb = mei_cl_read_cb(cl, file);
+ if (cb) {
+ /* read what left */
+ if (cb->buf_idx > *offset)
+ goto copy_buffer;
+ /* offset is beyond buf_idx we have no more data return 0 */
+ if (cb->buf_idx > 0 && cb->buf_idx <= *offset) {
+ rets = 0;
+ goto free;
+ }
+ /* Offset needs to be cleaned for contiguous reads*/
+ if (cb->buf_idx == 0 && *offset > 0)
+ *offset = 0;
+ } else if (*offset > 0) {
+ *offset = 0;
+ }
+
+ err = mei_cl_read_start(cl, length, file);
+ if (err && err != -EBUSY) {
+ cl_dbg(dev, cl, "mei start read failure status = %d\n", err);
+ rets = err;
+ goto out;
+ }
+
+ if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
+ if (file->f_flags & O_NONBLOCK) {
+ rets = -EAGAIN;
+ goto out;
+ }
+
+ mutex_unlock(&dev->device_lock);
+
+ if (wait_event_interruptible(cl->rx_wait,
+ (!list_empty(&cl->rd_completed)) ||
+ (!mei_cl_is_connected(cl)))) {
+
+ if (signal_pending(current))
+ return -EINTR;
+ return -ERESTARTSYS;
+ }
+
+ mutex_lock(&dev->device_lock);
+ if (!mei_cl_is_connected(cl)) {
+ rets = -ENODEV;
+ goto out;
+ }
+ }
+
+ cb = mei_cl_read_cb(cl, file);
+ if (!cb) {
+ if (mei_cl_is_fixed_address(cl) && dev->allow_fixed_address) {
+ cb = mei_cl_read_cb(cl, NULL);
+ if (cb)
+ goto copy_buffer;
+ }
+ rets = 0;
+ goto out;
+ }
+
+copy_buffer:
+ /* now copy the data to user space */
+ if (cb->status) {
+ rets = cb->status;
+ cl_dbg(dev, cl, "read operation failed %d\n", rets);
+ goto free;
+ }
+
+ cl_dbg(dev, cl, "buf.size = %d buf.idx = %ld\n",
+ cb->buf.size, cb->buf_idx);
+ if (length == 0 || ubuf == NULL || *offset > cb->buf_idx) {
+ rets = -EMSGSIZE;
+ goto free;
+ }
+
+ /* length is being truncated to PAGE_SIZE,
+ * however buf_idx may point beyond that */
+ length = min_t(size_t, length, cb->buf_idx - *offset);
+
+ if (copy_to_user(ubuf, cb->buf.data + *offset, length)) {
+ dev_dbg(dev->dev, "failed to copy data to userland\n");
+ rets = -EFAULT;
+ goto free;
+ }
+
+ rets = length;
+ *offset += length;
+ if ((unsigned long)*offset < cb->buf_idx)
+ goto out;
+
+free:
+ mei_io_cb_free(cb);
+
+out:
+ cl_dbg(dev, cl, "end mei read rets = %d\n", rets);
+ mutex_unlock(&dev->device_lock);
+ return rets;
+}
+/**
+ * mei_write - the write function.
+ *
+ * @file: pointer to file structure
+ * @ubuf: pointer to user buffer
+ * @length: buffer length
+ * @offset: data offset in buffer
+ *
+ * Return: >=0 data length on success , <0 on error
+ */
+static ssize_t mei_write(struct file *file, const char __user *ubuf,
+ size_t length, loff_t *offset)
+{
+ struct mei_cl *cl = file->private_data;
+ struct mei_cl_cb *write_cb = NULL;
+ struct mei_device *dev;
+ unsigned long timeout = 0;
+ int rets;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ mutex_lock(&dev->device_lock);
+
+ if (dev->dev_state != MEI_DEV_ENABLED) {
+ rets = -ENODEV;
+ goto out;
+ }
+
+ if (!mei_cl_is_connected(cl)) {
+ cl_err(dev, cl, "is not connected");
+ rets = -ENODEV;
+ goto out;
+ }
+
+ if (!mei_me_cl_is_active(cl->me_cl)) {
+ rets = -ENOTTY;
+ goto out;
+ }
+
+ if (length > mei_cl_mtu(cl)) {
+ rets = -EFBIG;
+ goto out;
+ }
+
+ if (length == 0) {
+ rets = 0;
+ goto out;
+ }
+
+ if (cl == &dev->iamthif_cl) {
+ write_cb = mei_amthif_find_read_list_entry(dev, file);
+
+ if (write_cb) {
+ timeout = write_cb->read_time +
+ mei_secs_to_jiffies(MEI_IAMTHIF_READ_TIMER);
+
+ if (time_after(jiffies, timeout)) {
+ *offset = 0;
+ mei_io_cb_free(write_cb);
+ write_cb = NULL;
+ }
+ }
+ }
+
+ *offset = 0;
+ write_cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, file);
+ if (!write_cb) {
+ rets = -ENOMEM;
+ goto out;
+ }
+
+ rets = copy_from_user(write_cb->buf.data, ubuf, length);
+ if (rets) {
+ dev_dbg(dev->dev, "failed to copy data from userland\n");
+ rets = -EFAULT;
+ goto out;
+ }
+
+ if (cl == &dev->iamthif_cl) {
+ rets = mei_amthif_write(cl, write_cb);
+
+ if (rets) {
+ dev_err(dev->dev,
+ "amthif write failed with status = %d\n", rets);
+ goto out;
+ }
+ mutex_unlock(&dev->device_lock);
+ return length;
+ }
+
+ rets = mei_cl_write(cl, write_cb, false);
+out:
+ mutex_unlock(&dev->device_lock);
+ if (rets < 0)
+ mei_io_cb_free(write_cb);
+ return rets;
+}
+
+/**
+ * mei_ioctl_connect_client - the connect to fw client IOCTL function
+ *
+ * @file: private data of the file object
+ * @data: IOCTL connect data, input and output parameters
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_ioctl_connect_client(struct file *file,
+ struct mei_connect_client_data *data)
+{
+ struct mei_device *dev;
+ struct mei_client *client;
+ struct mei_me_client *me_cl;
+ struct mei_cl *cl;
+ int rets;
+
+ cl = file->private_data;
+ dev = cl->dev;
+
+ if (dev->dev_state != MEI_DEV_ENABLED)
+ return -ENODEV;
+
+ if (cl->state != MEI_FILE_INITIALIZING &&
+ cl->state != MEI_FILE_DISCONNECTED)
+ return -EBUSY;
+
+ /* find ME client we're trying to connect to */
+ me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid);
+ if (!me_cl ||
+ (me_cl->props.fixed_address && !dev->allow_fixed_address)) {
+ dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
+ &data->in_client_uuid);
+ mei_me_cl_put(me_cl);
+ return -ENOTTY;
+ }
+
+ dev_dbg(dev->dev, "Connect to FW Client ID = %d\n",
+ me_cl->client_id);
+ dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n",
+ me_cl->props.protocol_version);
+ dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n",
+ me_cl->props.max_msg_length);
+
+ /* if we're connecting to amthif client then we will use the
+ * existing connection
+ */
+ if (uuid_le_cmp(data->in_client_uuid, mei_amthif_guid) == 0) {
+ dev_dbg(dev->dev, "FW Client is amthi\n");
+ if (!mei_cl_is_connected(&dev->iamthif_cl)) {
+ rets = -ENODEV;
+ goto end;
+ }
+ mei_cl_unlink(cl);
+
+ kfree(cl);
+ cl = NULL;
+ dev->iamthif_open_count++;
+ file->private_data = &dev->iamthif_cl;
+
+ client = &data->out_client_properties;
+ client->max_msg_length = me_cl->props.max_msg_length;
+ client->protocol_version = me_cl->props.protocol_version;
+ rets = dev->iamthif_cl.status;
+
+ goto end;
+ }
+
+ /* prepare the output buffer */
+ client = &data->out_client_properties;
+ client->max_msg_length = me_cl->props.max_msg_length;
+ client->protocol_version = me_cl->props.protocol_version;
+ dev_dbg(dev->dev, "Can connect?\n");
+
+ rets = mei_cl_connect(cl, me_cl, file);
+
+end:
+ mei_me_cl_put(me_cl);
+ return rets;
+}
+
+/**
+ * mei_ioctl_client_notify_request -
+ * propagate event notification request to client
+ *
+ * @file: pointer to file structure
+ * @request: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_request(struct file *file, u32 request)
+{
+ struct mei_cl *cl = file->private_data;
+
+ if (request != MEI_HBM_NOTIFICATION_START &&
+ request != MEI_HBM_NOTIFICATION_STOP)
+ return -EINVAL;
+
+ return mei_cl_notify_request(cl, file, (u8)request);
+}
+
+/**
+ * mei_ioctl_client_notify_get - wait for notification request
+ *
+ * @file: pointer to file structure
+ * @notify_get: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_get(struct file *file, u32 *notify_get)
+{
+ struct mei_cl *cl = file->private_data;
+ bool notify_ev;
+ bool block = (file->f_flags & O_NONBLOCK) == 0;
+ int rets;
+
+ rets = mei_cl_notify_get(cl, block, ¬ify_ev);
+ if (rets)
+ return rets;
+
+ *notify_get = notify_ev ? 1 : 0;
+ return 0;
+}
+
+/**
+ * mei_ioctl - the IOCTL function
+ *
+ * @file: pointer to file structure
+ * @cmd: ioctl command
+ * @data: pointer to mei message structure
+ *
+ * Return: 0 on success , <0 on error
+ */
+static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
+{
+ struct mei_device *dev;
+ struct mei_cl *cl = file->private_data;
+ struct mei_connect_client_data connect_data;
+ u32 notify_get, notify_req;
+ int rets;
+
+
+ if (WARN_ON(!cl || !cl->dev))
+ return -ENODEV;
+
+ dev = cl->dev;
+
+ dev_dbg(dev->dev, "IOCTL cmd = 0x%x", cmd);
+
+ mutex_lock(&dev->device_lock);
+ if (dev->dev_state != MEI_DEV_ENABLED) {
+ rets = -ENODEV;
+ goto out;
+ }
+
+ switch (cmd) {
+ case IOCTL_MEI_CONNECT_CLIENT:
+ dev_dbg(dev->dev, ": IOCTL_MEI_CONNECT_CLIENT.\n");
+ if (copy_from_user(&connect_data, (char __user *)data,
+ sizeof(struct mei_connect_client_data))) {
+ dev_dbg(dev->dev, "failed to copy data from userland\n");
+ rets = -EFAULT;
+ goto out;
+ }
+
+ rets = mei_ioctl_connect_client(file, &connect_data);
+ if (rets)
+ goto out;
+
+ /* if all is ok, copying the data back to user. */
+ if (copy_to_user((char __user *)data, &connect_data,
+ sizeof(struct mei_connect_client_data))) {
+ dev_dbg(dev->dev, "failed to copy data to userland\n");
+ rets = -EFAULT;
+ goto out;
+ }
+
+ break;
+
+ case IOCTL_MEI_NOTIFY_SET:
+ dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_SET.\n");
+ if (copy_from_user(¬ify_req,
+ (char __user *)data, sizeof(notify_req))) {
+ dev_dbg(dev->dev, "failed to copy data from userland\n");
+ rets = -EFAULT;
+ goto out;
+ }
+ rets = mei_ioctl_client_notify_request(file, notify_req);
+ break;
+
+ case IOCTL_MEI_NOTIFY_GET:
+ dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_GET.\n");
+ rets = mei_ioctl_client_notify_get(file, ¬ify_get);
+ if (rets)
+ goto out;
+
+ dev_dbg(dev->dev, "copy connect data to user\n");
+ if (copy_to_user((char __user *)data,
+ ¬ify_get, sizeof(notify_get))) {
+ dev_dbg(dev->dev, "failed to copy data to userland\n");
+ rets = -EFAULT;
+ goto out;
+
+ }
+ break;
+
+ default:
+ dev_err(dev->dev, ": unsupported ioctl %d.\n", cmd);
+ rets = -ENOIOCTLCMD;
+ }
+
+out:
+ mutex_unlock(&dev->device_lock);
+ return rets;
+}
+
+/**
+ * mei_compat_ioctl - the compat IOCTL function
+ *
+ * @file: pointer to file structure
+ * @cmd: ioctl command
+ * @data: pointer to mei message structure
+ *
+ * Return: 0 on success , <0 on error
+ */
+#ifdef CONFIG_COMPAT
+static long mei_compat_ioctl(struct file *file,
+ unsigned int cmd, unsigned long data)
+{
+ return mei_ioctl(file, cmd, (unsigned long)compat_ptr(data));
+}
+#endif
+
+
+/**
+ * mei_poll - the poll function
+ *
+ * @file: pointer to file structure
+ * @wait: pointer to poll_table structure
+ *
+ * Return: poll mask
+ */
+static unsigned int mei_poll(struct file *file, poll_table *wait)
+{
+ unsigned long req_events = poll_requested_events(wait);
+ struct mei_cl *cl = file->private_data;
+ struct mei_device *dev;
+ unsigned int mask = 0;
+ bool notify_en;
+
+ if (WARN_ON(!cl || !cl->dev))
+ return POLLERR;
+
+ dev = cl->dev;
+
+ mutex_lock(&dev->device_lock);
+
+ notify_en = cl->notify_en && (req_events & POLLPRI);
+
+ if (dev->dev_state != MEI_DEV_ENABLED ||
+ !mei_cl_is_connected(cl)) {
+ mask = POLLERR;
+ goto out;
+ }
+
+ if (cl == &dev->iamthif_cl) {
+ mask = mei_amthif_poll(dev, file, wait);
+ goto out;
+ }
+
+ if (notify_en) {
+ poll_wait(file, &cl->ev_wait, wait);
+ if (cl->notify_ev)
+ mask |= POLLPRI;
+ }
+
+ if (req_events & (POLLIN | POLLRDNORM)) {
+ poll_wait(file, &cl->rx_wait, wait);
+
+ if (!list_empty(&cl->rd_completed))
+ mask |= POLLIN | POLLRDNORM;
+ else
+ mei_cl_read_start(cl, 0, file);
+ }
+
+out:
+ mutex_unlock(&dev->device_lock);
+ return mask;
+}
+
+/**
+ * mei_fasync - asynchronous io support
+ *
+ * @fd: file descriptor
+ * @file: pointer to file structure
+ * @band: band bitmap
+ *
+ * Return: negative on error,
+ * 0 if it did no changes,
+ * and positive a process was added or deleted
+ */
+static int mei_fasync(int fd, struct file *file, int band)
+{
+
+ struct mei_cl *cl = file->private_data;
+
+ if (!mei_cl_is_connected(cl))
+ return -ENODEV;
+
+ return fasync_helper(fd, file, band, &cl->ev_async);
+}
+
+/**
+ * fw_status_show - mei device attribute show method
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf: char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t fw_status_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct mei_device *dev = dev_get_drvdata(device);
+ struct mei_fw_status fw_status;
+ int err, i;
+ ssize_t cnt = 0;
+
+ mutex_lock(&dev->device_lock);
+ err = mei_fw_status(dev, &fw_status);
+ mutex_unlock(&dev->device_lock);
+ if (err) {
+ dev_err(device, "read fw_status error = %d\n", err);
+ return err;
+ }
+
+ for (i = 0; i < fw_status.count; i++)
+ cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%08X\n",
+ fw_status.status[i]);
+ return cnt;
+}
+static DEVICE_ATTR_RO(fw_status);
+
+static struct attribute *mei_attrs[] = {
+ &dev_attr_fw_status.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(mei);
+
+/*
+ * file operations structure will be used for mei char device.
+ */
+static const struct file_operations mei_fops = {
+ .owner = THIS_MODULE,
+ .read = mei_read,
+ .unlocked_ioctl = mei_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = mei_compat_ioctl,
+#endif
+ .open = mei_open,
+ .release = mei_release,
+ .write = mei_write,
+ .poll = mei_poll,
+ .fasync = mei_fasync,
+ .llseek = no_llseek
+};
+
+static struct class *mei_class;
+static dev_t mei_devt;
+#define MEI_MAX_DEVS MINORMASK
+static DEFINE_MUTEX(mei_minor_lock);
+static DEFINE_IDR(mei_idr);
+
+/**
+ * mei_minor_get - obtain next free device minor number
+ *
+ * @dev: device pointer
+ *
+ * Return: allocated minor, or -ENOSPC if no free minor left
+ */
+static int mei_minor_get(struct mei_device *dev)
+{
+ int ret;
+
+ mutex_lock(&mei_minor_lock);
+ ret = idr_alloc(&mei_idr, dev, 0, MEI_MAX_DEVS, GFP_KERNEL);
+ if (ret >= 0)
+ dev->minor = ret;
+ else if (ret == -ENOSPC)
+ dev_err(dev->dev, "too many mei devices\n");
+
+ mutex_unlock(&mei_minor_lock);
+ return ret;
+}
+
+/**
+ * mei_minor_free - mark device minor number as free
+ *
+ * @dev: device pointer
+ */
+static void mei_minor_free(struct mei_device *dev)
+{
+ mutex_lock(&mei_minor_lock);
+ idr_remove(&mei_idr, dev->minor);
+ mutex_unlock(&mei_minor_lock);
+}
+
+int mei_register(struct mei_device *dev, struct device *parent)
+{
+ struct device *clsdev; /* class device */
+ int ret, devno;
+
+ ret = mei_minor_get(dev);
+ if (ret < 0)
+ return ret;
+
+ /* Fill in the data structures */
+ devno = MKDEV(MAJOR(mei_devt), dev->minor);
+ cdev_init(&dev->cdev, &mei_fops);
+ dev->cdev.owner = parent->driver->owner;
+
+ /* Add the device */
+ ret = cdev_add(&dev->cdev, devno, 1);
+ if (ret) {
+ dev_err(parent, "unable to add device %d:%d\n",
+ MAJOR(mei_devt), dev->minor);
+ goto err_dev_add;
+ }
+
+ clsdev = device_create_with_groups(mei_class, parent, devno,
+ dev, mei_groups,
+ "mei%d", dev->minor);
+
+ if (IS_ERR(clsdev)) {
+ dev_err(parent, "unable to create device %d:%d\n",
+ MAJOR(mei_devt), dev->minor);
+ ret = PTR_ERR(clsdev);
+ goto err_dev_create;
+ }
+
+ ret = mei_dbgfs_register(dev, dev_name(clsdev));
+ if (ret) {
+ dev_err(clsdev, "cannot register debugfs ret = %d\n", ret);
+ goto err_dev_dbgfs;
+ }
+
+ return 0;
+
+err_dev_dbgfs:
+ device_destroy(mei_class, devno);
+err_dev_create:
+ cdev_del(&dev->cdev);
+err_dev_add:
+ mei_minor_free(dev);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mei_register);
+
+void mei_deregister(struct mei_device *dev)
+{
+ int devno;
+
+ devno = dev->cdev.dev;
+ cdev_del(&dev->cdev);
+
+ mei_dbgfs_deregister(dev);
+
+ device_destroy(mei_class, devno);
+
+ mei_minor_free(dev);
+}
+EXPORT_SYMBOL_GPL(mei_deregister);
+
+static int __init mei_init(void)
+{
+ int ret;
+
+ mei_class = class_create(THIS_MODULE, "mei");
+ if (IS_ERR(mei_class)) {
+ pr_err("couldn't create class\n");
+ ret = PTR_ERR(mei_class);
+ goto err;
+ }
+
+ ret = alloc_chrdev_region(&mei_devt, 0, MEI_MAX_DEVS, "mei");
+ if (ret < 0) {
+ pr_err("unable to allocate char dev region\n");
+ goto err_class;
+ }
+
+ ret = mei_cl_bus_init();
+ if (ret < 0) {
+ pr_err("unable to initialize bus\n");
+ goto err_chrdev;
+ }
+
+ return 0;
+
+err_chrdev:
+ unregister_chrdev_region(mei_devt, MEI_MAX_DEVS);
+err_class:
+ class_destroy(mei_class);
+err:
+ return ret;
+}
+
+static void __exit mei_exit(void)
+{
+ unregister_chrdev_region(mei_devt, MEI_MAX_DEVS);
+ class_destroy(mei_class);
+ mei_cl_bus_exit();
+}
+
+module_init(mei_init);
+module_exit(mei_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Management Engine Interface");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/misc/mei/mei-trace.c b/drivers/misc/mei/mei-trace.c
new file mode 100644
index 0000000..388efb5
--- /dev/null
+++ b/drivers/misc/mei/mei-trace.c
@@ -0,0 +1,25 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+
+/* sparse doesn't like tracepoint macros */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "mei-trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(mei_reg_read);
+EXPORT_TRACEPOINT_SYMBOL(mei_reg_write);
+#endif /* __CHECKER__ */
diff --git a/drivers/misc/mei/mei-trace.h b/drivers/misc/mei/mei-trace.h
new file mode 100644
index 0000000..47e1bc6
--- /dev/null
+++ b/drivers/misc/mei/mei-trace.h
@@ -0,0 +1,74 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#if !defined(_MEI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MEI_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <linux/device.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mei
+
+TRACE_EVENT(mei_reg_read,
+ TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+ TP_ARGS(dev, reg, offs, val),
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dev))
+ __field(const char *, reg)
+ __field(u32, offs)
+ __field(u32, val)
+ ),
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dev))
+ __entry->reg = reg;
+ __entry->offs = offs;
+ __entry->val = val;
+ ),
+ TP_printk("[%s] read %s:[%#x] = %#x",
+ __get_str(dev), __entry->reg, __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(mei_reg_write,
+ TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+ TP_ARGS(dev, reg, offs, val),
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dev))
+ __field(const char *, reg)
+ __field(u32, offs)
+ __field(u32, val)
+ ),
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dev))
+ __entry->reg = reg;
+ __entry->offs = offs;
+ __entry->val = val;
+ ),
+ TP_printk("[%s] write %s[%#x] = %#x)",
+ __get_str(dev), __entry->reg, __entry->offs, __entry->val)
+);
+
+#endif /* _MEI_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mei-trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
new file mode 100644
index 0000000..1b06e2f
--- /dev/null
+++ b/drivers/misc/mei/mei_dev.h
@@ -0,0 +1,834 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_DEV_H_
+#define _MEI_DEV_H_
+
+#include <linux/types.h>
+#include <linux/watchdog.h>
+#include <linux/poll.h>
+#include <linux/mei.h>
+#include <linux/mei_cl_bus.h>
+
+#include "hw.h"
+#include "hbm.h"
+
+/*
+ * watch dog definition
+ */
+#define MEI_WD_HDR_SIZE 4
+#define MEI_WD_STOP_MSG_SIZE MEI_WD_HDR_SIZE
+#define MEI_WD_START_MSG_SIZE (MEI_WD_HDR_SIZE + 16)
+
+#define MEI_WD_DEFAULT_TIMEOUT 120 /* seconds */
+#define MEI_WD_MIN_TIMEOUT 120 /* seconds */
+#define MEI_WD_MAX_TIMEOUT 65535 /* seconds */
+
+#define MEI_WD_STOP_TIMEOUT 10 /* msecs */
+
+#define MEI_WD_STATE_INDEPENDENCE_MSG_SENT (1 << 0)
+
+#define MEI_RD_MSG_BUF_SIZE (128 * sizeof(u32))
+
+
+/*
+ * AMTHI Client UUID
+ */
+extern const uuid_le mei_amthif_guid;
+
+/*
+ * Watchdog Client UUID
+ */
+extern const uuid_le mei_wd_guid;
+
+/*
+ * Number of Maximum MEI Clients
+ */
+#define MEI_CLIENTS_MAX 256
+
+/*
+ * maximum number of consecutive resets
+ */
+#define MEI_MAX_CONSEC_RESET 3
+
+/*
+ * Number of File descriptors/handles
+ * that can be opened to the driver.
+ *
+ * Limit to 255: 256 Total Clients
+ * minus internal client for MEI Bus Messages
+ */
+#define MEI_MAX_OPEN_HANDLE_COUNT (MEI_CLIENTS_MAX - 1)
+
+/*
+ * Internal Clients Number
+ */
+#define MEI_HOST_CLIENT_ID_ANY (-1)
+#define MEI_HBM_HOST_CLIENT_ID 0 /* not used, just for documentation */
+#define MEI_WD_HOST_CLIENT_ID 1
+#define MEI_IAMTHIF_HOST_CLIENT_ID 2
+
+
+/* File state */
+enum file_state {
+ MEI_FILE_INITIALIZING = 0,
+ MEI_FILE_CONNECTING,
+ MEI_FILE_CONNECTED,
+ MEI_FILE_DISCONNECTING,
+ MEI_FILE_DISCONNECT_REPLY,
+ MEI_FILE_DISCONNECT_REQUIRED,
+ MEI_FILE_DISCONNECTED,
+};
+
+/* MEI device states */
+enum mei_dev_state {
+ MEI_DEV_INITIALIZING = 0,
+ MEI_DEV_INIT_CLIENTS,
+ MEI_DEV_ENABLED,
+ MEI_DEV_RESETTING,
+ MEI_DEV_DISABLED,
+ MEI_DEV_POWER_DOWN,
+ MEI_DEV_POWER_UP
+};
+
+const char *mei_dev_state_str(int state);
+
+enum iamthif_states {
+ MEI_IAMTHIF_IDLE,
+ MEI_IAMTHIF_WRITING,
+ MEI_IAMTHIF_FLOW_CONTROL,
+ MEI_IAMTHIF_READING,
+ MEI_IAMTHIF_READ_COMPLETE
+};
+
+enum mei_file_transaction_states {
+ MEI_IDLE,
+ MEI_WRITING,
+ MEI_WRITE_COMPLETE,
+ MEI_FLOW_CONTROL,
+ MEI_READING,
+ MEI_READ_COMPLETE
+};
+
+enum mei_wd_states {
+ MEI_WD_IDLE,
+ MEI_WD_RUNNING,
+ MEI_WD_STOPPING,
+};
+
+/**
+ * enum mei_cb_file_ops - file operation associated with the callback
+ * @MEI_FOP_READ: read
+ * @MEI_FOP_WRITE: write
+ * @MEI_FOP_CONNECT: connect
+ * @MEI_FOP_DISCONNECT: disconnect
+ * @MEI_FOP_DISCONNECT_RSP: disconnect response
+ * @MEI_FOP_NOTIFY_START: start notification
+ * @MEI_FOP_NOTIFY_STOP: stop notification
+ */
+enum mei_cb_file_ops {
+ MEI_FOP_READ = 0,
+ MEI_FOP_WRITE,
+ MEI_FOP_CONNECT,
+ MEI_FOP_DISCONNECT,
+ MEI_FOP_DISCONNECT_RSP,
+ MEI_FOP_NOTIFY_START,
+ MEI_FOP_NOTIFY_STOP,
+};
+
+/*
+ * Intel MEI message data struct
+ */
+struct mei_msg_data {
+ u32 size;
+ unsigned char *data;
+};
+
+/* Maximum number of processed FW status registers */
+#define MEI_FW_STATUS_MAX 6
+/* Minimal buffer for FW status string (8 bytes in dw + space or '\0') */
+#define MEI_FW_STATUS_STR_SZ (MEI_FW_STATUS_MAX * (8 + 1))
+
+
+/*
+ * struct mei_fw_status - storage of FW status data
+ *
+ * @count: number of actually available elements in array
+ * @status: FW status registers
+ */
+struct mei_fw_status {
+ int count;
+ u32 status[MEI_FW_STATUS_MAX];
+};
+
+/**
+ * struct mei_me_client - representation of me (fw) client
+ *
+ * @list: link in me client list
+ * @refcnt: struct reference count
+ * @props: client properties
+ * @client_id: me client id
+ * @mei_flow_ctrl_creds: flow control credits
+ * @connect_count: number connections to this client
+ * @bus_added: added to bus
+ */
+struct mei_me_client {
+ struct list_head list;
+ struct kref refcnt;
+ struct mei_client_properties props;
+ u8 client_id;
+ u8 mei_flow_ctrl_creds;
+ u8 connect_count;
+ u8 bus_added;
+};
+
+
+struct mei_cl;
+
+/**
+ * struct mei_cl_cb - file operation callback structure
+ *
+ * @list: link in callback queue
+ * @cl: file client who is running this operation
+ * @fop_type: file operation type
+ * @buf: buffer for data associated with the callback
+ * @buf_idx: last read index
+ * @read_time: last read operation time stamp (iamthif)
+ * @file_object: pointer to file structure
+ * @status: io status of the cb
+ * @internal: communication between driver and FW flag
+ * @completed: the transfer or reception has completed
+ */
+struct mei_cl_cb {
+ struct list_head list;
+ struct mei_cl *cl;
+ enum mei_cb_file_ops fop_type;
+ struct mei_msg_data buf;
+ unsigned long buf_idx;
+ unsigned long read_time;
+ struct file *file_object;
+ int status;
+ u32 internal:1;
+ u32 completed:1;
+};
+
+/**
+ * struct mei_cl - me client host representation
+ * carried in file->private_data
+ *
+ * @link: link in the clients list
+ * @dev: mei parent device
+ * @state: file operation state
+ * @tx_wait: wait queue for tx completion
+ * @rx_wait: wait queue for rx completion
+ * @wait: wait queue for management operation
+ * @ev_wait: notification wait queue
+ * @ev_async: event async notification
+ * @status: connection status
+ * @me_cl: fw client connected
+ * @host_client_id: host id
+ * @mei_flow_ctrl_creds: transmit flow credentials
+ * @timer_count: watchdog timer for operation completion
+ * @reserved: reserved for alignment
+ * @notify_en: notification - enabled/disabled
+ * @notify_ev: pending notification event
+ * @writing_state: state of the tx
+ * @rd_pending: pending read credits
+ * @rd_completed: completed read
+ *
+ * @cldev: device on the mei client bus
+ */
+struct mei_cl {
+ struct list_head link;
+ struct mei_device *dev;
+ enum file_state state;
+ wait_queue_head_t tx_wait;
+ wait_queue_head_t rx_wait;
+ wait_queue_head_t wait;
+ wait_queue_head_t ev_wait;
+ struct fasync_struct *ev_async;
+ int status;
+ struct mei_me_client *me_cl;
+ u8 host_client_id;
+ u8 mei_flow_ctrl_creds;
+ u8 timer_count;
+ u8 reserved;
+ u8 notify_en;
+ u8 notify_ev;
+ enum mei_file_transaction_states writing_state;
+ struct list_head rd_pending;
+ struct list_head rd_completed;
+
+ struct mei_cl_device *cldev;
+};
+
+/**
+ * struct mei_hw_ops - hw specific ops
+ *
+ * @host_is_ready : query for host readiness
+ *
+ * @hw_is_ready : query if hw is ready
+ * @hw_reset : reset hw
+ * @hw_start : start hw after reset
+ * @hw_config : configure hw
+ *
+ * @fw_status : get fw status registers
+ * @pg_state : power gating state of the device
+ * @pg_in_transition : is device now in pg transition
+ * @pg_is_enabled : is power gating enabled
+ *
+ * @intr_clear : clear pending interrupts
+ * @intr_enable : enable interrupts
+ * @intr_disable : disable interrupts
+ *
+ * @hbuf_free_slots : query for write buffer empty slots
+ * @hbuf_is_ready : query if write buffer is empty
+ * @hbuf_max_len : query for write buffer max len
+ *
+ * @write : write a message to FW
+ *
+ * @rdbuf_full_slots : query how many slots are filled
+ *
+ * @read_hdr : get first 4 bytes (header)
+ * @read : read a buffer from the FW
+ */
+struct mei_hw_ops {
+
+ bool (*host_is_ready)(struct mei_device *dev);
+
+ bool (*hw_is_ready)(struct mei_device *dev);
+ int (*hw_reset)(struct mei_device *dev, bool enable);
+ int (*hw_start)(struct mei_device *dev);
+ void (*hw_config)(struct mei_device *dev);
+
+
+ int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts);
+ enum mei_pg_state (*pg_state)(struct mei_device *dev);
+ bool (*pg_in_transition)(struct mei_device *dev);
+ bool (*pg_is_enabled)(struct mei_device *dev);
+
+ void (*intr_clear)(struct mei_device *dev);
+ void (*intr_enable)(struct mei_device *dev);
+ void (*intr_disable)(struct mei_device *dev);
+
+ int (*hbuf_free_slots)(struct mei_device *dev);
+ bool (*hbuf_is_ready)(struct mei_device *dev);
+ size_t (*hbuf_max_len)(const struct mei_device *dev);
+
+ int (*write)(struct mei_device *dev,
+ struct mei_msg_hdr *hdr,
+ unsigned char *buf);
+
+ int (*rdbuf_full_slots)(struct mei_device *dev);
+
+ u32 (*read_hdr)(const struct mei_device *dev);
+ int (*read)(struct mei_device *dev,
+ unsigned char *buf, unsigned long len);
+};
+
+/* MEI bus API*/
+void mei_cl_bus_rescan(struct mei_device *bus);
+void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
+ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
+ bool blocking);
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length);
+void mei_cl_bus_rx_event(struct mei_cl *cl);
+void mei_cl_bus_notify_event(struct mei_cl *cl);
+void mei_cl_bus_remove_devices(struct mei_device *bus);
+int mei_cl_bus_init(void);
+void mei_cl_bus_exit(void);
+
+/**
+ * enum mei_pg_event - power gating transition events
+ *
+ * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition
+ * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete
+ * @MEI_PG_EVENT_RECEIVED: the driver received pg event
+ * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt
+ * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt
+ */
+enum mei_pg_event {
+ MEI_PG_EVENT_IDLE,
+ MEI_PG_EVENT_WAIT,
+ MEI_PG_EVENT_RECEIVED,
+ MEI_PG_EVENT_INTR_WAIT,
+ MEI_PG_EVENT_INTR_RECEIVED,
+};
+
+/**
+ * enum mei_pg_state - device internal power gating state
+ *
+ * @MEI_PG_OFF: device is not power gated - it is active
+ * @MEI_PG_ON: device is power gated - it is in lower power state
+ */
+enum mei_pg_state {
+ MEI_PG_OFF = 0,
+ MEI_PG_ON = 1,
+};
+
+const char *mei_pg_state_str(enum mei_pg_state state);
+
+/**
+ * struct mei_device - MEI private device struct
+ *
+ * @dev : device on a bus
+ * @cdev : character device
+ * @minor : minor number allocated for device
+ *
+ * @write_list : write pending list
+ * @write_waiting_list : write completion list
+ * @ctrl_wr_list : pending control write list
+ * @ctrl_rd_list : pending control read list
+ *
+ * @file_list : list of opened handles
+ * @open_handle_count: number of opened handles
+ *
+ * @device_lock : big device lock
+ * @timer_work : MEI timer delayed work (timeouts)
+ *
+ * @recvd_hw_ready : hw ready message received flag
+ *
+ * @wait_hw_ready : wait queue for receive HW ready message form FW
+ * @wait_pg : wait queue for receive PG message from FW
+ * @wait_hbm_start : wait queue for receive HBM start message from FW
+ * @wait_stop_wd : wait queue for receive WD stop message from FW
+ *
+ * @reset_count : number of consecutive resets
+ * @dev_state : device state
+ * @hbm_state : state of host bus message protocol
+ * @init_clients_timer : HBM init handshake timeout
+ *
+ * @pg_event : power gating event
+ * @pg_domain : runtime PM domain
+ *
+ * @rd_msg_buf : control messages buffer
+ * @rd_msg_hdr : read message header storage
+ *
+ * @hbuf_depth : depth of hardware host/write buffer is slots
+ * @hbuf_is_ready : query if the host host/write buffer is ready
+ * @wr_msg : the buffer for hbm control messages
+ *
+ * @version : HBM protocol version in use
+ * @hbm_f_pg_supported : hbm feature pgi protocol
+ * @hbm_f_dc_supported : hbm feature dynamic clients
+ * @hbm_f_dot_supported : hbm feature disconnect on timeout
+ * @hbm_f_ev_supported : hbm feature event notification
+ *
+ * @me_clients_rwsem: rw lock over me_clients list
+ * @me_clients : list of FW clients
+ * @me_clients_map : FW clients bit map
+ * @host_clients_map : host clients id pool
+ * @me_client_index : last FW client index in enumeration
+ *
+ * @allow_fixed_address: allow user space to connect a fixed client
+ *
+ * @wd_cl : watchdog client
+ * @wd_state : watchdog client state
+ * @wd_pending : watchdog command is pending
+ * @wd_timeout : watchdog expiration timeout
+ * @wd_data : watchdog message buffer
+ *
+ * @amthif_cmd_list : amthif list for cmd waiting
+ * @amthif_rd_complete_list : amthif list for reading completed cmd data
+ * @iamthif_file_object : file for current amthif operation
+ * @iamthif_cl : amthif host client
+ * @iamthif_current_cb : amthif current operation callback
+ * @iamthif_open_count : number of opened amthif connections
+ * @iamthif_timer : time stamp of current amthif command completion
+ * @iamthif_stall_timer : timer to detect amthif hang
+ * @iamthif_state : amthif processor state
+ * @iamthif_canceled : current amthif command is canceled
+ *
+ * @init_work : work item for the device init
+ * @reset_work : work item for the device reset
+ *
+ * @device_list : mei client bus list
+ * @cl_bus_lock : client bus list lock
+ *
+ * @dbgfs_dir : debugfs mei root directory
+ *
+ * @ops: : hw specific operations
+ * @hw : hw specific data
+ */
+struct mei_device {
+ struct device *dev;
+ struct cdev cdev;
+ int minor;
+
+ struct mei_cl_cb write_list;
+ struct mei_cl_cb write_waiting_list;
+ struct mei_cl_cb ctrl_wr_list;
+ struct mei_cl_cb ctrl_rd_list;
+
+ struct list_head file_list;
+ long open_handle_count;
+
+ struct mutex device_lock;
+ struct delayed_work timer_work;
+
+ bool recvd_hw_ready;
+ /*
+ * waiting queue for receive message from FW
+ */
+ wait_queue_head_t wait_hw_ready;
+ wait_queue_head_t wait_pg;
+ wait_queue_head_t wait_hbm_start;
+ wait_queue_head_t wait_stop_wd;
+
+ /*
+ * mei device states
+ */
+ unsigned long reset_count;
+ enum mei_dev_state dev_state;
+ enum mei_hbm_state hbm_state;
+ u16 init_clients_timer;
+
+ /*
+ * Power Gating support
+ */
+ enum mei_pg_event pg_event;
+#ifdef CONFIG_PM
+ struct dev_pm_domain pg_domain;
+#endif /* CONFIG_PM */
+
+ unsigned char rd_msg_buf[MEI_RD_MSG_BUF_SIZE];
+ u32 rd_msg_hdr;
+
+ /* write buffer */
+ u8 hbuf_depth;
+ bool hbuf_is_ready;
+
+ /* used for control messages */
+ struct {
+ struct mei_msg_hdr hdr;
+ unsigned char data[128];
+ } wr_msg;
+
+ struct hbm_version version;
+ unsigned int hbm_f_pg_supported:1;
+ unsigned int hbm_f_dc_supported:1;
+ unsigned int hbm_f_dot_supported:1;
+ unsigned int hbm_f_ev_supported:1;
+
+ struct rw_semaphore me_clients_rwsem;
+ struct list_head me_clients;
+ DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX);
+ DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX);
+ unsigned long me_client_index;
+
+ bool allow_fixed_address;
+
+ struct mei_cl wd_cl;
+ enum mei_wd_states wd_state;
+ bool wd_pending;
+ u16 wd_timeout;
+ unsigned char wd_data[MEI_WD_START_MSG_SIZE];
+
+
+ /* amthif list for cmd waiting */
+ struct mei_cl_cb amthif_cmd_list;
+ /* driver managed amthif list for reading completed amthif cmd data */
+ struct mei_cl_cb amthif_rd_complete_list;
+ struct file *iamthif_file_object;
+ struct mei_cl iamthif_cl;
+ struct mei_cl_cb *iamthif_current_cb;
+ long iamthif_open_count;
+ unsigned long iamthif_timer;
+ u32 iamthif_stall_timer;
+ enum iamthif_states iamthif_state;
+ bool iamthif_canceled;
+
+ struct work_struct init_work;
+ struct work_struct reset_work;
+
+ /* List of bus devices */
+ struct list_head device_list;
+ struct mutex cl_bus_lock;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct dentry *dbgfs_dir;
+#endif /* CONFIG_DEBUG_FS */
+
+
+ const struct mei_hw_ops *ops;
+ char hw[0] __aligned(sizeof(void *));
+};
+
+static inline unsigned long mei_secs_to_jiffies(unsigned long sec)
+{
+ return msecs_to_jiffies(sec * MSEC_PER_SEC);
+}
+
+/**
+ * mei_data2slots - get slots - number of (dwords) from a message length
+ * + size of the mei header
+ *
+ * @length: size of the messages in bytes
+ *
+ * Return: number of slots
+ */
+static inline u32 mei_data2slots(size_t length)
+{
+ return DIV_ROUND_UP(sizeof(struct mei_msg_hdr) + length, 4);
+}
+
+/**
+ * mei_slots2data - get data in slots - bytes from slots
+ *
+ * @slots: number of available slots
+ *
+ * Return: number of bytes in slots
+ */
+static inline u32 mei_slots2data(int slots)
+{
+ return slots * 4;
+}
+
+/*
+ * mei init function prototypes
+ */
+void mei_device_init(struct mei_device *dev,
+ struct device *device,
+ const struct mei_hw_ops *hw_ops);
+int mei_reset(struct mei_device *dev);
+int mei_start(struct mei_device *dev);
+int mei_restart(struct mei_device *dev);
+void mei_stop(struct mei_device *dev);
+void mei_cancel_work(struct mei_device *dev);
+
+/*
+ * MEI interrupt functions prototype
+ */
+
+void mei_timer(struct work_struct *work);
+int mei_irq_read_handler(struct mei_device *dev,
+ struct mei_cl_cb *cmpl_list, s32 *slots);
+
+int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list);
+void mei_irq_compl_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list);
+
+/*
+ * AMTHIF - AMT Host Interface Functions
+ */
+void mei_amthif_reset_params(struct mei_device *dev);
+
+int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
+
+int mei_amthif_read(struct mei_device *dev, struct file *file,
+ char __user *ubuf, size_t length, loff_t *offset);
+
+unsigned int mei_amthif_poll(struct mei_device *dev,
+ struct file *file, poll_table *wait);
+
+int mei_amthif_release(struct mei_device *dev, struct file *file);
+
+struct mei_cl_cb *mei_amthif_find_read_list_entry(struct mei_device *dev,
+ struct file *file);
+
+int mei_amthif_write(struct mei_cl *cl, struct mei_cl_cb *cb);
+int mei_amthif_run_next_cmd(struct mei_device *dev);
+int mei_amthif_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+ struct mei_cl_cb *cmpl_list);
+
+void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb);
+int mei_amthif_irq_read_msg(struct mei_cl *cl,
+ struct mei_msg_hdr *mei_hdr,
+ struct mei_cl_cb *complete_list);
+int mei_amthif_irq_read(struct mei_device *dev, s32 *slots);
+
+/*
+ * NFC functions
+ */
+int mei_nfc_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
+void mei_nfc_host_exit(struct mei_device *dev);
+
+/*
+ * NFC Client UUID
+ */
+extern const uuid_le mei_nfc_guid;
+
+int mei_wd_send(struct mei_device *dev);
+int mei_wd_stop(struct mei_device *dev);
+int mei_wd_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
+/*
+ * mei_watchdog_register - Registering watchdog interface
+ * once we got connection to the WD Client
+ * @dev: mei device
+ */
+int mei_watchdog_register(struct mei_device *dev);
+/*
+ * mei_watchdog_unregister - Unregistering watchdog interface
+ * @dev: mei device
+ */
+void mei_watchdog_unregister(struct mei_device *dev);
+
+/*
+ * Register Access Function
+ */
+
+
+static inline void mei_hw_config(struct mei_device *dev)
+{
+ dev->ops->hw_config(dev);
+}
+
+static inline enum mei_pg_state mei_pg_state(struct mei_device *dev)
+{
+ return dev->ops->pg_state(dev);
+}
+
+static inline bool mei_pg_in_transition(struct mei_device *dev)
+{
+ return dev->ops->pg_in_transition(dev);
+}
+
+static inline bool mei_pg_is_enabled(struct mei_device *dev)
+{
+ return dev->ops->pg_is_enabled(dev);
+}
+
+static inline int mei_hw_reset(struct mei_device *dev, bool enable)
+{
+ return dev->ops->hw_reset(dev, enable);
+}
+
+static inline int mei_hw_start(struct mei_device *dev)
+{
+ return dev->ops->hw_start(dev);
+}
+
+static inline void mei_clear_interrupts(struct mei_device *dev)
+{
+ dev->ops->intr_clear(dev);
+}
+
+static inline void mei_enable_interrupts(struct mei_device *dev)
+{
+ dev->ops->intr_enable(dev);
+}
+
+static inline void mei_disable_interrupts(struct mei_device *dev)
+{
+ dev->ops->intr_disable(dev);
+}
+
+static inline bool mei_host_is_ready(struct mei_device *dev)
+{
+ return dev->ops->host_is_ready(dev);
+}
+static inline bool mei_hw_is_ready(struct mei_device *dev)
+{
+ return dev->ops->hw_is_ready(dev);
+}
+
+static inline bool mei_hbuf_is_ready(struct mei_device *dev)
+{
+ return dev->ops->hbuf_is_ready(dev);
+}
+
+static inline int mei_hbuf_empty_slots(struct mei_device *dev)
+{
+ return dev->ops->hbuf_free_slots(dev);
+}
+
+static inline size_t mei_hbuf_max_len(const struct mei_device *dev)
+{
+ return dev->ops->hbuf_max_len(dev);
+}
+
+static inline int mei_write_message(struct mei_device *dev,
+ struct mei_msg_hdr *hdr,
+ unsigned char *buf)
+{
+ return dev->ops->write(dev, hdr, buf);
+}
+
+static inline u32 mei_read_hdr(const struct mei_device *dev)
+{
+ return dev->ops->read_hdr(dev);
+}
+
+static inline void mei_read_slots(struct mei_device *dev,
+ unsigned char *buf, unsigned long len)
+{
+ dev->ops->read(dev, buf, len);
+}
+
+static inline int mei_count_full_read_slots(struct mei_device *dev)
+{
+ return dev->ops->rdbuf_full_slots(dev);
+}
+
+static inline int mei_fw_status(struct mei_device *dev,
+ struct mei_fw_status *fw_status)
+{
+ return dev->ops->fw_status(dev, fw_status);
+}
+
+bool mei_hbuf_acquire(struct mei_device *dev);
+
+bool mei_write_is_idle(struct mei_device *dev);
+
+void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+int mei_dbgfs_register(struct mei_device *dev, const char *name);
+void mei_dbgfs_deregister(struct mei_device *dev);
+#else
+static inline int mei_dbgfs_register(struct mei_device *dev, const char *name)
+{
+ return 0;
+}
+static inline void mei_dbgfs_deregister(struct mei_device *dev) {}
+#endif /* CONFIG_DEBUG_FS */
+
+int mei_register(struct mei_device *dev, struct device *parent);
+void mei_deregister(struct mei_device *dev);
+
+#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d internal=%1d comp=%1d"
+#define MEI_HDR_PRM(hdr) \
+ (hdr)->host_addr, (hdr)->me_addr, \
+ (hdr)->length, (hdr)->internal, (hdr)->msg_complete
+
+ssize_t mei_fw_status2str(struct mei_fw_status *fw_sts, char *buf, size_t len);
+/**
+ * mei_fw_status_str - fetch and convert fw status registers to printable string
+ *
+ * @dev: the device structure
+ * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ
+ * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ
+ *
+ * Return: number of bytes written or < 0 on failure
+ */
+static inline ssize_t mei_fw_status_str(struct mei_device *dev,
+ char *buf, size_t len)
+{
+ struct mei_fw_status fw_status;
+ int ret;
+
+ buf[0] = '\0';
+
+ ret = mei_fw_status(dev, &fw_status);
+ if (ret)
+ return ret;
+
+ ret = mei_fw_status2str(&fw_status, buf, MEI_FW_STATUS_STR_SZ);
+
+ return ret;
+}
+
+
+#endif
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
new file mode 100644
index 0000000..adab5bb
--- /dev/null
+++ b/drivers/misc/mei/pci-me.c
@@ -0,0 +1,490 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/uuid.h>
+#include <linux/compat.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "hw-me-regs.h"
+#include "hw-me.h"
+
+/* mei_pci_tbl - PCI Device ID Table */
+static const struct pci_device_id mei_me_pci_tbl[] = {
+ {MEI_PCI_DEVICE(MEI_DEV_ID_82946GZ, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_82G35, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_82Q965, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_82G965, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_82GM965, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_82GME965, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q35, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82G33, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q33, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82X38, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_3200, mei_me_legacy_cfg)},
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_6, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_7, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_8, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_9, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_10, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_1, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_2, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_3, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_4, mei_me_legacy_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_1, mei_me_ich_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_2, mei_me_ich_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, mei_me_ich_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, mei_me_ich_cfg)},
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, mei_me_pch_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, mei_me_pch_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, mei_me_pch_cpt_pbg_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, mei_me_pch_cpt_pbg_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, mei_me_pch_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, mei_me_pch_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, mei_me_pch_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, mei_me_pch8_sps_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, mei_me_pch8_sps_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, mei_me_pch8_sps_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, mei_me_pch8_cfg)},
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, mei_me_pch8_cfg)},
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, mei_me_pch8_cfg)},
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)},
+
+ /* required last entry */
+ {0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mei_me_pci_tbl);
+
+#ifdef CONFIG_PM
+static inline void mei_me_set_pm_domain(struct mei_device *dev);
+static inline void mei_me_unset_pm_domain(struct mei_device *dev);
+#else
+static inline void mei_me_set_pm_domain(struct mei_device *dev) {}
+static inline void mei_me_unset_pm_domain(struct mei_device *dev) {}
+#endif /* CONFIG_PM */
+
+/**
+ * mei_me_quirk_probe - probe for devices that doesn't valid ME interface
+ *
+ * @pdev: PCI device structure
+ * @cfg: per generation config
+ *
+ * Return: true if ME Interface is valid, false otherwise
+ */
+static bool mei_me_quirk_probe(struct pci_dev *pdev,
+ const struct mei_cfg *cfg)
+{
+ if (cfg->quirk_probe && cfg->quirk_probe(pdev)) {
+ dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n");
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * mei_me_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in kcs_pci_tbl
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ const struct mei_cfg *cfg = (struct mei_cfg *)(ent->driver_data);
+ struct mei_device *dev;
+ struct mei_me_hw *hw;
+ unsigned int irqflags;
+ int err;
+
+
+ if (!mei_me_quirk_probe(pdev, cfg))
+ return -ENODEV;
+
+ /* enable pci dev */
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable pci device.\n");
+ goto end;
+ }
+ /* set PCI host mastering */
+ pci_set_master(pdev);
+ /* pci request regions for mei driver */
+ err = pci_request_regions(pdev, KBUILD_MODNAME);
+ if (err) {
+ dev_err(&pdev->dev, "failed to get pci regions.\n");
+ goto disable_device;
+ }
+
+ if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ if (err)
+ err = dma_set_coherent_mask(&pdev->dev,
+ DMA_BIT_MASK(32));
+ }
+ if (err) {
+ dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+ goto release_regions;
+ }
+
+
+ /* allocates and initializes the mei dev structure */
+ dev = mei_me_dev_init(pdev, cfg);
+ if (!dev) {
+ err = -ENOMEM;
+ goto release_regions;
+ }
+ hw = to_me_hw(dev);
+ /* mapping IO device memory */
+ hw->mem_addr = pci_iomap(pdev, 0, 0);
+ if (!hw->mem_addr) {
+ dev_err(&pdev->dev, "mapping I/O device memory failure.\n");
+ err = -ENOMEM;
+ goto free_device;
+ }
+ pci_enable_msi(pdev);
+
+ /* request and enable interrupt */
+ irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+ err = request_threaded_irq(pdev->irq,
+ mei_me_irq_quick_handler,
+ mei_me_irq_thread_handler,
+ irqflags, KBUILD_MODNAME, dev);
+ if (err) {
+ dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n",
+ pdev->irq);
+ goto disable_msi;
+ }
+
+ if (mei_start(dev)) {
+ dev_err(&pdev->dev, "init hw failure.\n");
+ err = -ENODEV;
+ goto release_irq;
+ }
+
+ pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_ME_RPM_TIMEOUT);
+ pm_runtime_use_autosuspend(&pdev->dev);
+
+ err = mei_register(dev, &pdev->dev);
+ if (err)
+ goto release_irq;
+
+ pci_set_drvdata(pdev, dev);
+
+ schedule_delayed_work(&dev->timer_work, HZ);
+
+ /*
+ * For not wake-able HW runtime pm framework
+ * can't be used on pci device level.
+ * Use domain runtime pm callbacks instead.
+ */
+ if (!pci_dev_run_wake(pdev))
+ mei_me_set_pm_domain(dev);
+
+ if (mei_pg_is_enabled(dev))
+ pm_runtime_put_noidle(&pdev->dev);
+
+ dev_dbg(&pdev->dev, "initialization successful.\n");
+
+ return 0;
+
+release_irq:
+ mei_cancel_work(dev);
+ mei_disable_interrupts(dev);
+ free_irq(pdev->irq, dev);
+disable_msi:
+ pci_disable_msi(pdev);
+ pci_iounmap(pdev, hw->mem_addr);
+free_device:
+ kfree(dev);
+release_regions:
+ pci_release_regions(pdev);
+disable_device:
+ pci_disable_device(pdev);
+end:
+ dev_err(&pdev->dev, "initialization failed.\n");
+ return err;
+}
+
+/**
+ * mei_me_remove - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void mei_me_remove(struct pci_dev *pdev)
+{
+ struct mei_device *dev;
+ struct mei_me_hw *hw;
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return;
+
+ if (mei_pg_is_enabled(dev))
+ pm_runtime_get_noresume(&pdev->dev);
+
+ hw = to_me_hw(dev);
+
+
+ dev_dbg(&pdev->dev, "stop\n");
+ mei_stop(dev);
+
+ if (!pci_dev_run_wake(pdev))
+ mei_me_unset_pm_domain(dev);
+
+ /* disable interrupts */
+ mei_disable_interrupts(dev);
+
+ free_irq(pdev->irq, dev);
+ pci_disable_msi(pdev);
+
+ if (hw->mem_addr)
+ pci_iounmap(pdev, hw->mem_addr);
+
+ mei_deregister(dev);
+
+ kfree(dev);
+
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+
+
+}
+#ifdef CONFIG_PM_SLEEP
+static int mei_me_pci_suspend(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev = pci_get_drvdata(pdev);
+
+ if (!dev)
+ return -ENODEV;
+
+ dev_dbg(&pdev->dev, "suspend\n");
+
+ mei_stop(dev);
+
+ mei_disable_interrupts(dev);
+
+ free_irq(pdev->irq, dev);
+ pci_disable_msi(pdev);
+
+ return 0;
+}
+
+static int mei_me_pci_resume(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+ unsigned int irqflags;
+ int err;
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+
+ pci_enable_msi(pdev);
+
+ irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+ /* request and enable interrupt */
+ err = request_threaded_irq(pdev->irq,
+ mei_me_irq_quick_handler,
+ mei_me_irq_thread_handler,
+ irqflags, KBUILD_MODNAME, dev);
+
+ if (err) {
+ dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
+ pdev->irq);
+ return err;
+ }
+
+ err = mei_restart(dev);
+ if (err)
+ return err;
+
+ /* Start timer if stopped in suspend */
+ schedule_delayed_work(&dev->timer_work, HZ);
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mei_me_pm_runtime_idle(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+
+ dev_dbg(&pdev->dev, "rpm: me: runtime_idle\n");
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+ if (mei_write_is_idle(dev))
+ pm_runtime_autosuspend(device);
+
+ return -EBUSY;
+}
+
+static int mei_me_pm_runtime_suspend(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+ int ret;
+
+ dev_dbg(&pdev->dev, "rpm: me: runtime suspend\n");
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ if (mei_write_is_idle(dev))
+ ret = mei_me_pg_enter_sync(dev);
+ else
+ ret = -EAGAIN;
+
+ mutex_unlock(&dev->device_lock);
+
+ dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret);
+
+ return ret;
+}
+
+static int mei_me_pm_runtime_resume(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+ int ret;
+
+ dev_dbg(&pdev->dev, "rpm: me: runtime resume\n");
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ ret = mei_me_pg_exit_sync(dev);
+
+ mutex_unlock(&dev->device_lock);
+
+ dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret);
+
+ return ret;
+}
+
+/**
+ * mei_me_set_pm_domain - fill and set pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_me_set_pm_domain(struct mei_device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pdev->dev.bus && pdev->dev.bus->pm) {
+ dev->pg_domain.ops = *pdev->dev.bus->pm;
+
+ dev->pg_domain.ops.runtime_suspend = mei_me_pm_runtime_suspend;
+ dev->pg_domain.ops.runtime_resume = mei_me_pm_runtime_resume;
+ dev->pg_domain.ops.runtime_idle = mei_me_pm_runtime_idle;
+
+ pdev->dev.pm_domain = &dev->pg_domain;
+ }
+}
+
+/**
+ * mei_me_unset_pm_domain - clean pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_me_unset_pm_domain(struct mei_device *dev)
+{
+ /* stop using pm callbacks if any */
+ dev->dev->pm_domain = NULL;
+}
+
+static const struct dev_pm_ops mei_me_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(mei_me_pci_suspend,
+ mei_me_pci_resume)
+ SET_RUNTIME_PM_OPS(
+ mei_me_pm_runtime_suspend,
+ mei_me_pm_runtime_resume,
+ mei_me_pm_runtime_idle)
+};
+
+#define MEI_ME_PM_OPS (&mei_me_pm_ops)
+#else
+#define MEI_ME_PM_OPS NULL
+#endif /* CONFIG_PM */
+/*
+ * PCI driver structure
+ */
+static struct pci_driver mei_me_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mei_me_pci_tbl,
+ .probe = mei_me_probe,
+ .remove = mei_me_remove,
+ .shutdown = mei_me_remove,
+ .driver.pm = MEI_ME_PM_OPS,
+};
+
+module_pci_driver(mei_me_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Management Engine Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
new file mode 100644
index 0000000..0882c02
--- /dev/null
+++ b/drivers/misc/mei/pci-txe.c
@@ -0,0 +1,436 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/uuid.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+
+#include "mei_dev.h"
+#include "hw-txe.h"
+
+static const struct pci_device_id mei_txe_pci_tbl[] = {
+ {PCI_VDEVICE(INTEL, 0x0F18)}, /* Baytrail */
+ {PCI_VDEVICE(INTEL, 0x2298)}, /* Cherrytrail */
+
+ {0, }
+};
+MODULE_DEVICE_TABLE(pci, mei_txe_pci_tbl);
+
+#ifdef CONFIG_PM
+static inline void mei_txe_set_pm_domain(struct mei_device *dev);
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev);
+#else
+static inline void mei_txe_set_pm_domain(struct mei_device *dev) {}
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev) {}
+#endif /* CONFIG_PM */
+
+static void mei_txe_pci_iounmap(struct pci_dev *pdev, struct mei_txe_hw *hw)
+{
+ int i;
+
+ for (i = SEC_BAR; i < NUM_OF_MEM_BARS; i++) {
+ if (hw->mem_addr[i]) {
+ pci_iounmap(pdev, hw->mem_addr[i]);
+ hw->mem_addr[i] = NULL;
+ }
+ }
+}
+/**
+ * mei_txe_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in mei_txe_pci_tbl
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct mei_device *dev;
+ struct mei_txe_hw *hw;
+ int err;
+ int i;
+
+ /* enable pci dev */
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable pci device.\n");
+ goto end;
+ }
+ /* set PCI host mastering */
+ pci_set_master(pdev);
+ /* pci request regions for mei driver */
+ err = pci_request_regions(pdev, KBUILD_MODNAME);
+ if (err) {
+ dev_err(&pdev->dev, "failed to get pci regions.\n");
+ goto disable_device;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
+ if (err) {
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "No suitable DMA available.\n");
+ goto release_regions;
+ }
+ }
+
+ /* allocates and initializes the mei dev structure */
+ dev = mei_txe_dev_init(pdev);
+ if (!dev) {
+ err = -ENOMEM;
+ goto release_regions;
+ }
+ hw = to_txe_hw(dev);
+
+ /* mapping IO device memory */
+ for (i = SEC_BAR; i < NUM_OF_MEM_BARS; i++) {
+ hw->mem_addr[i] = pci_iomap(pdev, i, 0);
+ if (!hw->mem_addr[i]) {
+ dev_err(&pdev->dev, "mapping I/O device memory failure.\n");
+ err = -ENOMEM;
+ goto free_device;
+ }
+ }
+
+
+ pci_enable_msi(pdev);
+
+ /* clear spurious interrupts */
+ mei_clear_interrupts(dev);
+
+ /* request and enable interrupt */
+ if (pci_dev_msi_enabled(pdev))
+ err = request_threaded_irq(pdev->irq,
+ NULL,
+ mei_txe_irq_thread_handler,
+ IRQF_ONESHOT, KBUILD_MODNAME, dev);
+ else
+ err = request_threaded_irq(pdev->irq,
+ mei_txe_irq_quick_handler,
+ mei_txe_irq_thread_handler,
+ IRQF_SHARED, KBUILD_MODNAME, dev);
+ if (err) {
+ dev_err(&pdev->dev, "mei: request_threaded_irq failure. irq = %d\n",
+ pdev->irq);
+ goto free_device;
+ }
+
+ if (mei_start(dev)) {
+ dev_err(&pdev->dev, "init hw failure.\n");
+ err = -ENODEV;
+ goto release_irq;
+ }
+
+ pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT);
+ pm_runtime_use_autosuspend(&pdev->dev);
+
+ err = mei_register(dev, &pdev->dev);
+ if (err)
+ goto release_irq;
+
+ pci_set_drvdata(pdev, dev);
+
+ /*
+ * For not wake-able HW runtime pm framework
+ * can't be used on pci device level.
+ * Use domain runtime pm callbacks instead.
+ */
+ if (!pci_dev_run_wake(pdev))
+ mei_txe_set_pm_domain(dev);
+
+ pm_runtime_put_noidle(&pdev->dev);
+
+ return 0;
+
+release_irq:
+
+ mei_cancel_work(dev);
+
+ /* disable interrupts */
+ mei_disable_interrupts(dev);
+
+ free_irq(pdev->irq, dev);
+ pci_disable_msi(pdev);
+
+free_device:
+ mei_txe_pci_iounmap(pdev, hw);
+
+ kfree(dev);
+release_regions:
+ pci_release_regions(pdev);
+disable_device:
+ pci_disable_device(pdev);
+end:
+ dev_err(&pdev->dev, "initialization failed.\n");
+ return err;
+}
+
+/**
+ * mei_txe_remove - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void mei_txe_remove(struct pci_dev *pdev)
+{
+ struct mei_device *dev;
+ struct mei_txe_hw *hw;
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev) {
+ dev_err(&pdev->dev, "mei: dev =NULL\n");
+ return;
+ }
+
+ pm_runtime_get_noresume(&pdev->dev);
+
+ hw = to_txe_hw(dev);
+
+ mei_stop(dev);
+
+ if (!pci_dev_run_wake(pdev))
+ mei_txe_unset_pm_domain(dev);
+
+ /* disable interrupts */
+ mei_disable_interrupts(dev);
+ free_irq(pdev->irq, dev);
+ pci_disable_msi(pdev);
+
+ pci_set_drvdata(pdev, NULL);
+
+ mei_txe_pci_iounmap(pdev, hw);
+
+ mei_deregister(dev);
+
+ kfree(dev);
+
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+
+#ifdef CONFIG_PM_SLEEP
+static int mei_txe_pci_suspend(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev = pci_get_drvdata(pdev);
+
+ if (!dev)
+ return -ENODEV;
+
+ dev_dbg(&pdev->dev, "suspend\n");
+
+ mei_stop(dev);
+
+ mei_disable_interrupts(dev);
+
+ free_irq(pdev->irq, dev);
+ pci_disable_msi(pdev);
+
+ return 0;
+}
+
+static int mei_txe_pci_resume(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+ int err;
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+
+ pci_enable_msi(pdev);
+
+ mei_clear_interrupts(dev);
+
+ /* request and enable interrupt */
+ if (pci_dev_msi_enabled(pdev))
+ err = request_threaded_irq(pdev->irq,
+ NULL,
+ mei_txe_irq_thread_handler,
+ IRQF_ONESHOT, KBUILD_MODNAME, dev);
+ else
+ err = request_threaded_irq(pdev->irq,
+ mei_txe_irq_quick_handler,
+ mei_txe_irq_thread_handler,
+ IRQF_SHARED, KBUILD_MODNAME, dev);
+ if (err) {
+ dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
+ pdev->irq);
+ return err;
+ }
+
+ err = mei_restart(dev);
+
+ return err;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mei_txe_pm_runtime_idle(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+
+ dev_dbg(&pdev->dev, "rpm: txe: runtime_idle\n");
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+ if (mei_write_is_idle(dev))
+ pm_runtime_autosuspend(device);
+
+ return -EBUSY;
+}
+static int mei_txe_pm_runtime_suspend(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+ int ret;
+
+ dev_dbg(&pdev->dev, "rpm: txe: runtime suspend\n");
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ if (mei_write_is_idle(dev))
+ ret = mei_txe_aliveness_set_sync(dev, 0);
+ else
+ ret = -EAGAIN;
+
+ /*
+ * If everything is okay we're about to enter PCI low
+ * power state (D3) therefor we need to disable the
+ * interrupts towards host.
+ * However if device is not wakeable we do not enter
+ * D-low state and we need to keep the interrupt kicking
+ */
+ if (!ret && pci_dev_run_wake(pdev))
+ mei_disable_interrupts(dev);
+
+ dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret);
+
+ mutex_unlock(&dev->device_lock);
+ return ret;
+}
+
+static int mei_txe_pm_runtime_resume(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct mei_device *dev;
+ int ret;
+
+ dev_dbg(&pdev->dev, "rpm: txe: runtime resume\n");
+
+ dev = pci_get_drvdata(pdev);
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ mei_enable_interrupts(dev);
+
+ ret = mei_txe_aliveness_set_sync(dev, 1);
+
+ mutex_unlock(&dev->device_lock);
+
+ dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret);
+
+ return ret;
+}
+
+/**
+ * mei_txe_set_pm_domain - fill and set pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_txe_set_pm_domain(struct mei_device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pdev->dev.bus && pdev->dev.bus->pm) {
+ dev->pg_domain.ops = *pdev->dev.bus->pm;
+
+ dev->pg_domain.ops.runtime_suspend = mei_txe_pm_runtime_suspend;
+ dev->pg_domain.ops.runtime_resume = mei_txe_pm_runtime_resume;
+ dev->pg_domain.ops.runtime_idle = mei_txe_pm_runtime_idle;
+
+ pdev->dev.pm_domain = &dev->pg_domain;
+ }
+}
+
+/**
+ * mei_txe_unset_pm_domain - clean pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev)
+{
+ /* stop using pm callbacks if any */
+ dev->dev->pm_domain = NULL;
+}
+
+static const struct dev_pm_ops mei_txe_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(mei_txe_pci_suspend,
+ mei_txe_pci_resume)
+ SET_RUNTIME_PM_OPS(
+ mei_txe_pm_runtime_suspend,
+ mei_txe_pm_runtime_resume,
+ mei_txe_pm_runtime_idle)
+};
+
+#define MEI_TXE_PM_OPS (&mei_txe_pm_ops)
+#else
+#define MEI_TXE_PM_OPS NULL
+#endif /* CONFIG_PM */
+
+/*
+ * PCI driver structure
+ */
+static struct pci_driver mei_txe_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mei_txe_pci_tbl,
+ .probe = mei_txe_probe,
+ .remove = mei_txe_remove,
+ .shutdown = mei_txe_remove,
+ .driver.pm = MEI_TXE_PM_OPS,
+};
+
+module_pci_driver(mei_txe_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Trusted Execution Environment Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c
new file mode 100644
index 0000000..b346638
--- /dev/null
+++ b/drivers/misc/mei/wd.c
@@ -0,0 +1,391 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+#include <linux/watchdog.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+static const u8 mei_start_wd_params[] = { 0x02, 0x12, 0x13, 0x10 };
+static const u8 mei_stop_wd_params[] = { 0x02, 0x02, 0x14, 0x10 };
+
+/*
+ * AMT Watchdog Device
+ */
+#define INTEL_AMT_WATCHDOG_ID "INTCAMT"
+
+/* UUIDs for AMT F/W clients */
+const uuid_le mei_wd_guid = UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, 0x89,
+ 0x9D, 0xA9, 0x15, 0x14, 0xCB,
+ 0x32, 0xAB);
+
+static void mei_wd_set_start_timeout(struct mei_device *dev, u16 timeout)
+{
+ dev_dbg(dev->dev, "wd: set timeout=%d.\n", timeout);
+ memcpy(dev->wd_data, mei_start_wd_params, MEI_WD_HDR_SIZE);
+ memcpy(dev->wd_data + MEI_WD_HDR_SIZE, &timeout, sizeof(u16));
+}
+
+/**
+ * mei_wd_host_init - connect to the watchdog client
+ *
+ * @dev: the device structure
+ * @me_cl: me client
+ *
+ * Return: -ENOTTY if wd client cannot be found
+ * -EIO if write has failed
+ * 0 on success
+ */
+int mei_wd_host_init(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+ struct mei_cl *cl = &dev->wd_cl;
+ int ret;
+
+ mei_cl_init(cl, dev);
+
+ dev->wd_timeout = MEI_WD_DEFAULT_TIMEOUT;
+ dev->wd_state = MEI_WD_IDLE;
+
+ ret = mei_cl_link(cl, MEI_WD_HOST_CLIENT_ID);
+ if (ret < 0) {
+ dev_info(dev->dev, "wd: failed link client\n");
+ return ret;
+ }
+
+ ret = mei_cl_connect(cl, me_cl, NULL);
+ if (ret) {
+ dev_err(dev->dev, "wd: failed to connect = %d\n", ret);
+ mei_cl_unlink(cl);
+ return ret;
+ }
+
+ ret = mei_watchdog_register(dev);
+ if (ret) {
+ mei_cl_disconnect(cl);
+ mei_cl_unlink(cl);
+ }
+ return ret;
+}
+
+/**
+ * mei_wd_send - sends watch dog message to fw.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 if success,
+ * -EIO when message send fails
+ * -EINVAL when invalid message is to be sent
+ * -ENODEV on flow control failure
+ */
+int mei_wd_send(struct mei_device *dev)
+{
+ struct mei_cl *cl = &dev->wd_cl;
+ struct mei_msg_hdr hdr;
+ int ret;
+
+ hdr.host_addr = cl->host_client_id;
+ hdr.me_addr = mei_cl_me_id(cl);
+ hdr.msg_complete = 1;
+ hdr.reserved = 0;
+ hdr.internal = 0;
+
+ if (!memcmp(dev->wd_data, mei_start_wd_params, MEI_WD_HDR_SIZE))
+ hdr.length = MEI_WD_START_MSG_SIZE;
+ else if (!memcmp(dev->wd_data, mei_stop_wd_params, MEI_WD_HDR_SIZE))
+ hdr.length = MEI_WD_STOP_MSG_SIZE;
+ else {
+ dev_err(dev->dev, "wd: invalid message is to be sent, aborting\n");
+ return -EINVAL;
+ }
+
+ ret = mei_write_message(dev, &hdr, dev->wd_data);
+ if (ret) {
+ dev_err(dev->dev, "wd: write message failed\n");
+ return ret;
+ }
+
+ ret = mei_cl_flow_ctrl_reduce(cl);
+ if (ret) {
+ dev_err(dev->dev, "wd: flow_ctrl_reduce failed.\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * mei_wd_stop - sends watchdog stop message to fw.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 if success
+ * on error:
+ * -EIO when message send fails
+ * -EINVAL when invalid message is to be sent
+ * -ETIME on message timeout
+ */
+int mei_wd_stop(struct mei_device *dev)
+{
+ struct mei_cl *cl = &dev->wd_cl;
+ int ret;
+
+ if (!mei_cl_is_connected(cl) ||
+ dev->wd_state != MEI_WD_RUNNING)
+ return 0;
+
+ memcpy(dev->wd_data, mei_stop_wd_params, MEI_WD_STOP_MSG_SIZE);
+
+ dev->wd_state = MEI_WD_STOPPING;
+
+ ret = mei_cl_flow_ctrl_creds(cl);
+ if (ret < 0)
+ goto err;
+
+ if (ret && mei_hbuf_acquire(dev)) {
+ ret = mei_wd_send(dev);
+ if (ret)
+ goto err;
+ dev->wd_pending = false;
+ } else {
+ dev->wd_pending = true;
+ }
+
+ mutex_unlock(&dev->device_lock);
+
+ ret = wait_event_timeout(dev->wait_stop_wd,
+ dev->wd_state == MEI_WD_IDLE,
+ msecs_to_jiffies(MEI_WD_STOP_TIMEOUT));
+ mutex_lock(&dev->device_lock);
+ if (dev->wd_state != MEI_WD_IDLE) {
+ /* timeout */
+ ret = -ETIME;
+ dev_warn(dev->dev, "wd: stop failed to complete ret=%d\n", ret);
+ goto err;
+ }
+ dev_dbg(dev->dev, "wd: stop completed after %u msec\n",
+ MEI_WD_STOP_TIMEOUT - jiffies_to_msecs(ret));
+ return 0;
+err:
+ return ret;
+}
+
+/**
+ * mei_wd_ops_start - wd start command from the watchdog core.
+ *
+ * @wd_dev: watchdog device struct
+ *
+ * Return: 0 if success, negative errno code for failure
+ */
+static int mei_wd_ops_start(struct watchdog_device *wd_dev)
+{
+ struct mei_device *dev;
+ struct mei_cl *cl;
+ int err = -ENODEV;
+
+ dev = watchdog_get_drvdata(wd_dev);
+ if (!dev)
+ return -ENODEV;
+
+ cl = &dev->wd_cl;
+
+ mutex_lock(&dev->device_lock);
+
+ if (dev->dev_state != MEI_DEV_ENABLED) {
+ dev_dbg(dev->dev, "wd: dev_state != MEI_DEV_ENABLED dev_state = %s\n",
+ mei_dev_state_str(dev->dev_state));
+ goto end_unlock;
+ }
+
+ if (!mei_cl_is_connected(cl)) {
+ cl_dbg(dev, cl, "MEI Driver is not connected to Watchdog Client\n");
+ goto end_unlock;
+ }
+
+ mei_wd_set_start_timeout(dev, dev->wd_timeout);
+
+ err = 0;
+end_unlock:
+ mutex_unlock(&dev->device_lock);
+ return err;
+}
+
+/**
+ * mei_wd_ops_stop - wd stop command from the watchdog core.
+ *
+ * @wd_dev: watchdog device struct
+ *
+ * Return: 0 if success, negative errno code for failure
+ */
+static int mei_wd_ops_stop(struct watchdog_device *wd_dev)
+{
+ struct mei_device *dev;
+
+ dev = watchdog_get_drvdata(wd_dev);
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+ mei_wd_stop(dev);
+ mutex_unlock(&dev->device_lock);
+
+ return 0;
+}
+
+/**
+ * mei_wd_ops_ping - wd ping command from the watchdog core.
+ *
+ * @wd_dev: watchdog device struct
+ *
+ * Return: 0 if success, negative errno code for failure
+ */
+static int mei_wd_ops_ping(struct watchdog_device *wd_dev)
+{
+ struct mei_device *dev;
+ struct mei_cl *cl;
+ int ret;
+
+ dev = watchdog_get_drvdata(wd_dev);
+ if (!dev)
+ return -ENODEV;
+
+ cl = &dev->wd_cl;
+
+ mutex_lock(&dev->device_lock);
+
+ if (!mei_cl_is_connected(cl)) {
+ cl_err(dev, cl, "wd: not connected.\n");
+ ret = -ENODEV;
+ goto end;
+ }
+
+ dev->wd_state = MEI_WD_RUNNING;
+
+ ret = mei_cl_flow_ctrl_creds(cl);
+ if (ret < 0)
+ goto end;
+
+ /* Check if we can send the ping to HW*/
+ if (ret && mei_hbuf_acquire(dev)) {
+ dev_dbg(dev->dev, "wd: sending ping\n");
+
+ ret = mei_wd_send(dev);
+ if (ret)
+ goto end;
+ dev->wd_pending = false;
+ } else {
+ dev->wd_pending = true;
+ }
+
+end:
+ mutex_unlock(&dev->device_lock);
+ return ret;
+}
+
+/**
+ * mei_wd_ops_set_timeout - wd set timeout command from the watchdog core.
+ *
+ * @wd_dev: watchdog device struct
+ * @timeout: timeout value to set
+ *
+ * Return: 0 if success, negative errno code for failure
+ */
+static int mei_wd_ops_set_timeout(struct watchdog_device *wd_dev,
+ unsigned int timeout)
+{
+ struct mei_device *dev;
+
+ dev = watchdog_get_drvdata(wd_dev);
+ if (!dev)
+ return -ENODEV;
+
+ /* Check Timeout value */
+ if (timeout < MEI_WD_MIN_TIMEOUT || timeout > MEI_WD_MAX_TIMEOUT)
+ return -EINVAL;
+
+ mutex_lock(&dev->device_lock);
+
+ dev->wd_timeout = timeout;
+ wd_dev->timeout = timeout;
+ mei_wd_set_start_timeout(dev, dev->wd_timeout);
+
+ mutex_unlock(&dev->device_lock);
+
+ return 0;
+}
+
+/*
+ * Watchdog Device structs
+ */
+static const struct watchdog_ops wd_ops = {
+ .owner = THIS_MODULE,
+ .start = mei_wd_ops_start,
+ .stop = mei_wd_ops_stop,
+ .ping = mei_wd_ops_ping,
+ .set_timeout = mei_wd_ops_set_timeout,
+};
+static const struct watchdog_info wd_info = {
+ .identity = INTEL_AMT_WATCHDOG_ID,
+ .options = WDIOF_KEEPALIVEPING |
+ WDIOF_SETTIMEOUT |
+ WDIOF_ALARMONLY,
+};
+
+static struct watchdog_device amt_wd_dev = {
+ .info = &wd_info,
+ .ops = &wd_ops,
+ .timeout = MEI_WD_DEFAULT_TIMEOUT,
+ .min_timeout = MEI_WD_MIN_TIMEOUT,
+ .max_timeout = MEI_WD_MAX_TIMEOUT,
+};
+
+
+int mei_watchdog_register(struct mei_device *dev)
+{
+
+ int ret;
+
+ amt_wd_dev.parent = dev->dev;
+ /* unlock to perserve correct locking order */
+ mutex_unlock(&dev->device_lock);
+ ret = watchdog_register_device(&amt_wd_dev);
+ mutex_lock(&dev->device_lock);
+ if (ret) {
+ dev_err(dev->dev, "wd: unable to register watchdog device = %d.\n",
+ ret);
+ return ret;
+ }
+
+ dev_dbg(dev->dev, "wd: successfully register watchdog interface.\n");
+ watchdog_set_drvdata(&amt_wd_dev, dev);
+ return 0;
+}
+
+void mei_watchdog_unregister(struct mei_device *dev)
+{
+ if (watchdog_get_drvdata(&amt_wd_dev) == NULL)
+ return;
+
+ watchdog_set_drvdata(&amt_wd_dev, NULL);
+ watchdog_unregister_device(&amt_wd_dev);
+}
+
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
new file mode 100644
index 0000000..40677df
--- /dev/null
+++ b/drivers/misc/mic/Kconfig
@@ -0,0 +1,109 @@
+comment "Intel MIC Bus Driver"
+
+config INTEL_MIC_BUS
+ tristate "Intel MIC Bus Driver"
+ depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+ help
+ This option is selected by any driver which registers a
+ device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST,
+ CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc.
+
+ If you are building a host/card kernel with an Intel MIC device
+ then say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from
+ <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Bus Driver"
+
+config SCIF_BUS
+ tristate "SCIF Bus Driver"
+ depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+ help
+ This option is selected by any driver which registers a
+ device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST
+ and CONFIG_INTEL_MIC_CARD.
+
+ If you are building a host/card kernel with an Intel MIC device
+ then say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from
+ <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Host Driver"
+
+config INTEL_MIC_HOST
+ tristate "Intel MIC Host Driver"
+ depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
+ select VHOST_RING
+ help
+ This enables Host Driver support for the Intel Many Integrated
+ Core (MIC) family of PCIe form factor coprocessor devices that
+ run a 64 bit Linux OS. The driver manages card OS state and
+ enables communication between host and card. Intel MIC X100
+ devices are currently supported.
+
+ If you are building a host kernel with an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from
+ <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Card Driver"
+
+config INTEL_MIC_CARD
+ tristate "Intel MIC Card Driver"
+ depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
+ select VIRTIO
+ help
+ This enables card driver support for the Intel Many Integrated
+ Core (MIC) device family. The card driver communicates shutdown/
+ crash events to the host and allows registration/configuration of
+ virtio devices. Intel MIC X100 devices are currently supported.
+
+ If you are building a card kernel for an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ For more information see
+ <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Driver"
+
+config SCIF
+ tristate "SCIF Driver"
+ depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
+ select IOMMU_IOVA
+ help
+ This enables SCIF Driver support for the Intel Many Integrated
+ Core (MIC) family of PCIe form factor coprocessor devices that
+ run a 64 bit Linux OS. The Symmetric Communication Interface
+ (SCIF (pronounced as skiff)) is a low level communications API
+ across PCIe currently implemented for MIC.
+
+ If you are building a host kernel with an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from
+ <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Coprocessor State Management (COSM) Drivers"
+
+config MIC_COSM
+ tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
+ depends on 64BIT && PCI && X86 && SCIF
+ help
+ This enables COSM driver support for the Intel Many
+ Integrated Core (MIC) family of PCIe form factor coprocessor
+ devices. COSM drivers implement functions such as boot,
+ shutdown, reset and reboot of MIC devices.
+
+ If you are building a host kernel with an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from
+ <http://software.intel.com/en-us/mic-developer>.
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
new file mode 100644
index 0000000..e288a11
--- /dev/null
+++ b/drivers/misc/mic/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_HOST) += host/
+obj-$(CONFIG_INTEL_MIC_CARD) += card/
+obj-y += bus/
+obj-$(CONFIG_SCIF) += scif/
+obj-$(CONFIG_MIC_COSM) += cosm/
+obj-$(CONFIG_MIC_COSM) += cosm_client/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
new file mode 100644
index 0000000..761842b
--- /dev/null
+++ b/drivers/misc/mic/bus/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
+obj-$(CONFIG_SCIF_BUS) += scif_bus.o
+obj-$(CONFIG_MIC_COSM) += cosm_bus.o
diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c
new file mode 100644
index 0000000..d31d6c6
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.c
@@ -0,0 +1,141 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include "cosm_bus.h"
+
+/* Unique numbering for cosm devices. */
+static DEFINE_IDA(cosm_index_ida);
+
+static int cosm_dev_probe(struct device *d)
+{
+ struct cosm_device *dev = dev_to_cosm(d);
+ struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+ return drv->probe(dev);
+}
+
+static int cosm_dev_remove(struct device *d)
+{
+ struct cosm_device *dev = dev_to_cosm(d);
+ struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+ drv->remove(dev);
+ return 0;
+}
+
+static struct bus_type cosm_bus = {
+ .name = "cosm_bus",
+ .probe = cosm_dev_probe,
+ .remove = cosm_dev_remove,
+};
+
+int cosm_register_driver(struct cosm_driver *driver)
+{
+ driver->driver.bus = &cosm_bus;
+ return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_register_driver);
+
+void cosm_unregister_driver(struct cosm_driver *driver)
+{
+ driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_driver);
+
+static inline void cosm_release_dev(struct device *d)
+{
+ struct cosm_device *cdev = dev_to_cosm(d);
+
+ kfree(cdev);
+}
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
+{
+ struct cosm_device *cdev;
+ int ret;
+
+ cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+ if (!cdev)
+ return ERR_PTR(-ENOMEM);
+
+ cdev->dev.parent = pdev;
+ cdev->dev.release = cosm_release_dev;
+ cdev->hw_ops = hw_ops;
+ dev_set_drvdata(&cdev->dev, cdev);
+ cdev->dev.bus = &cosm_bus;
+
+ /* Assign a unique device index and hence name */
+ ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto free_cdev;
+
+ cdev->index = ret;
+ cdev->dev.id = ret;
+ dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
+
+ ret = device_register(&cdev->dev);
+ if (ret)
+ goto ida_remove;
+ return cdev;
+ida_remove:
+ ida_simple_remove(&cosm_index_ida, cdev->index);
+free_cdev:
+ put_device(&cdev->dev);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(cosm_register_device);
+
+void cosm_unregister_device(struct cosm_device *dev)
+{
+ int index = dev->index; /* save for after device release */
+
+ device_unregister(&dev->dev);
+ ida_simple_remove(&cosm_index_ida, index);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_device);
+
+struct cosm_device *cosm_find_cdev_by_id(int id)
+{
+ struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
+
+ return dev ? container_of(dev, struct cosm_device, dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
+
+static int __init cosm_init(void)
+{
+ return bus_register(&cosm_bus);
+}
+
+static void __exit cosm_exit(void)
+{
+ bus_unregister(&cosm_bus);
+ ida_destroy(&cosm_index_ida);
+}
+
+core_initcall(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h
new file mode 100644
index 0000000..f7c57f2
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.h
@@ -0,0 +1,134 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#ifndef _COSM_BUS_H_
+#define _COSM_BUS_H_
+
+#include <linux/scif.h>
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+/**
+ * cosm_device - representation of a cosm device
+ *
+ * @attr_group: Pointer to list of sysfs attribute groups.
+ * @sdev: Device for sysfs entries.
+ * @state: MIC state.
+ * @shutdown_status: MIC status reported by card for shutdown/crashes.
+ * @shutdown_status_int: Internal shutdown status maintained by the driver
+ * @cosm_mutex: Mutex for synchronizing access to data structures.
+ * @reset_trigger_work: Work for triggering reset requests.
+ * @scif_work: Work for handling per device SCIF connections
+ * @cmdline: Kernel command line.
+ * @firmware: Firmware file name.
+ * @ramdisk: Ramdisk file name.
+ * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
+ * @log_buf_addr: Log buffer address for MIC.
+ * @log_buf_len: Log buffer length address for MIC.
+ * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
+ * @hw_ops: the hardware bus ops for this device.
+ * @dev: underlying device.
+ * @index: unique position on the cosm bus
+ * @dbg_dir: debug fs directory
+ * @newepd: new endpoint from scif accept to be assigned to this cdev
+ * @epd: SCIF endpoint for this cdev
+ * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
+ * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
+ */
+struct cosm_device {
+ const struct attribute_group **attr_group;
+ struct device *sdev;
+ u8 state;
+ u8 shutdown_status;
+ u8 shutdown_status_int;
+ struct mutex cosm_mutex;
+ struct work_struct reset_trigger_work;
+ struct work_struct scif_work;
+ char *cmdline;
+ char *firmware;
+ char *ramdisk;
+ char *bootmode;
+ void *log_buf_addr;
+ int *log_buf_len;
+ struct kernfs_node *state_sysfs;
+ struct cosm_hw_ops *hw_ops;
+ struct device dev;
+ int index;
+ struct dentry *dbg_dir;
+ scif_epd_t newepd;
+ scif_epd_t epd;
+ bool heartbeat_watchdog_enable;
+ bool sysfs_heartbeat_enable;
+};
+
+/**
+ * cosm_driver - operations for a cosm driver
+ *
+ * @driver: underlying device driver (populate name and owner).
+ * @probe: the function to call when a device is found. Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct cosm_driver {
+ struct device_driver driver;
+ int (*probe)(struct cosm_device *dev);
+ void (*remove)(struct cosm_device *dev);
+};
+
+/**
+ * cosm_hw_ops - cosm bus ops
+ *
+ * @reset: trigger MIC reset
+ * @force_reset: force MIC reset
+ * @post_reset: inform MIC reset is complete
+ * @ready: is MIC ready for OS download
+ * @start: boot MIC
+ * @stop: prepare MIC for reset
+ * @family: return MIC HW family string
+ * @stepping: return MIC HW stepping string
+ * @aper: return MIC PCIe aperture
+ */
+struct cosm_hw_ops {
+ void (*reset)(struct cosm_device *cdev);
+ void (*force_reset)(struct cosm_device *cdev);
+ void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
+ bool (*ready)(struct cosm_device *cdev);
+ int (*start)(struct cosm_device *cdev, int id);
+ void (*stop)(struct cosm_device *cdev, bool force);
+ ssize_t (*family)(struct cosm_device *cdev, char *buf);
+ ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
+ struct mic_mw *(*aper)(struct cosm_device *cdev);
+};
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
+void cosm_unregister_device(struct cosm_device *dev);
+int cosm_register_driver(struct cosm_driver *drv);
+void cosm_unregister_driver(struct cosm_driver *drv);
+struct cosm_device *cosm_find_cdev_by_id(int id);
+
+static inline struct cosm_device *dev_to_cosm(struct device *dev)
+{
+ return container_of(dev, struct cosm_device, dev);
+}
+
+static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
+{
+ return container_of(drv, struct cosm_driver, driver);
+}
+#endif /* _COSM_BUS_H */
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
new file mode 100644
index 0000000..be37890
--- /dev/null
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -0,0 +1,204 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Bus driver.
+ *
+ * This implementation is very similar to the the virtio bus driver
+ * implementation @ drivers/virtio/virtio.c
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/mic_bus.h>
+
+static ssize_t device_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct mbus_device *dev = dev_to_mbus(d);
+ return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct mbus_device *dev = dev_to_mbus(d);
+ return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct mbus_device *dev = dev_to_mbus(d);
+ return sprintf(buf, "mbus:d%08Xv%08X\n",
+ dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mbus_dev_attrs[] = {
+ &dev_attr_device.attr,
+ &dev_attr_vendor.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(mbus_dev);
+
+static inline int mbus_id_match(const struct mbus_device *dev,
+ const struct mbus_device_id *id)
+{
+ if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID)
+ return 0;
+
+ return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support. If any of them
+ * match, we return 1 and the kernel will call mbus_dev_probe().
+ */
+static int mbus_dev_match(struct device *dv, struct device_driver *dr)
+{
+ unsigned int i;
+ struct mbus_device *dev = dev_to_mbus(dv);
+ const struct mbus_device_id *ids;
+
+ ids = drv_to_mbus(dr)->id_table;
+ for (i = 0; ids[i].device; i++)
+ if (mbus_id_match(dev, &ids[i]))
+ return 1;
+ return 0;
+}
+
+static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+ struct mbus_device *dev = dev_to_mbus(dv);
+
+ return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X",
+ dev->id.device, dev->id.vendor);
+}
+
+static int mbus_dev_probe(struct device *d)
+{
+ int err;
+ struct mbus_device *dev = dev_to_mbus(d);
+ struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+ err = drv->probe(dev);
+ if (!err)
+ if (drv->scan)
+ drv->scan(dev);
+ return err;
+}
+
+static int mbus_dev_remove(struct device *d)
+{
+ struct mbus_device *dev = dev_to_mbus(d);
+ struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+ drv->remove(dev);
+ return 0;
+}
+
+static struct bus_type mic_bus = {
+ .name = "mic_bus",
+ .match = mbus_dev_match,
+ .dev_groups = mbus_dev_groups,
+ .uevent = mbus_uevent,
+ .probe = mbus_dev_probe,
+ .remove = mbus_dev_remove,
+};
+
+int mbus_register_driver(struct mbus_driver *driver)
+{
+ driver->driver.bus = &mic_bus;
+ return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_register_driver);
+
+void mbus_unregister_driver(struct mbus_driver *driver)
+{
+ driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_driver);
+
+static void mbus_release_dev(struct device *d)
+{
+ struct mbus_device *mbdev = dev_to_mbus(d);
+ kfree(mbdev);
+}
+
+struct mbus_device *
+mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+ struct mbus_hw_ops *hw_ops, int index,
+ void __iomem *mmio_va)
+{
+ int ret;
+ struct mbus_device *mbdev;
+
+ mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL);
+ if (!mbdev)
+ return ERR_PTR(-ENOMEM);
+
+ mbdev->mmio_va = mmio_va;
+ mbdev->dev.parent = pdev;
+ mbdev->id.device = id;
+ mbdev->id.vendor = MBUS_DEV_ANY_ID;
+ mbdev->dev.archdata.dma_ops = dma_ops;
+ mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask;
+ dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64));
+ mbdev->dev.release = mbus_release_dev;
+ mbdev->hw_ops = hw_ops;
+ mbdev->dev.bus = &mic_bus;
+ mbdev->index = index;
+ dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
+ /*
+ * device_register() causes the bus infrastructure to look for a
+ * matching driver.
+ */
+ ret = device_register(&mbdev->dev);
+ if (ret)
+ goto free_mbdev;
+ return mbdev;
+free_mbdev:
+ put_device(&mbdev->dev);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(mbus_register_device);
+
+void mbus_unregister_device(struct mbus_device *mbdev)
+{
+ device_unregister(&mbdev->dev);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_device);
+
+static int __init mbus_init(void)
+{
+ return bus_register(&mic_bus);
+}
+
+static void __exit mbus_exit(void)
+{
+ bus_unregister(&mic_bus);
+}
+
+core_initcall(mbus_init);
+module_exit(mbus_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c
new file mode 100644
index 0000000..ff6e01c
--- /dev/null
+++ b/drivers/misc/mic/bus/scif_bus.c
@@ -0,0 +1,209 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+
+#include "scif_bus.h"
+
+static ssize_t device_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct scif_hw_dev *dev = dev_to_scif(d);
+
+ return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct scif_hw_dev *dev = dev_to_scif(d);
+
+ return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct scif_hw_dev *dev = dev_to_scif(d);
+
+ return sprintf(buf, "scif:d%08Xv%08X\n",
+ dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *scif_dev_attrs[] = {
+ &dev_attr_device.attr,
+ &dev_attr_vendor.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(scif_dev);
+
+static inline int scif_id_match(const struct scif_hw_dev *dev,
+ const struct scif_hw_dev_id *id)
+{
+ if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID)
+ return 0;
+
+ return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support. If any of them
+ * match, we return 1 and the kernel will call scif_dev_probe().
+ */
+static int scif_dev_match(struct device *dv, struct device_driver *dr)
+{
+ unsigned int i;
+ struct scif_hw_dev *dev = dev_to_scif(dv);
+ const struct scif_hw_dev_id *ids;
+
+ ids = drv_to_scif(dr)->id_table;
+ for (i = 0; ids[i].device; i++)
+ if (scif_id_match(dev, &ids[i]))
+ return 1;
+ return 0;
+}
+
+static int scif_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+ struct scif_hw_dev *dev = dev_to_scif(dv);
+
+ return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X",
+ dev->id.device, dev->id.vendor);
+}
+
+static int scif_dev_probe(struct device *d)
+{
+ struct scif_hw_dev *dev = dev_to_scif(d);
+ struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+ return drv->probe(dev);
+}
+
+static int scif_dev_remove(struct device *d)
+{
+ struct scif_hw_dev *dev = dev_to_scif(d);
+ struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+ drv->remove(dev);
+ return 0;
+}
+
+static struct bus_type scif_bus = {
+ .name = "scif_bus",
+ .match = scif_dev_match,
+ .dev_groups = scif_dev_groups,
+ .uevent = scif_uevent,
+ .probe = scif_dev_probe,
+ .remove = scif_dev_remove,
+};
+
+int scif_register_driver(struct scif_driver *driver)
+{
+ driver->driver.bus = &scif_bus;
+ return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_register_driver);
+
+void scif_unregister_driver(struct scif_driver *driver)
+{
+ driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_driver);
+
+static void scif_release_dev(struct device *d)
+{
+ struct scif_hw_dev *sdev = dev_to_scif(d);
+
+ kfree(sdev);
+}
+
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+ struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+ struct mic_mw *mmio, struct mic_mw *aper, void *dp,
+ void __iomem *rdp, struct dma_chan **chan, int num_chan,
+ bool card_rel_da)
+{
+ int ret;
+ struct scif_hw_dev *sdev;
+
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev)
+ return ERR_PTR(-ENOMEM);
+
+ sdev->dev.parent = pdev;
+ sdev->id.device = id;
+ sdev->id.vendor = SCIF_DEV_ANY_ID;
+ sdev->dev.archdata.dma_ops = dma_ops;
+ sdev->dev.release = scif_release_dev;
+ sdev->hw_ops = hw_ops;
+ sdev->dnode = dnode;
+ sdev->snode = snode;
+ dev_set_drvdata(&sdev->dev, sdev);
+ sdev->dev.bus = &scif_bus;
+ sdev->mmio = mmio;
+ sdev->aper = aper;
+ sdev->dp = dp;
+ sdev->rdp = rdp;
+ sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask;
+ dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
+ sdev->dma_ch = chan;
+ sdev->num_dma_ch = num_chan;
+ sdev->card_rel_da = card_rel_da;
+ dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
+ /*
+ * device_register() causes the bus infrastructure to look for a
+ * matching driver.
+ */
+ ret = device_register(&sdev->dev);
+ if (ret)
+ goto free_sdev;
+ return sdev;
+free_sdev:
+ put_device(&sdev->dev);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(scif_register_device);
+
+void scif_unregister_device(struct scif_hw_dev *sdev)
+{
+ device_unregister(&sdev->dev);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_device);
+
+static int __init scif_init(void)
+{
+ return bus_register(&scif_bus);
+}
+
+static void __exit scif_exit(void)
+{
+ bus_unregister(&scif_bus);
+}
+
+core_initcall(scif_init);
+module_exit(scif_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h
new file mode 100644
index 0000000..94f29ac
--- /dev/null
+++ b/drivers/misc/mic/bus/scif_bus.h
@@ -0,0 +1,133 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#ifndef _SCIF_BUS_H_
+#define _SCIF_BUS_H_
+/*
+ * Everything a scif driver needs to work with any particular scif
+ * hardware abstraction layer.
+ */
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+struct scif_hw_dev_id {
+ u32 device;
+ u32 vendor;
+};
+
+#define MIC_SCIF_DEV 1
+#define SCIF_DEV_ANY_ID 0xffffffff
+
+/**
+ * scif_hw_dev - representation of a hardware device abstracted for scif
+ * @hw_ops: the hardware ops supported by this device
+ * @id: the device type identification (used to match it with a driver)
+ * @mmio: MMIO memory window
+ * @aper: Aperture memory window
+ * @dev: underlying device
+ * @dnode - The destination node which this device will communicate with.
+ * @snode - The source node for this device.
+ * @dp - Self device page
+ * @rdp - Remote device page
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
+ * are relative to the card point of view
+ */
+struct scif_hw_dev {
+ struct scif_hw_ops *hw_ops;
+ struct scif_hw_dev_id id;
+ struct mic_mw *mmio;
+ struct mic_mw *aper;
+ struct device dev;
+ u8 dnode;
+ u8 snode;
+ void *dp;
+ void __iomem *rdp;
+ struct dma_chan **dma_ch;
+ int num_dma_ch;
+ bool card_rel_da;
+};
+
+/**
+ * scif_driver - operations for a scif I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found. Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct scif_driver {
+ struct device_driver driver;
+ const struct scif_hw_dev_id *id_table;
+ int (*probe)(struct scif_hw_dev *dev);
+ void (*remove)(struct scif_hw_dev *dev);
+};
+
+/**
+ * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus.
+ *
+ * @next_db: Obtain the next available doorbell.
+ * @request_irq: Request an interrupt on a particular doorbell.
+ * @free_irq: Free an interrupt requested previously.
+ * @ack_interrupt: acknowledge an interrupt in the ISR.
+ * @send_intr: Send an interrupt to the remote node on a specified doorbell.
+ * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell
+ * which is specifically targeted for a peer to peer node.
+ * @ioremap: Map a buffer with the specified physical address and length.
+ * @iounmap: Unmap a buffer previously mapped.
+ */
+struct scif_hw_ops {
+ int (*next_db)(struct scif_hw_dev *sdev);
+ struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev,
+ irqreturn_t (*func)(int irq,
+ void *data),
+ const char *name, void *data,
+ int db);
+ void (*free_irq)(struct scif_hw_dev *sdev,
+ struct mic_irq *cookie, void *data);
+ void (*ack_interrupt)(struct scif_hw_dev *sdev, int num);
+ void (*send_intr)(struct scif_hw_dev *sdev, int db);
+ void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db,
+ struct mic_mw *mw);
+ void __iomem * (*ioremap)(struct scif_hw_dev *sdev,
+ phys_addr_t pa, size_t len);
+ void (*iounmap)(struct scif_hw_dev *sdev, void __iomem *va);
+};
+
+int scif_register_driver(struct scif_driver *driver);
+void scif_unregister_driver(struct scif_driver *driver);
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id,
+ struct dma_map_ops *dma_ops,
+ struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+ struct mic_mw *mmio, struct mic_mw *aper,
+ void *dp, void __iomem *rdp,
+ struct dma_chan **chan, int num_chan,
+ bool card_rel_da);
+void scif_unregister_device(struct scif_hw_dev *sdev);
+
+static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
+{
+ return container_of(dev, struct scif_hw_dev, dev);
+}
+
+static inline struct scif_driver *drv_to_scif(struct device_driver *drv)
+{
+ return container_of(drv, struct scif_driver, driver);
+}
+#endif /* _SCIF_BUS_H */
diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile
new file mode 100644
index 0000000..69d58be
--- /dev/null
+++ b/drivers/misc/mic/card/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+ccflags-y += -DINTEL_MIC_CARD
+
+obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o
+mic_card-y += mic_x100.o
+mic_card-y += mic_device.o
+mic_card-y += mic_debugfs.o
+mic_card-y += mic_virtio.o
diff --git a/drivers/misc/mic/card/mic_debugfs.c b/drivers/misc/mic/card/mic_debugfs.c
new file mode 100644
index 0000000..421b3d7
--- /dev/null
+++ b/drivers/misc/mic/card/mic_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+/**
+ * mic_intr_test - Send interrupts to host.
+ */
+static int mic_intr_test(struct seq_file *s, void *unused)
+{
+ struct mic_driver *mdrv = s->private;
+ struct mic_device *mdev = &mdrv->mdev;
+
+ mic_send_intr(mdev, 0);
+ msleep(1000);
+ mic_send_intr(mdev, 1);
+ msleep(1000);
+ mic_send_intr(mdev, 2);
+ msleep(1000);
+ mic_send_intr(mdev, 3);
+ msleep(1000);
+
+ return 0;
+}
+
+static int mic_intr_test_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mic_intr_test, inode->i_private);
+}
+
+static int mic_intr_test_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations intr_test_ops = {
+ .owner = THIS_MODULE,
+ .open = mic_intr_test_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mic_intr_test_release
+};
+
+/**
+ * mic_create_card_debug_dir - Initialize MIC debugfs entries.
+ */
+void __init mic_create_card_debug_dir(struct mic_driver *mdrv)
+{
+ struct dentry *d;
+
+ if (!mic_dbg)
+ return;
+
+ mdrv->dbg_dir = debugfs_create_dir(mdrv->name, mic_dbg);
+ if (!mdrv->dbg_dir) {
+ dev_err(mdrv->dev, "Cant create dbg_dir %s\n", mdrv->name);
+ return;
+ }
+
+ d = debugfs_create_file("intr_test", 0444, mdrv->dbg_dir,
+ mdrv, &intr_test_ops);
+
+ if (!d) {
+ dev_err(mdrv->dev,
+ "Cant create dbg intr_test %s\n", mdrv->name);
+ return;
+ }
+}
+
+/**
+ * mic_delete_card_debug_dir - Uninitialize MIC debugfs entries.
+ */
+void mic_delete_card_debug_dir(struct mic_driver *mdrv)
+{
+ if (!mdrv->dbg_dir)
+ return;
+
+ debugfs_remove_recursive(mdrv->dbg_dir);
+}
+
+/**
+ * mic_init_card_debugfs - Initialize global debugfs entry.
+ */
+void __init mic_init_card_debugfs(void)
+{
+ mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!mic_dbg)
+ pr_err("can't create debugfs dir\n");
+}
+
+/**
+ * mic_exit_card_debugfs - Uninitialize global debugfs entry
+ */
+void mic_exit_card_debugfs(void)
+{
+ debugfs_remove(mic_dbg);
+}
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
new file mode 100644
index 0000000..d0edaf7
--- /dev/null
+++ b/drivers/misc/mic/card/mic_device.c
@@ -0,0 +1,356 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/dmaengine.h>
+#include <linux/kmod.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_virtio.h"
+
+static struct mic_driver *g_drv;
+
+static int __init mic_dp_init(void)
+{
+ struct mic_driver *mdrv = g_drv;
+ struct mic_device *mdev = &mdrv->mdev;
+ struct mic_bootparam __iomem *bootparam;
+ u64 lo, hi, dp_dma_addr;
+ u32 magic;
+
+ lo = mic_read_spad(&mdrv->mdev, MIC_DPLO_SPAD);
+ hi = mic_read_spad(&mdrv->mdev, MIC_DPHI_SPAD);
+
+ dp_dma_addr = lo | (hi << 32);
+ mdrv->dp = mic_card_map(mdev, dp_dma_addr, MIC_DP_SIZE);
+ if (!mdrv->dp) {
+ dev_err(mdrv->dev, "Cannot remap Aperture BAR\n");
+ return -ENOMEM;
+ }
+ bootparam = mdrv->dp;
+ magic = ioread32(&bootparam->magic);
+ if (MIC_MAGIC != magic) {
+ dev_err(mdrv->dev, "bootparam magic mismatch 0x%x\n", magic);
+ return -EIO;
+ }
+ return 0;
+}
+
+/* Uninitialize the device page */
+static void mic_dp_uninit(void)
+{
+ mic_card_unmap(&g_drv->mdev, g_drv->dp);
+}
+
+/**
+ * mic_request_card_irq - request an irq.
+ *
+ * @handler: interrupt handler passed to request_threaded_irq.
+ * @thread_fn: thread fn. passed to request_threaded_irq.
+ * @name: The ASCII name of the callee requesting the irq.
+ * @data: private data that is returned back when calling the
+ * function handler.
+ * @index: The doorbell index of the requester.
+ *
+ * returns: The cookie that is transparent to the caller. Passed
+ * back when calling mic_free_irq. An appropriate error code
+ * is returned on failure. Caller needs to use IS_ERR(return_val)
+ * to check for failure and PTR_ERR(return_val) to obtained the
+ * error code.
+ *
+ */
+struct mic_irq *
+mic_request_card_irq(irq_handler_t handler,
+ irq_handler_t thread_fn, const char *name,
+ void *data, int index)
+{
+ int rc = 0;
+ unsigned long cookie;
+ struct mic_driver *mdrv = g_drv;
+
+ rc = request_threaded_irq(mic_db_to_irq(mdrv, index), handler,
+ thread_fn, 0, name, data);
+ if (rc) {
+ dev_err(mdrv->dev, "request_threaded_irq failed rc = %d\n", rc);
+ goto err;
+ }
+ mdrv->irq_info.irq_usage_count[index]++;
+ cookie = index;
+ return (struct mic_irq *)cookie;
+err:
+ return ERR_PTR(rc);
+}
+
+/**
+ * mic_free_card_irq - free irq.
+ *
+ * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
+ * @data: private data specified by the calling function during the
+ * mic_request_threaded_irq
+ *
+ * returns: none.
+ */
+void mic_free_card_irq(struct mic_irq *cookie, void *data)
+{
+ int index;
+ struct mic_driver *mdrv = g_drv;
+
+ index = (unsigned long)cookie & 0xFFFFU;
+ free_irq(mic_db_to_irq(mdrv, index), data);
+ mdrv->irq_info.irq_usage_count[index]--;
+}
+
+/**
+ * mic_next_card_db - Get the doorbell with minimum usage count.
+ *
+ * Returns the irq index.
+ */
+int mic_next_card_db(void)
+{
+ int i;
+ int index = 0;
+ struct mic_driver *mdrv = g_drv;
+
+ for (i = 0; i < mdrv->intr_info.num_intr; i++) {
+ if (mdrv->irq_info.irq_usage_count[i] <
+ mdrv->irq_info.irq_usage_count[index])
+ index = i;
+ }
+
+ return index;
+}
+
+/**
+ * mic_init_irq - Initialize irq information.
+ *
+ * Returns 0 in success. Appropriate error code on failure.
+ */
+static int mic_init_irq(void)
+{
+ struct mic_driver *mdrv = g_drv;
+
+ mdrv->irq_info.irq_usage_count = kzalloc((sizeof(u32) *
+ mdrv->intr_info.num_intr),
+ GFP_KERNEL);
+ if (!mdrv->irq_info.irq_usage_count)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * mic_uninit_irq - Uninitialize irq information.
+ *
+ * None.
+ */
+static void mic_uninit_irq(void)
+{
+ struct mic_driver *mdrv = g_drv;
+
+ kfree(mdrv->irq_info.irq_usage_count);
+}
+
+static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev)
+{
+ return dev_get_drvdata(scdev->dev.parent);
+}
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+ irqreturn_t (*func)(int irq, void *data),
+ const char *name, void *data,
+ int db)
+{
+ return mic_request_card_irq(func, NULL, name, data, db);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+ struct mic_irq *cookie, void *data)
+{
+ return mic_free_card_irq(cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+ struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+ mic_ack_interrupt(&mdrv->mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+ return mic_next_card_db();
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+ struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+ mic_send_intr(&mdrv->mdev, db);
+}
+
+static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db,
+ struct mic_mw *mw)
+{
+ mic_send_p2p_intr(db, mw);
+}
+
+static void __iomem *
+___mic_ioremap(struct scif_hw_dev *scdev,
+ phys_addr_t pa, size_t len)
+{
+ struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+ return mic_card_map(&mdrv->mdev, pa, len);
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+ struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+ mic_card_unmap(&mdrv->mdev, va);
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+ .request_irq = ___mic_request_irq,
+ .free_irq = ___mic_free_irq,
+ .ack_interrupt = ___mic_ack_interrupt,
+ .next_db = ___mic_next_db,
+ .send_intr = ___mic_send_intr,
+ .send_p2p_intr = ___mic_send_p2p_intr,
+ .ioremap = ___mic_ioremap,
+ .iounmap = ___mic_iounmap,
+};
+
+static int mic_request_dma_chans(struct mic_driver *mdrv)
+{
+ dma_cap_mask_t mask;
+ struct dma_chan *chan;
+
+ request_module("mic_x100_dma");
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+
+ do {
+ chan = dma_request_channel(mask, NULL, NULL);
+ if (chan) {
+ mdrv->dma_ch[mdrv->num_dma_ch++] = chan;
+ if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN)
+ break;
+ }
+ } while (chan);
+ dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch);
+ return mdrv->num_dma_ch;
+}
+
+static void mic_free_dma_chans(struct mic_driver *mdrv)
+{
+ int i = 0;
+
+ for (i = 0; i < mdrv->num_dma_ch; i++) {
+ dma_release_channel(mdrv->dma_ch[i]);
+ mdrv->dma_ch[i] = NULL;
+ }
+ mdrv->num_dma_ch = 0;
+}
+
+/*
+ * mic_driver_init - MIC driver initialization tasks.
+ *
+ * Returns 0 in success. Appropriate error code on failure.
+ */
+int __init mic_driver_init(struct mic_driver *mdrv)
+{
+ int rc;
+ struct mic_bootparam __iomem *bootparam;
+ u8 node_id;
+
+ g_drv = mdrv;
+ /* Unloading the card module is not supported. */
+ if (!try_module_get(mdrv->dev->driver->owner)) {
+ rc = -ENODEV;
+ goto done;
+ }
+ rc = mic_dp_init();
+ if (rc)
+ goto put;
+ rc = mic_init_irq();
+ if (rc)
+ goto dp_uninit;
+ if (!mic_request_dma_chans(mdrv)) {
+ rc = -ENODEV;
+ goto irq_uninit;
+ }
+ rc = mic_devices_init(mdrv);
+ if (rc)
+ goto dma_free;
+ bootparam = mdrv->dp;
+ node_id = ioread8(&bootparam->node_id);
+ mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV,
+ NULL, &scif_hw_ops,
+ 0, node_id, &mdrv->mdev.mmio, NULL,
+ NULL, mdrv->dp, mdrv->dma_ch,
+ mdrv->num_dma_ch, true);
+ if (IS_ERR(mdrv->scdev)) {
+ rc = PTR_ERR(mdrv->scdev);
+ goto device_uninit;
+ }
+ mic_create_card_debug_dir(mdrv);
+done:
+ return rc;
+device_uninit:
+ mic_devices_uninit(mdrv);
+dma_free:
+ mic_free_dma_chans(mdrv);
+irq_uninit:
+ mic_uninit_irq();
+dp_uninit:
+ mic_dp_uninit();
+put:
+ module_put(mdrv->dev->driver->owner);
+ return rc;
+}
+
+/*
+ * mic_driver_uninit - MIC driver uninitialization tasks.
+ *
+ * Returns None
+ */
+void mic_driver_uninit(struct mic_driver *mdrv)
+{
+ mic_delete_card_debug_dir(mdrv);
+ scif_unregister_device(mdrv->scdev);
+ mic_devices_uninit(mdrv);
+ mic_free_dma_chans(mdrv);
+ mic_uninit_irq();
+ mic_dp_uninit();
+ module_put(mdrv->dev->driver->owner);
+}
diff --git a/drivers/misc/mic/card/mic_device.h b/drivers/misc/mic/card/mic_device.h
new file mode 100644
index 0000000..1dbf83c
--- /dev/null
+++ b/drivers/misc/mic/card/mic_device.h
@@ -0,0 +1,146 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef _MIC_CARD_DEVICE_H_
+#define _MIC_CARD_DEVICE_H_
+
+#include <linux/workqueue.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+
+/**
+ * struct mic_intr_info - Contains h/w specific interrupt sources info
+ *
+ * @num_intr: The number of irqs available
+ */
+struct mic_intr_info {
+ u32 num_intr;
+};
+
+/**
+ * struct mic_irq_info - OS specific irq information
+ *
+ * @irq_usage_count: usage count array tracking the number of sources
+ * assigned for each irq.
+ */
+struct mic_irq_info {
+ int *irq_usage_count;
+};
+
+/**
+ * struct mic_device - MIC device information.
+ *
+ * @mmio: MMIO bar information.
+ */
+struct mic_device {
+ struct mic_mw mmio;
+};
+
+/**
+ * struct mic_driver - MIC card driver information.
+ *
+ * @name: Name for MIC driver.
+ * @dbg_dir: debugfs directory of this MIC device.
+ * @dev: The device backing this MIC.
+ * @dp: The pointer to the virtio device page.
+ * @mdev: MIC device information for the host.
+ * @hotplug_work: Hot plug work for adding/removing virtio devices.
+ * @irq_info: The OS specific irq information
+ * @intr_info: H/W specific interrupt information.
+ * @dma_mbdev: dma device on the MIC virtual bus.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ */
+struct mic_driver {
+ char name[20];
+ struct dentry *dbg_dir;
+ struct device *dev;
+ void __iomem *dp;
+ struct mic_device mdev;
+ struct work_struct hotplug_work;
+ struct mic_irq_info irq_info;
+ struct mic_intr_info intr_info;
+ struct mbus_device *dma_mbdev;
+ struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+ int num_dma_ch;
+ struct scif_hw_dev *scdev;
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/**
+ * mic_mmio_read - read from an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @offset: register offset.
+ *
+ * RETURNS: register value.
+ */
+static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
+{
+ return ioread32(mw->va + offset);
+}
+
+/**
+ * mic_mmio_write - write to an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @val: the data value to put into the register
+ * @offset: register offset.
+ *
+ * RETURNS: none.
+ */
+static inline void
+mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
+{
+ iowrite32(val, mw->va + offset);
+}
+
+int mic_driver_init(struct mic_driver *mdrv);
+void mic_driver_uninit(struct mic_driver *mdrv);
+int mic_next_card_db(void);
+struct mic_irq *
+mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn,
+ const char *name, void *data, int db);
+void mic_free_card_irq(struct mic_irq *cookie, void *data);
+u32 mic_read_spad(struct mic_device *mdev, unsigned int idx);
+void mic_send_intr(struct mic_device *mdev, int doorbell);
+void mic_send_p2p_intr(int doorbell, struct mic_mw *mw);
+int mic_db_to_irq(struct mic_driver *mdrv, int db);
+u32 mic_ack_interrupt(struct mic_device *mdev);
+void mic_hw_intr_init(struct mic_driver *mdrv);
+void __iomem *
+mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size);
+void mic_card_unmap(struct mic_device *mdev, void __iomem *addr);
+void __init mic_create_card_debug_dir(struct mic_driver *mdrv);
+void mic_delete_card_debug_dir(struct mic_driver *mdrv);
+void __init mic_init_card_debugfs(void);
+void mic_exit_card_debugfs(void);
+#endif
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
new file mode 100644
index 0000000..e486a0c
--- /dev/null
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -0,0 +1,634 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Adapted from:
+ *
+ * virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/virtio_config.h>
+
+#include "../common/mic_dev.h"
+#include "mic_virtio.h"
+
+#define VIRTIO_SUBCODE_64 0x0D00
+
+#define MIC_MAX_VRINGS 4
+struct mic_vdev {
+ struct virtio_device vdev;
+ struct mic_device_desc __iomem *desc;
+ struct mic_device_ctrl __iomem *dc;
+ struct mic_device *mdev;
+ void __iomem *vr[MIC_MAX_VRINGS];
+ int used_size[MIC_MAX_VRINGS];
+ struct completion reset_done;
+ struct mic_irq *virtio_cookie;
+ int c2h_vdev_db;
+};
+
+static struct mic_irq *virtio_config_cookie;
+#define to_micvdev(vd) container_of(vd, struct mic_vdev, vdev)
+
+/* Helper API to obtain the parent of the virtio device */
+static inline struct device *mic_dev(struct mic_vdev *mvdev)
+{
+ return mvdev->vdev.dev.parent;
+}
+
+/* This gets the device's feature bits. */
+static u64 mic_get_features(struct virtio_device *vdev)
+{
+ unsigned int i, bits;
+ u32 features = 0;
+ struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc;
+ u8 __iomem *in_features = mic_vq_features(desc);
+ int feature_len = ioread8(&desc->feature_len);
+
+ bits = min_t(unsigned, feature_len, sizeof(features)) * 8;
+ for (i = 0; i < bits; i++)
+ if (ioread8(&in_features[i / 8]) & (BIT(i % 8)))
+ features |= BIT(i);
+
+ return features;
+}
+
+static int mic_finalize_features(struct virtio_device *vdev)
+{
+ unsigned int i, bits;
+ struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc;
+ u8 feature_len = ioread8(&desc->feature_len);
+ /* Second half of bitmap is features we accept. */
+ u8 __iomem *out_features =
+ mic_vq_features(desc) + feature_len;
+
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
+ /* Make sure we don't have any features > 32 bits! */
+ BUG_ON((u32)vdev->features != vdev->features);
+
+ memset_io(out_features, 0, feature_len);
+ bits = min_t(unsigned, feature_len,
+ sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++) {
+ if (__virtio_test_bit(vdev, i))
+ iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
+ &out_features[i / 8]);
+ }
+
+ return 0;
+}
+
+/*
+ * Reading and writing elements in config space
+ */
+static void mic_get(struct virtio_device *vdev, unsigned int offset,
+ void *buf, unsigned len)
+{
+ struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc;
+
+ if (offset + len > ioread8(&desc->config_len))
+ return;
+ memcpy_fromio(buf, mic_vq_configspace(desc) + offset, len);
+}
+
+static void mic_set(struct virtio_device *vdev, unsigned int offset,
+ const void *buf, unsigned len)
+{
+ struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc;
+
+ if (offset + len > ioread8(&desc->config_len))
+ return;
+ memcpy_toio(mic_vq_configspace(desc) + offset, buf, len);
+}
+
+/*
+ * The operations to get and set the status word just access the status
+ * field of the device descriptor. set_status also interrupts the host
+ * to tell about status changes.
+ */
+static u8 mic_get_status(struct virtio_device *vdev)
+{
+ return ioread8(&to_micvdev(vdev)->desc->status);
+}
+
+static void mic_set_status(struct virtio_device *vdev, u8 status)
+{
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+ if (!status)
+ return;
+ iowrite8(status, &mvdev->desc->status);
+ mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db);
+}
+
+/* Inform host on a virtio device reset and wait for ack from host */
+static void mic_reset_inform_host(struct virtio_device *vdev)
+{
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+ struct mic_device_ctrl __iomem *dc = mvdev->dc;
+ int retry;
+
+ iowrite8(0, &dc->host_ack);
+ iowrite8(1, &dc->vdev_reset);
+ mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db);
+
+ /* Wait till host completes all card accesses and acks the reset */
+ for (retry = 100; retry--;) {
+ if (ioread8(&dc->host_ack))
+ break;
+ msleep(100);
+ };
+
+ dev_dbg(mic_dev(mvdev), "%s: retry: %d\n", __func__, retry);
+
+ /* Reset status to 0 in case we timed out */
+ iowrite8(0, &mvdev->desc->status);
+}
+
+static void mic_reset(struct virtio_device *vdev)
+{
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+
+ dev_dbg(mic_dev(mvdev), "%s: virtio id %d\n",
+ __func__, vdev->id.device);
+
+ mic_reset_inform_host(vdev);
+ complete_all(&mvdev->reset_done);
+}
+
+/*
+ * The virtio_ring code calls this API when it wants to notify the Host.
+ */
+static bool mic_notify(struct virtqueue *vq)
+{
+ struct mic_vdev *mvdev = vq->priv;
+
+ mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db);
+ return true;
+}
+
+static void mic_del_vq(struct virtqueue *vq, int n)
+{
+ struct mic_vdev *mvdev = to_micvdev(vq->vdev);
+ struct vring *vr = (struct vring *)(vq + 1);
+
+ free_pages((unsigned long) vr->used, get_order(mvdev->used_size[n]));
+ vring_del_virtqueue(vq);
+ mic_card_unmap(mvdev->mdev, mvdev->vr[n]);
+ mvdev->vr[n] = NULL;
+}
+
+static void mic_del_vqs(struct virtio_device *vdev)
+{
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+ struct virtqueue *vq, *n;
+ int idx = 0;
+
+ dev_dbg(mic_dev(mvdev), "%s\n", __func__);
+
+ list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+ mic_del_vq(vq, idx++);
+}
+
+/*
+ * This routine will assign vring's allocated in host/io memory. Code in
+ * virtio_ring.c however continues to access this io memory as if it were local
+ * memory without io accessors.
+ */
+static struct virtqueue *mic_find_vq(struct virtio_device *vdev,
+ unsigned index,
+ void (*callback)(struct virtqueue *vq),
+ const char *name)
+{
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+ struct mic_vqconfig __iomem *vqconfig;
+ struct mic_vqconfig config;
+ struct virtqueue *vq;
+ void __iomem *va;
+ struct _mic_vring_info __iomem *info;
+ void *used;
+ int vr_size, _vr_size, err, magic;
+ struct vring *vr;
+ u8 type = ioread8(&mvdev->desc->type);
+
+ if (index >= ioread8(&mvdev->desc->num_vq))
+ return ERR_PTR(-ENOENT);
+
+ if (!name)
+ return ERR_PTR(-ENOENT);
+
+ /* First assign the vring's allocated in host memory */
+ vqconfig = mic_vq_config(mvdev->desc) + index;
+ memcpy_fromio(&config, vqconfig, sizeof(config));
+ _vr_size = vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN);
+ vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info));
+ va = mic_card_map(mvdev->mdev, le64_to_cpu(config.address), vr_size);
+ if (!va)
+ return ERR_PTR(-ENOMEM);
+ mvdev->vr[index] = va;
+ memset_io(va, 0x0, _vr_size);
+ vq = vring_new_virtqueue(index, le16_to_cpu(config.num),
+ MIC_VIRTIO_RING_ALIGN, vdev, false,
+ (void __force *)va, mic_notify, callback,
+ name);
+ if (!vq) {
+ err = -ENOMEM;
+ goto unmap;
+ }
+ info = va + _vr_size;
+ magic = ioread32(&info->magic);
+
+ if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) {
+ err = -EIO;
+ goto unmap;
+ }
+
+ /* Allocate and reassign used ring now */
+ mvdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
+ sizeof(struct vring_used_elem) *
+ le16_to_cpu(config.num));
+ used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(mvdev->used_size[index]));
+ if (!used) {
+ err = -ENOMEM;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto del_vq;
+ }
+ iowrite64(virt_to_phys(used), &vqconfig->used_address);
+
+ /*
+ * To reassign the used ring here we are directly accessing
+ * struct vring_virtqueue which is a private data structure
+ * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in
+ * vring_new_virtqueue() would ensure that
+ * (&vq->vring == (struct vring *) (&vq->vq + 1));
+ */
+ vr = (struct vring *)(vq + 1);
+ vr->used = used;
+
+ vq->priv = mvdev;
+ return vq;
+del_vq:
+ vring_del_virtqueue(vq);
+unmap:
+ mic_card_unmap(mvdev->mdev, mvdev->vr[index]);
+ return ERR_PTR(err);
+}
+
+static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char *names[])
+{
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+ struct mic_device_ctrl __iomem *dc = mvdev->dc;
+ int i, err, retry;
+
+ /* We must have this many virtqueues. */
+ if (nvqs > ioread8(&mvdev->desc->num_vq))
+ return -ENOENT;
+
+ for (i = 0; i < nvqs; ++i) {
+ dev_dbg(mic_dev(mvdev), "%s: %d: %s\n",
+ __func__, i, names[i]);
+ vqs[i] = mic_find_vq(vdev, i, callbacks[i], names[i]);
+ if (IS_ERR(vqs[i])) {
+ err = PTR_ERR(vqs[i]);
+ goto error;
+ }
+ }
+
+ iowrite8(1, &dc->used_address_updated);
+ /*
+ * Send an interrupt to the host to inform it that used
+ * rings have been re-assigned.
+ */
+ mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db);
+ for (retry = 100; retry--;) {
+ if (!ioread8(&dc->used_address_updated))
+ break;
+ msleep(100);
+ };
+
+ dev_dbg(mic_dev(mvdev), "%s: retry: %d\n", __func__, retry);
+ if (!retry) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ return 0;
+error:
+ mic_del_vqs(vdev);
+ return err;
+}
+
+/*
+ * The config ops structure as defined by virtio config
+ */
+static struct virtio_config_ops mic_vq_config_ops = {
+ .get_features = mic_get_features,
+ .finalize_features = mic_finalize_features,
+ .get = mic_get,
+ .set = mic_set,
+ .get_status = mic_get_status,
+ .set_status = mic_set_status,
+ .reset = mic_reset,
+ .find_vqs = mic_find_vqs,
+ .del_vqs = mic_del_vqs,
+};
+
+static irqreturn_t
+mic_virtio_intr_handler(int irq, void *data)
+{
+ struct mic_vdev *mvdev = data;
+ struct virtqueue *vq;
+
+ mic_ack_interrupt(mvdev->mdev);
+ list_for_each_entry(vq, &mvdev->vdev.vqs, list)
+ vring_interrupt(0, vq);
+
+ return IRQ_HANDLED;
+}
+
+static void mic_virtio_release_dev(struct device *_d)
+{
+ /*
+ * No need for a release method similar to virtio PCI.
+ * Provide an empty one to avoid getting a warning from core.
+ */
+}
+
+/*
+ * adds a new device and register it with virtio
+ * appropriate drivers are loaded by the device model
+ */
+static int mic_add_device(struct mic_device_desc __iomem *d,
+ unsigned int offset, struct mic_driver *mdrv)
+{
+ struct mic_vdev *mvdev;
+ int ret;
+ int virtio_db;
+ u8 type = ioread8(&d->type);
+
+ mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
+ if (!mvdev) {
+ dev_err(mdrv->dev, "Cannot allocate mic dev %u type %u\n",
+ offset, type);
+ return -ENOMEM;
+ }
+
+ mvdev->mdev = &mdrv->mdev;
+ mvdev->vdev.dev.parent = mdrv->dev;
+ mvdev->vdev.dev.release = mic_virtio_release_dev;
+ mvdev->vdev.id.device = type;
+ mvdev->vdev.config = &mic_vq_config_ops;
+ mvdev->desc = d;
+ mvdev->dc = (void __iomem *)d + mic_aligned_desc_size(d);
+ init_completion(&mvdev->reset_done);
+
+ virtio_db = mic_next_card_db();
+ mvdev->virtio_cookie = mic_request_card_irq(mic_virtio_intr_handler,
+ NULL, "virtio intr", mvdev, virtio_db);
+ if (IS_ERR(mvdev->virtio_cookie)) {
+ ret = PTR_ERR(mvdev->virtio_cookie);
+ goto kfree;
+ }
+ iowrite8((u8)virtio_db, &mvdev->dc->h2c_vdev_db);
+ mvdev->c2h_vdev_db = ioread8(&mvdev->dc->c2h_vdev_db);
+
+ ret = register_virtio_device(&mvdev->vdev);
+ if (ret) {
+ dev_err(mic_dev(mvdev),
+ "Failed to register mic device %u type %u\n",
+ offset, type);
+ goto free_irq;
+ }
+ iowrite64((u64)mvdev, &mvdev->dc->vdev);
+ dev_dbg(mic_dev(mvdev), "%s: registered mic device %u type %u mvdev %p\n",
+ __func__, offset, type, mvdev);
+
+ return 0;
+
+free_irq:
+ mic_free_card_irq(mvdev->virtio_cookie, mvdev);
+kfree:
+ kfree(mvdev);
+ return ret;
+}
+
+/*
+ * match for a mic device with a specific desc pointer
+ */
+static int mic_match_desc(struct device *dev, void *data)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+ struct mic_vdev *mvdev = to_micvdev(vdev);
+
+ return mvdev->desc == (void __iomem *)data;
+}
+
+static void mic_handle_config_change(struct mic_device_desc __iomem *d,
+ unsigned int offset, struct mic_driver *mdrv)
+{
+ struct mic_device_ctrl __iomem *dc
+ = (void __iomem *)d + mic_aligned_desc_size(d);
+ struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev);
+
+ if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
+ return;
+
+ dev_dbg(mdrv->dev, "%s %d\n", __func__, __LINE__);
+ virtio_config_changed(&mvdev->vdev);
+ iowrite8(1, &dc->guest_ack);
+}
+
+/*
+ * removes a virtio device if a hot remove event has been
+ * requested by the host.
+ */
+static int mic_remove_device(struct mic_device_desc __iomem *d,
+ unsigned int offset, struct mic_driver *mdrv)
+{
+ struct mic_device_ctrl __iomem *dc
+ = (void __iomem *)d + mic_aligned_desc_size(d);
+ struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev);
+ u8 status;
+ int ret = -1;
+
+ if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) {
+ dev_dbg(mdrv->dev,
+ "%s %d config_change %d type %d mvdev %p\n",
+ __func__, __LINE__,
+ ioread8(&dc->config_change), ioread8(&d->type), mvdev);
+
+ status = ioread8(&d->status);
+ reinit_completion(&mvdev->reset_done);
+ unregister_virtio_device(&mvdev->vdev);
+ mic_free_card_irq(mvdev->virtio_cookie, mvdev);
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK)
+ wait_for_completion(&mvdev->reset_done);
+ kfree(mvdev);
+ iowrite8(1, &dc->guest_ack);
+ dev_dbg(mdrv->dev, "%s %d guest_ack %d\n",
+ __func__, __LINE__, ioread8(&dc->guest_ack));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+#define REMOVE_DEVICES true
+
+static void mic_scan_devices(struct mic_driver *mdrv, bool remove)
+{
+ s8 type;
+ unsigned int i;
+ struct mic_device_desc __iomem *d;
+ struct mic_device_ctrl __iomem *dc;
+ struct device *dev;
+ int ret;
+
+ for (i = sizeof(struct mic_bootparam); i < MIC_DP_SIZE;
+ i += mic_total_desc_size(d)) {
+ d = mdrv->dp + i;
+ dc = (void __iomem *)d + mic_aligned_desc_size(d);
+ /*
+ * This read barrier is paired with the corresponding write
+ * barrier on the host which is inserted before adding or
+ * removing a virtio device descriptor, by updating the type.
+ */
+ rmb();
+ type = ioread8(&d->type);
+
+ /* end of list */
+ if (type == 0)
+ break;
+
+ if (type == -1)
+ continue;
+
+ /* device already exists */
+ dev = device_find_child(mdrv->dev, (void __force *)d,
+ mic_match_desc);
+ if (dev) {
+ if (remove)
+ iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE,
+ &dc->config_change);
+ put_device(dev);
+ mic_handle_config_change(d, i, mdrv);
+ ret = mic_remove_device(d, i, mdrv);
+ if (!ret && !remove)
+ iowrite8(-1, &d->type);
+ if (remove) {
+ iowrite8(0, &dc->config_change);
+ iowrite8(0, &dc->guest_ack);
+ }
+ continue;
+ }
+
+ /* new device */
+ dev_dbg(mdrv->dev, "%s %d Adding new virtio device %p\n",
+ __func__, __LINE__, d);
+ if (!remove)
+ mic_add_device(d, i, mdrv);
+ }
+}
+
+/*
+ * mic_hotplug_device tries to find changes in the device page.
+ */
+static void mic_hotplug_devices(struct work_struct *work)
+{
+ struct mic_driver *mdrv = container_of(work,
+ struct mic_driver, hotplug_work);
+
+ mic_scan_devices(mdrv, !REMOVE_DEVICES);
+}
+
+/*
+ * Interrupt handler for hot plug/config changes etc.
+ */
+static irqreturn_t
+mic_extint_handler(int irq, void *data)
+{
+ struct mic_driver *mdrv = (struct mic_driver *)data;
+
+ dev_dbg(mdrv->dev, "%s %d hotplug work\n",
+ __func__, __LINE__);
+ mic_ack_interrupt(&mdrv->mdev);
+ schedule_work(&mdrv->hotplug_work);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Init function for virtio
+ */
+int mic_devices_init(struct mic_driver *mdrv)
+{
+ int rc;
+ struct mic_bootparam __iomem *bootparam;
+ int config_db;
+
+ INIT_WORK(&mdrv->hotplug_work, mic_hotplug_devices);
+ mic_scan_devices(mdrv, !REMOVE_DEVICES);
+
+ config_db = mic_next_card_db();
+ virtio_config_cookie = mic_request_card_irq(mic_extint_handler, NULL,
+ "virtio_config_intr", mdrv,
+ config_db);
+ if (IS_ERR(virtio_config_cookie)) {
+ rc = PTR_ERR(virtio_config_cookie);
+ goto exit;
+ }
+
+ bootparam = mdrv->dp;
+ iowrite8(config_db, &bootparam->h2c_config_db);
+ return 0;
+exit:
+ return rc;
+}
+
+/*
+ * Uninit function for virtio
+ */
+void mic_devices_uninit(struct mic_driver *mdrv)
+{
+ struct mic_bootparam __iomem *bootparam = mdrv->dp;
+ iowrite8(-1, &bootparam->h2c_config_db);
+ mic_free_card_irq(virtio_config_cookie, mdrv);
+ flush_work(&mdrv->hotplug_work);
+ mic_scan_devices(mdrv, REMOVE_DEVICES);
+}
diff --git a/drivers/misc/mic/card/mic_virtio.h b/drivers/misc/mic/card/mic_virtio.h
new file mode 100644
index 0000000..d0407ba
--- /dev/null
+++ b/drivers/misc/mic/card/mic_virtio.h
@@ -0,0 +1,76 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef __MIC_CARD_VIRTIO_H
+#define __MIC_CARD_VIRTIO_H
+
+#include <linux/mic_common.h>
+#include "mic_device.h"
+
+/*
+ * 64 bit I/O access
+ */
+#ifndef ioread64
+#define ioread64 readq
+#endif
+#ifndef iowrite64
+#define iowrite64 writeq
+#endif
+
+static inline unsigned mic_desc_size(struct mic_device_desc __iomem *desc)
+{
+ return sizeof(*desc)
+ + ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig)
+ + ioread8(&desc->feature_len) * 2
+ + ioread8(&desc->config_len);
+}
+
+static inline struct mic_vqconfig __iomem *
+mic_vq_config(struct mic_device_desc __iomem *desc)
+{
+ return (struct mic_vqconfig __iomem *)(desc + 1);
+}
+
+static inline __u8 __iomem *
+mic_vq_features(struct mic_device_desc __iomem *desc)
+{
+ return (__u8 __iomem *)(mic_vq_config(desc) + ioread8(&desc->num_vq));
+}
+
+static inline __u8 __iomem *
+mic_vq_configspace(struct mic_device_desc __iomem *desc)
+{
+ return mic_vq_features(desc) + ioread8(&desc->feature_len) * 2;
+}
+static inline unsigned mic_total_desc_size(struct mic_device_desc __iomem *desc)
+{
+ return mic_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl);
+}
+
+int mic_devices_init(struct mic_driver *mdrv);
+void mic_devices_uninit(struct mic_driver *mdrv);
+
+#endif
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
new file mode 100644
index 0000000..b2958ce
--- /dev/null
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -0,0 +1,362 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+
+static const char mic_driver_name[] = "mic";
+
+static struct mic_driver g_drv;
+
+/**
+ * mic_read_spad - read from the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to scratchpad register, 0 based
+ *
+ * This function allows reading of the 32bit scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+u32 mic_read_spad(struct mic_device *mdev, unsigned int idx)
+{
+ return mic_mmio_read(&mdev->mmio,
+ MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_SPAD0 + idx * 4);
+}
+
+/**
+ * __mic_send_intr - Send interrupt to Host.
+ * @mdev: pointer to mic_device instance
+ * @doorbell: Doorbell number.
+ */
+void mic_send_intr(struct mic_device *mdev, int doorbell)
+{
+ struct mic_mw *mw = &mdev->mmio;
+
+ if (doorbell > MIC_X100_MAX_DOORBELL_IDX)
+ return;
+ /* Ensure that the interrupt is ordered w.r.t previous stores. */
+ wmb();
+ mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT,
+ MIC_X100_SBOX_BASE_ADDRESS +
+ (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
+}
+
+/*
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ */
+static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell)
+{
+ u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+ u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+ apic_icr_offset);
+
+ /* for MIC we need to make sure we "hit" the send_icr bit (13) */
+ apicicr_low = (apicicr_low | (1 << 13));
+ /*
+ * Ensure that the interrupt is ordered w.r.t. previous stores
+ * to main memory. Fence instructions are not implemented in X100
+ * since execution is in order but a compiler barrier is still
+ * required.
+ */
+ wmb();
+ mic_mmio_write(mw, apicicr_low,
+ MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell)
+{
+ int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+ /*
+ * Ensure that the interrupt is ordered w.r.t. previous stores
+ * to main memory. Fence instructions are not implemented in X100
+ * since execution is in order but a compiler barrier is still
+ * required.
+ */
+ wmb();
+ mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
+/**
+ * mic_ack_interrupt - Device specific interrupt handling.
+ * @mdev: pointer to mic_device instance
+ *
+ * Returns: bitmask of doorbell events triggered.
+ */
+u32 mic_ack_interrupt(struct mic_device *mdev)
+{
+ return 0;
+}
+
+static inline int mic_get_sbox_irq(int db)
+{
+ return MIC_X100_IRQ_BASE + db;
+}
+
+static inline int mic_get_rdmasr_irq(int index)
+{
+ return MIC_X100_RDMASR_IRQ_BASE + index;
+}
+
+void mic_send_p2p_intr(int db, struct mic_mw *mw)
+{
+ int rdmasr_index;
+
+ if (db < MIC_X100_NUM_SBOX_IRQ) {
+ mic_x100_send_sbox_intr(mw, db);
+ } else {
+ rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+ mic_x100_send_rdmasr_intr(mw, rdmasr_index);
+ }
+}
+
+/**
+ * mic_hw_intr_init - Initialize h/w specific interrupt
+ * information.
+ * @mdrv: pointer to mic_driver
+ */
+void mic_hw_intr_init(struct mic_driver *mdrv)
+{
+ mdrv->intr_info.num_intr = MIC_X100_NUM_SBOX_IRQ +
+ MIC_X100_NUM_RDMASR_IRQ;
+}
+
+/**
+ * mic_db_to_irq - Retrieve irq number corresponding to a doorbell.
+ * @mdrv: pointer to mic_driver
+ * @db: The doorbell obtained for which the irq is needed. Doorbell
+ * may correspond to an sbox doorbell or an rdmasr index.
+ *
+ * Returns the irq corresponding to the doorbell.
+ */
+int mic_db_to_irq(struct mic_driver *mdrv, int db)
+{
+ int rdmasr_index;
+
+ /*
+ * The total number of doorbell interrupts on the card are 16. Indices
+ * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category.
+ */
+ if (db < MIC_X100_NUM_SBOX_IRQ) {
+ return mic_get_sbox_irq(db);
+ } else {
+ rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+ return mic_get_rdmasr_irq(rdmasr_index);
+ }
+}
+
+/*
+ * mic_card_map - Allocate virtual address for a remote memory region.
+ * @mdev: pointer to mic_device instance.
+ * @addr: Remote DMA address.
+ * @size: Size of the region.
+ *
+ * Returns: Virtual address backing the remote memory region.
+ */
+void __iomem *
+mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size)
+{
+ return ioremap(addr, size);
+}
+
+/*
+ * mic_card_unmap - Unmap the virtual address for a remote memory region.
+ * @mdev: pointer to mic_device instance.
+ * @addr: Virtual address for remote memory region.
+ *
+ * Returns: None.
+ */
+void mic_card_unmap(struct mic_device *mdev, void __iomem *addr)
+{
+ iounmap(addr);
+}
+
+static inline struct mic_driver *mbdev_to_mdrv(struct mbus_device *mbdev)
+{
+ return dev_get_drvdata(mbdev->dev.parent);
+}
+
+static struct mic_irq *
+_mic_request_threaded_irq(struct mbus_device *mbdev,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ const char *name, void *data, int intr_src)
+{
+ int rc = 0;
+ unsigned int irq = intr_src;
+ unsigned long cookie = irq;
+
+ rc = request_threaded_irq(irq, handler, thread_fn, 0, name, data);
+ if (rc) {
+ dev_err(mbdev_to_mdrv(mbdev)->dev,
+ "request_threaded_irq failed rc = %d\n", rc);
+ return ERR_PTR(rc);
+ }
+ return (struct mic_irq *)cookie;
+}
+
+static void _mic_free_irq(struct mbus_device *mbdev,
+ struct mic_irq *cookie, void *data)
+{
+ unsigned long irq = (unsigned long)cookie;
+ free_irq(irq, data);
+}
+
+static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
+{
+ mic_ack_interrupt(&mbdev_to_mdrv(mbdev)->mdev);
+}
+
+static struct mbus_hw_ops mbus_hw_ops = {
+ .request_threaded_irq = _mic_request_threaded_irq,
+ .free_irq = _mic_free_irq,
+ .ack_interrupt = _mic_ack_interrupt,
+};
+
+static int __init mic_probe(struct platform_device *pdev)
+{
+ struct mic_driver *mdrv = &g_drv;
+ struct mic_device *mdev = &mdrv->mdev;
+ int rc = 0;
+
+ mdrv->dev = &pdev->dev;
+ snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name);
+
+ mdev->mmio.pa = MIC_X100_MMIO_BASE;
+ mdev->mmio.len = MIC_X100_MMIO_LEN;
+ mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE,
+ MIC_X100_MMIO_LEN);
+ if (!mdev->mmio.va) {
+ dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
+ rc = -EIO;
+ goto done;
+ }
+ mic_hw_intr_init(mdrv);
+ platform_set_drvdata(pdev, mdrv);
+ mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
+ NULL, &mbus_hw_ops, 0,
+ mdrv->mdev.mmio.va);
+ if (IS_ERR(mdrv->dma_mbdev)) {
+ rc = PTR_ERR(mdrv->dma_mbdev);
+ dev_err(&pdev->dev, "mbus_add_device failed rc %d\n", rc);
+ goto done;
+ }
+ rc = mic_driver_init(mdrv);
+ if (rc) {
+ dev_err(&pdev->dev, "mic_driver_init failed rc %d\n", rc);
+ goto remove_dma;
+ }
+done:
+ return rc;
+remove_dma:
+ mbus_unregister_device(mdrv->dma_mbdev);
+ return rc;
+}
+
+static int mic_remove(struct platform_device *pdev)
+{
+ struct mic_driver *mdrv = &g_drv;
+
+ mic_driver_uninit(mdrv);
+ mbus_unregister_device(mdrv->dma_mbdev);
+ return 0;
+}
+
+static void mic_platform_shutdown(struct platform_device *pdev)
+{
+ mic_remove(pdev);
+}
+
+static u64 mic_dma_mask = DMA_BIT_MASK(64);
+
+static struct platform_device mic_platform_dev = {
+ .name = mic_driver_name,
+ .id = 0,
+ .num_resources = 0,
+ .dev = {
+ .dma_mask = &mic_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(64),
+ },
+};
+
+static struct platform_driver __refdata mic_platform_driver = {
+ .probe = mic_probe,
+ .remove = mic_remove,
+ .shutdown = mic_platform_shutdown,
+ .driver = {
+ .name = mic_driver_name,
+ },
+};
+
+static int __init mic_init(void)
+{
+ int ret;
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!(c->x86 == 11 && c->x86_model == 1)) {
+ ret = -ENODEV;
+ pr_err("%s not running on X100 ret %d\n", __func__, ret);
+ goto done;
+ }
+
+ mic_init_card_debugfs();
+ ret = platform_device_register(&mic_platform_dev);
+ if (ret) {
+ pr_err("platform_device_register ret %d\n", ret);
+ goto cleanup_debugfs;
+ }
+ ret = platform_driver_register(&mic_platform_driver);
+ if (ret) {
+ pr_err("platform_driver_register ret %d\n", ret);
+ goto device_unregister;
+ }
+ return ret;
+
+device_unregister:
+ platform_device_unregister(&mic_platform_dev);
+cleanup_debugfs:
+ mic_exit_card_debugfs();
+done:
+ return ret;
+}
+
+static void __exit mic_exit(void)
+{
+ platform_driver_unregister(&mic_platform_driver);
+ platform_device_unregister(&mic_platform_dev);
+ mic_exit_card_debugfs();
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 Card driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/card/mic_x100.h b/drivers/misc/mic/card/mic_x100.h
new file mode 100644
index 0000000..7e22249
--- /dev/null
+++ b/drivers/misc/mic/card/mic_x100.h
@@ -0,0 +1,49 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef _MIC_X100_CARD_H_
+#define _MIC_X100_CARD_H_
+
+#define MIC_X100_MMIO_BASE 0x08007C0000ULL
+#define MIC_X100_MMIO_LEN 0x00020000ULL
+#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000ULL
+
+#define MIC_X100_SBOX_SPAD0 0x0000AB20
+#define MIC_X100_SBOX_SDBIC0 0x0000CC90
+#define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000
+#define MIC_X100_SBOX_RDMASR0 0x0000B180
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
+
+#define MIC_X100_MAX_DOORBELL_IDX 8
+
+#define MIC_X100_NUM_SBOX_IRQ 8
+#define MIC_X100_NUM_RDMASR_IRQ 8
+#define MIC_X100_SBOX_IRQ_BASE 0
+#define MIC_X100_RDMASR_IRQ_BASE 17
+
+#define MIC_X100_IRQ_BASE 26
+
+#endif
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
new file mode 100644
index 0000000..5077677
--- /dev/null
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -0,0 +1,67 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC driver.
+ *
+ */
+#ifndef __MIC_DEV_H__
+#define __MIC_DEV_H__
+
+/* The maximum number of MIC devices supported in a single host system. */
+#define MIC_MAX_NUM_DEVS 128
+
+/**
+ * enum mic_hw_family - The hardware family to which a device belongs.
+ */
+enum mic_hw_family {
+ MIC_FAMILY_X100 = 0,
+ MIC_FAMILY_X200,
+ MIC_FAMILY_UNKNOWN,
+ MIC_FAMILY_LAST
+};
+
+/**
+ * struct mic_mw - MIC memory window
+ *
+ * @pa: Base physical address.
+ * @va: Base ioremap'd virtual address.
+ * @len: Size of the memory window.
+ */
+struct mic_mw {
+ phys_addr_t pa;
+ void __iomem *va;
+ resource_size_t len;
+};
+
+/*
+ * Scratch pad register offsets used by the host to communicate
+ * device page DMA address to the card.
+ */
+#define MIC_DPLO_SPAD 14
+#define MIC_DPHI_SPAD 15
+
+/*
+ * These values are supposed to be in the config_change field of the
+ * device page when the host sends a config change interrupt to the card.
+ */
+#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
+#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
+
+/* Maximum number of DMA channels */
+#define MIC_MAX_DMA_CHAN 4
+
+#endif
diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile
new file mode 100644
index 0000000..b85d4d4
--- /dev/null
+++ b/drivers/misc/mic/cosm/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += mic_cosm.o
+
+mic_cosm-objs := cosm_main.o
+mic_cosm-objs += cosm_debugfs.o
+mic_cosm-objs += cosm_sysfs.o
+mic_cosm-objs += cosm_scif_server.o
diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c
new file mode 100644
index 0000000..216cb3c
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_debugfs.c
@@ -0,0 +1,156 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "cosm_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *cosm_dbg;
+
+/**
+ * cosm_log_buf_show - Display MIC kernel log buffer
+ *
+ * log_buf addr/len is read from System.map by user space
+ * and populated in sysfs entries.
+ */
+static int cosm_log_buf_show(struct seq_file *s, void *unused)
+{
+ void __iomem *log_buf_va;
+ int __iomem *log_buf_len_va;
+ struct cosm_device *cdev = s->private;
+ void *kva;
+ int size;
+ u64 aper_offset;
+
+ if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
+ goto done;
+
+ mutex_lock(&cdev->cosm_mutex);
+ switch (cdev->state) {
+ case MIC_BOOTING:
+ case MIC_ONLINE:
+ case MIC_SHUTTING_DOWN:
+ break;
+ default:
+ goto unlock;
+ }
+
+ /*
+ * Card kernel will never be relocated and any kernel text/data mapping
+ * can be translated to phys address by subtracting __START_KERNEL_map.
+ */
+ aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
+ log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+ aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
+ log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+
+ size = ioread32(log_buf_len_va);
+ kva = kmalloc(size, GFP_KERNEL);
+ if (!kva)
+ goto unlock;
+
+ memcpy_fromio(kva, log_buf_va, size);
+ seq_write(s, kva, size);
+ kfree(kva);
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+done:
+ return 0;
+}
+
+static int cosm_log_buf_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cosm_log_buf_show, inode->i_private);
+}
+
+static const struct file_operations log_buf_ops = {
+ .owner = THIS_MODULE,
+ .open = cosm_log_buf_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release
+};
+
+/**
+ * cosm_force_reset_show - Force MIC reset
+ *
+ * Invokes the force_reset COSM bus op instead of the standard reset
+ * op in case a force reset of the MIC device is required
+ */
+static int cosm_force_reset_show(struct seq_file *s, void *pos)
+{
+ struct cosm_device *cdev = s->private;
+
+ cosm_stop(cdev, true);
+ return 0;
+}
+
+static int cosm_force_reset_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cosm_force_reset_show, inode->i_private);
+}
+
+static const struct file_operations force_reset_ops = {
+ .owner = THIS_MODULE,
+ .open = cosm_force_reset_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release
+};
+
+void cosm_create_debug_dir(struct cosm_device *cdev)
+{
+ char name[16];
+
+ if (!cosm_dbg)
+ return;
+
+ scnprintf(name, sizeof(name), "mic%d", cdev->index);
+ cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
+ if (!cdev->dbg_dir)
+ return;
+
+ debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops);
+ debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
+ &force_reset_ops);
+}
+
+void cosm_delete_debug_dir(struct cosm_device *cdev)
+{
+ if (!cdev->dbg_dir)
+ return;
+
+ debugfs_remove_recursive(cdev->dbg_dir);
+}
+
+void cosm_init_debugfs(void)
+{
+ cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!cosm_dbg)
+ pr_err("can't create debugfs dir\n");
+}
+
+void cosm_exit_debugfs(void)
+{
+ debugfs_remove(cosm_dbg);
+}
diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c
new file mode 100644
index 0000000..4b4b356
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.c
@@ -0,0 +1,388 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include "cosm_main.h"
+
+static const char cosm_driver_name[] = "mic";
+
+/* COSM ID allocator */
+static struct ida g_cosm_ida;
+/* Class of MIC devices for sysfs accessibility. */
+static struct class *g_cosm_class;
+/* Number of MIC devices */
+static atomic_t g_num_dev;
+
+/**
+ * cosm_hw_reset - Issue a HW reset for the MIC device
+ * @cdev: pointer to cosm_device instance
+ */
+static void cosm_hw_reset(struct cosm_device *cdev, bool force)
+{
+ int i;
+
+#define MIC_RESET_TO (45)
+ if (force && cdev->hw_ops->force_reset)
+ cdev->hw_ops->force_reset(cdev);
+ else
+ cdev->hw_ops->reset(cdev);
+
+ for (i = 0; i < MIC_RESET_TO; i++) {
+ if (cdev->hw_ops->ready(cdev)) {
+ cosm_set_state(cdev, MIC_READY);
+ return;
+ }
+ /*
+ * Resets typically take 10s of seconds to complete.
+ * Since an MMIO read is required to check if the
+ * firmware is ready or not, a 1 second delay works nicely.
+ */
+ msleep(1000);
+ }
+ cosm_set_state(cdev, MIC_RESET_FAILED);
+}
+
+/**
+ * cosm_start - Start the MIC
+ * @cdev: pointer to cosm_device instance
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
+ */
+int cosm_start(struct cosm_device *cdev)
+{
+ const struct cred *orig_cred;
+ struct cred *override_cred;
+ int rc;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (!cdev->bootmode) {
+ dev_err(&cdev->dev, "%s %d bootmode not set\n",
+ __func__, __LINE__);
+ rc = -EINVAL;
+ goto unlock_ret;
+ }
+retry:
+ if (cdev->state != MIC_READY) {
+ dev_err(&cdev->dev, "%s %d MIC state not READY\n",
+ __func__, __LINE__);
+ rc = -EINVAL;
+ goto unlock_ret;
+ }
+ if (!cdev->hw_ops->ready(cdev)) {
+ cosm_hw_reset(cdev, false);
+ /*
+ * The state will either be MIC_READY if the reset succeeded
+ * or MIC_RESET_FAILED if the firmware reset failed.
+ */
+ goto retry;
+ }
+
+ /*
+ * Set credentials to root to allow non-root user to download initramsfs
+ * with 600 permissions
+ */
+ override_cred = prepare_creds();
+ if (!override_cred) {
+ dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
+ __func__, __LINE__);
+ rc = -ENOMEM;
+ goto unlock_ret;
+ }
+ override_cred->fsuid = GLOBAL_ROOT_UID;
+ orig_cred = override_creds(override_cred);
+
+ rc = cdev->hw_ops->start(cdev, cdev->index);
+
+ revert_creds(orig_cred);
+ put_cred(override_cred);
+ if (rc)
+ goto unlock_ret;
+
+ /*
+ * If linux is being booted, card is treated 'online' only
+ * when the scif interface in the card is up. If anything else
+ * is booted, we set card to 'online' immediately.
+ */
+ if (!strcmp(cdev->bootmode, "linux"))
+ cosm_set_state(cdev, MIC_BOOTING);
+ else
+ cosm_set_state(cdev, MIC_ONLINE);
+unlock_ret:
+ mutex_unlock(&cdev->cosm_mutex);
+ if (rc)
+ dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
+ return rc;
+}
+
+/**
+ * cosm_stop - Prepare the MIC for reset and trigger reset
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already reset and ready.
+ *
+ * RETURNS: None
+ */
+void cosm_stop(struct cosm_device *cdev, bool force)
+{
+ mutex_lock(&cdev->cosm_mutex);
+ if (cdev->state != MIC_READY || force) {
+ /*
+ * Don't call hw_ops if they have been called previously.
+ * stop(..) calls device_unregister and will crash the system if
+ * called multiple times.
+ */
+ bool call_hw_ops = cdev->state != MIC_RESET_FAILED &&
+ cdev->state != MIC_READY;
+
+ if (cdev->state != MIC_RESETTING)
+ cosm_set_state(cdev, MIC_RESETTING);
+ cdev->heartbeat_watchdog_enable = false;
+ if (call_hw_ops)
+ cdev->hw_ops->stop(cdev, force);
+ cosm_hw_reset(cdev, force);
+ cosm_set_shutdown_status(cdev, MIC_NOP);
+ if (call_hw_ops && cdev->hw_ops->post_reset)
+ cdev->hw_ops->post_reset(cdev, cdev->state);
+ }
+ mutex_unlock(&cdev->cosm_mutex);
+ flush_work(&cdev->scif_work);
+}
+
+/**
+ * cosm_reset_trigger_work - Trigger MIC reset
+ * @work: The work structure
+ *
+ * This work is scheduled whenever the host wants to reset the MIC.
+ */
+static void cosm_reset_trigger_work(struct work_struct *work)
+{
+ struct cosm_device *cdev = container_of(work, struct cosm_device,
+ reset_trigger_work);
+ cosm_stop(cdev, false);
+}
+
+/**
+ * cosm_reset - Schedule MIC reset
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: An -EINVAL if the card is already READY or 0 for success.
+ */
+int cosm_reset(struct cosm_device *cdev)
+{
+ int rc = 0;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (cdev->state != MIC_READY) {
+ cosm_set_state(cdev, MIC_RESETTING);
+ schedule_work(&cdev->reset_trigger_work);
+ } else {
+ dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
+ rc = -EINVAL;
+ }
+ mutex_unlock(&cdev->cosm_mutex);
+ return rc;
+}
+
+/**
+ * cosm_shutdown - Initiate MIC shutdown.
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: None
+ */
+int cosm_shutdown(struct cosm_device *cdev)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
+ int rc = 0;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (cdev->state != MIC_ONLINE) {
+ rc = -EINVAL;
+ dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
+ __func__, __LINE__, cosm_state_string[cdev->state]);
+ goto err;
+ }
+
+ if (!cdev->epd) {
+ rc = -ENOTCONN;
+ dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+
+ rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0) {
+ dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+ cdev->heartbeat_watchdog_enable = false;
+ cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+ rc = 0;
+err:
+ mutex_unlock(&cdev->cosm_mutex);
+ return rc;
+}
+
+static int cosm_driver_probe(struct cosm_device *cdev)
+{
+ int rc;
+
+ /* Initialize SCIF server at first probe */
+ if (atomic_add_return(1, &g_num_dev) == 1) {
+ rc = cosm_scif_init();
+ if (rc)
+ goto scif_exit;
+ }
+ mutex_init(&cdev->cosm_mutex);
+ INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
+ INIT_WORK(&cdev->scif_work, cosm_scif_work);
+ cdev->sysfs_heartbeat_enable = true;
+ cosm_sysfs_init(cdev);
+ cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
+ MKDEV(0, cdev->index), cdev, cdev->attr_group,
+ "mic%d", cdev->index);
+ if (IS_ERR(cdev->sdev)) {
+ rc = PTR_ERR(cdev->sdev);
+ dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
+ rc);
+ goto scif_exit;
+ }
+
+ cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
+ "state");
+ if (!cdev->state_sysfs) {
+ rc = -ENODEV;
+ dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
+ goto destroy_device;
+ }
+ cosm_create_debug_dir(cdev);
+ return 0;
+destroy_device:
+ device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+scif_exit:
+ if (atomic_dec_and_test(&g_num_dev))
+ cosm_scif_exit();
+ return rc;
+}
+
+static void cosm_driver_remove(struct cosm_device *cdev)
+{
+ cosm_delete_debug_dir(cdev);
+ sysfs_put(cdev->state_sysfs);
+ device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+ flush_work(&cdev->reset_trigger_work);
+ cosm_stop(cdev, false);
+ if (atomic_dec_and_test(&g_num_dev))
+ cosm_scif_exit();
+
+ /* These sysfs entries might have allocated */
+ kfree(cdev->cmdline);
+ kfree(cdev->firmware);
+ kfree(cdev->ramdisk);
+ kfree(cdev->bootmode);
+}
+
+static int cosm_suspend(struct device *dev)
+{
+ struct cosm_device *cdev = dev_to_cosm(dev);
+
+ mutex_lock(&cdev->cosm_mutex);
+ switch (cdev->state) {
+ /**
+ * Suspend/freeze hooks in userspace have already shutdown the card.
+ * Card should be 'ready' in most cases. It is however possible that
+ * some userspace application initiated a boot. In those cases, we
+ * simply reset the card.
+ */
+ case MIC_ONLINE:
+ case MIC_BOOTING:
+ case MIC_SHUTTING_DOWN:
+ mutex_unlock(&cdev->cosm_mutex);
+ cosm_stop(cdev, false);
+ break;
+ default:
+ mutex_unlock(&cdev->cosm_mutex);
+ break;
+ }
+ return 0;
+}
+
+static const struct dev_pm_ops cosm_pm_ops = {
+ .suspend = cosm_suspend,
+ .freeze = cosm_suspend
+};
+
+static struct cosm_driver cosm_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .owner = THIS_MODULE,
+ .pm = &cosm_pm_ops,
+ },
+ .probe = cosm_driver_probe,
+ .remove = cosm_driver_remove
+};
+
+static int __init cosm_init(void)
+{
+ int ret;
+
+ cosm_init_debugfs();
+
+ g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
+ if (IS_ERR(g_cosm_class)) {
+ ret = PTR_ERR(g_cosm_class);
+ pr_err("class_create failed ret %d\n", ret);
+ goto cleanup_debugfs;
+ }
+
+ ida_init(&g_cosm_ida);
+ ret = cosm_register_driver(&cosm_driver);
+ if (ret) {
+ pr_err("cosm_register_driver failed ret %d\n", ret);
+ goto ida_destroy;
+ }
+ return 0;
+ida_destroy:
+ ida_destroy(&g_cosm_ida);
+ class_destroy(g_cosm_class);
+cleanup_debugfs:
+ cosm_exit_debugfs();
+ return ret;
+}
+
+static void __exit cosm_exit(void)
+{
+ cosm_unregister_driver(&cosm_driver);
+ ida_destroy(&g_cosm_ida);
+ class_destroy(g_cosm_class);
+ cosm_exit_debugfs();
+}
+
+module_init(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h
new file mode 100644
index 0000000..f01156f
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.h
@@ -0,0 +1,70 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#ifndef _COSM_COSM_H_
+#define _COSM_COSM_H_
+
+#include <linux/scif.h>
+#include "../bus/cosm_bus.h"
+
+#define COSM_HEARTBEAT_SEND_SEC 30
+#define SCIF_COSM_LISTEN_PORT 201
+
+/**
+ * enum COSM msg id's
+ * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
+ * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
+ * @COSM_MSG_HEARTBEAT: card->host heartbeat
+ * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
+ */
+enum cosm_msg_id {
+ COSM_MSG_SHUTDOWN,
+ COSM_MSG_SYNC_TIME,
+ COSM_MSG_HEARTBEAT,
+ COSM_MSG_SHUTDOWN_STATUS,
+};
+
+struct cosm_msg {
+ u64 id;
+ union {
+ u64 shutdown_status;
+ struct timespec64 timespec;
+ };
+};
+
+extern const char * const cosm_state_string[];
+extern const char * const cosm_shutdown_status_string[];
+
+void cosm_sysfs_init(struct cosm_device *cdev);
+int cosm_start(struct cosm_device *cdev);
+void cosm_stop(struct cosm_device *cdev, bool force);
+int cosm_reset(struct cosm_device *cdev);
+int cosm_shutdown(struct cosm_device *cdev);
+void cosm_set_state(struct cosm_device *cdev, u8 state);
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
+void cosm_init_debugfs(void);
+void cosm_exit_debugfs(void);
+void cosm_create_debug_dir(struct cosm_device *cdev);
+void cosm_delete_debug_dir(struct cosm_device *cdev);
+int cosm_scif_init(void);
+void cosm_scif_exit(void);
+void cosm_scif_work(struct work_struct *work);
+
+#endif
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
new file mode 100644
index 0000000..5696df4
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -0,0 +1,405 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/kthread.h>
+#include "cosm_main.h"
+
+/*
+ * The COSM driver uses SCIF to communicate between the management node and the
+ * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
+ * receive a shutdown status back from the card upon completion of shutdown and
+ * (c) receive periodic heartbeat messages from the card used to deduce if the
+ * card has crashed.
+ *
+ * A COSM server consisting of a SCIF listening endpoint waits for incoming
+ * connections from the card. Upon acceptance of the connection, a separate
+ * work-item is scheduled to handle SCIF message processing for that card. The
+ * life-time of this work-item is therefore the time from which the connection
+ * from a card is accepted to the time at which the connection is closed. A new
+ * work-item starts each time the card boots and is alive till the card (a)
+ * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
+ * unloaded.
+ *
+ * From the point of view of COSM interactions with SCIF during card
+ * shutdown, reset and crash are as follows:
+ *
+ * Card shutdown
+ * -------------
+ * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
+ * message from the host.
+ * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
+ * in scif_remove(..) getting called on the card
+ * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
+ * scif_peer_unregister_device -> device_unregister for the host peer device
+ * 4. During device_unregister remove(..) method of cosm_client is invoked which
+ * closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
+ * message being sent to host SCIF. SCIF_DISCNCT message processing on the
+ * host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
+ * up the host COSM thread blocked in scif_poll(..) resulting in
+ * scif_poll(..) returning POLLHUP.
+ * 5. On the card, scif_peer_release_dev is next called which results in an
+ * SCIF_EXIT message being sent to the host and after receiving the
+ * SCIF_EXIT_ACK from the host the peer device teardown on the card is
+ * complete.
+ * 6. As part of the SCIF_EXIT message processing on the host, host sends a
+ * SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
+ * starts a similar SCIF peer device teardown sequence on the host
+ * corresponding to the card being shut down.
+ *
+ * Card reset
+ * ----------
+ * The case of interest here is when the card has not been previously shut down
+ * since most of the steps below are skipped in that case:
+
+ * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
+ * which unregisters the SCIF HW device resulting in scif_remove(..) being
+ * called on the host.
+ * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
+ * SCIF_EXIT message being sent to the card.
+ * 3. The card executes scif_stop() as part of SCIF_EXIT message
+ * processing. This results in the COSM endpoint on the card being closed and
+ * the SCIF host peer device on the card getting unregistered similar to
+ * steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
+ * host returns POLLHUP as a result.
+ * 4. On the host, card peer device unregister and SCIF HW remove(..) also
+ * subsequently complete.
+ *
+ * Card crash
+ * ----------
+ * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
+ * message from the card which would result in scif_poll(..) returning
+ * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
+ * message to itself resulting in the card SCIF peer device being unregistered,
+ * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
+ * scif_invalidate_ep call sequence which sets the endpoint state to
+ * DISCONNECTED and results in scif_poll(..) returning POLLHUP.
+ */
+
+#define COSM_SCIF_BACKLOG 16
+#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
+#define COSM_HEARTBEAT_TIMEOUT_SEC \
+ (COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
+#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
+
+static struct task_struct *server_thread;
+static scif_epd_t listen_epd;
+
+/* Publish MIC card's shutdown status to user space MIC daemon */
+static void cosm_update_mic_status(struct cosm_device *cdev)
+{
+ if (cdev->shutdown_status_int != MIC_NOP) {
+ cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
+ cdev->shutdown_status_int = MIC_NOP;
+ }
+}
+
+/* Store MIC card's shutdown status internally when it is received */
+static void cosm_shutdown_status_int(struct cosm_device *cdev,
+ enum mic_status shutdown_status)
+{
+ switch (shutdown_status) {
+ case MIC_HALTED:
+ case MIC_POWER_OFF:
+ case MIC_RESTART:
+ case MIC_CRASHED:
+ break;
+ default:
+ dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
+ __func__, __LINE__, shutdown_status);
+ return;
+ };
+ cdev->shutdown_status_int = shutdown_status;
+ cdev->heartbeat_watchdog_enable = false;
+
+ if (cdev->state != MIC_SHUTTING_DOWN)
+ cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+}
+
+/* Non-blocking recv. Read and process all available messages */
+static void cosm_scif_recv(struct cosm_device *cdev)
+{
+ struct cosm_msg msg;
+ int rc;
+
+ while (1) {
+ rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
+ if (!rc) {
+ break;
+ } else if (rc < 0) {
+ dev_dbg(&cdev->dev, "%s: %d rc %d\n",
+ __func__, __LINE__, rc);
+ break;
+ }
+ dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
+ __func__, __LINE__, rc, msg.id);
+
+ switch (msg.id) {
+ case COSM_MSG_SHUTDOWN_STATUS:
+ cosm_shutdown_status_int(cdev, msg.shutdown_status);
+ break;
+ case COSM_MSG_HEARTBEAT:
+ /* Nothing to do, heartbeat only unblocks scif_poll */
+ break;
+ default:
+ dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
+ __func__, __LINE__, msg.id);
+ break;
+ }
+ }
+}
+
+/* Publish crashed status for this MIC card */
+static void cosm_set_crashed(struct cosm_device *cdev)
+{
+ dev_err(&cdev->dev, "node alive timeout\n");
+ cosm_shutdown_status_int(cdev, MIC_CRASHED);
+ cosm_update_mic_status(cdev);
+}
+
+/* Send host time to the MIC card to sync system time between host and MIC */
+static void cosm_send_time(struct cosm_device *cdev)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
+ int rc;
+
+ getnstimeofday64(&msg.timespec);
+ rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0)
+ dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+ __func__, __LINE__, rc);
+}
+
+/*
+ * Close this cosm_device's endpoint after its peer endpoint on the card has
+ * been closed. In all cases except MIC card crash POLLHUP on the host is
+ * triggered by the client's endpoint being closed.
+ */
+static void cosm_scif_close(struct cosm_device *cdev)
+{
+ /*
+ * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
+ * reboot notifier when shutdown is still not complete, we notify mpssd
+ * to reset the card when SCIF endpoint is closed.
+ */
+ cosm_update_mic_status(cdev);
+ scif_close(cdev->epd);
+ cdev->epd = NULL;
+ dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+}
+
+/*
+ * Set card state to ONLINE when a new SCIF connection from a MIC card is
+ * received. Normally the state is BOOTING when the connection comes in, but can
+ * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
+ */
+static int cosm_set_online(struct cosm_device *cdev)
+{
+ int rc = 0;
+
+ if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
+ cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
+ cdev->epd = cdev->newepd;
+ if (cdev->state == MIC_BOOTING)
+ cosm_set_state(cdev, MIC_ONLINE);
+ cosm_send_time(cdev);
+ dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+ } else {
+ dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
+ __func__, __LINE__, cosm_state_string[cdev->state]);
+ rc = -EINVAL;
+ }
+ /* Drop reference acquired by bus_find_device in the server thread */
+ put_device(&cdev->dev);
+ return rc;
+}
+
+/*
+ * Work function for handling work for a SCIF connection from a particular MIC
+ * card. It first sets the card state to ONLINE and then calls scif_poll to
+ * block on activity such as incoming messages on the SCIF endpoint. When the
+ * endpoint is closed, the work function exits, completing its life cycle, from
+ * MIC card boot to card shutdown/reset/crash.
+ */
+void cosm_scif_work(struct work_struct *work)
+{
+ struct cosm_device *cdev = container_of(work, struct cosm_device,
+ scif_work);
+ struct scif_pollepd pollepd;
+ int rc;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (cosm_set_online(cdev))
+ goto exit;
+
+ while (1) {
+ pollepd.epd = cdev->epd;
+ pollepd.events = POLLIN;
+
+ /* Drop the mutex before blocking in scif_poll(..) */
+ mutex_unlock(&cdev->cosm_mutex);
+ /* poll(..) with timeout on our endpoint */
+ rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
+ mutex_lock(&cdev->cosm_mutex);
+ if (rc < 0) {
+ dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
+ __func__, __LINE__, rc);
+ continue;
+ }
+
+ /* There is a message from the card */
+ if (pollepd.revents & POLLIN)
+ cosm_scif_recv(cdev);
+
+ /* The peer endpoint is closed or this endpoint disconnected */
+ if (pollepd.revents & POLLHUP) {
+ cosm_scif_close(cdev);
+ break;
+ }
+
+ /* Did we timeout from poll? */
+ if (!rc && cdev->heartbeat_watchdog_enable)
+ cosm_set_crashed(cdev);
+ }
+exit:
+ dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
+ mutex_unlock(&cdev->cosm_mutex);
+}
+
+/*
+ * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
+ * cards, finds the correct cosm_device to associate that connection with and
+ * schedules individual work items for each MIC card.
+ */
+static int cosm_scif_server(void *unused)
+{
+ struct cosm_device *cdev;
+ scif_epd_t newepd;
+ struct scif_port_id port_id;
+ int rc;
+
+ allow_signal(SIGKILL);
+
+ while (!kthread_should_stop()) {
+ rc = scif_accept(listen_epd, &port_id, &newepd,
+ SCIF_ACCEPT_SYNC);
+ if (rc < 0) {
+ if (-ERESTARTSYS != rc)
+ pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
+ continue;
+ }
+
+ /*
+ * Associate the incoming connection with a particular
+ * cosm_device, COSM device ID == SCIF node ID - 1
+ */
+ cdev = cosm_find_cdev_by_id(port_id.node - 1);
+ if (!cdev)
+ continue;
+ cdev->newepd = newepd;
+ schedule_work(&cdev->scif_work);
+ }
+
+ pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
+ return 0;
+}
+
+static int cosm_scif_listen(void)
+{
+ int rc;
+
+ listen_epd = scif_open();
+ if (!listen_epd) {
+ pr_err("%s %d scif_open failed\n", __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
+ if (rc < 0) {
+ pr_err("%s %d scif_bind failed rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+
+ rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
+ if (rc < 0) {
+ pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
+ goto err;
+ }
+ pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
+ return 0;
+err:
+ scif_close(listen_epd);
+ listen_epd = NULL;
+ return rc;
+}
+
+static void cosm_scif_listen_exit(void)
+{
+ pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
+ if (listen_epd) {
+ scif_close(listen_epd);
+ listen_epd = NULL;
+ }
+}
+
+/*
+ * Create a listening SCIF endpoint and a server kthread which accepts incoming
+ * SCIF connections from MIC cards
+ */
+int cosm_scif_init(void)
+{
+ int rc = cosm_scif_listen();
+
+ if (rc) {
+ pr_err("%s %d cosm_scif_listen rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+
+ server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
+ if (IS_ERR(server_thread)) {
+ rc = PTR_ERR(server_thread);
+ pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
+ goto listen_exit;
+ }
+ return 0;
+listen_exit:
+ cosm_scif_listen_exit();
+err:
+ return rc;
+}
+
+/* Stop the running server thread and close the listening SCIF endpoint */
+void cosm_scif_exit(void)
+{
+ int rc;
+
+ if (!IS_ERR_OR_NULL(server_thread)) {
+ rc = send_sig(SIGKILL, server_thread, 0);
+ if (rc) {
+ pr_err("%s %d send_sig rc %d\n",
+ __func__, __LINE__, rc);
+ return;
+ }
+ kthread_stop(server_thread);
+ }
+
+ cosm_scif_listen_exit();
+}
diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c
new file mode 100644
index 0000000..29d6863
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/slab.h>
+#include "cosm_main.h"
+
+/*
+ * A state-to-string lookup table, for exposing a human readable state
+ * via sysfs. Always keep in sync with enum cosm_states
+ */
+const char * const cosm_state_string[] = {
+ [MIC_READY] = "ready",
+ [MIC_BOOTING] = "booting",
+ [MIC_ONLINE] = "online",
+ [MIC_SHUTTING_DOWN] = "shutting_down",
+ [MIC_RESETTING] = "resetting",
+ [MIC_RESET_FAILED] = "reset_failed",
+};
+
+/*
+ * A shutdown-status-to-string lookup table, for exposing a human
+ * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
+ */
+const char * const cosm_shutdown_status_string[] = {
+ [MIC_NOP] = "nop",
+ [MIC_CRASHED] = "crashed",
+ [MIC_HALTED] = "halted",
+ [MIC_POWER_OFF] = "poweroff",
+ [MIC_RESTART] = "restart",
+};
+
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
+{
+ dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
+ cosm_shutdown_status_string[cdev->shutdown_status],
+ cosm_shutdown_status_string[shutdown_status]);
+ cdev->shutdown_status = shutdown_status;
+}
+
+void cosm_set_state(struct cosm_device *cdev, u8 state)
+{
+ dev_dbg(&cdev->dev, "State %s -> %s\n",
+ cosm_state_string[cdev->state],
+ cosm_state_string[state]);
+ cdev->state = state;
+ sysfs_notify_dirent(cdev->state_sysfs);
+}
+
+static ssize_t
+family_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return cdev->hw_ops->family(cdev, buf);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t
+stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return cdev->hw_ops->stepping(cdev, buf);
+}
+static DEVICE_ATTR_RO(stepping);
+
+static ssize_t
+state_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev || cdev->state >= MIC_LAST)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ cosm_state_string[cdev->state]);
+}
+
+static ssize_t
+state_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int rc;
+
+ if (!cdev)
+ return -EINVAL;
+
+ if (sysfs_streq(buf, "boot")) {
+ rc = cosm_start(cdev);
+ goto done;
+ }
+ if (sysfs_streq(buf, "reset")) {
+ rc = cosm_reset(cdev);
+ goto done;
+ }
+
+ if (sysfs_streq(buf, "shutdown")) {
+ rc = cosm_shutdown(cdev);
+ goto done;
+ }
+ rc = -EINVAL;
+done:
+ if (rc)
+ count = rc;
+ return count;
+}
+static DEVICE_ATTR_RW(state);
+
+static ssize_t shutdown_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ cosm_shutdown_status_string[cdev->shutdown_status]);
+}
+static DEVICE_ATTR_RO(shutdown_status);
+
+static ssize_t
+heartbeat_enable_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
+}
+
+static ssize_t
+heartbeat_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int enable;
+ int ret;
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ ret = kstrtoint(buf, 10, &enable);
+ if (ret)
+ goto unlock;
+
+ cdev->sysfs_heartbeat_enable = enable;
+ /* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
+ if (cdev->state == MIC_ONLINE)
+ cdev->heartbeat_watchdog_enable = enable;
+ ret = count;
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return ret;
+}
+static DEVICE_ATTR_RW(heartbeat_enable);
+
+static ssize_t
+cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *cmdline;
+
+ if (!cdev)
+ return -EINVAL;
+
+ cmdline = cdev->cmdline;
+
+ if (cmdline)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+ return 0;
+}
+
+static ssize_t
+cmdline_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->cmdline);
+
+ cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->cmdline) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+
+ strncpy(cdev->cmdline, buf, count);
+
+ if (cdev->cmdline[count - 1] == '\n')
+ cdev->cmdline[count - 1] = '\0';
+ else
+ cdev->cmdline[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(cmdline);
+
+static ssize_t
+firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *firmware;
+
+ if (!cdev)
+ return -EINVAL;
+
+ firmware = cdev->firmware;
+
+ if (firmware)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
+ return 0;
+}
+
+static ssize_t
+firmware_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->firmware);
+
+ cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->firmware) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+ strncpy(cdev->firmware, buf, count);
+
+ if (cdev->firmware[count - 1] == '\n')
+ cdev->firmware[count - 1] = '\0';
+ else
+ cdev->firmware[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(firmware);
+
+static ssize_t
+ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *ramdisk;
+
+ if (!cdev)
+ return -EINVAL;
+
+ ramdisk = cdev->ramdisk;
+
+ if (ramdisk)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
+ return 0;
+}
+
+static ssize_t
+ramdisk_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->ramdisk);
+
+ cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->ramdisk) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+
+ strncpy(cdev->ramdisk, buf, count);
+
+ if (cdev->ramdisk[count - 1] == '\n')
+ cdev->ramdisk[count - 1] = '\0';
+ else
+ cdev->ramdisk[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(ramdisk);
+
+static ssize_t
+bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *bootmode;
+
+ if (!cdev)
+ return -EINVAL;
+
+ bootmode = cdev->bootmode;
+
+ if (bootmode)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
+ return 0;
+}
+
+static ssize_t
+bootmode_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->bootmode);
+
+ cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->bootmode) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+
+ strncpy(cdev->bootmode, buf, count);
+
+ if (cdev->bootmode[count - 1] == '\n')
+ cdev->bootmode[count - 1] = '\0';
+ else
+ cdev->bootmode[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(bootmode);
+
+static ssize_t
+log_buf_addr_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
+}
+
+static ssize_t
+log_buf_addr_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int ret;
+ unsigned long addr;
+
+ if (!cdev)
+ return -EINVAL;
+
+ ret = kstrtoul(buf, 16, &addr);
+ if (ret)
+ goto exit;
+
+ cdev->log_buf_addr = (void *)addr;
+ ret = count;
+exit:
+ return ret;
+}
+static DEVICE_ATTR_RW(log_buf_addr);
+
+static ssize_t
+log_buf_len_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
+}
+
+static ssize_t
+log_buf_len_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int ret;
+ unsigned long addr;
+
+ if (!cdev)
+ return -EINVAL;
+
+ ret = kstrtoul(buf, 16, &addr);
+ if (ret)
+ goto exit;
+
+ cdev->log_buf_len = (int *)addr;
+ ret = count;
+exit:
+ return ret;
+}
+static DEVICE_ATTR_RW(log_buf_len);
+
+static struct attribute *cosm_default_attrs[] = {
+ &dev_attr_family.attr,
+ &dev_attr_stepping.attr,
+ &dev_attr_state.attr,
+ &dev_attr_shutdown_status.attr,
+ &dev_attr_heartbeat_enable.attr,
+ &dev_attr_cmdline.attr,
+ &dev_attr_firmware.attr,
+ &dev_attr_ramdisk.attr,
+ &dev_attr_bootmode.attr,
+ &dev_attr_log_buf_addr.attr,
+ &dev_attr_log_buf_len.attr,
+
+ NULL
+};
+
+ATTRIBUTE_GROUPS(cosm_default);
+
+void cosm_sysfs_init(struct cosm_device *cdev)
+{
+ cdev->attr_group = cosm_default_groups;
+}
diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile
new file mode 100644
index 0000000..6f751a5
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile - Intel MIC COSM Client Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += cosm_client.o
+
+cosm_client-objs += cosm_scif_client.o
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
new file mode 100644
index 0000000..03e98bf
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -0,0 +1,275 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Client Driver
+ *
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kthread.h>
+#include "../cosm/cosm_main.h"
+
+#define COSM_SCIF_MAX_RETRIES 10
+#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
+
+static struct task_struct *client_thread;
+static scif_epd_t client_epd;
+static struct scif_peer_dev *client_spdev;
+
+/*
+ * Reboot notifier: receives shutdown status from the OS and communicates it
+ * back to the COSM process on the host
+ */
+static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
+ int rc;
+
+ event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
+ dev_info(&client_spdev->dev, "%s %d received event %ld\n",
+ __func__, __LINE__, event);
+
+ msg.shutdown_status = event;
+ rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0)
+ dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+ __func__, __LINE__, rc);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cosm_reboot = {
+ .notifier_call = cosm_reboot_event,
+};
+
+/* Set system time from timespec value received from the host */
+static void cosm_set_time(struct cosm_msg *msg)
+{
+ int rc = do_settimeofday64(&msg->timespec);
+
+ if (rc)
+ dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
+ __func__, __LINE__, rc);
+}
+
+/* COSM client receive message processing */
+static void cosm_client_recv(void)
+{
+ struct cosm_msg msg;
+ int rc;
+
+ while (1) {
+ rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
+ if (!rc) {
+ return;
+ } else if (rc < 0) {
+ dev_err(&client_spdev->dev, "%s: %d rc %d\n",
+ __func__, __LINE__, rc);
+ return;
+ }
+
+ dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
+ __func__, __LINE__, rc, msg.id);
+
+ switch (msg.id) {
+ case COSM_MSG_SYNC_TIME:
+ cosm_set_time(&msg);
+ break;
+ case COSM_MSG_SHUTDOWN:
+ orderly_poweroff(true);
+ break;
+ default:
+ dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
+ __func__, __LINE__, msg.id);
+ break;
+ }
+ }
+}
+
+/* Initiate connection to the COSM server on the host */
+static int cosm_scif_connect(void)
+{
+ struct scif_port_id port_id;
+ int i, rc;
+
+ client_epd = scif_open();
+ if (!client_epd) {
+ dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
+ __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ port_id.node = 0;
+ port_id.port = SCIF_COSM_LISTEN_PORT;
+
+ for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
+ rc = scif_connect(client_epd, &port_id);
+ if (rc < 0)
+ msleep(1000);
+ else
+ break;
+ }
+
+ if (rc < 0) {
+ dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
+ __func__, __LINE__, rc);
+ scif_close(client_epd);
+ client_epd = NULL;
+ }
+ return rc < 0 ? rc : 0;
+}
+
+/* Close host SCIF connection */
+static void cosm_scif_connect_exit(void)
+{
+ if (client_epd) {
+ scif_close(client_epd);
+ client_epd = NULL;
+ }
+}
+
+/*
+ * COSM SCIF client thread function: waits for messages from the host and sends
+ * a heartbeat to the host
+ */
+static int cosm_scif_client(void *unused)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
+ struct scif_pollepd pollepd;
+ int rc;
+
+ allow_signal(SIGKILL);
+
+ while (!kthread_should_stop()) {
+ pollepd.epd = client_epd;
+ pollepd.events = POLLIN;
+
+ rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
+ if (rc < 0) {
+ if (-EINTR != rc)
+ dev_err(&client_spdev->dev,
+ "%s %d scif_poll rc %d\n",
+ __func__, __LINE__, rc);
+ continue;
+ }
+
+ if (pollepd.revents & POLLIN)
+ cosm_client_recv();
+
+ msg.id = COSM_MSG_HEARTBEAT;
+ rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0)
+ dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+ __func__, __LINE__, rc);
+ }
+
+ dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
+ __func__, __LINE__);
+ return 0;
+}
+
+static void cosm_scif_probe(struct scif_peer_dev *spdev)
+{
+ int rc;
+
+ dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+ __func__, __LINE__, spdev->dnode);
+
+ /* We are only interested in the host with spdev->dnode == 0 */
+ if (spdev->dnode)
+ return;
+
+ client_spdev = spdev;
+ rc = cosm_scif_connect();
+ if (rc)
+ goto exit;
+
+ rc = register_reboot_notifier(&cosm_reboot);
+ if (rc) {
+ dev_err(&spdev->dev,
+ "reboot notifier registration failed rc %d\n", rc);
+ goto connect_exit;
+ }
+
+ client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
+ if (IS_ERR(client_thread)) {
+ rc = PTR_ERR(client_thread);
+ dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
+ __func__, __LINE__, rc);
+ goto unreg_reboot;
+ }
+ return;
+unreg_reboot:
+ unregister_reboot_notifier(&cosm_reboot);
+connect_exit:
+ cosm_scif_connect_exit();
+exit:
+ client_spdev = NULL;
+}
+
+static void cosm_scif_remove(struct scif_peer_dev *spdev)
+{
+ int rc;
+
+ dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+ __func__, __LINE__, spdev->dnode);
+
+ if (spdev->dnode)
+ return;
+
+ if (!IS_ERR_OR_NULL(client_thread)) {
+ rc = send_sig(SIGKILL, client_thread, 0);
+ if (rc) {
+ pr_err("%s %d send_sig rc %d\n",
+ __func__, __LINE__, rc);
+ return;
+ }
+ kthread_stop(client_thread);
+ }
+ unregister_reboot_notifier(&cosm_reboot);
+ cosm_scif_connect_exit();
+ client_spdev = NULL;
+}
+
+static struct scif_client scif_client_cosm = {
+ .name = KBUILD_MODNAME,
+ .probe = cosm_scif_probe,
+ .remove = cosm_scif_remove,
+};
+
+static int __init cosm_client_init(void)
+{
+ int rc = scif_client_register(&scif_client_cosm);
+
+ if (rc)
+ pr_err("scif_client_register failed rc %d\n", rc);
+ return rc;
+}
+
+static void __exit cosm_client_exit(void)
+{
+ scif_client_unregister(&scif_client_cosm);
+}
+
+module_init(cosm_client_init);
+module_exit(cosm_client_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
new file mode 100644
index 0000000..004d3db
--- /dev/null
+++ b/drivers/misc/mic/host/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
+mic_host-objs := mic_main.o
+mic_host-objs += mic_x100.o
+mic_host-objs += mic_smpt.o
+mic_host-objs += mic_intr.o
+mic_host-objs += mic_boot.o
+mic_host-objs += mic_debugfs.o
+mic_host-objs += mic_fops.o
+mic_host-objs += mic_virtio.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
new file mode 100644
index 0000000..7845564
--- /dev/null
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -0,0 +1,482 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/pci.h>
+#include <linux/kmod.h>
+#include <linux/mic_common.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+#include "mic_virtio.h"
+
+static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev)
+{
+ return dev_get_drvdata(scdev->dev.parent);
+}
+
+static void *__mic_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+ dma_addr_t tmp;
+ void *va = kmalloc(size, gfp);
+
+ if (va) {
+ tmp = mic_map_single(mdev, va, size);
+ if (dma_mapping_error(dev, tmp)) {
+ kfree(va);
+ va = NULL;
+ } else {
+ *dma_handle = tmp;
+ }
+ }
+ return va;
+}
+
+static void __mic_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, struct dma_attrs *attrs)
+{
+ struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ mic_unmap_single(mdev, dma_handle, size);
+ kfree(vaddr);
+}
+
+static dma_addr_t
+__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ void *va = phys_to_virt(page_to_phys(page)) + offset;
+ struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ return mic_map_single(mdev, va, size);
+}
+
+static void
+__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ mic_unmap_single(mdev, dma_addr, size);
+}
+
+static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+ struct scatterlist *s;
+ int i, j, ret;
+ dma_addr_t da;
+
+ ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
+ if (ret <= 0)
+ return 0;
+
+ for_each_sg(sg, s, nents, i) {
+ da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length);
+ if (!da)
+ goto err;
+ sg_dma_address(s) = da;
+ }
+ return nents;
+err:
+ for_each_sg(sg, s, i, j) {
+ mic_unmap(mdev, sg_dma_address(s), s->length);
+ sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
+ }
+ dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+ return 0;
+}
+
+static void __mic_dma_unmap_sg(struct device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+ struct scatterlist *s;
+ dma_addr_t da;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ da = mic_to_dma_addr(mdev, sg_dma_address(s));
+ mic_unmap(mdev, sg_dma_address(s), s->length);
+ sg_dma_address(s) = da;
+ }
+ dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+}
+
+static struct dma_map_ops __mic_dma_ops = {
+ .alloc = __mic_dma_alloc,
+ .free = __mic_dma_free,
+ .map_page = __mic_dma_map_page,
+ .unmap_page = __mic_dma_unmap_page,
+ .map_sg = __mic_dma_map_sg,
+ .unmap_sg = __mic_dma_unmap_sg,
+};
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+ irqreturn_t (*func)(int irq, void *data),
+ const char *name,
+ void *data, int db)
+{
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ return mic_request_threaded_irq(mdev, func, NULL, name, data,
+ db, MIC_INTR_DB);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+ struct mic_irq *cookie, void *data)
+{
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ return mic_free_irq(mdev, cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ mdev->ops->intr_workarounds(mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ return mic_next_db(mdev);
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ mdev->ops->send_intr(mdev, db);
+}
+
+static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev,
+ phys_addr_t pa, size_t len)
+{
+ struct mic_device *mdev = scdev_to_mdev(scdev);
+
+ return mdev->aper.va + pa;
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+ /* nothing to do */
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+ .request_irq = ___mic_request_irq,
+ .free_irq = ___mic_free_irq,
+ .ack_interrupt = ___mic_ack_interrupt,
+ .next_db = ___mic_next_db,
+ .send_intr = ___mic_send_intr,
+ .ioremap = ___mic_ioremap,
+ .iounmap = ___mic_iounmap,
+};
+
+static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev)
+{
+ return dev_get_drvdata(mbdev->dev.parent);
+}
+
+static dma_addr_t
+mic_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ void *va = phys_to_virt(page_to_phys(page)) + offset;
+ struct mic_device *mdev = dev_get_drvdata(dev->parent);
+
+ return mic_map_single(mdev, va, size);
+}
+
+static void
+mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct mic_device *mdev = dev_get_drvdata(dev->parent);
+ mic_unmap_single(mdev, dma_addr, size);
+}
+
+static struct dma_map_ops mic_dma_ops = {
+ .map_page = mic_dma_map_page,
+ .unmap_page = mic_dma_unmap_page,
+};
+
+static struct mic_irq *
+_mic_request_threaded_irq(struct mbus_device *mbdev,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ const char *name, void *data, int intr_src)
+{
+ return mic_request_threaded_irq(mbdev_to_mdev(mbdev), handler,
+ thread_fn, name, data,
+ intr_src, MIC_INTR_DMA);
+}
+
+static void _mic_free_irq(struct mbus_device *mbdev,
+ struct mic_irq *cookie, void *data)
+{
+ return mic_free_irq(mbdev_to_mdev(mbdev), cookie, data);
+}
+
+static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
+{
+ struct mic_device *mdev = mbdev_to_mdev(mbdev);
+ mdev->ops->intr_workarounds(mdev);
+}
+
+static struct mbus_hw_ops mbus_hw_ops = {
+ .request_threaded_irq = _mic_request_threaded_irq,
+ .free_irq = _mic_free_irq,
+ .ack_interrupt = _mic_ack_interrupt,
+};
+
+/* Initialize the MIC bootparams */
+void mic_bootparam_init(struct mic_device *mdev)
+{
+ struct mic_bootparam *bootparam = mdev->dp;
+
+ bootparam->magic = cpu_to_le32(MIC_MAGIC);
+ bootparam->h2c_config_db = -1;
+ bootparam->node_id = mdev->id + 1;
+ bootparam->scif_host_dma_addr = 0x0;
+ bootparam->scif_card_dma_addr = 0x0;
+ bootparam->c2h_scif_db = -1;
+ bootparam->h2c_scif_db = -1;
+}
+
+static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
+{
+ return dev_get_drvdata(cdev->dev.parent);
+}
+
+static void _mic_reset(struct cosm_device *cdev)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+ mdev->ops->reset_fw_ready(mdev);
+ mdev->ops->reset(mdev);
+}
+
+static bool _mic_ready(struct cosm_device *cdev)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+ return mdev->ops->is_fw_ready(mdev);
+}
+
+/**
+ * mic_request_dma_chans - Request DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns number of DMA channels acquired
+ */
+static int mic_request_dma_chans(struct mic_device *mdev)
+{
+ dma_cap_mask_t mask;
+ struct dma_chan *chan;
+
+ request_module("mic_x100_dma");
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+
+ do {
+ chan = dma_request_channel(mask, mdev->ops->dma_filter,
+ &mdev->pdev->dev);
+ if (chan) {
+ mdev->dma_ch[mdev->num_dma_ch++] = chan;
+ if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
+ break;
+ }
+ } while (chan);
+ dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
+ return mdev->num_dma_ch;
+}
+
+/**
+ * mic_free_dma_chans - release DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns none
+ */
+static void mic_free_dma_chans(struct mic_device *mdev)
+{
+ int i = 0;
+
+ for (i = 0; i < mdev->num_dma_ch; i++) {
+ dma_release_channel(mdev->dma_ch[i]);
+ mdev->dma_ch[i] = NULL;
+ }
+ mdev->num_dma_ch = 0;
+}
+
+/**
+ * _mic_start - Start the MIC.
+ * @cdev: pointer to cosm_device instance
+ * @id: MIC device id/index provided by COSM used in other drivers like SCIF
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ *
+ * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
+ */
+static int _mic_start(struct cosm_device *cdev, int id)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+ int rc;
+
+ mic_bootparam_init(mdev);
+ mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
+ MBUS_DEV_DMA_HOST, &mic_dma_ops,
+ &mbus_hw_ops, id, mdev->mmio.va);
+ if (IS_ERR(mdev->dma_mbdev)) {
+ rc = PTR_ERR(mdev->dma_mbdev);
+ goto unlock_ret;
+ }
+ if (!mic_request_dma_chans(mdev)) {
+ rc = -ENODEV;
+ goto dma_remove;
+ }
+ mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
+ &__mic_dma_ops, &scif_hw_ops,
+ id + 1, 0, &mdev->mmio,
+ &mdev->aper, mdev->dp, NULL,
+ mdev->dma_ch, mdev->num_dma_ch,
+ true);
+ if (IS_ERR(mdev->scdev)) {
+ rc = PTR_ERR(mdev->scdev);
+ goto dma_free;
+ }
+
+ rc = mdev->ops->load_mic_fw(mdev, NULL);
+ if (rc)
+ goto scif_remove;
+ mic_smpt_restore(mdev);
+ mic_intr_restore(mdev);
+ mdev->intr_ops->enable_interrupts(mdev);
+ mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
+ mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
+ mdev->ops->send_firmware_intr(mdev);
+ goto unlock_ret;
+scif_remove:
+ scif_unregister_device(mdev->scdev);
+dma_free:
+ mic_free_dma_chans(mdev);
+dma_remove:
+ mbus_unregister_device(mdev->dma_mbdev);
+unlock_ret:
+ return rc;
+}
+
+/**
+ * _mic_stop - Prepare the MIC for reset and trigger reset.
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already offline.
+ *
+ * RETURNS: None.
+ */
+static void _mic_stop(struct cosm_device *cdev, bool force)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+ /*
+ * Since SCIF handles card shutdown and reset (using COSM), it will
+ * will be the first to be registered and the last to be
+ * unregistered.
+ */
+ mic_virtio_reset_devices(mdev);
+ scif_unregister_device(mdev->scdev);
+ mic_free_dma_chans(mdev);
+ mbus_unregister_device(mdev->dma_mbdev);
+ mic_bootparam_init(mdev);
+}
+
+static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+ static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
+}
+
+static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+ const char *string = "??";
+
+ switch (mdev->stepping) {
+ case MIC_A0_STEP:
+ string = "A0";
+ break;
+ case MIC_B0_STEP:
+ string = "B0";
+ break;
+ case MIC_B1_STEP:
+ string = "B1";
+ break;
+ case MIC_C0_STEP:
+ string = "C0";
+ break;
+ default:
+ break;
+ }
+ return scnprintf(buf, PAGE_SIZE, "%s\n", string);
+}
+
+static struct mic_mw *_mic_aper(struct cosm_device *cdev)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+ return &mdev->aper;
+}
+
+struct cosm_hw_ops cosm_hw_ops = {
+ .reset = _mic_reset,
+ .force_reset = _mic_reset,
+ .post_reset = NULL,
+ .ready = _mic_ready,
+ .start = _mic_start,
+ .stop = _mic_stop,
+ .family = _mic_family,
+ .stepping = _mic_stepping,
+ .aper = _mic_aper,
+};
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
new file mode 100644
index 0000000..1058160
--- /dev/null
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -0,0 +1,406 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+#include "mic_virtio.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+static int mic_smpt_show(struct seq_file *s, void *pos)
+{
+ int i;
+ struct mic_device *mdev = s->private;
+ unsigned long flags;
+
+ seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n",
+ mdev->id, "SMPT entry", "SW DMA addr", "RefCount");
+ seq_puts(s, "====================================================\n");
+
+ if (mdev->smpt) {
+ struct mic_smpt_info *smpt_info = mdev->smpt;
+ spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+ for (i = 0; i < smpt_info->info.num_reg; i++) {
+ seq_printf(s, "%9s|%-10d| %-#14llx %-10lld\n",
+ " ", i, smpt_info->entry[i].dma_addr,
+ smpt_info->entry[i].ref_count);
+ }
+ spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+ }
+ seq_puts(s, "====================================================\n");
+ return 0;
+}
+
+static int mic_smpt_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mic_smpt_show, inode->i_private);
+}
+
+static int mic_smpt_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations smpt_file_ops = {
+ .owner = THIS_MODULE,
+ .open = mic_smpt_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mic_smpt_debug_release
+};
+
+static int mic_post_code_show(struct seq_file *s, void *pos)
+{
+ struct mic_device *mdev = s->private;
+ u32 reg = mdev->ops->get_postcode(mdev);
+
+ seq_printf(s, "%c%c", reg & 0xff, (reg >> 8) & 0xff);
+ return 0;
+}
+
+static int mic_post_code_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mic_post_code_show, inode->i_private);
+}
+
+static int mic_post_code_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations post_code_ops = {
+ .owner = THIS_MODULE,
+ .open = mic_post_code_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mic_post_code_debug_release
+};
+
+static int mic_dp_show(struct seq_file *s, void *pos)
+{
+ struct mic_device *mdev = s->private;
+ struct mic_device_desc *d;
+ struct mic_device_ctrl *dc;
+ struct mic_vqconfig *vqconfig;
+ __u32 *features;
+ __u8 *config;
+ struct mic_bootparam *bootparam = mdev->dp;
+ int i, j;
+
+ seq_printf(s, "Bootparam: magic 0x%x\n",
+ bootparam->magic);
+ seq_printf(s, "Bootparam: h2c_config_db %d\n",
+ bootparam->h2c_config_db);
+ seq_printf(s, "Bootparam: node_id %d\n",
+ bootparam->node_id);
+ seq_printf(s, "Bootparam: c2h_scif_db %d\n",
+ bootparam->c2h_scif_db);
+ seq_printf(s, "Bootparam: h2c_scif_db %d\n",
+ bootparam->h2c_scif_db);
+ seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n",
+ bootparam->scif_host_dma_addr);
+ seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n",
+ bootparam->scif_card_dma_addr);
+
+
+ for (i = sizeof(*bootparam); i < MIC_DP_SIZE;
+ i += mic_total_desc_size(d)) {
+ d = mdev->dp + i;
+ dc = (void *)d + mic_aligned_desc_size(d);
+
+ /* end of list */
+ if (d->type == 0)
+ break;
+
+ if (d->type == -1)
+ continue;
+
+ seq_printf(s, "Type %d ", d->type);
+ seq_printf(s, "Num VQ %d ", d->num_vq);
+ seq_printf(s, "Feature Len %d\n", d->feature_len);
+ seq_printf(s, "Config Len %d ", d->config_len);
+ seq_printf(s, "Shutdown Status %d\n", d->status);
+
+ for (j = 0; j < d->num_vq; j++) {
+ vqconfig = mic_vq_config(d) + j;
+ seq_printf(s, "vqconfig[%d]: ", j);
+ seq_printf(s, "address 0x%llx ", vqconfig->address);
+ seq_printf(s, "num %d ", vqconfig->num);
+ seq_printf(s, "used address 0x%llx\n",
+ vqconfig->used_address);
+ }
+
+ features = (__u32 *)mic_vq_features(d);
+ seq_printf(s, "Features: Host 0x%x ", features[0]);
+ seq_printf(s, "Guest 0x%x\n", features[1]);
+
+ config = mic_vq_configspace(d);
+ for (j = 0; j < d->config_len; j++)
+ seq_printf(s, "config[%d]=%d\n", j, config[j]);
+
+ seq_puts(s, "Device control:\n");
+ seq_printf(s, "Config Change %d ", dc->config_change);
+ seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
+ seq_printf(s, "Guest Ack %d ", dc->guest_ack);
+ seq_printf(s, "Host ack %d\n", dc->host_ack);
+ seq_printf(s, "Used address updated %d ",
+ dc->used_address_updated);
+ seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
+ seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
+ seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
+ }
+
+ return 0;
+}
+
+static int mic_dp_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mic_dp_show, inode->i_private);
+}
+
+static int mic_dp_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations dp_ops = {
+ .owner = THIS_MODULE,
+ .open = mic_dp_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mic_dp_debug_release
+};
+
+static int mic_vdev_info_show(struct seq_file *s, void *unused)
+{
+ struct mic_device *mdev = s->private;
+ struct list_head *pos, *tmp;
+ struct mic_vdev *mvdev;
+ int i, j;
+
+ mutex_lock(&mdev->mic_mutex);
+ list_for_each_safe(pos, tmp, &mdev->vdev_list) {
+ mvdev = list_entry(pos, struct mic_vdev, list);
+ seq_printf(s, "VDEV type %d state %s in %ld out %ld\n",
+ mvdev->virtio_id,
+ mic_vdevup(mvdev) ? "UP" : "DOWN",
+ mvdev->in_bytes,
+ mvdev->out_bytes);
+ for (i = 0; i < MIC_MAX_VRINGS; i++) {
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+ struct mic_vringh *mvr = &mvdev->mvr[i];
+ struct vringh *vrh = &mvr->vrh;
+ int num = vrh->vring.num;
+ if (!num)
+ continue;
+ desc = vrh->vring.desc;
+ seq_printf(s, "vring i %d avail_idx %d",
+ i, mvr->vring.info->avail_idx & (num - 1));
+ seq_printf(s, " vring i %d avail_idx %d\n",
+ i, mvr->vring.info->avail_idx);
+ seq_printf(s, "vrh i %d weak_barriers %d",
+ i, vrh->weak_barriers);
+ seq_printf(s, " last_avail_idx %d last_used_idx %d",
+ vrh->last_avail_idx, vrh->last_used_idx);
+ seq_printf(s, " completed %d\n", vrh->completed);
+ for (j = 0; j < num; j++) {
+ seq_printf(s, "desc[%d] addr 0x%llx len %d",
+ j, desc->addr, desc->len);
+ seq_printf(s, " flags 0x%x next %d\n",
+ desc->flags, desc->next);
+ desc++;
+ }
+ avail = vrh->vring.avail;
+ seq_printf(s, "avail flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, avail->flags),
+ vringh16_to_cpu(vrh, avail->idx) & (num - 1));
+ seq_printf(s, "avail flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, avail->flags),
+ vringh16_to_cpu(vrh, avail->idx));
+ for (j = 0; j < num; j++)
+ seq_printf(s, "avail ring[%d] %d\n",
+ j, avail->ring[j]);
+ used = vrh->vring.used;
+ seq_printf(s, "used flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, used->flags),
+ vringh16_to_cpu(vrh, used->idx) & (num - 1));
+ seq_printf(s, "used flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, used->flags),
+ vringh16_to_cpu(vrh, used->idx));
+ for (j = 0; j < num; j++)
+ seq_printf(s, "used ring[%d] id %d len %d\n",
+ j, vringh32_to_cpu(vrh,
+ used->ring[j].id),
+ vringh32_to_cpu(vrh,
+ used->ring[j].len));
+ }
+ }
+ mutex_unlock(&mdev->mic_mutex);
+
+ return 0;
+}
+
+static int mic_vdev_info_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mic_vdev_info_show, inode->i_private);
+}
+
+static int mic_vdev_info_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations vdev_info_ops = {
+ .owner = THIS_MODULE,
+ .open = mic_vdev_info_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mic_vdev_info_debug_release
+};
+
+static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
+{
+ struct mic_device *mdev = s->private;
+ int reg;
+ int i, j;
+ u16 entry;
+ u16 vector;
+ struct pci_dev *pdev = mdev->pdev;
+
+ if (pci_dev_msi_enabled(pdev)) {
+ for (i = 0; i < mdev->irq_info.num_vectors; i++) {
+ if (pdev->msix_enabled) {
+ entry = mdev->irq_info.msix_entries[i].entry;
+ vector = mdev->irq_info.msix_entries[i].vector;
+ } else {
+ entry = 0;
+ vector = pdev->irq;
+ }
+
+ reg = mdev->intr_ops->read_msi_to_src_map(mdev, entry);
+
+ seq_printf(s, "%s %-10d %s %-10d MXAR[%d]: %08X\n",
+ "IRQ:", vector, "Entry:", entry, i, reg);
+
+ seq_printf(s, "%-10s", "offset:");
+ for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
+ seq_printf(s, "%4d ", j);
+ seq_puts(s, "\n");
+
+
+ seq_printf(s, "%-10s", "count:");
+ for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
+ seq_printf(s, "%4d ",
+ (mdev->irq_info.mic_msi_map[i] &
+ BIT(j)) ? 1 : 0);
+ seq_puts(s, "\n\n");
+ }
+ } else {
+ seq_puts(s, "MSI/MSIx interrupts not enabled\n");
+ }
+
+ return 0;
+}
+
+static int mic_msi_irq_info_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mic_msi_irq_info_show, inode->i_private);
+}
+
+static int
+mic_msi_irq_info_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations msi_irq_info_ops = {
+ .owner = THIS_MODULE,
+ .open = mic_msi_irq_info_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mic_msi_irq_info_debug_release
+};
+
+/**
+ * mic_create_debug_dir - Initialize MIC debugfs entries.
+ */
+void mic_create_debug_dir(struct mic_device *mdev)
+{
+ char name[16];
+
+ if (!mic_dbg)
+ return;
+
+ scnprintf(name, sizeof(name), "mic%d", mdev->id);
+ mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
+ if (!mdev->dbg_dir)
+ return;
+
+ debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops);
+
+ debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
+ &post_code_ops);
+
+ debugfs_create_file("dp", 0444, mdev->dbg_dir, mdev, &dp_ops);
+
+ debugfs_create_file("vdev_info", 0444, mdev->dbg_dir, mdev,
+ &vdev_info_ops);
+
+ debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, mdev,
+ &msi_irq_info_ops);
+}
+
+/**
+ * mic_delete_debug_dir - Uninitialize MIC debugfs entries.
+ */
+void mic_delete_debug_dir(struct mic_device *mdev)
+{
+ if (!mdev->dbg_dir)
+ return;
+
+ debugfs_remove_recursive(mdev->dbg_dir);
+}
+
+/**
+ * mic_init_debugfs - Initialize global debugfs entry.
+ */
+void __init mic_init_debugfs(void)
+{
+ mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!mic_dbg)
+ pr_err("can't create debugfs dir\n");
+}
+
+/**
+ * mic_exit_debugfs - Uninitialize global debugfs entry
+ */
+void mic_exit_debugfs(void)
+{
+ debugfs_remove(mic_dbg);
+}
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
new file mode 100644
index 0000000..461184a
--- /dev/null
+++ b/drivers/misc/mic/host/mic_device.h
@@ -0,0 +1,172 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_DEVICE_H_
+#define _MIC_DEVICE_H_
+
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/notifier.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+#include <linux/miscdevice.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/cosm_bus.h"
+#include "mic_intr.h"
+
+/**
+ * enum mic_stepping - MIC stepping ids.
+ */
+enum mic_stepping {
+ MIC_A0_STEP = 0x0,
+ MIC_B0_STEP = 0x10,
+ MIC_B1_STEP = 0x11,
+ MIC_C0_STEP = 0x20,
+};
+
+extern struct cosm_hw_ops cosm_hw_ops;
+
+/**
+ * struct mic_device - MIC device information for each card.
+ *
+ * @mmio: MMIO bar information.
+ * @aper: Aperture bar information.
+ * @family: The MIC family to which this device belongs.
+ * @ops: MIC HW specific operations.
+ * @id: The unique device id for this MIC device.
+ * @stepping: Stepping ID.
+ * @pdev: Underlying PCI device.
+ * @mic_mutex: Mutex for synchronizing access to mic_device.
+ * @intr_ops: HW specific interrupt operations.
+ * @smpt_ops: Hardware specific SMPT operations.
+ * @smpt: MIC SMPT information.
+ * @intr_info: H/W specific interrupt information.
+ * @irq_info: The OS specific irq information
+ * @dbg_dir: debugfs directory of this MIC device.
+ * @bootaddr: MIC boot address.
+ * @dp: virtio device page
+ * @dp_dma_addr: virtio device page DMA address.
+ * @name: name for the misc char device
+ * @miscdev: registered misc char device
+ * @vdev_list: list of virtio devices.
+ * @dma_mbdev: MIC BUS DMA device.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ * @cosm_dev: COSM device
+ */
+struct mic_device {
+ struct mic_mw mmio;
+ struct mic_mw aper;
+ enum mic_hw_family family;
+ struct mic_hw_ops *ops;
+ int id;
+ enum mic_stepping stepping;
+ struct pci_dev *pdev;
+ struct mutex mic_mutex;
+ struct mic_hw_intr_ops *intr_ops;
+ struct mic_smpt_ops *smpt_ops;
+ struct mic_smpt_info *smpt;
+ struct mic_intr_info *intr_info;
+ struct mic_irq_info irq_info;
+ struct dentry *dbg_dir;
+ u32 bootaddr;
+ void *dp;
+ dma_addr_t dp_dma_addr;
+ char name[16];
+ struct miscdevice miscdev;
+ struct list_head vdev_list;
+ struct mbus_device *dma_mbdev;
+ struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+ int num_dma_ch;
+ struct scif_hw_dev *scdev;
+ struct cosm_device *cosm_dev;
+};
+
+/**
+ * struct mic_hw_ops - MIC HW specific operations.
+ * @aper_bar: Aperture bar resource number.
+ * @mmio_bar: MMIO bar resource number.
+ * @read_spad: Read from scratch pad register.
+ * @write_spad: Write to scratch pad register.
+ * @send_intr: Send an interrupt for a particular doorbell on the card.
+ * @ack_interrupt: Hardware specific operations to ack the h/w on
+ * receipt of an interrupt.
+ * @intr_workarounds: Hardware specific workarounds needed after
+ * handling an interrupt.
+ * @reset: Reset the remote processor.
+ * @reset_fw_ready: Reset firmware ready field.
+ * @is_fw_ready: Check if firmware is ready for OS download.
+ * @send_firmware_intr: Send an interrupt to the card firmware.
+ * @load_mic_fw: Load firmware segments required to boot the card
+ * into card memory. This includes the kernel, command line, ramdisk etc.
+ * @get_postcode: Get post code status from firmware.
+ * @dma_filter: DMA filter function to be used.
+ */
+struct mic_hw_ops {
+ u8 aper_bar;
+ u8 mmio_bar;
+ u32 (*read_spad)(struct mic_device *mdev, unsigned int idx);
+ void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val);
+ void (*send_intr)(struct mic_device *mdev, int doorbell);
+ u32 (*ack_interrupt)(struct mic_device *mdev);
+ void (*intr_workarounds)(struct mic_device *mdev);
+ void (*reset)(struct mic_device *mdev);
+ void (*reset_fw_ready)(struct mic_device *mdev);
+ bool (*is_fw_ready)(struct mic_device *mdev);
+ void (*send_firmware_intr)(struct mic_device *mdev);
+ int (*load_mic_fw)(struct mic_device *mdev, const char *buf);
+ u32 (*get_postcode)(struct mic_device *mdev);
+ bool (*dma_filter)(struct dma_chan *chan, void *param);
+};
+
+/**
+ * mic_mmio_read - read from an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @offset: register offset.
+ *
+ * RETURNS: register value.
+ */
+static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
+{
+ return ioread32(mw->va + offset);
+}
+
+/**
+ * mic_mmio_write - write to an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @val: the data value to put into the register
+ * @offset: register offset.
+ *
+ * RETURNS: none.
+ */
+static inline void
+mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
+{
+ iowrite32(val, mw->va + offset);
+}
+
+void mic_bootparam_init(struct mic_device *mdev);
+void mic_create_debug_dir(struct mic_device *dev);
+void mic_delete_debug_dir(struct mic_device *dev);
+void __init mic_init_debugfs(void);
+void mic_exit_debugfs(void);
+#endif
diff --git a/drivers/misc/mic/host/mic_fops.c b/drivers/misc/mic/host/mic_fops.c
new file mode 100644
index 0000000..8cc1d90
--- /dev/null
+++ b/drivers/misc/mic/host/mic_fops.c
@@ -0,0 +1,222 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/poll.h>
+#include <linux/pci.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_fops.h"
+#include "mic_virtio.h"
+
+int mic_open(struct inode *inode, struct file *f)
+{
+ struct mic_vdev *mvdev;
+ struct mic_device *mdev = container_of(f->private_data,
+ struct mic_device, miscdev);
+
+ mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
+ if (!mvdev)
+ return -ENOMEM;
+
+ init_waitqueue_head(&mvdev->waitq);
+ INIT_LIST_HEAD(&mvdev->list);
+ mvdev->mdev = mdev;
+ mvdev->virtio_id = -1;
+
+ f->private_data = mvdev;
+ return 0;
+}
+
+int mic_release(struct inode *inode, struct file *f)
+{
+ struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
+
+ if (-1 != mvdev->virtio_id)
+ mic_virtio_del_device(mvdev);
+ f->private_data = NULL;
+ kfree(mvdev);
+ return 0;
+}
+
+long mic_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
+ void __user *argp = (void __user *)arg;
+ int ret;
+
+ switch (cmd) {
+ case MIC_VIRTIO_ADD_DEVICE:
+ {
+ ret = mic_virtio_add_device(mvdev, argp);
+ if (ret < 0) {
+ dev_err(mic_dev(mvdev),
+ "%s %d errno ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+ }
+ break;
+ }
+ case MIC_VIRTIO_COPY_DESC:
+ {
+ struct mic_copy_desc copy;
+
+ ret = mic_vdev_inited(mvdev);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(©, argp, sizeof(copy)))
+ return -EFAULT;
+
+ dev_dbg(mic_dev(mvdev),
+ "%s %d === iovcnt 0x%x vr_idx 0x%x update_used %d\n",
+ __func__, __LINE__, copy.iovcnt, copy.vr_idx,
+ copy.update_used);
+
+ ret = mic_virtio_copy_desc(mvdev, ©);
+ if (ret < 0) {
+ dev_err(mic_dev(mvdev),
+ "%s %d errno ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+ }
+ if (copy_to_user(
+ &((struct mic_copy_desc __user *)argp)->out_len,
+ ©.out_len, sizeof(copy.out_len))) {
+ dev_err(mic_dev(mvdev), "%s %d errno ret %d\n",
+ __func__, __LINE__, -EFAULT);
+ return -EFAULT;
+ }
+ break;
+ }
+ case MIC_VIRTIO_CONFIG_CHANGE:
+ {
+ ret = mic_vdev_inited(mvdev);
+ if (ret)
+ return ret;
+
+ ret = mic_virtio_config_change(mvdev, argp);
+ if (ret < 0) {
+ dev_err(mic_dev(mvdev),
+ "%s %d errno ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+ }
+ break;
+ }
+ default:
+ return -ENOIOCTLCMD;
+ };
+ return 0;
+}
+
+/*
+ * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
+ * not when previously enqueued buffers may be available. This means that
+ * in the card->host (TX) path, when userspace is unblocked by poll it
+ * must drain all available descriptors or it can stall.
+ */
+unsigned int mic_poll(struct file *f, poll_table *wait)
+{
+ struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
+ int mask = 0;
+
+ poll_wait(f, &mvdev->waitq, wait);
+
+ if (mic_vdev_inited(mvdev)) {
+ mask = POLLERR;
+ } else if (mvdev->poll_wake) {
+ mvdev->poll_wake = 0;
+ mask = POLLIN | POLLOUT;
+ }
+
+ return mask;
+}
+
+static inline int
+mic_query_offset(struct mic_vdev *mvdev, unsigned long offset,
+ unsigned long *size, unsigned long *pa)
+{
+ struct mic_device *mdev = mvdev->mdev;
+ unsigned long start = MIC_DP_SIZE;
+ int i;
+
+ /*
+ * MMAP interface is as follows:
+ * offset region
+ * 0x0 virtio device_page
+ * 0x1000 first vring
+ * 0x1000 + size of 1st vring second vring
+ * ....
+ */
+ if (!offset) {
+ *pa = virt_to_phys(mdev->dp);
+ *size = MIC_DP_SIZE;
+ return 0;
+ }
+
+ for (i = 0; i < mvdev->dd->num_vq; i++) {
+ struct mic_vringh *mvr = &mvdev->mvr[i];
+ if (offset == start) {
+ *pa = virt_to_phys(mvr->vring.va);
+ *size = mvr->vring.len;
+ return 0;
+ }
+ start += mvr->vring.len;
+ }
+ return -1;
+}
+
+/*
+ * Maps the device page and virtio rings to user space for readonly access.
+ */
+int
+mic_mmap(struct file *f, struct vm_area_struct *vma)
+{
+ struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
+ int i, err;
+
+ err = mic_vdev_inited(mvdev);
+ if (err)
+ return err;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EACCES;
+
+ while (size_rem) {
+ i = mic_query_offset(mvdev, offset, &size, &pa);
+ if (i < 0)
+ return -EINVAL;
+ err = remap_pfn_range(vma, vma->vm_start + offset,
+ pa >> PAGE_SHIFT, size, vma->vm_page_prot);
+ if (err)
+ return err;
+ dev_dbg(mic_dev(mvdev),
+ "%s %d type %d size 0x%lx off 0x%lx pa 0x%lx vma 0x%lx\n",
+ __func__, __LINE__, mvdev->virtio_id, size, offset,
+ pa, vma->vm_start + offset);
+ size_rem -= size;
+ offset += size;
+ }
+ return 0;
+}
diff --git a/drivers/misc/mic/host/mic_fops.h b/drivers/misc/mic/host/mic_fops.h
new file mode 100644
index 0000000..dc3893d
--- /dev/null
+++ b/drivers/misc/mic/host/mic_fops.h
@@ -0,0 +1,32 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_FOPS_H_
+#define _MIC_FOPS_H_
+
+int mic_open(struct inode *inode, struct file *filp);
+int mic_release(struct inode *inode, struct file *filp);
+ssize_t mic_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos);
+long mic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+int mic_mmap(struct file *f, struct vm_area_struct *vma);
+unsigned int mic_poll(struct file *f, poll_table *wait);
+
+#endif
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
new file mode 100644
index 0000000..08ca3e3
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.c
@@ -0,0 +1,645 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+static irqreturn_t mic_thread_fn(int irq, void *dev)
+{
+ struct mic_device *mdev = dev;
+ struct mic_intr_info *intr_info = mdev->intr_info;
+ struct mic_irq_info *irq_info = &mdev->irq_info;
+ struct mic_intr_cb *intr_cb;
+ struct pci_dev *pdev = mdev->pdev;
+ int i;
+
+ spin_lock(&irq_info->mic_thread_lock);
+ for (i = intr_info->intr_start_idx[MIC_INTR_DB];
+ i < intr_info->intr_len[MIC_INTR_DB]; i++)
+ if (test_and_clear_bit(i, &irq_info->mask)) {
+ list_for_each_entry(intr_cb, &irq_info->cb_list[i],
+ list)
+ if (intr_cb->thread_fn)
+ intr_cb->thread_fn(pdev->irq,
+ intr_cb->data);
+ }
+ spin_unlock(&irq_info->mic_thread_lock);
+ return IRQ_HANDLED;
+}
+/**
+ * mic_interrupt - Generic interrupt handler for
+ * MSI and INTx based interrupts.
+ */
+static irqreturn_t mic_interrupt(int irq, void *dev)
+{
+ struct mic_device *mdev = dev;
+ struct mic_intr_info *intr_info = mdev->intr_info;
+ struct mic_irq_info *irq_info = &mdev->irq_info;
+ struct mic_intr_cb *intr_cb;
+ struct pci_dev *pdev = mdev->pdev;
+ u32 mask;
+ int i;
+
+ mask = mdev->ops->ack_interrupt(mdev);
+ if (!mask)
+ return IRQ_NONE;
+
+ spin_lock(&irq_info->mic_intr_lock);
+ for (i = intr_info->intr_start_idx[MIC_INTR_DB];
+ i < intr_info->intr_len[MIC_INTR_DB]; i++)
+ if (mask & BIT(i)) {
+ list_for_each_entry(intr_cb, &irq_info->cb_list[i],
+ list)
+ if (intr_cb->handler)
+ intr_cb->handler(pdev->irq,
+ intr_cb->data);
+ set_bit(i, &irq_info->mask);
+ }
+ spin_unlock(&irq_info->mic_intr_lock);
+ return IRQ_WAKE_THREAD;
+}
+
+/* Return the interrupt offset from the index. Index is 0 based. */
+static u16 mic_map_src_to_offset(struct mic_device *mdev,
+ int intr_src, enum mic_intr_type type)
+{
+ if (type >= MIC_NUM_INTR_TYPES)
+ return MIC_NUM_OFFSETS;
+ if (intr_src >= mdev->intr_info->intr_len[type])
+ return MIC_NUM_OFFSETS;
+
+ return mdev->intr_info->intr_start_idx[type] + intr_src;
+}
+
+/* Return next available msix_entry. */
+static struct msix_entry *mic_get_available_vector(struct mic_device *mdev)
+{
+ int i;
+ struct mic_irq_info *info = &mdev->irq_info;
+
+ for (i = 0; i < info->num_vectors; i++)
+ if (!info->mic_msi_map[i])
+ return &info->msix_entries[i];
+ return NULL;
+}
+
+/**
+ * mic_register_intr_callback - Register a callback handler for the
+ * given source id.
+ *
+ * @mdev: pointer to the mic_device instance
+ * @idx: The source id to be registered.
+ * @handler: The function to be called when the source id receives
+ * the interrupt.
+ * @thread_fn: thread fn. corresponding to the handler
+ * @data: Private data of the requester.
+ * Return the callback structure that was registered or an
+ * appropriate error on failure.
+ */
+static struct mic_intr_cb *mic_register_intr_callback(struct mic_device *mdev,
+ u8 idx, irq_handler_t handler, irq_handler_t thread_fn,
+ void *data)
+{
+ struct mic_intr_cb *intr_cb;
+ unsigned long flags;
+ int rc;
+ intr_cb = kmalloc(sizeof(*intr_cb), GFP_KERNEL);
+
+ if (!intr_cb)
+ return ERR_PTR(-ENOMEM);
+
+ intr_cb->handler = handler;
+ intr_cb->thread_fn = thread_fn;
+ intr_cb->data = data;
+ intr_cb->cb_id = ida_simple_get(&mdev->irq_info.cb_ida,
+ 0, 0, GFP_KERNEL);
+ if (intr_cb->cb_id < 0) {
+ rc = intr_cb->cb_id;
+ goto ida_fail;
+ }
+
+ spin_lock(&mdev->irq_info.mic_thread_lock);
+ spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+ list_add_tail(&intr_cb->list, &mdev->irq_info.cb_list[idx]);
+ spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+ spin_unlock(&mdev->irq_info.mic_thread_lock);
+
+ return intr_cb;
+ida_fail:
+ kfree(intr_cb);
+ return ERR_PTR(rc);
+}
+
+/**
+ * mic_unregister_intr_callback - Unregister the callback handler
+ * identified by its callback id.
+ *
+ * @mdev: pointer to the mic_device instance
+ * @idx: The callback structure id to be unregistered.
+ * Return the source id that was unregistered or MIC_NUM_OFFSETS if no
+ * such callback handler was found.
+ */
+static u8 mic_unregister_intr_callback(struct mic_device *mdev, u32 idx)
+{
+ struct list_head *pos, *tmp;
+ struct mic_intr_cb *intr_cb;
+ unsigned long flags;
+ int i;
+
+ spin_lock(&mdev->irq_info.mic_thread_lock);
+ spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+ for (i = 0; i < MIC_NUM_OFFSETS; i++) {
+ list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
+ intr_cb = list_entry(pos, struct mic_intr_cb, list);
+ if (intr_cb->cb_id == idx) {
+ list_del(pos);
+ ida_simple_remove(&mdev->irq_info.cb_ida,
+ intr_cb->cb_id);
+ kfree(intr_cb);
+ spin_unlock_irqrestore(
+ &mdev->irq_info.mic_intr_lock, flags);
+ spin_unlock(&mdev->irq_info.mic_thread_lock);
+ return i;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+ spin_unlock(&mdev->irq_info.mic_thread_lock);
+ return MIC_NUM_OFFSETS;
+}
+
+/**
+ * mic_setup_msix - Initializes MSIx interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ *
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
+{
+ int rc, i;
+ int entry_size = sizeof(*mdev->irq_info.msix_entries);
+
+ mdev->irq_info.msix_entries = kmalloc_array(MIC_MIN_MSIX,
+ entry_size, GFP_KERNEL);
+ if (!mdev->irq_info.msix_entries) {
+ rc = -ENOMEM;
+ goto err_nomem1;
+ }
+
+ for (i = 0; i < MIC_MIN_MSIX; i++)
+ mdev->irq_info.msix_entries[i].entry = i;
+
+ rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
+ MIC_MIN_MSIX);
+ if (rc) {
+ dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
+ goto err_enable_msix;
+ }
+
+ mdev->irq_info.num_vectors = MIC_MIN_MSIX;
+ mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
+ mdev->irq_info.num_vectors), GFP_KERNEL);
+
+ if (!mdev->irq_info.mic_msi_map) {
+ rc = -ENOMEM;
+ goto err_nomem2;
+ }
+
+ dev_dbg(&mdev->pdev->dev,
+ "%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
+ return 0;
+err_nomem2:
+ pci_disable_msix(pdev);
+err_enable_msix:
+ kfree(mdev->irq_info.msix_entries);
+err_nomem1:
+ mdev->irq_info.num_vectors = 0;
+ return rc;
+}
+
+/**
+ * mic_setup_callbacks - Initialize data structures needed
+ * to handle callbacks.
+ *
+ * @mdev: pointer to mic_device instance
+ */
+static int mic_setup_callbacks(struct mic_device *mdev)
+{
+ int i;
+
+ mdev->irq_info.cb_list = kmalloc_array(MIC_NUM_OFFSETS,
+ sizeof(*mdev->irq_info.cb_list),
+ GFP_KERNEL);
+ if (!mdev->irq_info.cb_list)
+ return -ENOMEM;
+
+ for (i = 0; i < MIC_NUM_OFFSETS; i++)
+ INIT_LIST_HEAD(&mdev->irq_info.cb_list[i]);
+ ida_init(&mdev->irq_info.cb_ida);
+ spin_lock_init(&mdev->irq_info.mic_intr_lock);
+ spin_lock_init(&mdev->irq_info.mic_thread_lock);
+ return 0;
+}
+
+/**
+ * mic_release_callbacks - Uninitialize data structures needed
+ * to handle callbacks.
+ *
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_release_callbacks(struct mic_device *mdev)
+{
+ unsigned long flags;
+ struct list_head *pos, *tmp;
+ struct mic_intr_cb *intr_cb;
+ int i;
+
+ spin_lock(&mdev->irq_info.mic_thread_lock);
+ spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+ for (i = 0; i < MIC_NUM_OFFSETS; i++) {
+ if (list_empty(&mdev->irq_info.cb_list[i]))
+ break;
+
+ list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
+ intr_cb = list_entry(pos, struct mic_intr_cb, list);
+ list_del(pos);
+ ida_simple_remove(&mdev->irq_info.cb_ida,
+ intr_cb->cb_id);
+ kfree(intr_cb);
+ }
+ }
+ spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+ spin_unlock(&mdev->irq_info.mic_thread_lock);
+ ida_destroy(&mdev->irq_info.cb_ida);
+ kfree(mdev->irq_info.cb_list);
+}
+
+/**
+ * mic_setup_msi - Initializes MSI interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_msi(struct mic_device *mdev, struct pci_dev *pdev)
+{
+ int rc;
+
+ rc = pci_enable_msi(pdev);
+ if (rc) {
+ dev_dbg(&pdev->dev, "Error enabling MSI. rc = %d\n", rc);
+ return rc;
+ }
+
+ mdev->irq_info.num_vectors = 1;
+ mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
+ mdev->irq_info.num_vectors), GFP_KERNEL);
+
+ if (!mdev->irq_info.mic_msi_map) {
+ rc = -ENOMEM;
+ goto err_nomem1;
+ }
+
+ rc = mic_setup_callbacks(mdev);
+ if (rc) {
+ dev_err(&pdev->dev, "Error setting up callbacks\n");
+ goto err_nomem2;
+ }
+
+ rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
+ 0, "mic-msi", mdev);
+ if (rc) {
+ dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
+ goto err_irq_req_fail;
+ }
+
+ dev_dbg(&pdev->dev, "%d MSI irqs setup\n", mdev->irq_info.num_vectors);
+ return 0;
+err_irq_req_fail:
+ mic_release_callbacks(mdev);
+err_nomem2:
+ kfree(mdev->irq_info.mic_msi_map);
+err_nomem1:
+ pci_disable_msi(pdev);
+ mdev->irq_info.num_vectors = 0;
+ return rc;
+}
+
+/**
+ * mic_setup_intx - Initializes legacy interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_intx(struct mic_device *mdev, struct pci_dev *pdev)
+{
+ int rc;
+
+ /* Enable intx */
+ pci_intx(pdev, 1);
+ rc = mic_setup_callbacks(mdev);
+ if (rc) {
+ dev_err(&pdev->dev, "Error setting up callbacks\n");
+ goto err_nomem;
+ }
+
+ rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
+ IRQF_SHARED, "mic-intx", mdev);
+ if (rc)
+ goto err;
+
+ dev_dbg(&pdev->dev, "intx irq setup\n");
+ return 0;
+err:
+ mic_release_callbacks(mdev);
+err_nomem:
+ return rc;
+}
+
+/**
+ * mic_next_db - Retrieve the next doorbell interrupt source id.
+ * The id is picked sequentially from the available pool of
+ * doorlbell ids.
+ *
+ * @mdev: pointer to the mic_device instance.
+ *
+ * Returns the next doorbell interrupt source.
+ */
+int mic_next_db(struct mic_device *mdev)
+{
+ int next_db;
+
+ next_db = mdev->irq_info.next_avail_src %
+ mdev->intr_info->intr_len[MIC_INTR_DB];
+ mdev->irq_info.next_avail_src++;
+ return next_db;
+}
+
+#define COOKIE_ID_SHIFT 16
+#define GET_ENTRY(cookie) ((cookie) & 0xFFFF)
+#define GET_OFFSET(cookie) ((cookie) >> COOKIE_ID_SHIFT)
+#define MK_COOKIE(x, y) ((x) | (y) << COOKIE_ID_SHIFT)
+
+/**
+ * mic_request_threaded_irq - request an irq. mic_mutex needs
+ * to be held before calling this function.
+ *
+ * @mdev: pointer to mic_device instance
+ * @handler: The callback function that handles the interrupt.
+ * The function needs to call ack_interrupts
+ * (mdev->ops->ack_interrupt(mdev)) when handling the interrupts.
+ * @thread_fn: thread fn required by request_threaded_irq.
+ * @name: The ASCII name of the callee requesting the irq.
+ * @data: private data that is returned back when calling the
+ * function handler.
+ * @intr_src: The source id of the requester. Its the doorbell id
+ * for Doorbell interrupts and DMA channel id for DMA interrupts.
+ * @type: The type of interrupt. Values defined in mic_intr_type
+ *
+ * returns: The cookie that is transparent to the caller. Passed
+ * back when calling mic_free_irq. An appropriate error code
+ * is returned on failure. Caller needs to use IS_ERR(return_val)
+ * to check for failure and PTR_ERR(return_val) to obtained the
+ * error code.
+ *
+ */
+struct mic_irq *
+mic_request_threaded_irq(struct mic_device *mdev,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ const char *name, void *data, int intr_src,
+ enum mic_intr_type type)
+{
+ u16 offset;
+ int rc = 0;
+ struct msix_entry *msix = NULL;
+ unsigned long cookie = 0;
+ u16 entry;
+ struct mic_intr_cb *intr_cb;
+ struct pci_dev *pdev = mdev->pdev;
+
+ offset = mic_map_src_to_offset(mdev, intr_src, type);
+ if (offset >= MIC_NUM_OFFSETS) {
+ dev_err(&mdev->pdev->dev,
+ "Error mapping index %d to a valid source id.\n",
+ intr_src);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (mdev->irq_info.num_vectors > 1) {
+ msix = mic_get_available_vector(mdev);
+ if (!msix) {
+ dev_err(&mdev->pdev->dev,
+ "No MSIx vectors available for use.\n");
+ rc = -ENOSPC;
+ goto err;
+ }
+
+ rc = request_threaded_irq(msix->vector, handler, thread_fn,
+ 0, name, data);
+ if (rc) {
+ dev_dbg(&mdev->pdev->dev,
+ "request irq failed rc = %d\n", rc);
+ goto err;
+ }
+ entry = msix->entry;
+ mdev->irq_info.mic_msi_map[entry] |= BIT(offset);
+ mdev->intr_ops->program_msi_to_src_map(mdev,
+ entry, offset, true);
+ cookie = MK_COOKIE(entry, offset);
+ dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
+ msix->vector, intr_src);
+ } else {
+ intr_cb = mic_register_intr_callback(mdev, offset, handler,
+ thread_fn, data);
+ if (IS_ERR(intr_cb)) {
+ dev_err(&mdev->pdev->dev,
+ "No available callback entries for use\n");
+ rc = PTR_ERR(intr_cb);
+ goto err;
+ }
+
+ entry = 0;
+ if (pci_dev_msi_enabled(pdev)) {
+ mdev->irq_info.mic_msi_map[entry] |= (1 << offset);
+ mdev->intr_ops->program_msi_to_src_map(mdev,
+ entry, offset, true);
+ }
+ cookie = MK_COOKIE(entry, intr_cb->cb_id);
+ dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
+ intr_cb->cb_id, intr_src);
+ }
+ return (struct mic_irq *)cookie;
+err:
+ return ERR_PTR(rc);
+}
+
+/**
+ * mic_free_irq - free irq. mic_mutex
+ * needs to be held before calling this function.
+ *
+ * @mdev: pointer to mic_device instance
+ * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
+ * @data: private data specified by the calling function during the
+ * mic_request_threaded_irq
+ *
+ * returns: none.
+ */
+void mic_free_irq(struct mic_device *mdev,
+ struct mic_irq *cookie, void *data)
+{
+ u32 offset;
+ u32 entry;
+ u8 src_id;
+ unsigned int irq;
+ struct pci_dev *pdev = mdev->pdev;
+
+ entry = GET_ENTRY((unsigned long)cookie);
+ offset = GET_OFFSET((unsigned long)cookie);
+ if (mdev->irq_info.num_vectors > 1) {
+ if (entry >= mdev->irq_info.num_vectors) {
+ dev_warn(&mdev->pdev->dev,
+ "entry %d should be < num_irq %d\n",
+ entry, mdev->irq_info.num_vectors);
+ return;
+ }
+ irq = mdev->irq_info.msix_entries[entry].vector;
+ free_irq(irq, data);
+ mdev->irq_info.mic_msi_map[entry] &= ~(BIT(offset));
+ mdev->intr_ops->program_msi_to_src_map(mdev,
+ entry, offset, false);
+
+ dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
+ } else {
+ irq = pdev->irq;
+ src_id = mic_unregister_intr_callback(mdev, offset);
+ if (src_id >= MIC_NUM_OFFSETS) {
+ dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
+ return;
+ }
+ if (pci_dev_msi_enabled(pdev)) {
+ mdev->irq_info.mic_msi_map[entry] &= ~(BIT(src_id));
+ mdev->intr_ops->program_msi_to_src_map(mdev,
+ entry, src_id, false);
+ }
+ dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
+ offset, src_id);
+ }
+}
+
+/**
+ * mic_setup_interrupts - Initializes interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
+{
+ int rc;
+
+ rc = mic_setup_msix(mdev, pdev);
+ if (!rc)
+ goto done;
+
+ rc = mic_setup_msi(mdev, pdev);
+ if (!rc)
+ goto done;
+
+ rc = mic_setup_intx(mdev, pdev);
+ if (rc) {
+ dev_err(&mdev->pdev->dev, "no usable interrupts\n");
+ return rc;
+ }
+done:
+ mdev->intr_ops->enable_interrupts(mdev);
+ return 0;
+}
+
+/**
+ * mic_free_interrupts - Frees interrupts setup by mic_setup_interrupts
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * returns none.
+ */
+void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
+{
+ int i;
+
+ mdev->intr_ops->disable_interrupts(mdev);
+ if (mdev->irq_info.num_vectors > 1) {
+ for (i = 0; i < mdev->irq_info.num_vectors; i++) {
+ if (mdev->irq_info.mic_msi_map[i])
+ dev_warn(&pdev->dev, "irq %d may still be in use.\n",
+ mdev->irq_info.msix_entries[i].vector);
+ }
+ kfree(mdev->irq_info.mic_msi_map);
+ kfree(mdev->irq_info.msix_entries);
+ pci_disable_msix(pdev);
+ } else {
+ if (pci_dev_msi_enabled(pdev)) {
+ free_irq(pdev->irq, mdev);
+ kfree(mdev->irq_info.mic_msi_map);
+ pci_disable_msi(pdev);
+ } else {
+ free_irq(pdev->irq, mdev);
+ }
+ mic_release_callbacks(mdev);
+ }
+}
+
+/**
+ * mic_intr_restore - Restore MIC interrupt registers.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * Restore the interrupt registers to values previously
+ * stored in the SW data structures. mic_mutex needs to
+ * be held before calling this function.
+ *
+ * returns None.
+ */
+void mic_intr_restore(struct mic_device *mdev)
+{
+ int entry, offset;
+ struct pci_dev *pdev = mdev->pdev;
+
+ if (!pci_dev_msi_enabled(pdev))
+ return;
+
+ for (entry = 0; entry < mdev->irq_info.num_vectors; entry++) {
+ for (offset = 0; offset < MIC_NUM_OFFSETS; offset++) {
+ if (mdev->irq_info.mic_msi_map[entry] & BIT(offset))
+ mdev->intr_ops->program_msi_to_src_map(mdev,
+ entry, offset, true);
+ }
+ }
+}
diff --git a/drivers/misc/mic/host/mic_intr.h b/drivers/misc/mic/host/mic_intr.h
new file mode 100644
index 0000000..cce2882
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.h
@@ -0,0 +1,149 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_INTR_H_
+#define _MIC_INTR_H_
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+/*
+ * The minimum number of msix vectors required for normal operation.
+ * 3 for virtio network, console and block devices.
+ * 1 for card shutdown notifications.
+ * 4 for host owned DMA channels.
+ * 1 for SCIF
+ */
+#define MIC_MIN_MSIX 9
+#define MIC_NUM_OFFSETS 32
+
+/**
+ * mic_intr_source - The type of source that will generate
+ * the interrupt.The number of types needs to be in sync with
+ * MIC_NUM_INTR_TYPES
+ *
+ * MIC_INTR_DB: The source is a doorbell
+ * MIC_INTR_DMA: The source is a DMA channel
+ * MIC_INTR_ERR: The source is an error interrupt e.g. SBOX ERR
+ * MIC_NUM_INTR_TYPES: Total number of interrupt sources.
+ */
+enum mic_intr_type {
+ MIC_INTR_DB = 0,
+ MIC_INTR_DMA,
+ MIC_INTR_ERR,
+ MIC_NUM_INTR_TYPES
+};
+
+/**
+ * struct mic_intr_info - Contains h/w specific interrupt sources
+ * information.
+ *
+ * @intr_start_idx: Contains the starting indexes of the
+ * interrupt types.
+ * @intr_len: Contains the length of the interrupt types.
+ */
+struct mic_intr_info {
+ u16 intr_start_idx[MIC_NUM_INTR_TYPES];
+ u16 intr_len[MIC_NUM_INTR_TYPES];
+};
+
+/**
+ * struct mic_irq_info - OS specific irq information
+ *
+ * @next_avail_src: next available doorbell that can be assigned.
+ * @msix_entries: msix entries allocated while setting up MSI-x
+ * @mic_msi_map: The MSI/MSI-x mapping information.
+ * @num_vectors: The number of MSI/MSI-x vectors that have been allocated.
+ * @cb_ida: callback ID allocator to track the callbacks registered.
+ * @mic_intr_lock: spinlock to protect the interrupt callback list.
+ * @mic_thread_lock: spinlock to protect the thread callback list.
+ * This lock is used to protect against thread_fn while
+ * mic_intr_lock is used to protect against interrupt handler.
+ * @cb_list: Array of callback lists one for each source.
+ * @mask: Mask used by the main thread fn to call the underlying thread fns.
+ */
+struct mic_irq_info {
+ int next_avail_src;
+ struct msix_entry *msix_entries;
+ u32 *mic_msi_map;
+ u16 num_vectors;
+ struct ida cb_ida;
+ spinlock_t mic_intr_lock;
+ spinlock_t mic_thread_lock;
+ struct list_head *cb_list;
+ unsigned long mask;
+};
+
+/**
+ * struct mic_intr_cb - Interrupt callback structure.
+ *
+ * @handler: The callback function
+ * @thread_fn: The thread_fn.
+ * @data: Private data of the requester.
+ * @cb_id: The callback id. Identifies this callback.
+ * @list: list head pointing to the next callback structure.
+ */
+struct mic_intr_cb {
+ irq_handler_t handler;
+ irq_handler_t thread_fn;
+ void *data;
+ int cb_id;
+ struct list_head list;
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/* Forward declaration */
+struct mic_device;
+
+/**
+ * struct mic_hw_intr_ops: MIC HW specific interrupt operations
+ * @intr_init: Initialize H/W specific interrupt information.
+ * @enable_interrupts: Enable interrupts from the hardware.
+ * @disable_interrupts: Disable interrupts from the hardware.
+ * @program_msi_to_src_map: Update MSI mapping registers with
+ * irq information.
+ * @read_msi_to_src_map: Read MSI mapping registers containing
+ * irq information.
+ */
+struct mic_hw_intr_ops {
+ void (*intr_init)(struct mic_device *mdev);
+ void (*enable_interrupts)(struct mic_device *mdev);
+ void (*disable_interrupts)(struct mic_device *mdev);
+ void (*program_msi_to_src_map) (struct mic_device *mdev,
+ int idx, int intr_src, bool set);
+ u32 (*read_msi_to_src_map) (struct mic_device *mdev,
+ int idx);
+};
+
+int mic_next_db(struct mic_device *mdev);
+struct mic_irq *
+mic_request_threaded_irq(struct mic_device *mdev,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ const char *name, void *data, int intr_src,
+ enum mic_intr_type type);
+void mic_free_irq(struct mic_device *mdev,
+ struct mic_irq *cookie, void *data);
+int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
+void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
+void mic_intr_restore(struct mic_device *mdev);
+#endif
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
new file mode 100644
index 0000000..153894e
--- /dev/null
+++ b/drivers/misc/mic/host/mic_main.c
@@ -0,0 +1,386 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+#include "mic_smpt.h"
+#include "mic_fops.h"
+#include "mic_virtio.h"
+
+static const char mic_driver_name[] = "mic";
+
+static const struct pci_device_id mic_pci_tbl[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2250)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2251)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2252)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2253)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2254)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2255)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2256)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2257)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2258)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2259)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225a)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225b)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225c)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225d)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225e)},
+
+ /* required last entry */
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
+
+/* ID allocator for MIC devices */
+static struct ida g_mic_ida;
+/* Base device node number for MIC devices */
+static dev_t g_mic_devno;
+
+static const struct file_operations mic_fops = {
+ .open = mic_open,
+ .release = mic_release,
+ .unlocked_ioctl = mic_ioctl,
+ .poll = mic_poll,
+ .mmap = mic_mmap,
+ .owner = THIS_MODULE,
+};
+
+/* Initialize the device page */
+static int mic_dp_init(struct mic_device *mdev)
+{
+ mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
+ if (!mdev->dp)
+ return -ENOMEM;
+
+ mdev->dp_dma_addr = mic_map_single(mdev,
+ mdev->dp, MIC_DP_SIZE);
+ if (mic_map_error(mdev->dp_dma_addr)) {
+ kfree(mdev->dp);
+ dev_err(&mdev->pdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, -ENOMEM);
+ return -ENOMEM;
+ }
+ mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
+ mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
+ return 0;
+}
+
+/* Uninitialize the device page */
+static void mic_dp_uninit(struct mic_device *mdev)
+{
+ mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE);
+ kfree(mdev->dp);
+}
+
+/**
+ * mic_ops_init: Initialize HW specific operation tables.
+ *
+ * @mdev: pointer to mic_device instance
+ *
+ * returns none.
+ */
+static void mic_ops_init(struct mic_device *mdev)
+{
+ switch (mdev->family) {
+ case MIC_FAMILY_X100:
+ mdev->ops = &mic_x100_ops;
+ mdev->intr_ops = &mic_x100_intr_ops;
+ mdev->smpt_ops = &mic_x100_smpt_ops;
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * mic_get_family - Determine hardware family to which this MIC belongs.
+ *
+ * @pdev: The pci device structure
+ *
+ * returns family.
+ */
+static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
+{
+ enum mic_hw_family family;
+
+ switch (pdev->device) {
+ case MIC_X100_PCI_DEVICE_2250:
+ case MIC_X100_PCI_DEVICE_2251:
+ case MIC_X100_PCI_DEVICE_2252:
+ case MIC_X100_PCI_DEVICE_2253:
+ case MIC_X100_PCI_DEVICE_2254:
+ case MIC_X100_PCI_DEVICE_2255:
+ case MIC_X100_PCI_DEVICE_2256:
+ case MIC_X100_PCI_DEVICE_2257:
+ case MIC_X100_PCI_DEVICE_2258:
+ case MIC_X100_PCI_DEVICE_2259:
+ case MIC_X100_PCI_DEVICE_225a:
+ case MIC_X100_PCI_DEVICE_225b:
+ case MIC_X100_PCI_DEVICE_225c:
+ case MIC_X100_PCI_DEVICE_225d:
+ case MIC_X100_PCI_DEVICE_225e:
+ family = MIC_FAMILY_X100;
+ break;
+ default:
+ family = MIC_FAMILY_UNKNOWN;
+ break;
+ }
+ return family;
+}
+
+/**
+ * mic_device_init - Allocates and initializes the MIC device structure
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: The pci device structure
+ *
+ * returns none.
+ */
+static void
+mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
+{
+ mdev->pdev = pdev;
+ mdev->family = mic_get_family(pdev);
+ mdev->stepping = pdev->revision;
+ mic_ops_init(mdev);
+ mutex_init(&mdev->mic_mutex);
+ mdev->irq_info.next_avail_src = 0;
+ INIT_LIST_HEAD(&mdev->vdev_list);
+}
+
+/**
+ * mic_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in mic_pci_tbl
+ *
+ * returns 0 on success, < 0 on failure.
+ */
+static int mic_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int rc;
+ struct mic_device *mdev;
+
+ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev) {
+ rc = -ENOMEM;
+ dev_err(&pdev->dev, "mdev kmalloc failed rc %d\n", rc);
+ goto mdev_alloc_fail;
+ }
+ mdev->id = ida_simple_get(&g_mic_ida, 0, MIC_MAX_NUM_DEVS, GFP_KERNEL);
+ if (mdev->id < 0) {
+ rc = mdev->id;
+ dev_err(&pdev->dev, "ida_simple_get failed rc %d\n", rc);
+ goto ida_fail;
+ }
+
+ mic_device_init(mdev, pdev);
+
+ rc = pci_enable_device(pdev);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to enable pci device.\n");
+ goto ida_remove;
+ }
+
+ pci_set_master(pdev);
+
+ rc = pci_request_regions(pdev, mic_driver_name);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to get pci regions.\n");
+ goto disable_device;
+ }
+
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ dev_err(&pdev->dev, "Cannot set DMA mask\n");
+ goto release_regions;
+ }
+
+ mdev->mmio.pa = pci_resource_start(pdev, mdev->ops->mmio_bar);
+ mdev->mmio.len = pci_resource_len(pdev, mdev->ops->mmio_bar);
+ mdev->mmio.va = pci_ioremap_bar(pdev, mdev->ops->mmio_bar);
+ if (!mdev->mmio.va) {
+ dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
+ rc = -EIO;
+ goto release_regions;
+ }
+
+ mdev->aper.pa = pci_resource_start(pdev, mdev->ops->aper_bar);
+ mdev->aper.len = pci_resource_len(pdev, mdev->ops->aper_bar);
+ mdev->aper.va = ioremap_wc(mdev->aper.pa, mdev->aper.len);
+ if (!mdev->aper.va) {
+ dev_err(&pdev->dev, "Cannot remap Aperture BAR\n");
+ rc = -EIO;
+ goto unmap_mmio;
+ }
+
+ mdev->intr_ops->intr_init(mdev);
+ rc = mic_setup_interrupts(mdev, pdev);
+ if (rc) {
+ dev_err(&pdev->dev, "mic_setup_interrupts failed %d\n", rc);
+ goto unmap_aper;
+ }
+ rc = mic_smpt_init(mdev);
+ if (rc) {
+ dev_err(&pdev->dev, "smpt_init failed %d\n", rc);
+ goto free_interrupts;
+ }
+
+ pci_set_drvdata(pdev, mdev);
+
+ rc = mic_dp_init(mdev);
+ if (rc) {
+ dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
+ goto smpt_uninit;
+ }
+ mic_bootparam_init(mdev);
+
+ mic_create_debug_dir(mdev);
+
+ mdev->miscdev.minor = MISC_DYNAMIC_MINOR;
+ snprintf(mdev->name, sizeof(mdev->name), "mic%d", mdev->id);
+ mdev->miscdev.name = mdev->name;
+ mdev->miscdev.fops = &mic_fops;
+ mdev->miscdev.parent = &mdev->pdev->dev;
+ rc = misc_register(&mdev->miscdev);
+ if (rc) {
+ dev_err(&pdev->dev, "misc_register err id %d rc %d\n",
+ mdev->id, rc);
+ goto cleanup_debug_dir;
+ }
+
+ mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
+ if (IS_ERR(mdev->cosm_dev)) {
+ rc = PTR_ERR(mdev->cosm_dev);
+ dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
+ goto misc_dereg;
+ }
+ return 0;
+misc_dereg:
+ misc_deregister(&mdev->miscdev);
+cleanup_debug_dir:
+ mic_delete_debug_dir(mdev);
+ mic_dp_uninit(mdev);
+smpt_uninit:
+ mic_smpt_uninit(mdev);
+free_interrupts:
+ mic_free_interrupts(mdev, pdev);
+unmap_aper:
+ iounmap(mdev->aper.va);
+unmap_mmio:
+ iounmap(mdev->mmio.va);
+release_regions:
+ pci_release_regions(pdev);
+disable_device:
+ pci_disable_device(pdev);
+ida_remove:
+ ida_simple_remove(&g_mic_ida, mdev->id);
+ida_fail:
+ kfree(mdev);
+mdev_alloc_fail:
+ dev_err(&pdev->dev, "Probe failed rc %d\n", rc);
+ return rc;
+}
+
+/**
+ * mic_remove - Device Removal Routine
+ * mic_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ *
+ * @pdev: PCI device structure
+ */
+static void mic_remove(struct pci_dev *pdev)
+{
+ struct mic_device *mdev;
+
+ mdev = pci_get_drvdata(pdev);
+ if (!mdev)
+ return;
+
+ cosm_unregister_device(mdev->cosm_dev);
+ misc_deregister(&mdev->miscdev);
+ mic_delete_debug_dir(mdev);
+ mic_dp_uninit(mdev);
+ mic_smpt_uninit(mdev);
+ mic_free_interrupts(mdev, pdev);
+ iounmap(mdev->aper.va);
+ iounmap(mdev->mmio.va);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ ida_simple_remove(&g_mic_ida, mdev->id);
+ kfree(mdev);
+}
+
+static struct pci_driver mic_driver = {
+ .name = mic_driver_name,
+ .id_table = mic_pci_tbl,
+ .probe = mic_probe,
+ .remove = mic_remove
+};
+
+static int __init mic_init(void)
+{
+ int ret;
+
+ ret = alloc_chrdev_region(&g_mic_devno, 0,
+ MIC_MAX_NUM_DEVS, mic_driver_name);
+ if (ret) {
+ pr_err("alloc_chrdev_region failed ret %d\n", ret);
+ goto error;
+ }
+
+ mic_init_debugfs();
+ ida_init(&g_mic_ida);
+ ret = pci_register_driver(&mic_driver);
+ if (ret) {
+ pr_err("pci_register_driver failed ret %d\n", ret);
+ goto cleanup_chrdev;
+ }
+ return ret;
+cleanup_chrdev:
+ ida_destroy(&g_mic_ida);
+ mic_exit_debugfs();
+ unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
+error:
+ return ret;
+}
+
+static void __exit mic_exit(void)
+{
+ pci_unregister_driver(&mic_driver);
+ ida_destroy(&g_mic_ida);
+ mic_exit_debugfs();
+ unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 Host driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c
new file mode 100644
index 0000000..c3f9585
--- /dev/null
+++ b/drivers/misc/mic/host/mic_smpt.c
@@ -0,0 +1,439 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+static inline u64 mic_system_page_mask(struct mic_device *mdev)
+{
+ return (1ULL << mdev->smpt->info.page_shift) - 1ULL;
+}
+
+static inline u8 mic_sys_addr_to_smpt(struct mic_device *mdev, dma_addr_t pa)
+{
+ return (pa - mdev->smpt->info.base) >> mdev->smpt->info.page_shift;
+}
+
+static inline u64 mic_smpt_to_pa(struct mic_device *mdev, u8 index)
+{
+ return mdev->smpt->info.base + (index * mdev->smpt->info.page_size);
+}
+
+static inline u64 mic_smpt_offset(struct mic_device *mdev, dma_addr_t pa)
+{
+ return pa & mic_system_page_mask(mdev);
+}
+
+static inline u64 mic_smpt_align_low(struct mic_device *mdev, dma_addr_t pa)
+{
+ return ALIGN(pa - mic_system_page_mask(mdev),
+ mdev->smpt->info.page_size);
+}
+
+static inline u64 mic_smpt_align_high(struct mic_device *mdev, dma_addr_t pa)
+{
+ return ALIGN(pa, mdev->smpt->info.page_size);
+}
+
+/* Total Cumulative system memory accessible by MIC across all SMPT entries */
+static inline u64 mic_max_system_memory(struct mic_device *mdev)
+{
+ return mdev->smpt->info.num_reg * mdev->smpt->info.page_size;
+}
+
+/* Maximum system memory address accessible by MIC */
+static inline u64 mic_max_system_addr(struct mic_device *mdev)
+{
+ return mdev->smpt->info.base + mic_max_system_memory(mdev) - 1ULL;
+}
+
+/* Check if the DMA address is a MIC system memory address */
+static inline bool
+mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
+{
+ return pa >= mdev->smpt->info.base && pa <= mic_max_system_addr(mdev);
+}
+
+/* Populate an SMPT entry and update the reference counts. */
+static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
+ int entries, struct mic_device *mdev)
+{
+ struct mic_smpt_info *smpt_info = mdev->smpt;
+ int i;
+
+ for (i = spt; i < spt + entries; i++,
+ addr += smpt_info->info.page_size) {
+ if (!smpt_info->entry[i].ref_count &&
+ (smpt_info->entry[i].dma_addr != addr)) {
+ mdev->smpt_ops->set(mdev, addr, i);
+ smpt_info->entry[i].dma_addr = addr;
+ }
+ smpt_info->entry[i].ref_count += ref[i - spt];
+ }
+}
+
+/*
+ * Find an available MIC address in MIC SMPT address space
+ * for a given DMA address and size.
+ */
+static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
+ int entries, s64 *ref, size_t size)
+{
+ int spt;
+ int ae = 0;
+ int i;
+ unsigned long flags;
+ dma_addr_t mic_addr = 0;
+ dma_addr_t addr = dma_addr;
+ struct mic_smpt_info *smpt_info = mdev->smpt;
+
+ spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+
+ /* find existing entries */
+ for (i = 0; i < smpt_info->info.num_reg; i++) {
+ if (smpt_info->entry[i].dma_addr == addr) {
+ ae++;
+ addr += smpt_info->info.page_size;
+ } else if (ae) /* cannot find contiguous entries */
+ goto not_found;
+
+ if (ae == entries)
+ goto found;
+ }
+
+ /* find free entry */
+ for (ae = 0, i = 0; i < smpt_info->info.num_reg; i++) {
+ ae = (smpt_info->entry[i].ref_count == 0) ? ae + 1 : 0;
+ if (ae == entries)
+ goto found;
+ }
+
+not_found:
+ spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+ return mic_addr;
+
+found:
+ spt = i - entries + 1;
+ mic_addr = mic_smpt_to_pa(mdev, spt);
+ mic_add_smpt_entry(spt, ref, dma_addr, entries, mdev);
+ smpt_info->map_count++;
+ smpt_info->ref_count += (s64)size;
+ spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+ return mic_addr;
+}
+
+/*
+ * Returns number of smpt entries needed for dma_addr to dma_addr + size
+ * also returns the reference count array for each of those entries
+ * and the starting smpt address
+ */
+static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
+ size_t size, s64 *ref, u64 *smpt_start)
+{
+ u64 start = dma_addr;
+ u64 end = dma_addr + size;
+ int i = 0;
+
+ while (start < end) {
+ ref[i++] = min(mic_smpt_align_high(mdev, start + 1),
+ end) - start;
+ start = mic_smpt_align_high(mdev, start + 1);
+ }
+
+ if (smpt_start)
+ *smpt_start = mic_smpt_align_low(mdev, dma_addr);
+
+ return i;
+}
+
+/*
+ * mic_to_dma_addr - Converts a MIC address to a DMA address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC address.
+ *
+ * returns a DMA address.
+ */
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
+{
+ struct mic_smpt_info *smpt_info = mdev->smpt;
+ int spt;
+ dma_addr_t dma_addr;
+
+ if (!mic_is_system_addr(mdev, mic_addr)) {
+ dev_err(&mdev->pdev->dev,
+ "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
+ return -EINVAL;
+ }
+ spt = mic_sys_addr_to_smpt(mdev, mic_addr);
+ dma_addr = smpt_info->entry[spt].dma_addr +
+ mic_smpt_offset(mdev, mic_addr);
+ return dma_addr;
+}
+
+/**
+ * mic_map - Maps a DMA address to a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @dma_addr: DMA address.
+ * @size: Size of the region to be mapped.
+ *
+ * This API converts the DMA address provided to a DMA address understood
+ * by MIC. Caller should check for errors by calling mic_map_error(..).
+ *
+ * returns DMA address as required by MIC.
+ */
+dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
+{
+ dma_addr_t mic_addr = 0;
+ int num_entries;
+ s64 *ref;
+ u64 smpt_start;
+
+ if (!size || size > mic_max_system_memory(mdev))
+ return mic_addr;
+
+ ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
+ if (!ref)
+ return mic_addr;
+
+ num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
+ ref, &smpt_start);
+
+ /* Set the smpt table appropriately and get 16G aligned mic address */
+ mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
+
+ kfree(ref);
+
+ /*
+ * If mic_addr is zero then its an error case
+ * since mic_addr can never be zero.
+ * else generate mic_addr by adding the 16G offset in dma_addr
+ */
+ if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
+ dev_err(&mdev->pdev->dev,
+ "mic_map failed dma_addr 0x%llx size 0x%lx\n",
+ dma_addr, size);
+ return mic_addr;
+ } else {
+ return mic_addr + mic_smpt_offset(mdev, dma_addr);
+ }
+}
+
+/**
+ * mic_unmap - Unmaps a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC physical address.
+ * @size: Size of the region to be unmapped.
+ *
+ * This API unmaps the mappings created by mic_map(..).
+ *
+ * returns None.
+ */
+void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
+{
+ struct mic_smpt_info *smpt_info = mdev->smpt;
+ s64 *ref;
+ int num_smpt;
+ int spt;
+ int i;
+ unsigned long flags;
+
+ if (!size)
+ return;
+
+ if (!mic_is_system_addr(mdev, mic_addr)) {
+ dev_err(&mdev->pdev->dev,
+ "invalid address: 0x%llx\n", mic_addr);
+ return;
+ }
+
+ spt = mic_sys_addr_to_smpt(mdev, mic_addr);
+ ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
+ if (!ref)
+ return;
+
+ /* Get number of smpt entries to be mapped, ref count array */
+ num_smpt = mic_get_smpt_ref_count(mdev, mic_addr, size, ref, NULL);
+
+ spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+ smpt_info->unmap_count++;
+ smpt_info->ref_count -= (s64)size;
+
+ for (i = spt; i < spt + num_smpt; i++) {
+ smpt_info->entry[i].ref_count -= ref[i - spt];
+ if (smpt_info->entry[i].ref_count < 0)
+ dev_warn(&mdev->pdev->dev,
+ "ref count for entry %d is negative\n", i);
+ }
+ spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+ kfree(ref);
+}
+
+/**
+ * mic_map_single - Maps a virtual address to a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @va: Kernel direct mapped virtual address.
+ * @size: Size of the region to be mapped.
+ *
+ * This API calls pci_map_single(..) for the direct mapped virtual address
+ * and then converts the DMA address provided to a DMA address understood
+ * by MIC. Caller should check for errors by calling mic_map_error(..).
+ *
+ * returns DMA address as required by MIC.
+ */
+dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
+{
+ dma_addr_t mic_addr = 0;
+ struct pci_dev *pdev = mdev->pdev;
+ dma_addr_t dma_addr =
+ pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
+
+ if (!pci_dma_mapping_error(pdev, dma_addr)) {
+ mic_addr = mic_map(mdev, dma_addr, size);
+ if (!mic_addr) {
+ dev_err(&mdev->pdev->dev,
+ "mic_map failed dma_addr 0x%llx size 0x%lx\n",
+ dma_addr, size);
+ pci_unmap_single(pdev, dma_addr,
+ size, PCI_DMA_BIDIRECTIONAL);
+ }
+ }
+ return mic_addr;
+}
+
+/**
+ * mic_unmap_single - Unmaps a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC physical address.
+ * @size: Size of the region to be unmapped.
+ *
+ * This API unmaps the mappings created by mic_map_single(..).
+ *
+ * returns None.
+ */
+void
+mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
+{
+ struct pci_dev *pdev = mdev->pdev;
+ dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
+ mic_unmap(mdev, mic_addr, size);
+ pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+}
+
+/**
+ * mic_smpt_init - Initialize MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns 0 for success and -errno for error.
+ */
+int mic_smpt_init(struct mic_device *mdev)
+{
+ int i, err = 0;
+ dma_addr_t dma_addr;
+ struct mic_smpt_info *smpt_info;
+
+ mdev->smpt = kmalloc(sizeof(*mdev->smpt), GFP_KERNEL);
+ if (!mdev->smpt)
+ return -ENOMEM;
+
+ smpt_info = mdev->smpt;
+ mdev->smpt_ops->init(mdev);
+ smpt_info->entry = kmalloc_array(smpt_info->info.num_reg,
+ sizeof(*smpt_info->entry), GFP_KERNEL);
+ if (!smpt_info->entry) {
+ err = -ENOMEM;
+ goto free_smpt;
+ }
+ spin_lock_init(&smpt_info->smpt_lock);
+ for (i = 0; i < smpt_info->info.num_reg; i++) {
+ dma_addr = i * smpt_info->info.page_size;
+ smpt_info->entry[i].dma_addr = dma_addr;
+ smpt_info->entry[i].ref_count = 0;
+ mdev->smpt_ops->set(mdev, dma_addr, i);
+ }
+ smpt_info->ref_count = 0;
+ smpt_info->map_count = 0;
+ smpt_info->unmap_count = 0;
+ return 0;
+free_smpt:
+ kfree(smpt_info);
+ return err;
+}
+
+/**
+ * mic_smpt_uninit - UnInitialize MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns None.
+ */
+void mic_smpt_uninit(struct mic_device *mdev)
+{
+ struct mic_smpt_info *smpt_info = mdev->smpt;
+ int i;
+
+ dev_dbg(&mdev->pdev->dev,
+ "nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
+ mdev->id, smpt_info->ref_count,
+ smpt_info->map_count, smpt_info->unmap_count);
+
+ for (i = 0; i < smpt_info->info.num_reg; i++) {
+ dev_dbg(&mdev->pdev->dev,
+ "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
+ i, smpt_info->entry[i].dma_addr,
+ smpt_info->entry[i].ref_count);
+ if (smpt_info->entry[i].ref_count)
+ dev_warn(&mdev->pdev->dev,
+ "ref count for entry %d is not zero\n", i);
+ }
+ kfree(smpt_info->entry);
+ kfree(smpt_info);
+}
+
+/**
+ * mic_smpt_restore - Restore MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * Restore the SMPT registers to values previously stored in the
+ * SW data structures. Some MIC steppings lose register state
+ * across resets and this API should be called for performing
+ * a restore operation if required.
+ *
+ * returns None.
+ */
+void mic_smpt_restore(struct mic_device *mdev)
+{
+ int i;
+ dma_addr_t dma_addr;
+
+ for (i = 0; i < mdev->smpt->info.num_reg; i++) {
+ dma_addr = mdev->smpt->entry[i].dma_addr;
+ mdev->smpt_ops->set(mdev, dma_addr, i);
+ }
+}
diff --git a/drivers/misc/mic/host/mic_smpt.h b/drivers/misc/mic/host/mic_smpt.h
new file mode 100644
index 0000000..68721c6
--- /dev/null
+++ b/drivers/misc/mic/host/mic_smpt.h
@@ -0,0 +1,99 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef MIC_SMPT_H
+#define MIC_SMPT_H
+/**
+ * struct mic_smpt_ops - MIC HW specific SMPT operations.
+ * @init: Initialize hardware specific SMPT information in mic_smpt_hw_info.
+ * @set: Set the value for a particular SMPT entry.
+ */
+struct mic_smpt_ops {
+ void (*init)(struct mic_device *mdev);
+ void (*set)(struct mic_device *mdev, dma_addr_t dma_addr, u8 index);
+};
+
+/**
+ * struct mic_smpt - MIC SMPT entry information.
+ * @dma_addr: Base DMA address for this SMPT entry.
+ * @ref_count: Number of active mappings for this SMPT entry in bytes.
+ */
+struct mic_smpt {
+ dma_addr_t dma_addr;
+ s64 ref_count;
+};
+
+/**
+ * struct mic_smpt_hw_info - MIC SMPT hardware specific information.
+ * @num_reg: Number of SMPT registers.
+ * @page_shift: System memory page shift.
+ * @page_size: System memory page size.
+ * @base: System address base.
+ */
+struct mic_smpt_hw_info {
+ u8 num_reg;
+ u8 page_shift;
+ u64 page_size;
+ u64 base;
+};
+
+/**
+ * struct mic_smpt_info - MIC SMPT information.
+ * @entry: Array of SMPT entries.
+ * @smpt_lock: Spin lock protecting access to SMPT data structures.
+ * @info: Hardware specific SMPT information.
+ * @ref_count: Number of active SMPT mappings (for debug).
+ * @map_count: Number of SMPT mappings created (for debug).
+ * @unmap_count: Number of SMPT mappings destroyed (for debug).
+ */
+struct mic_smpt_info {
+ struct mic_smpt *entry;
+ spinlock_t smpt_lock;
+ struct mic_smpt_hw_info info;
+ s64 ref_count;
+ s64 map_count;
+ s64 unmap_count;
+};
+
+dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size);
+void mic_unmap_single(struct mic_device *mdev,
+ dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_map(struct mic_device *mdev,
+ dma_addr_t dma_addr, size_t size);
+void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr);
+
+/**
+ * mic_map_error - Check a MIC address for errors.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns Whether there was an error during mic_map..(..) APIs.
+ */
+static inline bool mic_map_error(dma_addr_t mic_addr)
+{
+ return !mic_addr;
+}
+
+int mic_smpt_init(struct mic_device *mdev);
+void mic_smpt_uninit(struct mic_device *mdev);
+void mic_smpt_restore(struct mic_device *mdev);
+
+#endif
diff --git a/drivers/misc/mic/host/mic_virtio.c b/drivers/misc/mic/host/mic_virtio.c
new file mode 100644
index 0000000..58b107a
--- /dev/null
+++ b/drivers/misc/mic/host/mic_virtio.c
@@ -0,0 +1,811 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/dmaengine.h>
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+#include "mic_virtio.h"
+
+/*
+ * Size of the internal buffer used during DMA's as an intermediate buffer
+ * for copy to/from user.
+ */
+#define MIC_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL)
+
+static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
+ dma_addr_t src, size_t len)
+{
+ int err = 0;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *mic_ch = mdev->dma_ch[0];
+
+ if (!mic_ch) {
+ err = -EBUSY;
+ goto error;
+ }
+
+ tx = mic_ch->device->device_prep_dma_memcpy(mic_ch, dst, src, len,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ err = -ENOMEM;
+ goto error;
+ } else {
+ dma_cookie_t cookie = tx->tx_submit(tx);
+
+ err = dma_submit_error(cookie);
+ if (err)
+ goto error;
+ err = dma_sync_wait(mic_ch, cookie);
+ }
+error:
+ if (err)
+ dev_err(&mdev->pdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+}
+
+/*
+ * Initiates the copies across the PCIe bus from card memory to a user
+ * space buffer. When transfers are done using DMA, source/destination
+ * addresses and transfer length must follow the alignment requirements of
+ * the MIC DMA engine.
+ */
+static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, void __user *ubuf,
+ size_t len, u64 daddr, size_t dlen,
+ int vr_idx)
+{
+ struct mic_device *mdev = mvdev->mdev;
+ void __iomem *dbuf = mdev->aper.va + daddr;
+ struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
+ size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align;
+ size_t dma_offset;
+ size_t partlen;
+ int err;
+
+ dma_offset = daddr - round_down(daddr, dma_alignment);
+ daddr -= dma_offset;
+ len += dma_offset;
+
+ while (len) {
+ partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE);
+
+ err = mic_sync_dma(mdev, mvr->buf_da, daddr,
+ ALIGN(partlen, dma_alignment));
+ if (err)
+ goto err;
+
+ if (copy_to_user(ubuf, mvr->buf + dma_offset,
+ partlen - dma_offset)) {
+ err = -EFAULT;
+ goto err;
+ }
+ daddr += partlen;
+ ubuf += partlen;
+ dbuf += partlen;
+ mvdev->in_bytes_dma += partlen;
+ mvdev->in_bytes += partlen;
+ len -= partlen;
+ dma_offset = 0;
+ }
+ return 0;
+err:
+ dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+
+/*
+ * Initiates copies across the PCIe bus from a user space buffer to card
+ * memory. When transfers are done using DMA, source/destination addresses
+ * and transfer length must follow the alignment requirements of the MIC
+ * DMA engine.
+ */
+static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, void __user *ubuf,
+ size_t len, u64 daddr, size_t dlen,
+ int vr_idx)
+{
+ struct mic_device *mdev = mvdev->mdev;
+ void __iomem *dbuf = mdev->aper.va + daddr;
+ struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
+ size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align;
+ size_t partlen;
+ int err;
+
+ if (daddr & (dma_alignment - 1)) {
+ mvdev->tx_dst_unaligned += len;
+ goto memcpy;
+ } else if (ALIGN(len, dma_alignment) > dlen) {
+ mvdev->tx_len_unaligned += len;
+ goto memcpy;
+ }
+
+ while (len) {
+ partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE);
+
+ if (copy_from_user(mvr->buf, ubuf, partlen)) {
+ err = -EFAULT;
+ goto err;
+ }
+ err = mic_sync_dma(mdev, daddr, mvr->buf_da,
+ ALIGN(partlen, dma_alignment));
+ if (err)
+ goto err;
+ daddr += partlen;
+ ubuf += partlen;
+ dbuf += partlen;
+ mvdev->out_bytes_dma += partlen;
+ mvdev->out_bytes += partlen;
+ len -= partlen;
+ }
+memcpy:
+ /*
+ * We are copying to IO below and should ideally use something
+ * like copy_from_user_toio(..) if it existed.
+ */
+ if (copy_from_user((void __force *)dbuf, ubuf, len)) {
+ err = -EFAULT;
+ goto err;
+ }
+ mvdev->out_bytes += len;
+ return 0;
+err:
+ dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+
+#define MIC_VRINGH_READ true
+
+/* The function to call to notify the card about added buffers */
+static void mic_notify(struct vringh *vrh)
+{
+ struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh);
+ struct mic_vdev *mvdev = mvrh->mvdev;
+ s8 db = mvdev->dc->h2c_vdev_db;
+
+ if (db != -1)
+ mvdev->mdev->ops->send_intr(mvdev->mdev, db);
+}
+
+/* Determine the total number of bytes consumed in a VRINGH KIOV */
+static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov)
+{
+ int i;
+ u32 total = iov->consumed;
+
+ for (i = 0; i < iov->i; i++)
+ total += iov->iov[i].iov_len;
+ return total;
+}
+
+/*
+ * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
+ * This API is heavily based on the vringh_iov_xfer(..) implementation
+ * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
+ * and vringh_iov_push_kern(..) directly is because there is no
+ * way to override the VRINGH xfer(..) routines as of v3.10.
+ */
+static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov,
+ void __user *ubuf, size_t len, bool read, int vr_idx,
+ size_t *out_len)
+{
+ int ret = 0;
+ size_t partlen, tot_len = 0;
+
+ while (len && iov->i < iov->used) {
+ partlen = min(iov->iov[iov->i].iov_len, len);
+ if (read)
+ ret = mic_virtio_copy_to_user(mvdev, ubuf, partlen,
+ (u64)iov->iov[iov->i].iov_base,
+ iov->iov[iov->i].iov_len,
+ vr_idx);
+ else
+ ret = mic_virtio_copy_from_user(mvdev, ubuf, partlen,
+ (u64)iov->iov[iov->i].iov_base,
+ iov->iov[iov->i].iov_len,
+ vr_idx);
+ if (ret) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len -= partlen;
+ ubuf += partlen;
+ tot_len += partlen;
+ iov->consumed += partlen;
+ iov->iov[iov->i].iov_len -= partlen;
+ iov->iov[iov->i].iov_base += partlen;
+ if (!iov->iov[iov->i].iov_len) {
+ /* Fix up old iov element then increment. */
+ iov->iov[iov->i].iov_len = iov->consumed;
+ iov->iov[iov->i].iov_base -= iov->consumed;
+
+ iov->consumed = 0;
+ iov->i++;
+ }
+ }
+ *out_len = tot_len;
+ return ret;
+}
+
+/*
+ * Use the standard VRINGH infrastructure in the kernel to fetch new
+ * descriptors, initiate the copies and update the used ring.
+ */
+static int _mic_virtio_copy(struct mic_vdev *mvdev,
+ struct mic_copy_desc *copy)
+{
+ int ret = 0;
+ u32 iovcnt = copy->iovcnt;
+ struct iovec iov;
+ struct iovec __user *u_iov = copy->iov;
+ void __user *ubuf = NULL;
+ struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
+ struct vringh_kiov *riov = &mvr->riov;
+ struct vringh_kiov *wiov = &mvr->wiov;
+ struct vringh *vrh = &mvr->vrh;
+ u16 *head = &mvr->head;
+ struct mic_vring *vr = &mvr->vring;
+ size_t len = 0, out_len;
+
+ copy->out_len = 0;
+ /* Fetch a new IOVEC if all previous elements have been processed */
+ if (riov->i == riov->used && wiov->i == wiov->used) {
+ ret = vringh_getdesc_kern(vrh, riov, wiov,
+ head, GFP_KERNEL);
+ /* Check if there are available descriptors */
+ if (ret <= 0)
+ return ret;
+ }
+ while (iovcnt) {
+ if (!len) {
+ /* Copy over a new iovec from user space. */
+ ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
+ if (ret) {
+ ret = -EINVAL;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len = iov.iov_len;
+ ubuf = iov.iov_base;
+ }
+ /* Issue all the read descriptors first */
+ ret = mic_vringh_copy(mvdev, riov, ubuf, len, MIC_VRINGH_READ,
+ copy->vr_idx, &out_len);
+ if (ret) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len -= out_len;
+ ubuf += out_len;
+ copy->out_len += out_len;
+ /* Issue the write descriptors next */
+ ret = mic_vringh_copy(mvdev, wiov, ubuf, len, !MIC_VRINGH_READ,
+ copy->vr_idx, &out_len);
+ if (ret) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len -= out_len;
+ ubuf += out_len;
+ copy->out_len += out_len;
+ if (!len) {
+ /* One user space iovec is now completed */
+ iovcnt--;
+ u_iov++;
+ }
+ /* Exit loop if all elements in KIOVs have been processed. */
+ if (riov->i == riov->used && wiov->i == wiov->used)
+ break;
+ }
+ /*
+ * Update the used ring if a descriptor was available and some data was
+ * copied in/out and the user asked for a used ring update.
+ */
+ if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
+ u32 total = 0;
+
+ /* Determine the total data consumed */
+ total += mic_vringh_iov_consumed(riov);
+ total += mic_vringh_iov_consumed(wiov);
+ vringh_complete_kern(vrh, *head, total);
+ *head = USHRT_MAX;
+ if (vringh_need_notify_kern(vrh) > 0)
+ vringh_notify(vrh);
+ vringh_kiov_cleanup(riov);
+ vringh_kiov_cleanup(wiov);
+ /* Update avail idx for user space */
+ vr->info->avail_idx = vrh->last_avail_idx;
+ }
+ return ret;
+}
+
+static inline int mic_verify_copy_args(struct mic_vdev *mvdev,
+ struct mic_copy_desc *copy)
+{
+ if (copy->vr_idx >= mvdev->dd->num_vq) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -EINVAL);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Copy a specified number of virtio descriptors in a chain */
+int mic_virtio_copy_desc(struct mic_vdev *mvdev,
+ struct mic_copy_desc *copy)
+{
+ int err;
+ struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
+
+ err = mic_verify_copy_args(mvdev, copy);
+ if (err)
+ return err;
+
+ mutex_lock(&mvr->vr_mutex);
+ if (!mic_vdevup(mvdev)) {
+ err = -ENODEV;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ err = _mic_virtio_copy(mvdev, copy);
+ if (err) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ }
+err:
+ mutex_unlock(&mvr->vr_mutex);
+ return err;
+}
+
+static void mic_virtio_init_post(struct mic_vdev *mvdev)
+{
+ struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd);
+ int i;
+
+ for (i = 0; i < mvdev->dd->num_vq; i++) {
+ if (!le64_to_cpu(vqconfig[i].used_address)) {
+ dev_warn(mic_dev(mvdev), "used_address zero??\n");
+ continue;
+ }
+ mvdev->mvr[i].vrh.vring.used =
+ (void __force *)mvdev->mdev->aper.va +
+ le64_to_cpu(vqconfig[i].used_address);
+ }
+
+ mvdev->dc->used_address_updated = 0;
+
+ dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n",
+ __func__, mvdev->virtio_id);
+}
+
+static inline void mic_virtio_device_reset(struct mic_vdev *mvdev)
+{
+ int i;
+
+ dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n",
+ __func__, mvdev->dd->status, mvdev->virtio_id);
+
+ for (i = 0; i < mvdev->dd->num_vq; i++)
+ /*
+ * Avoid lockdep false positive. The + 1 is for the mic
+ * mutex which is held in the reset devices code path.
+ */
+ mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
+
+ /* 0 status means "reset" */
+ mvdev->dd->status = 0;
+ mvdev->dc->vdev_reset = 0;
+ mvdev->dc->host_ack = 1;
+
+ for (i = 0; i < mvdev->dd->num_vq; i++) {
+ struct vringh *vrh = &mvdev->mvr[i].vrh;
+ mvdev->mvr[i].vring.info->avail_idx = 0;
+ vrh->completed = 0;
+ vrh->last_avail_idx = 0;
+ vrh->last_used_idx = 0;
+ }
+
+ for (i = 0; i < mvdev->dd->num_vq; i++)
+ mutex_unlock(&mvdev->mvr[i].vr_mutex);
+}
+
+void mic_virtio_reset_devices(struct mic_device *mdev)
+{
+ struct list_head *pos, *tmp;
+ struct mic_vdev *mvdev;
+
+ dev_dbg(&mdev->pdev->dev, "%s\n", __func__);
+
+ list_for_each_safe(pos, tmp, &mdev->vdev_list) {
+ mvdev = list_entry(pos, struct mic_vdev, list);
+ mic_virtio_device_reset(mvdev);
+ mvdev->poll_wake = 1;
+ wake_up(&mvdev->waitq);
+ }
+}
+
+void mic_bh_handler(struct work_struct *work)
+{
+ struct mic_vdev *mvdev = container_of(work, struct mic_vdev,
+ virtio_bh_work);
+
+ if (mvdev->dc->used_address_updated)
+ mic_virtio_init_post(mvdev);
+
+ if (mvdev->dc->vdev_reset)
+ mic_virtio_device_reset(mvdev);
+
+ mvdev->poll_wake = 1;
+ wake_up(&mvdev->waitq);
+}
+
+static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
+{
+ struct mic_vdev *mvdev = data;
+ struct mic_device *mdev = mvdev->mdev;
+
+ mdev->ops->intr_workarounds(mdev);
+ schedule_work(&mvdev->virtio_bh_work);
+ return IRQ_HANDLED;
+}
+
+int mic_virtio_config_change(struct mic_vdev *mvdev,
+ void __user *argp)
+{
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+ int ret = 0, retry, i;
+ struct mic_bootparam *bootparam = mvdev->mdev->dp;
+ s8 db = bootparam->h2c_config_db;
+
+ mutex_lock(&mvdev->mdev->mic_mutex);
+ for (i = 0; i < mvdev->dd->num_vq; i++)
+ mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
+
+ if (db == -1 || mvdev->dd->type == -1) {
+ ret = -EIO;
+ goto exit;
+ }
+
+ if (copy_from_user(mic_vq_configspace(mvdev->dd),
+ argp, mvdev->dd->config_len)) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -EFAULT);
+ ret = -EFAULT;
+ goto exit;
+ }
+ mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
+ mvdev->mdev->ops->send_intr(mvdev->mdev, db);
+
+ for (retry = 100; retry--;) {
+ ret = wait_event_timeout(wake,
+ mvdev->dc->guest_ack, msecs_to_jiffies(100));
+ if (ret)
+ break;
+ }
+
+ dev_dbg(mic_dev(mvdev),
+ "%s %d retry: %d\n", __func__, __LINE__, retry);
+ mvdev->dc->config_change = 0;
+ mvdev->dc->guest_ack = 0;
+exit:
+ for (i = 0; i < mvdev->dd->num_vq; i++)
+ mutex_unlock(&mvdev->mvr[i].vr_mutex);
+ mutex_unlock(&mvdev->mdev->mic_mutex);
+ return ret;
+}
+
+static int mic_copy_dp_entry(struct mic_vdev *mvdev,
+ void __user *argp,
+ __u8 *type,
+ struct mic_device_desc **devpage)
+{
+ struct mic_device *mdev = mvdev->mdev;
+ struct mic_device_desc dd, *dd_config, *devp;
+ struct mic_vqconfig *vqconfig;
+ int ret = 0, i;
+ bool slot_found = false;
+
+ if (copy_from_user(&dd, argp, sizeof(dd))) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -EFAULT);
+ return -EFAULT;
+ }
+
+ if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
+ dd.num_vq > MIC_MAX_VRINGS) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -EINVAL);
+ return -EINVAL;
+ }
+
+ dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL);
+ if (dd_config == NULL) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -ENOMEM);
+ return -ENOMEM;
+ }
+ if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
+ ret = -EFAULT;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto exit;
+ }
+
+ vqconfig = mic_vq_config(dd_config);
+ for (i = 0; i < dd.num_vq; i++) {
+ if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
+ ret = -EINVAL;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto exit;
+ }
+ }
+
+ /* Find the first free device page entry */
+ for (i = sizeof(struct mic_bootparam);
+ i < MIC_DP_SIZE - mic_total_desc_size(dd_config);
+ i += mic_total_desc_size(devp)) {
+ devp = mdev->dp + i;
+ if (devp->type == 0 || devp->type == -1) {
+ slot_found = true;
+ break;
+ }
+ }
+ if (!slot_found) {
+ ret = -EINVAL;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto exit;
+ }
+ /*
+ * Save off the type before doing the memcpy. Type will be set in the
+ * end after completing all initialization for the new device.
+ */
+ *type = dd_config->type;
+ dd_config->type = 0;
+ memcpy(devp, dd_config, mic_desc_size(dd_config));
+
+ *devpage = devp;
+exit:
+ kfree(dd_config);
+ return ret;
+}
+
+static void mic_init_device_ctrl(struct mic_vdev *mvdev,
+ struct mic_device_desc *devpage)
+{
+ struct mic_device_ctrl *dc;
+
+ dc = (void *)devpage + mic_aligned_desc_size(devpage);
+
+ dc->config_change = 0;
+ dc->guest_ack = 0;
+ dc->vdev_reset = 0;
+ dc->host_ack = 0;
+ dc->used_address_updated = 0;
+ dc->c2h_vdev_db = -1;
+ dc->h2c_vdev_db = -1;
+ mvdev->dc = dc;
+}
+
+int mic_virtio_add_device(struct mic_vdev *mvdev,
+ void __user *argp)
+{
+ struct mic_device *mdev = mvdev->mdev;
+ struct mic_device_desc *dd = NULL;
+ struct mic_vqconfig *vqconfig;
+ int vr_size, i, j, ret;
+ u8 type = 0;
+ s8 db;
+ char irqname[10];
+ struct mic_bootparam *bootparam = mdev->dp;
+ u16 num;
+ dma_addr_t vr_addr;
+
+ mutex_lock(&mdev->mic_mutex);
+
+ ret = mic_copy_dp_entry(mvdev, argp, &type, &dd);
+ if (ret) {
+ mutex_unlock(&mdev->mic_mutex);
+ return ret;
+ }
+
+ mic_init_device_ctrl(mvdev, dd);
+
+ mvdev->dd = dd;
+ mvdev->virtio_id = type;
+ vqconfig = mic_vq_config(dd);
+ INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler);
+
+ for (i = 0; i < dd->num_vq; i++) {
+ struct mic_vringh *mvr = &mvdev->mvr[i];
+ struct mic_vring *vr = &mvdev->mvr[i].vring;
+ num = le16_to_cpu(vqconfig[i].num);
+ mutex_init(&mvr->vr_mutex);
+ vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
+ sizeof(struct _mic_vring_info));
+ vr->va = (void *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(vr_size));
+ if (!vr->va) {
+ ret = -ENOMEM;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto err;
+ }
+ vr->len = vr_size;
+ vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
+ vr->info->magic = cpu_to_le32(MIC_MAGIC + mvdev->virtio_id + i);
+ vr_addr = mic_map_single(mdev, vr->va, vr_size);
+ if (mic_map_error(vr_addr)) {
+ free_pages((unsigned long)vr->va, get_order(vr_size));
+ ret = -ENOMEM;
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto err;
+ }
+ vqconfig[i].address = cpu_to_le64(vr_addr);
+
+ vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
+ ret = vringh_init_kern(&mvr->vrh,
+ *(u32 *)mic_vq_features(mvdev->dd), num, false,
+ vr->vr.desc, vr->vr.avail, vr->vr.used);
+ if (ret) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto err;
+ }
+ vringh_kiov_init(&mvr->riov, NULL, 0);
+ vringh_kiov_init(&mvr->wiov, NULL, 0);
+ mvr->head = USHRT_MAX;
+ mvr->mvdev = mvdev;
+ mvr->vrh.notify = mic_notify;
+ dev_dbg(&mdev->pdev->dev,
+ "%s %d index %d va %p info %p vr_size 0x%x\n",
+ __func__, __LINE__, i, vr->va, vr->info, vr_size);
+ mvr->buf = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(MIC_INT_DMA_BUF_SIZE));
+ mvr->buf_da = mic_map_single(mvdev->mdev, mvr->buf,
+ MIC_INT_DMA_BUF_SIZE);
+ }
+
+ snprintf(irqname, sizeof(irqname), "mic%dvirtio%d", mdev->id,
+ mvdev->virtio_id);
+ mvdev->virtio_db = mic_next_db(mdev);
+ mvdev->virtio_cookie = mic_request_threaded_irq(mdev,
+ mic_virtio_intr_handler,
+ NULL, irqname, mvdev,
+ mvdev->virtio_db, MIC_INTR_DB);
+ if (IS_ERR(mvdev->virtio_cookie)) {
+ ret = PTR_ERR(mvdev->virtio_cookie);
+ dev_dbg(&mdev->pdev->dev, "request irq failed\n");
+ goto err;
+ }
+
+ mvdev->dc->c2h_vdev_db = mvdev->virtio_db;
+
+ list_add_tail(&mvdev->list, &mdev->vdev_list);
+ /*
+ * Order the type update with previous stores. This write barrier
+ * is paired with the corresponding read barrier before the uncached
+ * system memory read of the type, on the card while scanning the
+ * device page.
+ */
+ smp_wmb();
+ dd->type = type;
+
+ dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type);
+
+ db = bootparam->h2c_config_db;
+ if (db != -1)
+ mdev->ops->send_intr(mdev, db);
+ mutex_unlock(&mdev->mic_mutex);
+ return 0;
+err:
+ vqconfig = mic_vq_config(dd);
+ for (j = 0; j < i; j++) {
+ struct mic_vringh *mvr = &mvdev->mvr[j];
+ mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address),
+ mvr->vring.len);
+ free_pages((unsigned long)mvr->vring.va,
+ get_order(mvr->vring.len));
+ }
+ mutex_unlock(&mdev->mic_mutex);
+ return ret;
+}
+
+void mic_virtio_del_device(struct mic_vdev *mvdev)
+{
+ struct list_head *pos, *tmp;
+ struct mic_vdev *tmp_mvdev;
+ struct mic_device *mdev = mvdev->mdev;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+ int i, ret, retry;
+ struct mic_vqconfig *vqconfig;
+ struct mic_bootparam *bootparam = mdev->dp;
+ s8 db;
+
+ mutex_lock(&mdev->mic_mutex);
+ db = bootparam->h2c_config_db;
+ if (db == -1)
+ goto skip_hot_remove;
+ dev_dbg(&mdev->pdev->dev,
+ "Requesting hot remove id %d\n", mvdev->virtio_id);
+ mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
+ mdev->ops->send_intr(mdev, db);
+ for (retry = 100; retry--;) {
+ ret = wait_event_timeout(wake,
+ mvdev->dc->guest_ack, msecs_to_jiffies(100));
+ if (ret)
+ break;
+ }
+ dev_dbg(&mdev->pdev->dev,
+ "Device id %d config_change %d guest_ack %d retry %d\n",
+ mvdev->virtio_id, mvdev->dc->config_change,
+ mvdev->dc->guest_ack, retry);
+ mvdev->dc->config_change = 0;
+ mvdev->dc->guest_ack = 0;
+skip_hot_remove:
+ mic_free_irq(mdev, mvdev->virtio_cookie, mvdev);
+ flush_work(&mvdev->virtio_bh_work);
+ vqconfig = mic_vq_config(mvdev->dd);
+ for (i = 0; i < mvdev->dd->num_vq; i++) {
+ struct mic_vringh *mvr = &mvdev->mvr[i];
+
+ mic_unmap_single(mvdev->mdev, mvr->buf_da,
+ MIC_INT_DMA_BUF_SIZE);
+ free_pages((unsigned long)mvr->buf,
+ get_order(MIC_INT_DMA_BUF_SIZE));
+ vringh_kiov_cleanup(&mvr->riov);
+ vringh_kiov_cleanup(&mvr->wiov);
+ mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address),
+ mvr->vring.len);
+ free_pages((unsigned long)mvr->vring.va,
+ get_order(mvr->vring.len));
+ }
+
+ list_for_each_safe(pos, tmp, &mdev->vdev_list) {
+ tmp_mvdev = list_entry(pos, struct mic_vdev, list);
+ if (tmp_mvdev == mvdev) {
+ list_del(pos);
+ dev_dbg(&mdev->pdev->dev,
+ "Removing virtio device id %d\n",
+ mvdev->virtio_id);
+ break;
+ }
+ }
+ /*
+ * Order the type update with previous stores. This write barrier
+ * is paired with the corresponding read barrier before the uncached
+ * system memory read of the type, on the card while scanning the
+ * device page.
+ */
+ smp_wmb();
+ mvdev->dd->type = -1;
+ mutex_unlock(&mdev->mic_mutex);
+}
diff --git a/drivers/misc/mic/host/mic_virtio.h b/drivers/misc/mic/host/mic_virtio.h
new file mode 100644
index 0000000..a80631f
--- /dev/null
+++ b/drivers/misc/mic/host/mic_virtio.h
@@ -0,0 +1,155 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef MIC_VIRTIO_H
+#define MIC_VIRTIO_H
+
+#include <linux/virtio_config.h>
+#include <linux/mic_ioctl.h>
+
+/*
+ * Note on endianness.
+ * 1. Host can be both BE or LE
+ * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail
+ * rings and ioreadXX/iowriteXX to access used ring.
+ * 3. Device page exposed by host to guest contains LE values. Guest
+ * accesses these using ioreadXX/iowriteXX etc. This way in general we
+ * obey the virtio spec according to which guest works with native
+ * endianness and host is aware of guest endianness and does all
+ * required endianness conversion.
+ * 4. Data provided from user space to guest (in ADD_DEVICE and
+ * CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be
+ * in guest endianness.
+ */
+
+/**
+ * struct mic_vringh - Virtio ring host information.
+ *
+ * @vring: The MIC vring used for setting up user space mappings.
+ * @vrh: The host VRINGH used for accessing the card vrings.
+ * @riov: The VRINGH read kernel IOV.
+ * @wiov: The VRINGH write kernel IOV.
+ * @vr_mutex: Mutex for synchronizing access to the VRING.
+ * @buf: Temporary kernel buffer used to copy in/out data
+ * from/to the card via DMA.
+ * @buf_da: dma address of buf.
+ * @mvdev: Back pointer to MIC virtio device for vringh_notify(..).
+ * @head: The VRINGH head index address passed to vringh_getdesc_kern(..).
+ */
+struct mic_vringh {
+ struct mic_vring vring;
+ struct vringh vrh;
+ struct vringh_kiov riov;
+ struct vringh_kiov wiov;
+ struct mutex vr_mutex;
+ void *buf;
+ dma_addr_t buf_da;
+ struct mic_vdev *mvdev;
+ u16 head;
+};
+
+/**
+ * struct mic_vdev - Host information for a card Virtio device.
+ *
+ * @virtio_id - Virtio device id.
+ * @waitq - Waitqueue to allow ring3 apps to poll.
+ * @mdev - Back pointer to host MIC device.
+ * @poll_wake - Used for waking up threads blocked in poll.
+ * @out_bytes - Debug stats for number of bytes copied from host to card.
+ * @in_bytes - Debug stats for number of bytes copied from card to host.
+ * @out_bytes_dma - Debug stats for number of bytes copied from host to card
+ * using DMA.
+ * @in_bytes_dma - Debug stats for number of bytes copied from card to host
+ * using DMA.
+ * @tx_len_unaligned - Debug stats for number of bytes copied to the card where
+ * the transfer length did not have the required DMA alignment.
+ * @tx_dst_unaligned - Debug stats for number of bytes copied where the
+ * destination address on the card did not have the required DMA alignment.
+ * @mvr - Store per VRING data structures.
+ * @virtio_bh_work - Work struct used to schedule virtio bottom half handling.
+ * @dd - Virtio device descriptor.
+ * @dc - Virtio device control fields.
+ * @list - List of Virtio devices.
+ * @virtio_db - The doorbell used by the card to interrupt the host.
+ * @virtio_cookie - The cookie returned while requesting interrupts.
+ */
+struct mic_vdev {
+ int virtio_id;
+ wait_queue_head_t waitq;
+ struct mic_device *mdev;
+ int poll_wake;
+ unsigned long out_bytes;
+ unsigned long in_bytes;
+ unsigned long out_bytes_dma;
+ unsigned long in_bytes_dma;
+ unsigned long tx_len_unaligned;
+ unsigned long tx_dst_unaligned;
+ struct mic_vringh mvr[MIC_MAX_VRINGS];
+ struct work_struct virtio_bh_work;
+ struct mic_device_desc *dd;
+ struct mic_device_ctrl *dc;
+ struct list_head list;
+ int virtio_db;
+ struct mic_irq *virtio_cookie;
+};
+
+void mic_virtio_uninit(struct mic_device *mdev);
+int mic_virtio_add_device(struct mic_vdev *mvdev,
+ void __user *argp);
+void mic_virtio_del_device(struct mic_vdev *mvdev);
+int mic_virtio_config_change(struct mic_vdev *mvdev,
+ void __user *argp);
+int mic_virtio_copy_desc(struct mic_vdev *mvdev,
+ struct mic_copy_desc *request);
+void mic_virtio_reset_devices(struct mic_device *mdev);
+void mic_bh_handler(struct work_struct *work);
+
+/* Helper API to obtain the MIC PCIe device */
+static inline struct device *mic_dev(struct mic_vdev *mvdev)
+{
+ return &mvdev->mdev->pdev->dev;
+}
+
+/* Helper API to check if a virtio device is initialized */
+static inline int mic_vdev_inited(struct mic_vdev *mvdev)
+{
+ /* Device has not been created yet */
+ if (!mvdev->dd || !mvdev->dd->type) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -EINVAL);
+ return -EINVAL;
+ }
+
+ /* Device has been removed/deleted */
+ if (mvdev->dd->type == -1) {
+ dev_err(mic_dev(mvdev), "%s %d err %d\n",
+ __func__, __LINE__, -ENODEV);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/* Helper API to check if a virtio device is running */
+static inline bool mic_vdevup(struct mic_vdev *mvdev)
+{
+ return !!mvdev->dd->status;
+}
+#endif
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
new file mode 100644
index 0000000..8118ac4
--- /dev/null
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -0,0 +1,579 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+#include "mic_smpt.h"
+
+/**
+ * mic_x100_write_spad - write to the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register.
+ *
+ * RETURNS: none.
+ */
+static void
+mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
+{
+ dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
+ val, idx);
+ mic_mmio_write(&mdev->mmio, val,
+ MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_SPAD0 + idx * 4);
+}
+
+/**
+ * mic_x100_read_spad - read from the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to scratchpad register, 0 based
+ *
+ * This function allows reading of the 32bit scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static u32
+mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
+{
+ u32 val = mic_mmio_read(&mdev->mmio,
+ MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_SPAD0 + idx * 4);
+
+ dev_dbg(&mdev->pdev->dev,
+ "Reading 0x%x from scratch pad index %d\n", val, idx);
+ return val;
+}
+
+/**
+ * mic_x100_enable_interrupts - Enable interrupts.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_enable_interrupts(struct mic_device *mdev)
+{
+ u32 reg;
+ struct mic_mw *mw = &mdev->mmio;
+ u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
+ u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
+
+ reg = mic_mmio_read(mw, sice0);
+ reg |= MIC_X100_SBOX_DBR_BITS(0xf) | MIC_X100_SBOX_DMA_BITS(0xff);
+ mic_mmio_write(mw, reg, sice0);
+
+ /*
+ * Enable auto-clear when enabling interrupts. Applicable only for
+ * MSI-x. Legacy and MSI mode cannot have auto-clear enabled.
+ */
+ if (mdev->irq_info.num_vectors > 1) {
+ reg = mic_mmio_read(mw, siac0);
+ reg |= MIC_X100_SBOX_DBR_BITS(0xf) |
+ MIC_X100_SBOX_DMA_BITS(0xff);
+ mic_mmio_write(mw, reg, siac0);
+ }
+}
+
+/**
+ * mic_x100_disable_interrupts - Disable interrupts.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_disable_interrupts(struct mic_device *mdev)
+{
+ u32 reg;
+ struct mic_mw *mw = &mdev->mmio;
+ u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
+ u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
+ u32 sicc0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICC0;
+
+ reg = mic_mmio_read(mw, sice0);
+ mic_mmio_write(mw, reg, sicc0);
+
+ if (mdev->irq_info.num_vectors > 1) {
+ reg = mic_mmio_read(mw, siac0);
+ reg &= ~(MIC_X100_SBOX_DBR_BITS(0xf) |
+ MIC_X100_SBOX_DMA_BITS(0xff));
+ mic_mmio_write(mw, reg, siac0);
+ }
+}
+
+/**
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_sbox_intr(struct mic_device *mdev,
+ int doorbell)
+{
+ struct mic_mw *mw = &mdev->mmio;
+ u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+ u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+ apic_icr_offset);
+
+ /* for MIC we need to make sure we "hit" the send_icr bit (13) */
+ apicicr_low = (apicicr_low | (1 << 13));
+
+ /* Ensure that the interrupt is ordered w.r.t. previous stores. */
+ wmb();
+ mic_mmio_write(mw, apicicr_low,
+ MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+/**
+ * mic_x100_send_rdmasr_intr - Send an RDMASR interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
+ int doorbell)
+{
+ int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+ /* Ensure that the interrupt is ordered w.r.t. previous stores. */
+ wmb();
+ mic_mmio_write(&mdev->mmio, 0,
+ MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
+/**
+ * __mic_x100_send_intr - Send interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ * @doorbell: doorbell number.
+ */
+static void mic_x100_send_intr(struct mic_device *mdev, int doorbell)
+{
+ int rdmasr_db;
+ if (doorbell < MIC_X100_NUM_SBOX_IRQ) {
+ mic_x100_send_sbox_intr(mdev, doorbell);
+ } else {
+ rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ;
+ mic_x100_send_rdmasr_intr(mdev, rdmasr_db);
+ }
+}
+
+/**
+ * mic_x100_ack_interrupt - Read the interrupt sources register and
+ * clear it. This function will be called in the MSI/INTx case.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: bitmask of interrupt sources triggered.
+ */
+static u32 mic_x100_ack_interrupt(struct mic_device *mdev)
+{
+ u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0;
+ u32 reg = mic_mmio_read(&mdev->mmio, sicr0);
+ mic_mmio_write(&mdev->mmio, reg, sicr0);
+ return reg;
+}
+
+/**
+ * mic_x100_intr_workarounds - These hardware specific workarounds are
+ * to be invoked everytime an interrupt is handled.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: none
+ */
+static void mic_x100_intr_workarounds(struct mic_device *mdev)
+{
+ struct mic_mw *mw = &mdev->mmio;
+
+ /* Clear pending bit array. */
+ if (MIC_A0_STEP == mdev->stepping)
+ mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_MSIXPBACR);
+
+ if (mdev->stepping >= MIC_B0_STEP)
+ mdev->intr_ops->enable_interrupts(mdev);
+}
+
+/**
+ * mic_x100_hw_intr_init - Initialize h/w specific interrupt
+ * information.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_hw_intr_init(struct mic_device *mdev)
+{
+ mdev->intr_info = (struct mic_intr_info *)mic_x100_intr_init;
+}
+
+/**
+ * mic_x100_read_msi_to_src_map - read from the MSI mapping registers
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the mapping register, 0 based
+ *
+ * This function allows reading of the 32bit MSI mapping register.
+ *
+ * RETURNS: The value in the register.
+ */
+static u32
+mic_x100_read_msi_to_src_map(struct mic_device *mdev, int idx)
+{
+ return mic_mmio_read(&mdev->mmio,
+ MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_MXAR0 + idx * 4);
+}
+
+/**
+ * mic_x100_program_msi_to_src_map - program the MSI mapping registers
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the mapping register, 0 based
+ * @offset: The bit offset in the register that needs to be updated.
+ * @set: boolean specifying if the bit in the specified offset needs
+ * to be set or cleared.
+ *
+ * RETURNS: None.
+ */
+static void
+mic_x100_program_msi_to_src_map(struct mic_device *mdev,
+ int idx, int offset, bool set)
+{
+ unsigned long reg;
+ struct mic_mw *mw = &mdev->mmio;
+ u32 mxar = MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_MXAR0 + idx * 4;
+
+ reg = mic_mmio_read(mw, mxar);
+ if (set)
+ __set_bit(offset, ®);
+ else
+ __clear_bit(offset, ®);
+ mic_mmio_write(mw, reg, mxar);
+}
+
+/*
+ * mic_x100_reset_fw_ready - Reset Firmware ready status field.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_reset_fw_ready(struct mic_device *mdev)
+{
+ mdev->ops->write_spad(mdev, MIC_X100_DOWNLOAD_INFO, 0);
+}
+
+/*
+ * mic_x100_is_fw_ready - Check if firmware is ready.
+ * @mdev: pointer to mic_device instance
+ */
+static bool mic_x100_is_fw_ready(struct mic_device *mdev)
+{
+ u32 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+ return MIC_X100_SPAD2_DOWNLOAD_STATUS(scratch2) ? true : false;
+}
+
+/**
+ * mic_x100_get_apic_id - Get bootstrap APIC ID.
+ * @mdev: pointer to mic_device instance
+ */
+static u32 mic_x100_get_apic_id(struct mic_device *mdev)
+{
+ u32 scratch2 = 0;
+
+ scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+ return MIC_X100_SPAD2_APIC_ID(scratch2);
+}
+
+/**
+ * mic_x100_send_firmware_intr - Send an interrupt to the firmware on MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_firmware_intr(struct mic_device *mdev)
+{
+ u32 apicicr_low;
+ u64 apic_icr_offset = MIC_X100_SBOX_APICICR7;
+ int vector = MIC_X100_BSP_INTERRUPT_VECTOR;
+ struct mic_mw *mw = &mdev->mmio;
+
+ /*
+ * For MIC we need to make sure we "hit"
+ * the send_icr bit (13).
+ */
+ apicicr_low = (vector | (1 << 13));
+
+ mic_mmio_write(mw, mic_x100_get_apic_id(mdev),
+ MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset + 4);
+
+ /* Ensure that the interrupt is ordered w.r.t. previous stores. */
+ wmb();
+ mic_mmio_write(mw, apicicr_low,
+ MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+/**
+ * mic_x100_hw_reset - Reset the MIC device.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_hw_reset(struct mic_device *mdev)
+{
+ u32 reset_reg;
+ u32 rgcr = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_RGCR;
+ struct mic_mw *mw = &mdev->mmio;
+
+ /* Ensure that the reset is ordered w.r.t. previous loads and stores */
+ mb();
+ /* Trigger reset */
+ reset_reg = mic_mmio_read(mw, rgcr);
+ reset_reg |= 0x1;
+ mic_mmio_write(mw, reset_reg, rgcr);
+ /*
+ * It seems we really want to delay at least 1 second
+ * after touching reset to prevent a lot of problems.
+ */
+ msleep(1000);
+}
+
+/**
+ * mic_x100_load_command_line - Load command line to MIC.
+ * @mdev: pointer to mic_device instance
+ * @fw: the firmware image
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
+{
+ u32 len = 0;
+ u32 boot_mem;
+ char *buf;
+ void __iomem *cmd_line_va = mdev->aper.va + mdev->bootaddr + fw->size;
+#define CMDLINE_SIZE 2048
+
+ boot_mem = mdev->aper.len >> 20;
+ buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ len += snprintf(buf, CMDLINE_SIZE - len,
+ " mem=%dM", boot_mem);
+ if (mdev->cosm_dev->cmdline)
+ snprintf(buf + len, CMDLINE_SIZE - len, " %s",
+ mdev->cosm_dev->cmdline);
+ memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
+ kfree(buf);
+ return 0;
+}
+
+/**
+ * mic_x100_load_ramdisk - Load ramdisk to MIC.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_ramdisk(struct mic_device *mdev)
+{
+ const struct firmware *fw;
+ int rc;
+ struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
+
+ rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
+ if (rc < 0) {
+ dev_err(&mdev->pdev->dev,
+ "ramdisk request_firmware failed: %d %s\n",
+ rc, mdev->cosm_dev->ramdisk);
+ goto error;
+ }
+ /*
+ * Typically the bootaddr for card OS is 64M
+ * so copy over the ramdisk @ 128M.
+ */
+ memcpy_toio(mdev->aper.va + (mdev->bootaddr << 1), fw->data, fw->size);
+ iowrite32(mdev->bootaddr << 1, &bp->hdr.ramdisk_image);
+ iowrite32(fw->size, &bp->hdr.ramdisk_size);
+ release_firmware(fw);
+error:
+ return rc;
+}
+
+/**
+ * mic_x100_get_boot_addr - Get MIC boot address.
+ * @mdev: pointer to mic_device instance
+ *
+ * This function is called during firmware load to determine
+ * the address at which the OS should be downloaded in card
+ * memory i.e. GDDR.
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_get_boot_addr(struct mic_device *mdev)
+{
+ u32 scratch2, boot_addr;
+ int rc = 0;
+
+ scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+ boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
+ dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
+ __func__, __LINE__, boot_addr);
+ if (boot_addr > (1 << 31)) {
+ dev_err(&mdev->pdev->dev,
+ "incorrect bootaddr 0x%x\n",
+ boot_addr);
+ rc = -EINVAL;
+ goto error;
+ }
+ mdev->bootaddr = boot_addr;
+error:
+ return rc;
+}
+
+/**
+ * mic_x100_load_firmware - Load firmware to MIC.
+ * @mdev: pointer to mic_device instance
+ * @buf: buffer containing boot string including firmware/ramdisk path.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
+{
+ int rc;
+ const struct firmware *fw;
+
+ rc = mic_x100_get_boot_addr(mdev);
+ if (rc)
+ goto error;
+ /* load OS */
+ rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
+ if (rc < 0) {
+ dev_err(&mdev->pdev->dev,
+ "ramdisk request_firmware failed: %d %s\n",
+ rc, mdev->cosm_dev->firmware);
+ goto error;
+ }
+ if (mdev->bootaddr > mdev->aper.len - fw->size) {
+ rc = -EINVAL;
+ dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
+ __func__, __LINE__, rc, mdev->bootaddr);
+ release_firmware(fw);
+ goto error;
+ }
+ memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
+ mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
+ if (!strcmp(mdev->cosm_dev->bootmode, "flash"))
+ goto done;
+ /* load command line */
+ rc = mic_x100_load_command_line(mdev, fw);
+ if (rc) {
+ dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
+ __func__, __LINE__, rc);
+ goto error;
+ }
+ release_firmware(fw);
+ /* load ramdisk */
+ if (mdev->cosm_dev->ramdisk)
+ rc = mic_x100_load_ramdisk(mdev);
+error:
+ dev_dbg(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc);
+done:
+ return rc;
+}
+
+/**
+ * mic_x100_get_postcode - Get postcode status from firmware.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: postcode.
+ */
+static u32 mic_x100_get_postcode(struct mic_device *mdev)
+{
+ return mic_mmio_read(&mdev->mmio, MIC_X100_POSTCODE);
+}
+
+/**
+ * mic_x100_smpt_set - Update an SMPT entry with a DMA address.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: none.
+ */
+static void
+mic_x100_smpt_set(struct mic_device *mdev, dma_addr_t dma_addr, u8 index)
+{
+#define SNOOP_ON (0 << 0)
+#define SNOOP_OFF (1 << 0)
+/*
+ * Sbox Smpt Reg Bits:
+ * Bits 31:2 Host address
+ * Bits 1 RSVD
+ * Bits 0 No snoop
+ */
+#define BUILD_SMPT(NO_SNOOP, HOST_ADDR) \
+ (u32)(((HOST_ADDR) << 2) | ((NO_SNOOP) & 0x01))
+
+ uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON,
+ dma_addr >> mdev->smpt->info.page_shift);
+ mic_mmio_write(&mdev->mmio, smpt_reg_val,
+ MIC_X100_SBOX_BASE_ADDRESS +
+ MIC_X100_SBOX_SMPT00 + (4 * index));
+}
+
+/**
+ * mic_x100_smpt_hw_init - Initialize SMPT X100 specific fields.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: none.
+ */
+static void mic_x100_smpt_hw_init(struct mic_device *mdev)
+{
+ struct mic_smpt_hw_info *info = &mdev->smpt->info;
+
+ info->num_reg = 32;
+ info->page_shift = 34;
+ info->page_size = (1ULL << info->page_shift);
+ info->base = 0x8000000000ULL;
+}
+
+struct mic_smpt_ops mic_x100_smpt_ops = {
+ .init = mic_x100_smpt_hw_init,
+ .set = mic_x100_smpt_set,
+};
+
+static bool mic_x100_dma_filter(struct dma_chan *chan, void *param)
+{
+ if (chan->device->dev->parent == (struct device *)param)
+ return true;
+ return false;
+}
+
+struct mic_hw_ops mic_x100_ops = {
+ .aper_bar = MIC_X100_APER_BAR,
+ .mmio_bar = MIC_X100_MMIO_BAR,
+ .read_spad = mic_x100_read_spad,
+ .write_spad = mic_x100_write_spad,
+ .send_intr = mic_x100_send_intr,
+ .ack_interrupt = mic_x100_ack_interrupt,
+ .intr_workarounds = mic_x100_intr_workarounds,
+ .reset = mic_x100_hw_reset,
+ .reset_fw_ready = mic_x100_reset_fw_ready,
+ .is_fw_ready = mic_x100_is_fw_ready,
+ .send_firmware_intr = mic_x100_send_firmware_intr,
+ .load_mic_fw = mic_x100_load_firmware,
+ .get_postcode = mic_x100_get_postcode,
+ .dma_filter = mic_x100_dma_filter,
+};
+
+struct mic_hw_intr_ops mic_x100_intr_ops = {
+ .intr_init = mic_x100_hw_intr_init,
+ .enable_interrupts = mic_x100_enable_interrupts,
+ .disable_interrupts = mic_x100_disable_interrupts,
+ .program_msi_to_src_map = mic_x100_program_msi_to_src_map,
+ .read_msi_to_src_map = mic_x100_read_msi_to_src_map,
+};
diff --git a/drivers/misc/mic/host/mic_x100.h b/drivers/misc/mic/host/mic_x100.h
new file mode 100644
index 0000000..8b7daa1
--- /dev/null
+++ b/drivers/misc/mic/host/mic_x100.h
@@ -0,0 +1,98 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_X100_HW_H_
+#define _MIC_X100_HW_H_
+
+#define MIC_X100_PCI_DEVICE_2250 0x2250
+#define MIC_X100_PCI_DEVICE_2251 0x2251
+#define MIC_X100_PCI_DEVICE_2252 0x2252
+#define MIC_X100_PCI_DEVICE_2253 0x2253
+#define MIC_X100_PCI_DEVICE_2254 0x2254
+#define MIC_X100_PCI_DEVICE_2255 0x2255
+#define MIC_X100_PCI_DEVICE_2256 0x2256
+#define MIC_X100_PCI_DEVICE_2257 0x2257
+#define MIC_X100_PCI_DEVICE_2258 0x2258
+#define MIC_X100_PCI_DEVICE_2259 0x2259
+#define MIC_X100_PCI_DEVICE_225a 0x225a
+#define MIC_X100_PCI_DEVICE_225b 0x225b
+#define MIC_X100_PCI_DEVICE_225c 0x225c
+#define MIC_X100_PCI_DEVICE_225d 0x225d
+#define MIC_X100_PCI_DEVICE_225e 0x225e
+
+#define MIC_X100_APER_BAR 0
+#define MIC_X100_MMIO_BAR 4
+
+#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000
+#define MIC_X100_SBOX_SPAD0 0x0000AB20
+#define MIC_X100_SBOX_SICR0_DBR(x) ((x) & 0xf)
+#define MIC_X100_SBOX_SICR0_DMA(x) (((x) >> 8) & 0xff)
+#define MIC_X100_SBOX_SICE0_DBR(x) ((x) & 0xf)
+#define MIC_X100_SBOX_DBR_BITS(x) ((x) & 0xf)
+#define MIC_X100_SBOX_SICE0_DMA(x) (((x) >> 8) & 0xff)
+#define MIC_X100_SBOX_DMA_BITS(x) (((x) & 0xff) << 8)
+
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
+#define MIC_X100_SBOX_SICR0 0x00009004
+#define MIC_X100_SBOX_SICE0 0x0000900C
+#define MIC_X100_SBOX_SICC0 0x00009010
+#define MIC_X100_SBOX_SIAC0 0x00009014
+#define MIC_X100_SBOX_MSIXPBACR 0x00009084
+#define MIC_X100_SBOX_MXAR0 0x00009044
+#define MIC_X100_SBOX_SMPT00 0x00003100
+#define MIC_X100_SBOX_RDMASR0 0x0000B180
+
+#define MIC_X100_DOORBELL_IDX_START 0
+#define MIC_X100_NUM_DOORBELL 4
+#define MIC_X100_DMA_IDX_START 8
+#define MIC_X100_NUM_DMA 8
+#define MIC_X100_ERR_IDX_START 30
+#define MIC_X100_NUM_ERR 1
+
+#define MIC_X100_NUM_SBOX_IRQ 8
+#define MIC_X100_NUM_RDMASR_IRQ 8
+#define MIC_X100_RDMASR_IRQ_BASE 17
+#define MIC_X100_SPAD2_DOWNLOAD_STATUS(x) ((x) & 0x1)
+#define MIC_X100_SPAD2_APIC_ID(x) (((x) >> 1) & 0x1ff)
+#define MIC_X100_SPAD2_DOWNLOAD_ADDR(x) ((x) & 0xfffff000)
+#define MIC_X100_SBOX_APICICR7 0x0000AA08
+#define MIC_X100_SBOX_RGCR 0x00004010
+#define MIC_X100_SBOX_SDBIC0 0x0000CC90
+#define MIC_X100_DOWNLOAD_INFO 2
+#define MIC_X100_FW_SIZE 5
+#define MIC_X100_POSTCODE 0x242c
+
+static const u16 mic_x100_intr_init[] = {
+ MIC_X100_DOORBELL_IDX_START,
+ MIC_X100_DMA_IDX_START,
+ MIC_X100_ERR_IDX_START,
+ MIC_X100_NUM_DOORBELL,
+ MIC_X100_NUM_DMA,
+ MIC_X100_NUM_ERR,
+};
+
+/* Host->Card(bootstrap) Interrupt Vector */
+#define MIC_X100_BSP_INTERRUPT_VECTOR 229
+
+extern struct mic_hw_ops mic_x100_ops;
+extern struct mic_smpt_ops mic_x100_smpt_ops;
+extern struct mic_hw_intr_ops mic_x100_intr_ops;
+
+#endif
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
new file mode 100644
index 0000000..29cfc3e
--- /dev/null
+++ b/drivers/misc/mic/scif/Makefile
@@ -0,0 +1,20 @@
+#
+# Makefile - SCIF driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_SCIF) += scif.o
+scif-objs := scif_main.o
+scif-objs += scif_peer_bus.o
+scif-objs += scif_ports.o
+scif-objs += scif_debugfs.o
+scif-objs += scif_fd.o
+scif-objs += scif_api.o
+scif-objs += scif_epd.o
+scif-objs += scif_rb.o
+scif-objs += scif_nodeqp.o
+scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
new file mode 100644
index 0000000..ddc9e4b
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -0,0 +1,1496 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/scif.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+static const char * const scif_ep_states[] = {
+ "Unbound",
+ "Bound",
+ "Listening",
+ "Connected",
+ "Connecting",
+ "Mapping",
+ "Closing",
+ "Close Listening",
+ "Disconnected",
+ "Zombie"};
+
+enum conn_async_state {
+ ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
+ ASYNC_CONN_INPROGRESS, /* async connect in progress */
+ ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
+};
+
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+ .owner = THIS_MODULE,
+};
+
+scif_epd_t scif_open(void)
+{
+ struct scif_endpt *ep;
+ int err;
+
+ might_sleep();
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ goto err_ep_alloc;
+
+ ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
+ if (!ep->qp_info.qp)
+ goto err_qp_alloc;
+
+ err = scif_anon_inode_getfile(ep);
+ if (err)
+ goto err_anon_inode;
+
+ spin_lock_init(&ep->lock);
+ mutex_init(&ep->sendlock);
+ mutex_init(&ep->recvlock);
+
+ scif_rma_ep_init(ep);
+ ep->state = SCIFEP_UNBOUND;
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI open: ep %p success\n", ep);
+ return ep;
+
+err_anon_inode:
+ kfree(ep->qp_info.qp);
+err_qp_alloc:
+ kfree(ep);
+err_ep_alloc:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(scif_open);
+
+/*
+ * scif_disconnect_ep - Disconnects the endpoint if found
+ * @epd: The end point returned from scif_open()
+ */
+static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
+{
+ struct scifmsg msg;
+ struct scif_endpt *fep = NULL;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+ int err;
+
+ /*
+ * Wake up any threads blocked in send()/recv() before closing
+ * out the connection. Grabbing and releasing the send/recv lock
+ * will ensure that any blocked senders/receivers have exited for
+ * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
+ * close. Ring 3 endpoints are not affected since close will not
+ * be called while there are IOCTLs executing.
+ */
+ wake_up_interruptible(&ep->sendwq);
+ wake_up_interruptible(&ep->recvwq);
+ mutex_lock(&ep->sendlock);
+ mutex_unlock(&ep->sendlock);
+ mutex_lock(&ep->recvlock);
+ mutex_unlock(&ep->recvlock);
+
+ /* Remove from the connected list */
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep) {
+ list_del(pos);
+ fep = tmpep;
+ spin_lock(&ep->lock);
+ break;
+ }
+ }
+
+ if (!fep) {
+ /*
+ * The other side has completed the disconnect before
+ * the end point can be removed from the list. Therefore
+ * the ep lock is not locked, traverse the disconnected
+ * list to find the endpoint and release the conn lock.
+ */
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+ return NULL;
+ }
+
+ init_completion(&ep->discon);
+ msg.uop = SCIF_DISCNCT;
+ msg.src = ep->port;
+ msg.dst = ep->peer;
+ msg.payload[0] = (u64)ep;
+ msg.payload[1] = ep->remote_ep;
+
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ spin_unlock(&ep->lock);
+ mutex_unlock(&scif_info.connlock);
+
+ if (!err)
+ /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
+ wait_for_completion_timeout(&ep->discon,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ return ep;
+}
+
+int scif_close(scif_epd_t epd)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+ enum scif_epd_state oldstate;
+ bool flush_conn;
+
+ dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
+ ep, scif_ep_states[ep->state]);
+ might_sleep();
+ spin_lock(&ep->lock);
+ flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
+ spin_unlock(&ep->lock);
+
+ if (flush_conn)
+ flush_work(&scif_info.conn_work);
+
+ spin_lock(&ep->lock);
+ oldstate = ep->state;
+
+ ep->state = SCIFEP_CLOSING;
+
+ switch (oldstate) {
+ case SCIFEP_ZOMBIE:
+ dev_err(scif_info.mdev.this_device,
+ "SCIFAPI close: zombie state unexpected\n");
+ case SCIFEP_DISCONNECTED:
+ spin_unlock(&ep->lock);
+ scif_unregister_all_windows(epd);
+ /* Remove from the disconnected list */
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+ break;
+ case SCIFEP_UNBOUND:
+ case SCIFEP_BOUND:
+ case SCIFEP_CONNECTING:
+ spin_unlock(&ep->lock);
+ break;
+ case SCIFEP_MAPPING:
+ case SCIFEP_CONNECTED:
+ case SCIFEP_CLOSING:
+ {
+ spin_unlock(&ep->lock);
+ scif_unregister_all_windows(epd);
+ scif_disconnect_ep(ep);
+ break;
+ }
+ case SCIFEP_LISTENING:
+ case SCIFEP_CLLISTEN:
+ {
+ struct scif_conreq *conreq;
+ struct scifmsg msg;
+ struct scif_endpt *aep;
+
+ spin_unlock(&ep->lock);
+ mutex_lock(&scif_info.eplock);
+
+ /* remove from listen list */
+ list_for_each_safe(pos, tmpq, &scif_info.listen) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep)
+ list_del(pos);
+ }
+ /* Remove any dangling accepts */
+ while (ep->acceptcnt) {
+ aep = list_first_entry(&ep->li_accept,
+ struct scif_endpt, liacceptlist);
+ list_del(&aep->liacceptlist);
+ scif_put_port(aep->port.port);
+ list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+ tmpep = list_entry(pos, struct scif_endpt,
+ miacceptlist);
+ if (tmpep == aep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.eplock);
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, list);
+ if (tmpep == aep) {
+ list_del(pos);
+ break;
+ }
+ }
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, list);
+ if (tmpep == aep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+ scif_teardown_ep(aep);
+ mutex_lock(&scif_info.eplock);
+ scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
+ ep->acceptcnt--;
+ }
+
+ spin_lock(&ep->lock);
+ mutex_unlock(&scif_info.eplock);
+
+ /* Remove and reject any pending connection requests. */
+ while (ep->conreqcnt) {
+ conreq = list_first_entry(&ep->conlist,
+ struct scif_conreq, list);
+ list_del(&conreq->list);
+
+ msg.uop = SCIF_CNCT_REJ;
+ msg.dst.node = conreq->msg.src.node;
+ msg.dst.port = conreq->msg.src.port;
+ msg.payload[0] = conreq->msg.payload[0];
+ msg.payload[1] = conreq->msg.payload[1];
+ /*
+ * No Error Handling on purpose for scif_nodeqp_send().
+ * If the remote node is lost we still want free the
+ * connection requests on the self node.
+ */
+ scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
+ &msg);
+ ep->conreqcnt--;
+ kfree(conreq);
+ }
+
+ spin_unlock(&ep->lock);
+ /* If a kSCIF accept is waiting wake it up */
+ wake_up_interruptible(&ep->conwq);
+ break;
+ }
+ }
+ scif_put_port(ep->port.port);
+ scif_anon_inode_fput(ep);
+ scif_teardown_ep(ep);
+ scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scif_close);
+
+/**
+ * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
+ * accept new connections.
+ * @epd: The end point returned from scif_open()
+ */
+int __scif_flush(scif_epd_t epd)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ switch (ep->state) {
+ case SCIFEP_LISTENING:
+ {
+ ep->state = SCIFEP_CLLISTEN;
+
+ /* If an accept is waiting wake it up */
+ wake_up_interruptible(&ep->conwq);
+ break;
+ }
+ default:
+ break;
+ }
+ return 0;
+}
+
+int scif_bind(scif_epd_t epd, u16 pn)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int ret = 0;
+ int tmp;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI bind: ep %p %s requested port number %d\n",
+ ep, scif_ep_states[ep->state], pn);
+ if (pn) {
+ /*
+ * Similar to IETF RFC 1700, SCIF ports below
+ * SCIF_ADMIN_PORT_END can only be bound by system (or root)
+ * processes or by processes executed by privileged users.
+ */
+ if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
+ ret = -EACCES;
+ goto scif_bind_admin_exit;
+ }
+ }
+
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_BOUND) {
+ ret = -EINVAL;
+ goto scif_bind_exit;
+ } else if (ep->state != SCIFEP_UNBOUND) {
+ ret = -EISCONN;
+ goto scif_bind_exit;
+ }
+
+ if (pn) {
+ tmp = scif_rsrv_port(pn);
+ if (tmp != pn) {
+ ret = -EINVAL;
+ goto scif_bind_exit;
+ }
+ } else {
+ pn = scif_get_new_port();
+ if (!pn) {
+ ret = -ENOSPC;
+ goto scif_bind_exit;
+ }
+ }
+
+ ep->state = SCIFEP_BOUND;
+ ep->port.node = scif_info.nodeid;
+ ep->port.port = pn;
+ ep->conn_async_state = ASYNC_CONN_IDLE;
+ ret = pn;
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI bind: bound to port number %d\n", pn);
+scif_bind_exit:
+ spin_unlock(&ep->lock);
+scif_bind_admin_exit:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scif_bind);
+
+int scif_listen(scif_epd_t epd, int backlog)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
+ spin_lock(&ep->lock);
+ switch (ep->state) {
+ case SCIFEP_ZOMBIE:
+ case SCIFEP_CLOSING:
+ case SCIFEP_CLLISTEN:
+ case SCIFEP_UNBOUND:
+ case SCIFEP_DISCONNECTED:
+ spin_unlock(&ep->lock);
+ return -EINVAL;
+ case SCIFEP_LISTENING:
+ case SCIFEP_CONNECTED:
+ case SCIFEP_CONNECTING:
+ case SCIFEP_MAPPING:
+ spin_unlock(&ep->lock);
+ return -EISCONN;
+ case SCIFEP_BOUND:
+ break;
+ }
+
+ ep->state = SCIFEP_LISTENING;
+ ep->backlog = backlog;
+
+ ep->conreqcnt = 0;
+ ep->acceptcnt = 0;
+ INIT_LIST_HEAD(&ep->conlist);
+ init_waitqueue_head(&ep->conwq);
+ INIT_LIST_HEAD(&ep->li_accept);
+ spin_unlock(&ep->lock);
+
+ /*
+ * Listen status is complete so delete the qp information not needed
+ * on a listen before placing on the list of listening ep's
+ */
+ scif_teardown_ep(ep);
+ ep->qp_info.qp = NULL;
+
+ mutex_lock(&scif_info.eplock);
+ list_add_tail(&ep->list, &scif_info.listen);
+ mutex_unlock(&scif_info.eplock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scif_listen);
+
+/*
+ ************************************************************************
+ * SCIF connection flow:
+ *
+ * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
+ * connections via a SCIF_CNCT_REQ message
+ * 2) A SCIF endpoint can initiate a SCIF connection by calling
+ * scif_connect(..) which calls scif_setup_qp_connect(..) which
+ * allocates the local qp for the endpoint ring buffer and then sends
+ * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
+ * a SCIF_CNCT_REJ message
+ * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
+ * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
+ * message otherwise
+ * 4) A thread blocked waiting for incoming connections allocates its local
+ * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
+ * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
+ * the node sends a SCIF_CNCT_REJ message
+ * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
+ * connecting endpoint is woken up as part of handling
+ * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
+ * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
+ * success or a SCIF_CNCT_GNTNACK message on failure and completes
+ * the scif_connect(..) API
+ * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
+ * in step 4 is woken up and completes the scif_accept(..) API
+ * 7) The SCIF connection is now established between the two SCIF endpoints.
+ */
+static int scif_conn_func(struct scif_endpt *ep)
+{
+ int err = 0;
+ struct scifmsg msg;
+ struct device *spdev;
+
+ err = scif_reserve_dma_chan(ep);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_simple;
+ }
+ /* Initiate the first part of the endpoint QP setup */
+ err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
+ SCIF_ENDPT_QP_SIZE, ep->remote_dev);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s err %d qp_offset 0x%llx\n",
+ __func__, err, ep->qp_info.qp_offset);
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_simple;
+ }
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto cleanup_qp;
+ }
+ /* Format connect message and send it */
+ msg.src = ep->port;
+ msg.dst = ep->conn_port;
+ msg.uop = SCIF_CNCT_REQ;
+ msg.payload[0] = (u64)ep;
+ msg.payload[1] = ep->qp_info.qp_offset;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ if (err)
+ goto connect_error_dec;
+ scif_put_peer_dev(spdev);
+ /*
+ * Wait for the remote node to respond with SCIF_CNCT_GNT or
+ * SCIF_CNCT_REJ message.
+ */
+ err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d timeout\n", __func__, __LINE__);
+ ep->state = SCIFEP_BOUND;
+ }
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto cleanup_qp;
+ }
+ if (ep->state == SCIFEP_MAPPING) {
+ err = scif_setup_qp_connect_response(ep->remote_dev,
+ ep->qp_info.qp,
+ ep->qp_info.gnt_pld);
+ /*
+ * If the resource to map the queue are not available then
+ * we need to tell the other side to terminate the accept
+ */
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ msg.uop = SCIF_CNCT_GNTNACK;
+ msg.payload[0] = ep->remote_ep;
+ _scif_nodeqp_send(ep->remote_dev, &msg);
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_dec;
+ }
+
+ msg.uop = SCIF_CNCT_GNTACK;
+ msg.payload[0] = ep->remote_ep;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ if (err) {
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_dec;
+ }
+ ep->state = SCIFEP_CONNECTED;
+ mutex_lock(&scif_info.connlock);
+ list_add_tail(&ep->list, &scif_info.connected);
+ mutex_unlock(&scif_info.connlock);
+ dev_dbg(&ep->remote_dev->sdev->dev,
+ "SCIFAPI connect: ep %p connected\n", ep);
+ } else if (ep->state == SCIFEP_BOUND) {
+ dev_dbg(&ep->remote_dev->sdev->dev,
+ "SCIFAPI connect: ep %p connection refused\n", ep);
+ err = -ECONNREFUSED;
+ goto connect_error_dec;
+ }
+ scif_put_peer_dev(spdev);
+ return err;
+connect_error_dec:
+ scif_put_peer_dev(spdev);
+cleanup_qp:
+ scif_cleanup_ep_qp(ep);
+connect_error_simple:
+ return err;
+}
+
+/*
+ * scif_conn_handler:
+ *
+ * Workqueue handler for servicing non-blocking SCIF connect
+ *
+ */
+void scif_conn_handler(struct work_struct *work)
+{
+ struct scif_endpt *ep;
+
+ do {
+ ep = NULL;
+ spin_lock(&scif_info.nb_connect_lock);
+ if (!list_empty(&scif_info.nb_connect_list)) {
+ ep = list_first_entry(&scif_info.nb_connect_list,
+ struct scif_endpt, conn_list);
+ list_del(&ep->conn_list);
+ }
+ spin_unlock(&scif_info.nb_connect_lock);
+ if (ep) {
+ ep->conn_err = scif_conn_func(ep);
+ wake_up_interruptible(&ep->conn_pend_wq);
+ }
+ } while (ep);
+}
+
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+ struct scif_dev *remote_dev;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
+ scif_ep_states[ep->state]);
+
+ if (!scif_dev || dst->node > scif_info.maxid)
+ return -ENODEV;
+
+ might_sleep();
+
+ remote_dev = &scif_dev[dst->node];
+ spdev = scif_get_peer_dev(remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+
+ spin_lock(&ep->lock);
+ switch (ep->state) {
+ case SCIFEP_ZOMBIE:
+ case SCIFEP_CLOSING:
+ err = -EINVAL;
+ break;
+ case SCIFEP_DISCONNECTED:
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+ ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+ else
+ err = -EINVAL;
+ break;
+ case SCIFEP_LISTENING:
+ case SCIFEP_CLLISTEN:
+ err = -EOPNOTSUPP;
+ break;
+ case SCIFEP_CONNECTING:
+ case SCIFEP_MAPPING:
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+ err = -EINPROGRESS;
+ else
+ err = -EISCONN;
+ break;
+ case SCIFEP_CONNECTED:
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+ ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+ else
+ err = -EISCONN;
+ break;
+ case SCIFEP_UNBOUND:
+ ep->port.port = scif_get_new_port();
+ if (!ep->port.port) {
+ err = -ENOSPC;
+ } else {
+ ep->port.node = scif_info.nodeid;
+ ep->conn_async_state = ASYNC_CONN_IDLE;
+ }
+ /* Fall through */
+ case SCIFEP_BOUND:
+ /*
+ * If a non-blocking connect has been already initiated
+ * (conn_async_state is either ASYNC_CONN_INPROGRESS or
+ * ASYNC_CONN_FLUSH_WORK), the end point could end up in
+ * SCIF_BOUND due an error in the connection process
+ * (e.g., connection refused) If conn_async_state is
+ * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
+ * so that the error status can be collected. If the state is
+ * already ASYNC_CONN_FLUSH_WORK - then set the error to
+ * EINPROGRESS since some other thread is waiting to collect
+ * error status.
+ */
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+ } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+ err = -EINPROGRESS;
+ } else {
+ ep->conn_port = *dst;
+ init_waitqueue_head(&ep->sendwq);
+ init_waitqueue_head(&ep->recvwq);
+ init_waitqueue_head(&ep->conwq);
+ ep->conn_async_state = 0;
+
+ if (unlikely(non_block))
+ ep->conn_async_state = ASYNC_CONN_INPROGRESS;
+ }
+ break;
+ }
+
+ if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+ goto connect_simple_unlock1;
+
+ ep->state = SCIFEP_CONNECTING;
+ ep->remote_dev = &scif_dev[dst->node];
+ ep->qp_info.qp->magic = SCIFEP_MAGIC;
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ init_waitqueue_head(&ep->conn_pend_wq);
+ spin_lock(&scif_info.nb_connect_lock);
+ list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
+ spin_unlock(&scif_info.nb_connect_lock);
+ err = -EINPROGRESS;
+ schedule_work(&scif_info.conn_work);
+ }
+connect_simple_unlock1:
+ spin_unlock(&ep->lock);
+ scif_put_peer_dev(spdev);
+ if (err) {
+ return err;
+ } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+ flush_work(&scif_info.conn_work);
+ err = ep->conn_err;
+ spin_lock(&ep->lock);
+ ep->conn_async_state = ASYNC_CONN_IDLE;
+ spin_unlock(&ep->lock);
+ } else {
+ err = scif_conn_func(ep);
+ }
+ return err;
+}
+
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
+{
+ return __scif_connect(epd, dst, false);
+}
+EXPORT_SYMBOL_GPL(scif_connect);
+
+/**
+ * scif_accept() - Accept a connection request from the remote node
+ *
+ * The function accepts a connection request from the remote node. Successful
+ * complete is indicate by a new end point being created and passed back
+ * to the caller for future reference.
+ *
+ * Upon successful complete a zero will be returned and the peer information
+ * will be filled in.
+ *
+ * If the end point is not in the listening state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the function is called with the ASYNC flag set and no connection requests
+ * are pending it will return -EAGAIN.
+ *
+ * If the remote side is not sending any connection requests the caller may
+ * terminate this function with a signal. If so a -EINTR will be returned.
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
+ scif_epd_t *newepd, int flags)
+{
+ struct scif_endpt *lep = (struct scif_endpt *)epd;
+ struct scif_endpt *cep;
+ struct scif_conreq *conreq;
+ struct scifmsg msg;
+ int err;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
+
+ if (flags & ~SCIF_ACCEPT_SYNC)
+ return -EINVAL;
+
+ if (!peer || !newepd)
+ return -EINVAL;
+
+ might_sleep();
+ spin_lock(&lep->lock);
+ if (lep->state != SCIFEP_LISTENING) {
+ spin_unlock(&lep->lock);
+ return -EINVAL;
+ }
+
+ if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
+ /* No connection request present and we do not want to wait */
+ spin_unlock(&lep->lock);
+ return -EAGAIN;
+ }
+
+ lep->files = current->files;
+retry_connection:
+ spin_unlock(&lep->lock);
+ /* Wait for the remote node to send us a SCIF_CNCT_REQ */
+ err = wait_event_interruptible(lep->conwq,
+ (lep->conreqcnt ||
+ (lep->state != SCIFEP_LISTENING)));
+ if (err)
+ return err;
+
+ if (lep->state != SCIFEP_LISTENING)
+ return -EINTR;
+
+ spin_lock(&lep->lock);
+
+ if (!lep->conreqcnt)
+ goto retry_connection;
+
+ /* Get the first connect request off the list */
+ conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
+ list_del(&conreq->list);
+ lep->conreqcnt--;
+ spin_unlock(&lep->lock);
+
+ /* Fill in the peer information */
+ peer->node = conreq->msg.src.node;
+ peer->port = conreq->msg.src.port;
+
+ cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+ if (!cep) {
+ err = -ENOMEM;
+ goto scif_accept_error_epalloc;
+ }
+ spin_lock_init(&cep->lock);
+ mutex_init(&cep->sendlock);
+ mutex_init(&cep->recvlock);
+ cep->state = SCIFEP_CONNECTING;
+ cep->remote_dev = &scif_dev[peer->node];
+ cep->remote_ep = conreq->msg.payload[0];
+
+ scif_rma_ep_init(cep);
+
+ err = scif_reserve_dma_chan(cep);
+ if (err) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto scif_accept_error_qpalloc;
+ }
+
+ cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
+ if (!cep->qp_info.qp) {
+ err = -ENOMEM;
+ goto scif_accept_error_qpalloc;
+ }
+
+ err = scif_anon_inode_getfile(cep);
+ if (err)
+ goto scif_accept_error_anon_inode;
+
+ cep->qp_info.qp->magic = SCIFEP_MAGIC;
+ spdev = scif_get_peer_dev(cep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto scif_accept_error_map;
+ }
+ err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
+ conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
+ cep->remote_dev);
+ if (err) {
+ dev_dbg(&cep->remote_dev->sdev->dev,
+ "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
+ lep, cep, err, cep->qp_info.qp_offset);
+ scif_put_peer_dev(spdev);
+ goto scif_accept_error_map;
+ }
+
+ cep->port.node = lep->port.node;
+ cep->port.port = lep->port.port;
+ cep->peer.node = peer->node;
+ cep->peer.port = peer->port;
+ init_waitqueue_head(&cep->sendwq);
+ init_waitqueue_head(&cep->recvwq);
+ init_waitqueue_head(&cep->conwq);
+
+ msg.uop = SCIF_CNCT_GNT;
+ msg.src = cep->port;
+ msg.payload[0] = cep->remote_ep;
+ msg.payload[1] = cep->qp_info.qp_offset;
+ msg.payload[2] = (u64)cep;
+
+ err = _scif_nodeqp_send(cep->remote_dev, &msg);
+ scif_put_peer_dev(spdev);
+ if (err)
+ goto scif_accept_error_map;
+retry:
+ /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
+ err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
+ SCIF_NODE_ACCEPT_TIMEOUT);
+ if (!err && scifdev_alive(cep))
+ goto retry;
+ err = !err ? -ENODEV : 0;
+ if (err)
+ goto scif_accept_error_map;
+ kfree(conreq);
+
+ spin_lock(&cep->lock);
+
+ if (cep->state == SCIFEP_CLOSING) {
+ /*
+ * Remote failed to allocate resources and NAKed the grant.
+ * There is at this point nothing referencing the new end point.
+ */
+ spin_unlock(&cep->lock);
+ scif_teardown_ep(cep);
+ kfree(cep);
+
+ /* If call with sync flag then go back and wait. */
+ if (flags & SCIF_ACCEPT_SYNC) {
+ spin_lock(&lep->lock);
+ goto retry_connection;
+ }
+ return -EAGAIN;
+ }
+
+ scif_get_port(cep->port.port);
+ *newepd = (scif_epd_t)cep;
+ spin_unlock(&cep->lock);
+ return 0;
+scif_accept_error_map:
+ scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
+ scif_teardown_ep(cep);
+scif_accept_error_qpalloc:
+ kfree(cep);
+scif_accept_error_epalloc:
+ msg.uop = SCIF_CNCT_REJ;
+ msg.dst.node = conreq->msg.src.node;
+ msg.dst.port = conreq->msg.src.port;
+ msg.payload[0] = conreq->msg.payload[0];
+ msg.payload[1] = conreq->msg.payload[1];
+ scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
+ kfree(conreq);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_accept);
+
+/*
+ * scif_msg_param_check:
+ * @epd: The end point returned from scif_open()
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
+ */
+static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
+{
+ int ret = -EINVAL;
+
+ if (len < 0)
+ goto err_ret;
+ if (flags && (!(flags & SCIF_RECV_BLOCK)))
+ goto err_ret;
+ ret = 0;
+err_ret:
+ return ret;
+}
+
+static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scifmsg notif_msg;
+ int curr_xfer_len = 0, sent_len = 0, write_count;
+ int ret = 0;
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (flags & SCIF_SEND_BLOCK)
+ might_sleep();
+
+ spin_lock(&ep->lock);
+ while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
+ write_count = scif_rb_space(&qp->outbound_q);
+ if (write_count) {
+ /* Best effort to send as much data as possible */
+ curr_xfer_len = min(len - sent_len, write_count);
+ ret = scif_rb_write(&qp->outbound_q, msg,
+ curr_xfer_len);
+ if (ret < 0)
+ break;
+ /* Success. Update write pointer */
+ scif_rb_commit(&qp->outbound_q);
+ /*
+ * Send a notification to the peer about the
+ * produced data message.
+ */
+ notif_msg.src = ep->port;
+ notif_msg.uop = SCIF_CLIENT_SENT;
+ notif_msg.payload[0] = ep->remote_ep;
+ ret = _scif_nodeqp_send(ep->remote_dev, ¬if_msg);
+ if (ret)
+ break;
+ sent_len += curr_xfer_len;
+ msg = msg + curr_xfer_len;
+ continue;
+ }
+ curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
+ /* Not enough RB space. return for the Non Blocking case */
+ if (!(flags & SCIF_SEND_BLOCK))
+ break;
+
+ spin_unlock(&ep->lock);
+ /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
+ ret =
+ wait_event_interruptible(ep->sendwq,
+ (SCIFEP_CONNECTED != ep->state) ||
+ (scif_rb_space(&qp->outbound_q) >=
+ curr_xfer_len));
+ spin_lock(&ep->lock);
+ if (ret)
+ break;
+ }
+ if (sent_len)
+ ret = sent_len;
+ else if (!ret && SCIFEP_CONNECTED != ep->state)
+ ret = SCIFEP_DISCONNECTED == ep->state ?
+ -ECONNRESET : -ENOTCONN;
+ spin_unlock(&ep->lock);
+ return ret;
+}
+
+static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+ int read_size;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scifmsg notif_msg;
+ int curr_recv_len = 0, remaining_len = len, read_count;
+ int ret = 0;
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (flags & SCIF_RECV_BLOCK)
+ might_sleep();
+ spin_lock(&ep->lock);
+ while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
+ SCIFEP_DISCONNECTED == ep->state)) {
+ read_count = scif_rb_count(&qp->inbound_q, remaining_len);
+ if (read_count) {
+ /*
+ * Best effort to recv as much data as there
+ * are bytes to read in the RB particularly
+ * important for the Non Blocking case.
+ */
+ curr_recv_len = min(remaining_len, read_count);
+ read_size = scif_rb_get_next(&qp->inbound_q,
+ msg, curr_recv_len);
+ if (ep->state == SCIFEP_CONNECTED) {
+ /*
+ * Update the read pointer only if the endpoint
+ * is still connected else the read pointer
+ * might no longer exist since the peer has
+ * freed resources!
+ */
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ /*
+ * Send a notification to the peer about the
+ * consumed data message only if the EP is in
+ * SCIFEP_CONNECTED state.
+ */
+ notif_msg.src = ep->port;
+ notif_msg.uop = SCIF_CLIENT_RCVD;
+ notif_msg.payload[0] = ep->remote_ep;
+ ret = _scif_nodeqp_send(ep->remote_dev,
+ ¬if_msg);
+ if (ret)
+ break;
+ }
+ remaining_len -= curr_recv_len;
+ msg = msg + curr_recv_len;
+ continue;
+ }
+ /*
+ * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
+ * we will keep looping forever.
+ */
+ if (ep->state == SCIFEP_DISCONNECTED)
+ break;
+ /*
+ * Return in the Non Blocking case if there is no data
+ * to read in this iteration.
+ */
+ if (!(flags & SCIF_RECV_BLOCK))
+ break;
+ curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
+ spin_unlock(&ep->lock);
+ /*
+ * Wait for a SCIF_CLIENT_SEND message in the blocking case
+ * or until other side disconnects.
+ */
+ ret =
+ wait_event_interruptible(ep->recvwq,
+ SCIFEP_CONNECTED != ep->state ||
+ scif_rb_count(&qp->inbound_q,
+ curr_recv_len)
+ >= curr_recv_len);
+ spin_lock(&ep->lock);
+ if (ret)
+ break;
+ }
+ if (len - remaining_len)
+ ret = len - remaining_len;
+ else if (!ret && ep->state != SCIFEP_CONNECTED)
+ ret = ep->state == SCIFEP_DISCONNECTED ?
+ -ECONNRESET : -ENOTCONN;
+ spin_unlock(&ep->lock);
+ return ret;
+}
+
+/**
+ * scif_user_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_send().
+ */
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+ int sent_len = 0;
+ char *tmp;
+ int loop_len;
+ int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ err = scif_msg_param_check(epd, len, flags);
+ if (err)
+ goto send_err;
+
+ tmp = kmalloc(chunk_len, GFP_KERNEL);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto send_err;
+ }
+ /*
+ * Grabbing the lock before breaking up the transfer in
+ * multiple chunks is required to ensure that messages do
+ * not get fragmented and reordered.
+ */
+ mutex_lock(&ep->sendlock);
+ while (sent_len != len) {
+ loop_len = len - sent_len;
+ loop_len = min(chunk_len, loop_len);
+ if (copy_from_user(tmp, msg, loop_len)) {
+ err = -EFAULT;
+ goto send_free_err;
+ }
+ err = _scif_send(epd, tmp, loop_len, flags);
+ if (err < 0)
+ goto send_free_err;
+ sent_len += err;
+ msg += err;
+ if (err != loop_len)
+ goto send_free_err;
+ }
+send_free_err:
+ mutex_unlock(&ep->sendlock);
+ kfree(tmp);
+send_err:
+ return err < 0 ? err : sent_len;
+}
+
+/**
+ * scif_user_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_recv().
+ */
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+ int recv_len = 0;
+ char *tmp;
+ int loop_len;
+ int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ err = scif_msg_param_check(epd, len, flags);
+ if (err)
+ goto recv_err;
+
+ tmp = kmalloc(chunk_len, GFP_KERNEL);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto recv_err;
+ }
+ /*
+ * Grabbing the lock before breaking up the transfer in
+ * multiple chunks is required to ensure that messages do
+ * not get fragmented and reordered.
+ */
+ mutex_lock(&ep->recvlock);
+ while (recv_len != len) {
+ loop_len = len - recv_len;
+ loop_len = min(chunk_len, loop_len);
+ err = _scif_recv(epd, tmp, loop_len, flags);
+ if (err < 0)
+ goto recv_free_err;
+ if (copy_to_user(msg, tmp, err)) {
+ err = -EFAULT;
+ goto recv_free_err;
+ }
+ recv_len += err;
+ msg += err;
+ if (err != loop_len)
+ goto recv_free_err;
+ }
+recv_free_err:
+ mutex_unlock(&ep->recvlock);
+ kfree(tmp);
+recv_err:
+ return err < 0 ? err : recv_len;
+}
+
+/**
+ * scif_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_send().
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int ret;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ ret = scif_msg_param_check(epd, len, flags);
+ if (ret)
+ return ret;
+ if (!ep->remote_dev)
+ return -ENOTCONN;
+ /*
+ * Grab the mutex lock in the blocking case only
+ * to ensure messages do not get fragmented/reordered.
+ * The non blocking mode is protected using spin locks
+ * in _scif_send().
+ */
+ if (flags & SCIF_SEND_BLOCK)
+ mutex_lock(&ep->sendlock);
+
+ ret = _scif_send(epd, msg, len, flags);
+
+ if (flags & SCIF_SEND_BLOCK)
+ mutex_unlock(&ep->sendlock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scif_send);
+
+/**
+ * scif_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_recv().
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int ret;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ ret = scif_msg_param_check(epd, len, flags);
+ if (ret)
+ return ret;
+ /*
+ * Grab the mutex lock in the blocking case only
+ * to ensure messages do not get fragmented/reordered.
+ * The non blocking mode is protected using spin locks
+ * in _scif_send().
+ */
+ if (flags & SCIF_RECV_BLOCK)
+ mutex_lock(&ep->recvlock);
+
+ ret = _scif_recv(epd, msg, len, flags);
+
+ if (flags & SCIF_RECV_BLOCK)
+ mutex_unlock(&ep->recvlock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scif_recv);
+
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+ poll_table *p, struct scif_endpt *ep)
+{
+ /*
+ * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+ * and regrab it afterwards. Because the endpoint state might have
+ * changed while the lock was given up, the state must be checked
+ * again after re-acquiring the lock. The code in __scif_pollfd(..)
+ * does this.
+ */
+ spin_unlock(&ep->lock);
+ poll_wait(f, wq, p);
+ spin_lock(&ep->lock);
+}
+
+unsigned int
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+ unsigned int mask = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+ spin_lock(&ep->lock);
+
+ /* Endpoint is waiting for a non-blocking connect to complete */
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ if (ep->state == SCIFEP_CONNECTED ||
+ ep->state == SCIFEP_DISCONNECTED ||
+ ep->conn_err)
+ mask |= POLLOUT;
+ goto exit;
+ }
+ }
+
+ /* Endpoint is listening for incoming connection requests */
+ if (ep->state == SCIFEP_LISTENING) {
+ _scif_poll_wait(f, &ep->conwq, wait, ep);
+ if (ep->state == SCIFEP_LISTENING) {
+ if (ep->conreqcnt)
+ mask |= POLLIN;
+ goto exit;
+ }
+ }
+
+ /* Endpoint is connected or disconnected */
+ if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+ if (poll_requested_events(wait) & POLLIN)
+ _scif_poll_wait(f, &ep->recvwq, wait, ep);
+ if (poll_requested_events(wait) & POLLOUT)
+ _scif_poll_wait(f, &ep->sendwq, wait, ep);
+ if (ep->state == SCIFEP_CONNECTED ||
+ ep->state == SCIFEP_DISCONNECTED) {
+ /* Data can be read without blocking */
+ if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+ mask |= POLLIN;
+ /* Data can be written without blocking */
+ if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+ mask |= POLLOUT;
+ /* Return POLLHUP if endpoint is disconnected */
+ if (ep->state == SCIFEP_DISCONNECTED)
+ mask |= POLLHUP;
+ goto exit;
+ }
+ }
+
+ /* Return POLLERR if the endpoint is in none of the above states */
+ mask |= POLLERR;
+exit:
+ spin_unlock(&ep->lock);
+ return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ * and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+ struct poll_wqueues table;
+ poll_table *pt;
+ int i, mask, count = 0, timed_out = timeout_msecs == 0;
+ u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+ : msecs_to_jiffies(timeout_msecs);
+
+ poll_initwait(&table);
+ pt = &table.pt;
+ while (1) {
+ for (i = 0; i < nfds; i++) {
+ pt->_key = ufds[i].events | POLLERR | POLLHUP;
+ mask = __scif_pollfd(ufds[i].epd->anon,
+ pt, ufds[i].epd);
+ mask &= ufds[i].events | POLLERR | POLLHUP;
+ if (mask) {
+ count++;
+ pt->_qproc = NULL;
+ }
+ ufds[i].revents = mask;
+ }
+ pt->_qproc = NULL;
+ if (!count) {
+ count = table.error;
+ if (signal_pending(current))
+ count = -EINTR;
+ }
+ if (count || timed_out)
+ break;
+
+ if (!schedule_timeout_interruptible(timeout))
+ timed_out = 1;
+ }
+ poll_freewait(&table);
+ return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
+int scif_get_node_ids(u16 *nodes, int len, u16 *self)
+{
+ int online = 0;
+ int offset = 0;
+ int node;
+
+ if (!scif_is_mgmt_node())
+ scif_get_node_info();
+
+ *self = scif_info.nodeid;
+ mutex_lock(&scif_info.conflock);
+ len = min_t(int, len, scif_info.total);
+ for (node = 0; node <= scif_info.maxid; node++) {
+ if (_scifdev_alive(&scif_dev[node])) {
+ online++;
+ if (offset < len)
+ nodes[offset++] = node;
+ }
+ }
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
+ scif_info.total, online, offset);
+ mutex_unlock(&scif_info.conflock);
+
+ return online;
+}
+EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+ struct scif_client *client =
+ container_of(si, struct scif_client, si);
+ struct scif_peer_dev *spdev =
+ container_of(dev, struct scif_peer_dev, dev);
+
+ if (client->probe)
+ client->probe(spdev);
+ return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+ struct subsys_interface *si)
+{
+ struct scif_client *client =
+ container_of(si, struct scif_client, si);
+ struct scif_peer_dev *spdev =
+ container_of(dev, struct scif_peer_dev, dev);
+
+ if (client->remove)
+ client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+ subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+ struct subsys_interface *si = &client->si;
+
+ si->name = client->name;
+ si->subsys = &scif_peer_bus;
+ si->add_dev = scif_add_client_dev;
+ si->remove_dev = scif_remove_client_dev;
+
+ return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
new file mode 100644
index 0000000..6884dad
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -0,0 +1,162 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../common/mic_dev.h"
+#include "scif_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *scif_dbg;
+
+static int scif_dev_test(struct seq_file *s, void *unused)
+{
+ int node;
+
+ seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n",
+ scif_info.total, scif_info.nodeid,
+ scif_info.maxid);
+
+ if (!scif_dev)
+ return 0;
+
+ seq_printf(s, "%-16s\t%-16s\n", "node_id", "state");
+
+ for (node = 0; node <= scif_info.maxid; node++)
+ seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node,
+ _scifdev_alive(&scif_dev[node]) ?
+ "Running" : "Offline");
+ return 0;
+}
+
+static int scif_dev_test_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, scif_dev_test, inode->i_private);
+}
+
+static int scif_dev_test_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations scif_dev_ops = {
+ .owner = THIS_MODULE,
+ .open = scif_dev_test_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = scif_dev_test_release
+};
+
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+ int j;
+ struct scatterlist *sg;
+ scif_pinned_pages_t pin = window->pinned_pages;
+
+ seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+ window, window->type, window->temp, window->offset);
+ seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+ window->nr_pages, window->nr_contig_chunks, window->prot);
+ seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+ window->ref_count, window->magic, window->peer_window);
+ seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+ window->unreg_state, window->va_for_temp);
+
+ for (j = 0; j < window->nr_contig_chunks; j++)
+ seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+ window->dma_addr[j], window->num_pages[j]);
+
+ if (window->type == SCIF_WINDOW_SELF && pin)
+ for (j = 0; j < window->nr_pages; j++)
+ seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+ j, pin->pages[j],
+ page_address(pin->pages[j]));
+
+ if (window->st)
+ for_each_sg(window->st->sgl, sg, window->st->nents, j)
+ seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+ j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
+{
+ struct list_head *item;
+ struct scif_window *window;
+
+ list_for_each(item, head) {
+ window = list_entry(item, struct scif_window, list);
+ scif_display_window(window, s);
+ }
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+ struct scif_endpt *ep;
+ struct list_head *pos;
+
+ mutex_lock(&scif_info.connlock);
+ list_for_each(pos, &scif_info.connected) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ seq_printf(s, "ep %p self windows\n", ep);
+ mutex_lock(&ep->rma_info.rma_lock);
+ scif_display_all_windows(&ep->rma_info.reg_list, s);
+ seq_printf(s, "ep %p remote windows\n", ep);
+ scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ }
+ mutex_unlock(&scif_info.connlock);
+ return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+ .owner = THIS_MODULE,
+ .open = scif_rma_test_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
+ scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!scif_dbg) {
+ dev_err(scif_info.mdev.this_device,
+ "can't create debugfs dir scif\n");
+ return;
+ }
+
+ debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+ debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
+ debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
+ debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
+}
+
+void scif_exit_debugfs(void)
+{
+ debugfs_remove_recursive(scif_dbg);
+}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 0000000..95a13c6
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1979 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+/*
+ * struct scif_dma_comp_cb - SCIF DMA completion callback
+ *
+ * @dma_completion_func: DMA completion callback
+ * @cb_cookie: DMA completion callback cookie
+ * @temp_buf: Temporary buffer
+ * @temp_buf_to_free: Temporary buffer to be freed
+ * @is_cache: Is a kmem_cache allocated buffer
+ * @dst_offset: Destination registration offset
+ * @dst_window: Destination registration window
+ * @len: Length of the temp buffer
+ * @temp_phys: DMA address of the temp buffer
+ * @sdev: The SCIF device
+ * @header_padding: padding for cache line alignment
+ */
+struct scif_dma_comp_cb {
+ void (*dma_completion_func)(void *cookie);
+ void *cb_cookie;
+ u8 *temp_buf;
+ u8 *temp_buf_to_free;
+ bool is_cache;
+ s64 dst_offset;
+ struct scif_window *dst_window;
+ size_t len;
+ dma_addr_t temp_phys;
+ struct scif_dev *sdev;
+ int header_padding;
+};
+
+/**
+ * struct scif_copy_work - Work for DMA copy
+ *
+ * @src_offset: Starting source offset
+ * @dst_offset: Starting destination offset
+ * @src_window: Starting src registered window
+ * @dst_window: Starting dst registered window
+ * @loopback: true if this is a loopback DMA transfer
+ * @len: Length of the transfer
+ * @comp_cb: DMA copy completion callback
+ * @remote_dev: The remote SCIF peer device
+ * @fence_type: polling or interrupt based
+ * @ordered: is this a tail byte ordered DMA transfer
+ */
+struct scif_copy_work {
+ s64 src_offset;
+ s64 dst_offset;
+ struct scif_window *src_window;
+ struct scif_window *dst_window;
+ int loopback;
+ size_t len;
+ struct scif_dma_comp_cb *comp_cb;
+ struct scif_dev *remote_dev;
+ int fence_type;
+ bool ordered;
+};
+
+#ifndef list_entry_next
+#define list_entry_next(pos, member) \
+ list_entry(pos->member.next, typeof(*pos), member)
+#endif
+
+/**
+ * scif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int scif_reserve_dma_chan(struct scif_endpt *ep)
+{
+ int err = 0;
+ struct scif_dev *scifdev;
+ struct scif_hw_dev *sdev;
+ struct dma_chan *chan;
+
+ /* Loopback DMAs are not supported on the management node */
+ if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
+ return 0;
+ if (scif_info.nodeid)
+ scifdev = &scif_dev[0];
+ else
+ scifdev = ep->remote_dev;
+ sdev = scifdev->sdev;
+ if (!sdev->num_dma_ch)
+ return -ENODEV;
+ chan = sdev->dma_ch[scifdev->dma_ch_idx];
+ scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
+ mutex_lock(&ep->rma_info.rma_lock);
+ ep->rma_info.dma_chan = chan;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ return err;
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached windows
+ */
+static
+void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
+ struct scif_endpt *ep,
+ u64 start, u64 len)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ u64 start_va, end_va;
+ u64 end = start + len;
+
+ if (end <= start)
+ return;
+
+ list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+ window = list_entry(item, struct scif_window, list);
+ ep = (struct scif_endpt *)window->ep;
+ if (!len)
+ break;
+ start_va = window->va_for_temp;
+ end_va = start_va + (window->nr_pages << PAGE_SHIFT);
+ if (start < start_va && end <= start_va)
+ break;
+ if (start >= end_va)
+ continue;
+ __scif_rma_destroy_tcw_helper(window);
+ }
+}
+
+static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
+{
+ struct scif_endpt *ep = mmn->ep;
+
+ spin_lock(&ep->rma_info.tc_lock);
+ __scif_rma_destroy_tcw(mmn, ep, start, len);
+ spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+ struct list_head *item, *tmp;
+ struct scif_mmu_notif *mmn;
+
+ list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+ }
+}
+
+static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+ struct list_head *item, *tmp;
+ struct scif_mmu_notif *mmn;
+
+ spin_lock(&ep->rma_info.tc_lock);
+ list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
+ }
+ spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+ if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
+ return false;
+ if ((atomic_read(&ep->rma_info.tcw_total_pages)
+ + (cur_bytes >> PAGE_SHIFT)) >
+ scif_info.rma_tc_limit) {
+ dev_info(scif_info.mdev.this_device,
+ "%s %d total=%d, current=%zu reached max\n",
+ __func__, __LINE__,
+ atomic_read(&ep->rma_info.tcw_total_pages),
+ (1 + (cur_bytes >> PAGE_SHIFT)));
+ scif_rma_destroy_tcw_invalid();
+ __scif_rma_destroy_tcw_ep(ep);
+ }
+ return true;
+}
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct scif_mmu_notif *mmn;
+
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+ scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+ schedule_work(&scif_info.misc_work);
+}
+
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ struct scif_mmu_notif *mmn;
+
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+ scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
+}
+
+static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct scif_mmu_notif *mmn;
+
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+ scif_rma_destroy_tcw(mmn, start, end - start);
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ /*
+ * Nothing to do here, everything needed was done in
+ * invalidate_range_start.
+ */
+}
+
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+ .release = scif_mmu_notifier_release,
+ .clear_flush_young = NULL,
+ .invalidate_page = scif_mmu_notifier_invalidate_page,
+ .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+ .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
+{
+ struct scif_endpt_rma_info *rma = &ep->rma_info;
+ struct scif_mmu_notif *mmn = NULL;
+ struct list_head *item, *tmp;
+
+ mutex_lock(&ep->rma_info.mmn_lock);
+ list_for_each_safe(item, tmp, &rma->mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+ list_del(item);
+ kfree(mmn);
+ }
+ mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
+ struct mm_struct *mm, struct scif_endpt *ep)
+{
+ mmn->ep = ep;
+ mmn->mm = mm;
+ mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+ INIT_LIST_HEAD(&mmn->list);
+ INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
+{
+ struct scif_mmu_notif *mmn;
+ struct list_head *item;
+
+ list_for_each(item, &rma->mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ if (mmn->mm == mm)
+ return mmn;
+ }
+ return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+ struct scif_mmu_notif *mmn
+ = kzalloc(sizeof(*mmn), GFP_KERNEL);
+
+ if (!mmn)
+ return ERR_PTR(ENOMEM);
+
+ scif_init_mmu_notifier(mmn, current->mm, ep);
+ if (mmu_notifier_register(&mmn->ep_mmu_notifier,
+ current->mm)) {
+ kfree(mmn);
+ return ERR_PTR(EBUSY);
+ }
+ list_add(&mmn->list, &ep->rma_info.mmn_list);
+ return mmn;
+}
+
+/*
+ * Called from the misc thread to destroy temporary cached windows and
+ * unregister the MMU notifier for the SCIF endpoint.
+ */
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+ struct list_head *pos, *tmpq;
+ struct scif_endpt *ep;
+restart:
+ scif_rma_destroy_tcw_invalid();
+ spin_lock(&scif_info.rmalock);
+ list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
+ ep = list_entry(pos, struct scif_endpt, mmu_list);
+ list_del(&ep->mmu_list);
+ spin_unlock(&scif_info.rmalock);
+ scif_rma_destroy_tcw_ep(ep);
+ scif_ep_unregister_mmu_notifier(ep);
+ goto restart;
+ }
+ spin_unlock(&scif_info.rmalock);
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+ return !!(flags & SCIF_RMA_USECACHE);
+}
+#else
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm,
+ struct scif_endpt_rma_info *rma)
+{
+ return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+ return NULL;
+}
+
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+ return false;
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+ return false;
+}
+#endif
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
+ off_t *out_offset, struct scif_window **out_window)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err;
+ scif_pinned_pages_t pinned_pages;
+ size_t aligned_len;
+
+ aligned_len = ALIGN(len, PAGE_SIZE);
+
+ err = __scif_pin_pages((void *)(addr & PAGE_MASK),
+ aligned_len, &prot, 0, &pinned_pages);
+ if (err)
+ return err;
+
+ pinned_pages->prot = prot;
+
+ /* Compute the offset for this registration */
+ err = scif_get_window_offset(ep, 0, 0,
+ aligned_len >> PAGE_SHIFT,
+ (s64 *)out_offset);
+ if (err)
+ goto error_unpin;
+
+ /* Allocate and prepare self registration window */
+ *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
+ *out_offset, true);
+ if (!*out_window) {
+ scif_free_window_offset(ep, NULL, *out_offset);
+ err = -ENOMEM;
+ goto error_unpin;
+ }
+
+ (*out_window)->pinned_pages = pinned_pages;
+ (*out_window)->nr_pages = pinned_pages->nr_pages;
+ (*out_window)->prot = pinned_pages->prot;
+
+ (*out_window)->va_for_temp = addr & PAGE_MASK;
+ err = scif_map_window(ep->remote_dev, *out_window);
+ if (err) {
+ /* Something went wrong! Rollback */
+ scif_destroy_window(ep, *out_window);
+ *out_window = NULL;
+ } else {
+ *out_offset |= (addr - (*out_window)->va_for_temp);
+ }
+ return err;
+error_unpin:
+ if (err)
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ scif_unpin_pages(pinned_pages);
+ return err;
+}
+
+#define SCIF_DMA_TO (3 * HZ)
+
+/*
+ * scif_sync_dma - Program a DMA without an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * @sync_wait: Wait for DMA to complete?
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
+ bool sync_wait)
+{
+ int err = 0;
+ struct dma_async_tx_descriptor *tx = NULL;
+ enum dma_ctrl_flags flags = DMA_PREP_FENCE;
+ dma_cookie_t cookie;
+ struct dma_device *ddev;
+
+ if (!chan) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ ddev = chan->device;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ if (!sync_wait) {
+ dma_async_issue_pending(chan);
+ } else {
+ if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
+ err = 0;
+ } else {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ }
+ }
+release:
+ return err;
+}
+
+static void scif_dma_callback(void *arg)
+{
+ struct completion *done = (struct completion *)arg;
+
+ complete(done);
+}
+
+#define SCIF_DMA_SYNC_WAIT true
+#define SCIF_DMA_POLL BIT(0)
+#define SCIF_DMA_INTR BIT(1)
+
+/*
+ * scif_async_dma - Program a DMA with an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * Return 0 on success and -errno on error.
+ */
+static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+ int err = 0;
+ struct dma_device *ddev;
+ struct dma_async_tx_descriptor *tx = NULL;
+ enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+ DECLARE_COMPLETION_ONSTACK(done_wait);
+ dma_cookie_t cookie;
+ enum dma_status status;
+
+ if (!chan) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ ddev = chan->device;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ reinit_completion(&done_wait);
+ tx->callback = scif_dma_callback;
+ tx->callback_param = &done_wait;
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ dma_async_issue_pending(chan);
+
+ err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
+ if (!err) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ err = 0;
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+ if (status != DMA_COMPLETE) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+release:
+ return err;
+}
+
+/*
+ * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+ if (!chan)
+ return -EINVAL;
+ return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
+}
+
+/*
+ * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+ if (!chan)
+ return -EINVAL;
+ return scif_async_dma(sdev, chan);
+}
+
+/**
+ * scif_rma_destroy_windows:
+ *
+ * This routine destroys all windows queued for cleanup
+ */
+void scif_rma_destroy_windows(void)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep;
+ struct dma_chan *chan;
+
+ might_sleep();
+restart:
+ spin_lock(&scif_info.rmalock);
+ list_for_each_safe(item, tmp, &scif_info.rma) {
+ window = list_entry(item, struct scif_window,
+ list);
+ ep = (struct scif_endpt *)window->ep;
+ chan = ep->rma_info.dma_chan;
+
+ list_del_init(&window->list);
+ spin_unlock(&scif_info.rmalock);
+ if (!chan || !scifdev_alive(ep) ||
+ !scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan))
+ /* Remove window from global list */
+ window->unreg_state = OP_COMPLETED;
+ else
+ dev_warn(&ep->remote_dev->sdev->dev,
+ "DMA engine hung?\n");
+ if (window->unreg_state == OP_COMPLETED) {
+ if (window->type == SCIF_WINDOW_SELF)
+ scif_destroy_window(ep, window);
+ else
+ scif_destroy_remote_window(window);
+ atomic_dec(&ep->rma_info.tw_refcount);
+ }
+ goto restart;
+ }
+ spin_unlock(&scif_info.rmalock);
+}
+
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached registered windows
+ * which have been queued for cleanup.
+ */
+void scif_rma_destroy_tcw_invalid(void)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep;
+ struct dma_chan *chan;
+
+ might_sleep();
+restart:
+ spin_lock(&scif_info.rmalock);
+ list_for_each_safe(item, tmp, &scif_info.rma_tc) {
+ window = list_entry(item, struct scif_window, list);
+ ep = (struct scif_endpt *)window->ep;
+ chan = ep->rma_info.dma_chan;
+ list_del_init(&window->list);
+ spin_unlock(&scif_info.rmalock);
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (!chan || !scifdev_alive(ep) ||
+ !scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan)) {
+ atomic_sub(window->nr_pages,
+ &ep->rma_info.tcw_total_pages);
+ scif_destroy_window(ep, window);
+ atomic_dec(&ep->rma_info.tcw_refcount);
+ } else {
+ dev_warn(&ep->remote_dev->sdev->dev,
+ "DMA engine hung?\n");
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto restart;
+ }
+ spin_unlock(&scif_info.rmalock);
+}
+
+static inline
+void *_get_local_va(off_t off, struct scif_window *window, size_t len)
+{
+ int page_nr = (off - window->offset) >> PAGE_SHIFT;
+ off_t page_off = off & ~PAGE_MASK;
+ void *va = NULL;
+
+ if (window->type == SCIF_WINDOW_SELF) {
+ struct page **pages = window->pinned_pages->pages;
+
+ va = page_address(pages[page_nr]) + page_off;
+ }
+ return va;
+}
+
+static inline
+void *ioremap_remote(off_t off, struct scif_window *window,
+ size_t len, struct scif_dev *dev,
+ struct scif_window_iter *iter)
+{
+ dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
+
+ /*
+ * If the DMA address is not card relative then we need the DMA
+ * addresses to be an offset into the bar. The aperture base was already
+ * added so subtract it here since scif_ioremap is going to add it again
+ */
+ if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+ dev->sdev->aper && !dev->sdev->card_rel_da)
+ phys = phys - dev->sdev->aper->pa;
+ return scif_ioremap(phys, len, dev);
+}
+
+static inline void
+iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
+{
+ scif_iounmap(virt, size, work->remote_dev);
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware: Only in the case of cpu copy from mgmt node to card
+ * because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization.
+ * This could happen at both mgmt node and card.
+ */
+static inline void
+scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
+{
+ if (!count)
+ return;
+
+ memcpy_toio((void __iomem __force *)dst, src, --count);
+ /* Order the last byte with the previous stores */
+ wmb();
+ *(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
+ size_t count, bool ordered)
+{
+ if (ordered)
+ scif_ordered_memcpy_toio(dst, src, count);
+ else
+ memcpy_toio((void __iomem __force *)dst, src, count);
+}
+
+static inline
+void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
+{
+ if (!count)
+ return;
+
+ memcpy_fromio(dst, (void __iomem __force *)src, --count);
+ /* Order the last byte with the previous loads */
+ rmb();
+ *(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
+ size_t count, bool ordered)
+{
+ if (ordered)
+ scif_ordered_memcpy_fromio(dst, src, count);
+ else
+ memcpy_fromio(dst, (void __iomem __force *)src, count);
+}
+
+#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * scif_off_to_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+ size_t *nr_bytes, struct scif_window_iter *iter)
+{
+ int i, page_nr;
+ s64 start, end;
+ off_t page_off;
+
+ if (window->nr_pages == window->nr_contig_chunks) {
+ page_nr = (off - window->offset) >> PAGE_SHIFT;
+ page_off = off & ~PAGE_MASK;
+
+ if (nr_bytes)
+ *nr_bytes = PAGE_SIZE - page_off;
+ return window->dma_addr[page_nr] | page_off;
+ }
+ if (iter) {
+ i = iter->index;
+ start = iter->offset;
+ } else {
+ i = 0;
+ start = window->offset;
+ }
+ for (; i < window->nr_contig_chunks; i++) {
+ end = start + (window->num_pages[i] << PAGE_SHIFT);
+ if (off >= start && off < end) {
+ if (iter) {
+ iter->index = i;
+ iter->offset = start;
+ }
+ if (nr_bytes)
+ *nr_bytes = end - off;
+ return (window->dma_addr[i] + (off - start));
+ }
+ start += (window->num_pages[i] << PAGE_SHIFT);
+ }
+ dev_err(scif_info.mdev.this_device,
+ "%s %d BUG. Addr not found? window %p off 0x%llx\n",
+ __func__, __LINE__, window, off);
+ return SCIF_RMA_ERROR_CODE;
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
+ u8 *temp, size_t rem_len, bool to_temp)
+{
+ void *window_virt;
+ size_t loop_len;
+ int offset_in_page;
+ s64 end_offset;
+
+ offset_in_page = offset & ~PAGE_MASK;
+ loop_len = PAGE_SIZE - offset_in_page;
+
+ if (rem_len < loop_len)
+ loop_len = rem_len;
+
+ window_virt = _get_local_va(offset, window, loop_len);
+ if (!window_virt)
+ return;
+ if (to_temp)
+ memcpy(temp, window_virt, loop_len);
+ else
+ memcpy(window_virt, temp, loop_len);
+
+ offset += loop_len;
+ temp += loop_len;
+ rem_len -= loop_len;
+
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ while (rem_len) {
+ if (offset == end_offset) {
+ window = list_entry_next(window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ }
+ loop_len = min(PAGE_SIZE, rem_len);
+ window_virt = _get_local_va(offset, window, loop_len);
+ if (!window_virt)
+ return;
+ if (to_temp)
+ memcpy(temp, window_virt, loop_len);
+ else
+ memcpy(window_virt, temp, loop_len);
+ offset += loop_len;
+ temp += loop_len;
+ rem_len -= loop_len;
+ }
+}
+
+/**
+ * scif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+static void scif_rma_completion_cb(void *data)
+{
+ struct scif_dma_comp_cb *comp_cb = data;
+
+ /* Free DMA Completion CB. */
+ if (comp_cb->dst_window)
+ scif_rma_local_cpu_copy(comp_cb->dst_offset,
+ comp_cb->dst_window,
+ comp_cb->temp_buf +
+ comp_cb->header_padding,
+ comp_cb->len, false);
+ scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
+ SCIF_KMEM_UNALIGNED_BUF_SIZE);
+ if (comp_cb->is_cache)
+ kmem_cache_free(unaligned_cache,
+ comp_cb->temp_buf_to_free);
+ else
+ kfree(comp_cb->temp_buf_to_free);
+}
+
+/* Copies between temporary buffer and offsets provided in work */
+static int
+scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
+ u8 *temp, struct dma_chan *chan,
+ bool src_local)
+{
+ struct scif_dma_comp_cb *comp_cb = work->comp_cb;
+ dma_addr_t window_dma_addr, temp_dma_addr;
+ dma_addr_t temp_phys = comp_cb->temp_phys;
+ size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+ int offset_in_ca, ret = 0;
+ s64 end_offset, offset;
+ struct scif_window *window;
+ void *window_virt_addr;
+ size_t tail_len;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_device *dev = chan->device;
+ dma_cookie_t cookie;
+
+ if (src_local) {
+ offset = work->dst_offset;
+ window = work->dst_window;
+ } else {
+ offset = work->src_offset;
+ window = work->src_window;
+ }
+
+ offset_in_ca = offset & (L1_CACHE_BYTES - 1);
+ if (offset_in_ca) {
+ loop_len = L1_CACHE_BYTES - offset_in_ca;
+ loop_len = min(loop_len, remaining_len);
+ window_virt_addr = ioremap_remote(offset, window,
+ loop_len,
+ work->remote_dev,
+ NULL);
+ if (!window_virt_addr)
+ return -ENOMEM;
+ if (src_local)
+ scif_unaligned_cpy_toio(window_virt_addr, temp,
+ loop_len,
+ work->ordered &&
+ !(remaining_len - loop_len));
+ else
+ scif_unaligned_cpy_fromio(temp, window_virt_addr,
+ loop_len, work->ordered &&
+ !(remaining_len - loop_len));
+ iounmap_remote(window_virt_addr, loop_len, work);
+
+ offset += loop_len;
+ temp += loop_len;
+ temp_phys += loop_len;
+ remaining_len -= loop_len;
+ }
+
+ offset_in_ca = offset & ~PAGE_MASK;
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+
+ tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+ remaining_len -= tail_len;
+ while (remaining_len) {
+ if (offset == end_offset) {
+ window = list_entry_next(window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ }
+ if (scif_is_mgmt_node())
+ temp_dma_addr = temp_phys;
+ else
+ /* Fix if we ever enable IOMMU on the card */
+ temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+ window_dma_addr = scif_off_to_dma_addr(window, offset,
+ &nr_contig_bytes,
+ NULL);
+ loop_len = min(nr_contig_bytes, remaining_len);
+ if (src_local) {
+ if (work->ordered && !tail_len &&
+ !(remaining_len - loop_len) &&
+ loop_len != L1_CACHE_BYTES) {
+ /*
+ * Break up the last chunk of the transfer into
+ * two steps. if there is no tail to guarantee
+ * DMA ordering. SCIF_DMA_POLLING inserts
+ * a status update descriptor in step 1 which
+ * acts as a double sided synchronization fence
+ * for the DMA engine to ensure that the last
+ * cache line in step 2 is updated last.
+ */
+ /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+ tx =
+ dev->device_prep_dma_memcpy(chan,
+ window_dma_addr,
+ temp_dma_addr,
+ loop_len -
+ L1_CACHE_BYTES,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ offset += (loop_len - L1_CACHE_BYTES);
+ temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+ window_dma_addr += (loop_len - L1_CACHE_BYTES);
+ remaining_len -= (loop_len - L1_CACHE_BYTES);
+ loop_len = remaining_len;
+
+ /* Step 2) DMA: L1_CACHE_BYTES */
+ tx =
+ dev->device_prep_dma_memcpy(chan,
+ window_dma_addr,
+ temp_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ } else {
+ tx =
+ dev->device_prep_dma_memcpy(chan,
+ window_dma_addr,
+ temp_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ }
+ } else {
+ tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
+ window_dma_addr, loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ }
+ if (ret < 0)
+ goto err;
+ offset += loop_len;
+ temp += loop_len;
+ temp_phys += loop_len;
+ remaining_len -= loop_len;
+ offset_in_ca = 0;
+ }
+ if (tail_len) {
+ if (offset == end_offset) {
+ window = list_entry_next(window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ }
+ window_virt_addr = ioremap_remote(offset, window, tail_len,
+ work->remote_dev,
+ NULL);
+ if (!window_virt_addr)
+ return -ENOMEM;
+ /*
+ * The CPU copy for the tail bytes must be initiated only once
+ * previous DMA transfers for this endpoint have completed
+ * to guarantee ordering.
+ */
+ if (work->ordered) {
+ struct scif_dev *rdev = work->remote_dev;
+
+ ret = scif_drain_dma_intr(rdev->sdev, chan);
+ if (ret)
+ return ret;
+ }
+ if (src_local)
+ scif_unaligned_cpy_toio(window_virt_addr, temp,
+ tail_len, work->ordered);
+ else
+ scif_unaligned_cpy_fromio(temp, window_virt_addr,
+ tail_len, work->ordered);
+ iounmap_remote(window_virt_addr, tail_len, work);
+ }
+ tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ ret = -ENOMEM;
+ return ret;
+ }
+ tx->callback = &scif_rma_completion_cb;
+ tx->callback_param = comp_cb;
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ return ret;
+ }
+ dma_async_issue_pending(chan);
+ return 0;
+err:
+ dev_err(scif_info.mdev.this_device,
+ "%s %d Desc Prog Failed ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+/*
+ * _scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+ struct dma_chan *chan)
+{
+ dma_addr_t src_dma_addr, dst_dma_addr;
+ size_t loop_len, remaining_len, src_contig_bytes = 0;
+ size_t dst_contig_bytes = 0;
+ struct scif_window_iter src_win_iter;
+ struct scif_window_iter dst_win_iter;
+ s64 end_src_offset, end_dst_offset;
+ struct scif_window *src_window = work->src_window;
+ struct scif_window *dst_window = work->dst_window;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ int ret = 0;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_device *dev = chan->device;
+ dma_cookie_t cookie;
+
+ remaining_len = work->len;
+
+ scif_init_window_iter(src_window, &src_win_iter);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ while (remaining_len) {
+ if (src_offset == end_src_offset) {
+ src_window = list_entry_next(src_window, list);
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(src_window, &src_win_iter);
+ }
+ if (dst_offset == end_dst_offset) {
+ dst_window = list_entry_next(dst_window, list);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ }
+
+ /* compute dma addresses for transfer */
+ src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+ &src_contig_bytes,
+ &src_win_iter);
+ dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+ &dst_contig_bytes,
+ &dst_win_iter);
+ loop_len = min(src_contig_bytes, dst_contig_bytes);
+ loop_len = min(loop_len, remaining_len);
+ if (work->ordered && !(remaining_len - loop_len)) {
+ /*
+ * Break up the last chunk of the transfer into two
+ * steps to ensure that the last byte in step 2 is
+ * updated last.
+ */
+ /* Step 1) DMA: Body Length - 1 */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len - 1,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ src_offset += (loop_len - 1);
+ dst_offset += (loop_len - 1);
+ src_dma_addr += (loop_len - 1);
+ dst_dma_addr += (loop_len - 1);
+ remaining_len -= (loop_len - 1);
+ loop_len = remaining_len;
+
+ /* Step 2) DMA: 1 BYTES */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr, loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ } else {
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr, loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ }
+ return ret;
+err:
+ dev_err(scif_info.mdev.this_device,
+ "%s %d Desc Prog Failed ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+/*
+ * scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+ struct dma_chan *chan)
+{
+ dma_addr_t src_dma_addr, dst_dma_addr;
+ size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
+ size_t dst_contig_bytes = 0;
+ int src_cache_off;
+ s64 end_src_offset, end_dst_offset;
+ struct scif_window_iter src_win_iter;
+ struct scif_window_iter dst_win_iter;
+ void *src_virt, *dst_virt;
+ struct scif_window *src_window = work->src_window;
+ struct scif_window *dst_window = work->dst_window;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ int ret = 0;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_device *dev = chan->device;
+ dma_cookie_t cookie;
+
+ remaining_len = work->len;
+ scif_init_window_iter(src_window, &src_win_iter);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+
+ src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+ if (src_cache_off != 0) {
+ /* Head */
+ loop_len = L1_CACHE_BYTES - src_cache_off;
+ loop_len = min(loop_len, remaining_len);
+ src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+ dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+ if (src_window->type == SCIF_WINDOW_SELF)
+ src_virt = _get_local_va(src_offset, src_window,
+ loop_len);
+ else
+ src_virt = ioremap_remote(src_offset, src_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!src_virt)
+ return -ENOMEM;
+ if (dst_window->type == SCIF_WINDOW_SELF)
+ dst_virt = _get_local_va(dst_offset, dst_window,
+ loop_len);
+ else
+ dst_virt = ioremap_remote(dst_offset, dst_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!dst_virt) {
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+ return -ENOMEM;
+ }
+ if (src_window->type == SCIF_WINDOW_SELF)
+ scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+ remaining_len == loop_len ?
+ work->ordered : false);
+ else
+ scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
+ remaining_len == loop_len ?
+ work->ordered : false);
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+ if (dst_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(dst_virt, loop_len, work);
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ }
+
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+ remaining_len -= tail_len;
+ while (remaining_len) {
+ if (src_offset == end_src_offset) {
+ src_window = list_entry_next(src_window, list);
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(src_window, &src_win_iter);
+ }
+ if (dst_offset == end_dst_offset) {
+ dst_window = list_entry_next(dst_window, list);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ }
+
+ /* compute dma addresses for transfer */
+ src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+ &src_contig_bytes,
+ &src_win_iter);
+ dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+ &dst_contig_bytes,
+ &dst_win_iter);
+ loop_len = min(src_contig_bytes, dst_contig_bytes);
+ loop_len = min(loop_len, remaining_len);
+ if (work->ordered && !tail_len &&
+ !(remaining_len - loop_len)) {
+ /*
+ * Break up the last chunk of the transfer into two
+ * steps. if there is no tail to gurantee DMA ordering.
+ * Passing SCIF_DMA_POLLING inserts a status update
+ * descriptor in step 1 which acts as a double sided
+ * synchronization fence for the DMA engine to ensure
+ * that the last cache line in step 2 is updated last.
+ */
+ /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len -
+ L1_CACHE_BYTES,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ src_offset += (loop_len - L1_CACHE_BYTES);
+ dst_offset += (loop_len - L1_CACHE_BYTES);
+ src_dma_addr += (loop_len - L1_CACHE_BYTES);
+ dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+ remaining_len -= (loop_len - L1_CACHE_BYTES);
+ loop_len = remaining_len;
+
+ /* Step 2) DMA: L1_CACHE_BYTES */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ } else {
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ }
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ }
+ remaining_len = tail_len;
+ if (remaining_len) {
+ loop_len = remaining_len;
+ if (src_offset == end_src_offset)
+ src_window = list_entry_next(src_window, list);
+ if (dst_offset == end_dst_offset)
+ dst_window = list_entry_next(dst_window, list);
+
+ src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+ dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+ /*
+ * The CPU copy for the tail bytes must be initiated only once
+ * previous DMA transfers for this endpoint have completed to
+ * guarantee ordering.
+ */
+ if (work->ordered) {
+ struct scif_dev *rdev = work->remote_dev;
+
+ ret = scif_drain_dma_poll(rdev->sdev, chan);
+ if (ret)
+ return ret;
+ }
+ if (src_window->type == SCIF_WINDOW_SELF)
+ src_virt = _get_local_va(src_offset, src_window,
+ loop_len);
+ else
+ src_virt = ioremap_remote(src_offset, src_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!src_virt)
+ return -ENOMEM;
+
+ if (dst_window->type == SCIF_WINDOW_SELF)
+ dst_virt = _get_local_va(dst_offset, dst_window,
+ loop_len);
+ else
+ dst_virt = ioremap_remote(dst_offset, dst_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!dst_virt) {
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+ return -ENOMEM;
+ }
+
+ if (src_window->type == SCIF_WINDOW_SELF)
+ scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+ work->ordered);
+ else
+ scif_unaligned_cpy_fromio(dst_virt, src_virt,
+ loop_len, work->ordered);
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+
+ if (dst_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(dst_virt, loop_len, work);
+ remaining_len -= loop_len;
+ }
+ return ret;
+err:
+ dev_err(scif_info.mdev.this_device,
+ "%s %d Desc Prog Failed ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+/*
+ * scif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
+{
+ void *src_virt, *dst_virt;
+ size_t loop_len, remaining_len;
+ int src_page_off, dst_page_off;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ struct scif_window *src_window = work->src_window;
+ struct scif_window *dst_window = work->dst_window;
+ s64 end_src_offset, end_dst_offset;
+ int ret = 0;
+ struct scif_window_iter src_win_iter;
+ struct scif_window_iter dst_win_iter;
+
+ remaining_len = work->len;
+
+ scif_init_window_iter(src_window, &src_win_iter);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ while (remaining_len) {
+ src_page_off = src_offset & ~PAGE_MASK;
+ dst_page_off = dst_offset & ~PAGE_MASK;
+ loop_len = min(PAGE_SIZE -
+ max(src_page_off, dst_page_off),
+ remaining_len);
+
+ if (src_window->type == SCIF_WINDOW_SELF)
+ src_virt = _get_local_va(src_offset, src_window,
+ loop_len);
+ else
+ src_virt = ioremap_remote(src_offset, src_window,
+ loop_len,
+ work->remote_dev,
+ &src_win_iter);
+ if (!src_virt) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (dst_window->type == SCIF_WINDOW_SELF)
+ dst_virt = _get_local_va(dst_offset, dst_window,
+ loop_len);
+ else
+ dst_virt = ioremap_remote(dst_offset, dst_window,
+ loop_len,
+ work->remote_dev,
+ &dst_win_iter);
+ if (!dst_virt) {
+ if (src_window->type == SCIF_WINDOW_PEER)
+ iounmap_remote(src_virt, loop_len, work);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (work->loopback) {
+ memcpy(dst_virt, src_virt, loop_len);
+ } else {
+ if (src_window->type == SCIF_WINDOW_SELF)
+ memcpy_toio((void __iomem __force *)dst_virt,
+ src_virt, loop_len);
+ else
+ memcpy_fromio(dst_virt,
+ (void __iomem __force *)src_virt,
+ loop_len);
+ }
+ if (src_window->type == SCIF_WINDOW_PEER)
+ iounmap_remote(src_virt, loop_len, work);
+
+ if (dst_window->type == SCIF_WINDOW_PEER)
+ iounmap_remote(dst_virt, loop_len, work);
+
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ if (remaining_len) {
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ if (src_offset == end_src_offset) {
+ src_window = list_entry_next(src_window, list);
+ scif_init_window_iter(src_window,
+ &src_win_iter);
+ }
+ if (dst_offset == end_dst_offset) {
+ dst_window = list_entry_next(dst_window, list);
+ scif_init_window_iter(dst_window,
+ &dst_win_iter);
+ }
+ }
+ }
+error:
+ return ret;
+}
+
+static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
+ struct scif_copy_work *work,
+ struct dma_chan *chan, off_t loffset)
+{
+ int src_cache_off, dst_cache_off;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ u8 *temp = NULL;
+ bool src_local = true, dst_local = false;
+ struct scif_dma_comp_cb *comp_cb;
+ dma_addr_t src_dma_addr, dst_dma_addr;
+ int err;
+
+ if (is_dma_copy_aligned(chan->device, 1, 1, 1))
+ return _scif_rma_list_dma_copy_aligned(work, chan);
+
+ src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+ dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+
+ if (dst_cache_off == src_cache_off)
+ return scif_rma_list_dma_copy_aligned(work, chan);
+
+ if (work->loopback)
+ return scif_rma_list_cpu_copy(work);
+ src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
+ dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
+ src_local = work->src_window->type == SCIF_WINDOW_SELF;
+ dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
+
+ dst_local = dst_local;
+ /* Allocate dma_completion cb */
+ comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
+ if (!comp_cb)
+ goto error;
+
+ work->comp_cb = comp_cb;
+ comp_cb->cb_cookie = comp_cb;
+ comp_cb->dma_completion_func = &scif_rma_completion_cb;
+
+ if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
+ comp_cb->is_cache = false;
+ /* Allocate padding bytes to align to a cache line */
+ temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
+ GFP_KERNEL);
+ if (!temp)
+ goto free_comp_cb;
+ comp_cb->temp_buf_to_free = temp;
+ /* kmalloc(..) does not guarantee cache line alignment */
+ if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
+ temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
+ } else {
+ comp_cb->is_cache = true;
+ temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
+ if (!temp)
+ goto free_comp_cb;
+ comp_cb->temp_buf_to_free = temp;
+ }
+
+ if (src_local) {
+ temp += dst_cache_off;
+ scif_rma_local_cpu_copy(work->src_offset, work->src_window,
+ temp, work->len, true);
+ } else {
+ comp_cb->dst_window = work->dst_window;
+ comp_cb->dst_offset = work->dst_offset;
+ work->src_offset = work->src_offset - src_cache_off;
+ comp_cb->len = work->len;
+ work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+ comp_cb->header_padding = src_cache_off;
+ }
+ comp_cb->temp_buf = temp;
+
+ err = scif_map_single(&comp_cb->temp_phys, temp,
+ work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
+ if (err)
+ goto free_temp_buf;
+ comp_cb->sdev = work->remote_dev;
+ if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
+ goto free_temp_buf;
+ if (!src_local)
+ work->fence_type = SCIF_DMA_INTR;
+ return 0;
+free_temp_buf:
+ if (comp_cb->is_cache)
+ kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
+ else
+ kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+ kfree(comp_cb);
+error:
+ return -ENOMEM;
+}
+
+/**
+ * scif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ * @last_chunk: true if this is the last chunk of a larger transfer
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
+ size_t len, off_t roffset, int flags,
+ enum scif_rma_dir dir, bool last_chunk)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_rma_req remote_req;
+ struct scif_rma_req req;
+ struct scif_window *local_window = NULL;
+ struct scif_window *remote_window = NULL;
+ struct scif_copy_work copy_work;
+ bool loopback;
+ int err = 0;
+ struct dma_chan *chan;
+ struct scif_mmu_notif *mmn = NULL;
+ bool cache = false;
+ struct device *spdev;
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
+ SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+ return -EINVAL;
+
+ loopback = scifdev_self(ep->remote_dev) ? true : false;
+ copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
+ SCIF_DMA_POLL : 0;
+ copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+ /* Use CPU for Mgmt node <-> Mgmt node copies */
+ if (loopback && scif_is_mgmt_node()) {
+ flags |= SCIF_RMA_USECPU;
+ copy_work.fence_type = 0x0;
+ }
+
+ cache = scif_is_set_reg_cache(flags);
+
+ remote_req.out_window = &remote_window;
+ remote_req.offset = roffset;
+ remote_req.nr_bytes = len;
+ /*
+ * If transfer is from local to remote then the remote window
+ * must be writeable and vice versa.
+ */
+ remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
+ remote_req.type = SCIF_WINDOW_PARTIAL;
+ remote_req.head = &ep->rma_info.remote_reg_list;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+
+ if (addr && cache) {
+ mutex_lock(&ep->rma_info.mmn_lock);
+ mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
+ if (!mmn)
+ scif_add_mmu_notifier(current->mm, ep);
+ mutex_unlock(&ep->rma_info.mmn_lock);
+ if (IS_ERR(mmn)) {
+ scif_put_peer_dev(spdev);
+ return PTR_ERR(mmn);
+ }
+ cache = cache && !scif_rma_tc_can_cache(ep, len);
+ }
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (addr) {
+ req.out_window = &local_window;
+ req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
+ PAGE_SIZE);
+ req.va_for_temp = addr & PAGE_MASK;
+ req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
+ VM_READ : VM_WRITE | VM_READ);
+ /* Does a valid local window exist? */
+ if (mmn) {
+ spin_lock(&ep->rma_info.tc_lock);
+ req.head = &mmn->tc_reg_list;
+ err = scif_query_tcw(ep, &req);
+ spin_unlock(&ep->rma_info.tc_lock);
+ }
+ if (!mmn || err) {
+ err = scif_register_temp(epd, req.va_for_temp,
+ req.nr_bytes, req.prot,
+ &loffset, &local_window);
+ if (err) {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto error;
+ }
+ if (!cache)
+ goto skip_cache;
+ atomic_inc(&ep->rma_info.tcw_refcount);
+ atomic_add_return(local_window->nr_pages,
+ &ep->rma_info.tcw_total_pages);
+ if (mmn) {
+ spin_lock(&ep->rma_info.tc_lock);
+ scif_insert_tcw(local_window,
+ &mmn->tc_reg_list);
+ spin_unlock(&ep->rma_info.tc_lock);
+ }
+ }
+skip_cache:
+ loffset = local_window->offset +
+ (addr - local_window->va_for_temp);
+ } else {
+ req.out_window = &local_window;
+ req.offset = loffset;
+ /*
+ * If transfer is from local to remote then the self window
+ * must be readable and vice versa.
+ */
+ req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
+ req.nr_bytes = len;
+ req.type = SCIF_WINDOW_PARTIAL;
+ req.head = &ep->rma_info.reg_list;
+ /* Does a valid local window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto error;
+ }
+ }
+
+ /* Does a valid remote window exist? */
+ err = scif_query_window(&remote_req);
+ if (err) {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto error;
+ }
+
+ /*
+ * Prepare copy_work for submitting work to the DMA kernel thread
+ * or CPU copy routine.
+ */
+ copy_work.len = len;
+ copy_work.loopback = loopback;
+ copy_work.remote_dev = ep->remote_dev;
+ if (dir == SCIF_LOCAL_TO_REMOTE) {
+ copy_work.src_offset = loffset;
+ copy_work.src_window = local_window;
+ copy_work.dst_offset = roffset;
+ copy_work.dst_window = remote_window;
+ } else {
+ copy_work.src_offset = roffset;
+ copy_work.src_window = remote_window;
+ copy_work.dst_offset = loffset;
+ copy_work.dst_window = local_window;
+ }
+
+ if (flags & SCIF_RMA_USECPU) {
+ scif_rma_list_cpu_copy(©_work);
+ } else {
+ chan = ep->rma_info.dma_chan;
+ err = scif_rma_list_dma_copy_wrapper(epd, ©_work,
+ chan, loffset);
+ }
+ if (addr && !cache)
+ atomic_inc(&ep->rma_info.tw_refcount);
+
+ mutex_unlock(&ep->rma_info.rma_lock);
+
+ if (last_chunk) {
+ struct scif_dev *rdev = ep->remote_dev;
+
+ if (copy_work.fence_type == SCIF_DMA_POLL)
+ err = scif_drain_dma_poll(rdev->sdev,
+ ep->rma_info.dma_chan);
+ else if (copy_work.fence_type == SCIF_DMA_INTR)
+ err = scif_drain_dma_intr(rdev->sdev,
+ ep->rma_info.dma_chan);
+ }
+
+ if (addr && !cache)
+ scif_queue_for_cleanup(local_window, &scif_info.rma);
+ scif_put_peer_dev(spdev);
+ return err;
+error:
+ if (err) {
+ if (addr && local_window && !cache)
+ scif_destroy_window(ep, local_window);
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d len 0x%lx\n",
+ __func__, __LINE__, err, len);
+ }
+ scif_put_peer_dev(spdev);
+ return err;
+}
+
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
+ epd, loffset, len, roffset, flags);
+ if (scif_unaligned(loffset, roffset)) {
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, loffset, 0x0,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_REMOTE_TO_LOCAL, false);
+ if (err)
+ goto readfrom_err;
+ loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, loffset, 0x0, len,
+ roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+readfrom_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_readfrom);
+
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
+ epd, loffset, len, roffset, flags);
+ if (scif_unaligned(loffset, roffset)) {
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, loffset, 0x0,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_LOCAL_TO_REMOTE, false);
+ if (err)
+ goto writeto_err;
+ loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, loffset, 0x0, len,
+ roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+writeto_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_writeto);
+
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+ epd, addr, len, roffset, flags);
+ if (scif_unaligned((off_t __force)addr, roffset)) {
+ if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+ flags &= ~SCIF_RMA_USECACHE;
+
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, 0, (u64)addr,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_REMOTE_TO_LOCAL, false);
+ if (err)
+ goto vreadfrom_err;
+ addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, 0, (u64)addr, len,
+ roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_vreadfrom);
+
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+ epd, addr, len, roffset, flags);
+ if (scif_unaligned((off_t __force)addr, roffset)) {
+ if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+ flags &= ~SCIF_RMA_USECACHE;
+
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, 0, (u64)addr,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_LOCAL_TO_REMOTE, false);
+ if (err)
+ goto vwriteto_err;
+ addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, 0, (u64)addr, len,
+ roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+vwriteto_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
new file mode 100644
index 0000000..00e5d6d
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -0,0 +1,357 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+void scif_cleanup_ep_qp(struct scif_endpt *ep)
+{
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (qp->outbound_q.rb_base) {
+ scif_iounmap((void *)qp->outbound_q.rb_base,
+ qp->outbound_q.size, ep->remote_dev);
+ qp->outbound_q.rb_base = NULL;
+ }
+ if (qp->remote_qp) {
+ scif_iounmap((void *)qp->remote_qp,
+ sizeof(struct scif_qp), ep->remote_dev);
+ qp->remote_qp = NULL;
+ }
+ if (qp->local_qp) {
+ scif_unmap_single(qp->local_qp, ep->remote_dev,
+ sizeof(struct scif_qp));
+ qp->local_qp = 0x0;
+ }
+ if (qp->local_buf) {
+ scif_unmap_single(qp->local_buf, ep->remote_dev,
+ SCIF_ENDPT_QP_SIZE);
+ qp->local_buf = 0;
+ }
+}
+
+void scif_teardown_ep(void *endpt)
+{
+ struct scif_endpt *ep = endpt;
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (qp) {
+ spin_lock(&ep->lock);
+ scif_cleanup_ep_qp(ep);
+ spin_unlock(&ep->lock);
+ kfree(qp->inbound_q.rb_base);
+ kfree(qp);
+ }
+}
+
+/*
+ * Enqueue the endpoint to the zombie list for cleanup.
+ * The endpoint should not be accessed once this API returns.
+ */
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
+{
+ if (!eplock_held)
+ mutex_lock(&scif_info.eplock);
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_ZOMBIE;
+ spin_unlock(&ep->lock);
+ list_add_tail(&ep->list, &scif_info.zombie);
+ scif_info.nr_zombies++;
+ if (!eplock_held)
+ mutex_unlock(&scif_info.eplock);
+ schedule_work(&scif_info.misc_work);
+}
+
+static struct scif_endpt *scif_find_listen_ep(u16 port)
+{
+ struct scif_endpt *ep = NULL;
+ struct list_head *pos, *tmpq;
+
+ mutex_lock(&scif_info.eplock);
+ list_for_each_safe(pos, tmpq, &scif_info.listen) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (ep->port.port == port) {
+ mutex_unlock(&scif_info.eplock);
+ return ep;
+ }
+ }
+ mutex_unlock(&scif_info.eplock);
+ return NULL;
+}
+
+void scif_cleanup_zombie_epd(void)
+{
+ struct list_head *pos, *tmpq;
+ struct scif_endpt *ep;
+
+ mutex_lock(&scif_info.eplock);
+ list_for_each_safe(pos, tmpq, &scif_info.zombie) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (scif_rma_ep_can_uninit(ep)) {
+ list_del(pos);
+ scif_info.nr_zombies--;
+ put_iova_domain(&ep->rma_info.iovad);
+ kfree(ep);
+ }
+ }
+ mutex_unlock(&scif_info.eplock);
+}
+
+/**
+ * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message
+ * @msg: Interrupt message
+ *
+ * This message is initiated by the remote node to request a connection
+ * to the local node. This function looks for an end point in the
+ * listen state on the requested port id.
+ *
+ * If it finds a listening port it places the connect request on the
+ * listening end points queue and wakes up any pending accept calls.
+ *
+ * If it does not find a listening end point it sends a connection
+ * reject message to the remote node.
+ */
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = NULL;
+ struct scif_conreq *conreq;
+
+ conreq = kmalloc(sizeof(*conreq), GFP_KERNEL);
+ if (!conreq)
+ /* Lack of resources so reject the request. */
+ goto conreq_sendrej;
+
+ ep = scif_find_listen_ep(msg->dst.port);
+ if (!ep)
+ /* Send reject due to no listening ports */
+ goto conreq_sendrej_free;
+ else
+ spin_lock(&ep->lock);
+
+ if (ep->backlog <= ep->conreqcnt) {
+ /* Send reject due to too many pending requests */
+ spin_unlock(&ep->lock);
+ goto conreq_sendrej_free;
+ }
+
+ conreq->msg = *msg;
+ list_add_tail(&conreq->list, &ep->conlist);
+ ep->conreqcnt++;
+ wake_up_interruptible(&ep->conwq);
+ spin_unlock(&ep->lock);
+ return;
+
+conreq_sendrej_free:
+ kfree(conreq);
+conreq_sendrej:
+ msg->uop = SCIF_CNCT_REJ;
+ scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message
+ * @msg: Interrupt message
+ *
+ * An accept() on the remote node has occurred and sent this message
+ * to indicate success. Place the end point in the MAPPING state and
+ * save the remote nodes memory information. Then wake up the connect
+ * request so it can finish.
+ */
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTING == ep->state) {
+ ep->peer.node = msg->src.node;
+ ep->peer.port = msg->src.port;
+ ep->qp_info.gnt_pld = msg->payload[1];
+ ep->remote_ep = msg->payload[2];
+ ep->state = SCIFEP_MAPPING;
+
+ wake_up(&ep->conwq);
+ }
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote connection request has finished mapping the local memory.
+ * Place the connection in the connected state and wake up the pending
+ * accept() call.
+ */
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ mutex_lock(&scif_info.connlock);
+ spin_lock(&ep->lock);
+ /* New ep is now connected with all resources set. */
+ ep->state = SCIFEP_CONNECTED;
+ list_add_tail(&ep->list, &scif_info.connected);
+ wake_up(&ep->conwq);
+ spin_unlock(&ep->lock);
+ mutex_unlock(&scif_info.connlock);
+}
+
+/**
+ * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote connection request failed to map the local memory it was sent.
+ * Place the end point in the CLOSING state to indicate it and wake up
+ * the pending accept();
+ */
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_CLOSING;
+ wake_up(&ep->conwq);
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote end has rejected the connection request. Set the end
+ * point back to the bound state and wake up the pending connect().
+ */
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTING == ep->state) {
+ ep->state = SCIFEP_BOUND;
+ wake_up(&ep->conwq);
+ }
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_discnct() - Respond to SCIF_DISCNCT interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote node has indicated close() has been called on its end
+ * point. Remove the local end point from the connected list, set its
+ * state to disconnected and ensure accesses to the remote node are
+ * shutdown.
+ *
+ * When all accesses to the remote end have completed then send a
+ * DISCNT_ACK to indicate it can remove its resources and complete
+ * the close routine.
+ */
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = NULL;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ /*
+ * The local ep may have sent a disconnect and and been closed
+ * due to a message response time out. It may have been
+ * allocated again and formed a new connection so we want to
+ * check if the remote ep matches
+ */
+ if (((u64)tmpep == msg->payload[1]) &&
+ ((u64)tmpep->remote_ep == msg->payload[0])) {
+ list_del(pos);
+ ep = tmpep;
+ spin_lock(&ep->lock);
+ break;
+ }
+ }
+
+ /*
+ * If the terminated end is not found then this side started closing
+ * before the other side sent the disconnect. If so the ep will no
+ * longer be on the connected list. Regardless the other side
+ * needs to be acked to let it know close is complete.
+ */
+ if (!ep) {
+ mutex_unlock(&scif_info.connlock);
+ goto discnct_ack;
+ }
+
+ ep->state = SCIFEP_DISCONNECTED;
+ list_add_tail(&ep->list, &scif_info.disconnected);
+
+ wake_up_interruptible(&ep->sendwq);
+ wake_up_interruptible(&ep->recvwq);
+ spin_unlock(&ep->lock);
+ mutex_unlock(&scif_info.connlock);
+
+discnct_ack:
+ msg->uop = SCIF_DISCNT_ACK;
+ scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side has indicated it has not more references to local resources
+ */
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_DISCONNECTED;
+ spin_unlock(&ep->lock);
+ complete(&ep->discon);
+}
+
+/**
+ * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTED == ep->state)
+ wake_up_interruptible(&ep->recvwq);
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTED == ep->state)
+ wake_up_interruptible(&ep->sendwq);
+ spin_unlock(&ep->lock);
+}
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
new file mode 100644
index 0000000..1771d7a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -0,0 +1,210 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_EPD_H
+#define SCIF_EPD_H
+
+#include <linux/delay.h>
+#include <linux/scif.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_EPLOCK_HELD true
+
+enum scif_epd_state {
+ SCIFEP_UNBOUND,
+ SCIFEP_BOUND,
+ SCIFEP_LISTENING,
+ SCIFEP_CONNECTED,
+ SCIFEP_CONNECTING,
+ SCIFEP_MAPPING,
+ SCIFEP_CLOSING,
+ SCIFEP_CLLISTEN,
+ SCIFEP_DISCONNECTED,
+ SCIFEP_ZOMBIE
+};
+
+/*
+ * struct scif_conreq - Data structure added to the connection list.
+ *
+ * @msg: connection request message received
+ * @list: link to list of connection requests
+ */
+struct scif_conreq {
+ struct scifmsg msg;
+ struct list_head list;
+};
+
+/* Size of the RB for the Endpoint QP */
+#define SCIF_ENDPT_QP_SIZE 0x1000
+
+/*
+ * scif_endpt_qp_info - SCIF endpoint queue pair
+ *
+ * @qp - Qpair for this endpoint
+ * @qp_offset - DMA address of the QP
+ * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the
+ * physical address of the remote_qp.
+ */
+struct scif_endpt_qp_info {
+ struct scif_qp *qp;
+ dma_addr_t qp_offset;
+ dma_addr_t gnt_pld;
+};
+
+/*
+ * struct scif_endpt - The SCIF endpoint data structure
+ *
+ * @state: end point state
+ * @lock: lock synchronizing access to endpoint fields like state etc
+ * @port: self port information
+ * @peer: peer port information
+ * @backlog: maximum pending connection requests
+ * @qp_info: Endpoint QP information for SCIF messaging
+ * @remote_dev: scifdev used by this endpt to communicate with remote node.
+ * @remote_ep: remote endpoint
+ * @conreqcnt: Keep track of number of connection requests.
+ * @files: Open file information used to match the id passed in with
+ * the flush routine.
+ * @conlist: list of connection requests
+ * @conwq: waitqueue for connection processing
+ * @discon: completion used during disconnection
+ * @sendwq: waitqueue used during sending messages
+ * @recvwq: waitqueue used during message receipt
+ * @sendlock: Synchronize ordering of messages sent
+ * @recvlock: Synchronize ordering of messages received
+ * @list: link to list of various endpoints like connected, listening etc
+ * @li_accept: pending ACCEPTREG
+ * @acceptcnt: pending ACCEPTREG cnt
+ * @liacceptlist: link to listen accept
+ * @miacceptlist: link to uaccept
+ * @listenep: associated listen ep
+ * @conn_work: Non blocking connect work
+ * @conn_port: Connection port
+ * @conn_err: Errors during connection
+ * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
+ * @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
+ * @anon: anonymous file for use in kernel mode scif poll
+ */
+struct scif_endpt {
+ enum scif_epd_state state;
+ spinlock_t lock;
+ struct scif_port_id port;
+ struct scif_port_id peer;
+ int backlog;
+ struct scif_endpt_qp_info qp_info;
+ struct scif_dev *remote_dev;
+ u64 remote_ep;
+ int conreqcnt;
+ struct files_struct *files;
+ struct list_head conlist;
+ wait_queue_head_t conwq;
+ struct completion discon;
+ wait_queue_head_t sendwq;
+ wait_queue_head_t recvwq;
+ struct mutex sendlock;
+ struct mutex recvlock;
+ struct list_head list;
+ struct list_head li_accept;
+ int acceptcnt;
+ struct list_head liacceptlist;
+ struct list_head miacceptlist;
+ struct scif_endpt *listenep;
+ struct scif_port_id conn_port;
+ int conn_err;
+ int conn_async_state;
+ wait_queue_head_t conn_pend_wq;
+ struct list_head conn_list;
+ struct scif_endpt_rma_info rma_info;
+ struct list_head mmu_list;
+ struct file *anon;
+};
+
+static inline int scifdev_alive(struct scif_endpt *ep)
+{
+ return _scifdev_alive(ep->remote_dev);
+}
+
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+ if (ep->state == SCIFEP_DISCONNECTED)
+ return -ECONNRESET;
+
+ if (ep->state != SCIFEP_CONNECTED)
+ return -ENOTCONN;
+
+ if (!scifdev_alive(ep))
+ return -ENODEV;
+
+ return 0;
+}
+
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+ epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+ if (IS_ERR(epd->anon))
+ return PTR_ERR(epd->anon);
+ return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+ if (epd->anon) {
+ fput(epd->anon);
+ epd->anon = NULL;
+ }
+}
+
+void scif_cleanup_zombie_epd(void);
+void scif_teardown_ep(void *endpt);
+void scif_cleanup_ep_qp(struct scif_endpt *ep);
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held);
+void scif_get_node_info(void);
+void scif_send_acks(struct scif_dev *dev);
+void scif_conn_handler(struct work_struct *work);
+int scif_rsrv_port(u16 port);
+void scif_get_port(u16 port);
+int scif_get_new_port(void);
+void scif_put_port(u16 port);
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags);
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags);
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
+int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait,
+ struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+ int map_flags, scif_pinned_pages_t *pages);
+#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
new file mode 100644
index 0000000..f7e8261
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -0,0 +1,471 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+static int scif_fdopen(struct inode *inode, struct file *f)
+{
+ struct scif_endpt *priv = scif_open();
+
+ if (!priv)
+ return -ENOMEM;
+ f->private_data = priv;
+ return 0;
+}
+
+static int scif_fdclose(struct inode *inode, struct file *f)
+{
+ struct scif_endpt *priv = f->private_data;
+
+ return scif_close(priv);
+}
+
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+ struct scif_endpt *priv = f->private_data;
+
+ return scif_mmap(vma, priv);
+}
+
+static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
+{
+ struct scif_endpt *priv = f->private_data;
+
+ return __scif_pollfd(f, wait, priv);
+}
+
+static int scif_fdflush(struct file *f, fl_owner_t id)
+{
+ struct scif_endpt *ep = f->private_data;
+
+ spin_lock(&ep->lock);
+ /*
+ * The listening endpoint stashes the open file information before
+ * waiting for incoming connections. The release callback would never be
+ * called if the application closed the endpoint, while waiting for
+ * incoming connections from a separate thread since the file descriptor
+ * reference count is bumped up in the accept IOCTL. Call the flush
+ * routine if the id matches the endpoint open file information so that
+ * the listening endpoint can be woken up and the fd released.
+ */
+ if (ep->files == id)
+ __scif_flush(ep);
+ spin_unlock(&ep->lock);
+ return 0;
+}
+
+static __always_inline void scif_err_debug(int err, const char *str)
+{
+ /*
+ * ENOTCONN is a common uninteresting error which is
+ * flooding debug messages to the console unnecessarily.
+ */
+ if (err < 0 && err != -ENOTCONN)
+ dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err);
+}
+
+static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ struct scif_endpt *priv = f->private_data;
+ void __user *argp = (void __user *)arg;
+ int err = 0;
+ struct scifioctl_msg request;
+ bool non_block = false;
+
+ non_block = !!(f->f_flags & O_NONBLOCK);
+
+ switch (cmd) {
+ case SCIF_BIND:
+ {
+ int pn;
+
+ if (copy_from_user(&pn, argp, sizeof(pn)))
+ return -EFAULT;
+
+ pn = scif_bind(priv, pn);
+ if (pn < 0)
+ return pn;
+
+ if (copy_to_user(argp, &pn, sizeof(pn)))
+ return -EFAULT;
+
+ return 0;
+ }
+ case SCIF_LISTEN:
+ return scif_listen(priv, arg);
+ case SCIF_CONNECT:
+ {
+ struct scifioctl_connect req;
+ struct scif_endpt *ep = (struct scif_endpt *)priv;
+
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+
+ err = __scif_connect(priv, &req.peer, non_block);
+ if (err < 0)
+ return err;
+
+ req.self.node = ep->port.node;
+ req.self.port = ep->port.port;
+
+ if (copy_to_user(argp, &req, sizeof(req)))
+ return -EFAULT;
+
+ return 0;
+ }
+ /*
+ * Accept is done in two halves. The request ioctl does the basic
+ * functionality of accepting the request and returning the information
+ * about it including the internal ID of the end point. The register
+ * is done with the internal ID on a new file descriptor opened by the
+ * requesting process.
+ */
+ case SCIF_ACCEPTREQ:
+ {
+ struct scifioctl_accept request;
+ scif_epd_t *ep = (scif_epd_t *)&request.endpt;
+
+ if (copy_from_user(&request, argp, sizeof(request)))
+ return -EFAULT;
+
+ err = scif_accept(priv, &request.peer, ep, request.flags);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(argp, &request, sizeof(request))) {
+ scif_close(*ep);
+ return -EFAULT;
+ }
+ /*
+ * Add to the list of user mode eps where the second half
+ * of the accept is not yet completed.
+ */
+ mutex_lock(&scif_info.eplock);
+ list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
+ list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
+ (*ep)->listenep = priv;
+ priv->acceptcnt++;
+ mutex_unlock(&scif_info.eplock);
+
+ return 0;
+ }
+ case SCIF_ACCEPTREG:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scif_endpt *newep;
+ struct scif_endpt *lisep;
+ struct scif_endpt *fep = NULL;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+
+ /* Finally replace the pointer to the accepted endpoint */
+ if (copy_from_user(&newep, argp, sizeof(void *)))
+ return -EFAULT;
+
+ /* Remove form the user accept queue */
+ mutex_lock(&scif_info.eplock);
+ list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, miacceptlist);
+ if (tmpep == newep) {
+ list_del(pos);
+ fep = tmpep;
+ break;
+ }
+ }
+
+ if (!fep) {
+ mutex_unlock(&scif_info.eplock);
+ return -ENOENT;
+ }
+
+ lisep = newep->listenep;
+ list_for_each_safe(pos, tmpq, &lisep->li_accept) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, liacceptlist);
+ if (tmpep == newep) {
+ list_del(pos);
+ lisep->acceptcnt--;
+ break;
+ }
+ }
+
+ mutex_unlock(&scif_info.eplock);
+
+ /* Free the resources automatically created from the open. */
+ scif_anon_inode_fput(priv);
+ scif_teardown_ep(priv);
+ scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
+ f->private_data = newep;
+ return 0;
+ }
+ case SCIF_SEND:
+ {
+ struct scif_endpt *priv = f->private_data;
+
+ if (copy_from_user(&request, argp,
+ sizeof(struct scifioctl_msg))) {
+ err = -EFAULT;
+ goto send_err;
+ }
+ err = scif_user_send(priv, (void __user *)request.msg,
+ request.len, request.flags);
+ if (err < 0)
+ goto send_err;
+ if (copy_to_user(&
+ ((struct scifioctl_msg __user *)argp)->out_len,
+ &err, sizeof(err))) {
+ err = -EFAULT;
+ goto send_err;
+ }
+ err = 0;
+send_err:
+ scif_err_debug(err, "scif_send");
+ return err;
+ }
+ case SCIF_RECV:
+ {
+ struct scif_endpt *priv = f->private_data;
+
+ if (copy_from_user(&request, argp,
+ sizeof(struct scifioctl_msg))) {
+ err = -EFAULT;
+ goto recv_err;
+ }
+
+ err = scif_user_recv(priv, (void __user *)request.msg,
+ request.len, request.flags);
+ if (err < 0)
+ goto recv_err;
+
+ if (copy_to_user(&
+ ((struct scifioctl_msg __user *)argp)->out_len,
+ &err, sizeof(err))) {
+ err = -EFAULT;
+ goto recv_err;
+ }
+ err = 0;
+recv_err:
+ scif_err_debug(err, "scif_recv");
+ return err;
+ }
+ case SCIF_GET_NODEIDS:
+ {
+ struct scifioctl_node_ids node_ids;
+ int entries;
+ u16 *nodes;
+ void __user *unodes, *uself;
+ u16 self;
+
+ if (copy_from_user(&node_ids, argp, sizeof(node_ids))) {
+ err = -EFAULT;
+ goto getnodes_err2;
+ }
+
+ entries = min_t(int, scif_info.maxid, node_ids.len);
+ nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL);
+ if (entries && !nodes) {
+ err = -ENOMEM;
+ goto getnodes_err2;
+ }
+ node_ids.len = scif_get_node_ids(nodes, entries, &self);
+
+ unodes = (void __user *)node_ids.nodes;
+ if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) {
+ err = -EFAULT;
+ goto getnodes_err1;
+ }
+
+ uself = (void __user *)node_ids.self;
+ if (copy_to_user(uself, &self, sizeof(u16))) {
+ err = -EFAULT;
+ goto getnodes_err1;
+ }
+
+ if (copy_to_user(argp, &node_ids, sizeof(node_ids))) {
+ err = -EFAULT;
+ goto getnodes_err1;
+ }
+getnodes_err1:
+ kfree(nodes);
+getnodes_err2:
+ return err;
+ }
+ case SCIF_REG:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_reg reg;
+ off_t ret;
+
+ if (copy_from_user(®, argp, sizeof(reg))) {
+ err = -EFAULT;
+ goto reg_err;
+ }
+ if (reg.flags & SCIF_MAP_KERNEL) {
+ err = -EINVAL;
+ goto reg_err;
+ }
+ ret = scif_register(priv, (void *)reg.addr, reg.len,
+ reg.offset, reg.prot, reg.flags);
+ if (ret < 0) {
+ err = (int)ret;
+ goto reg_err;
+ }
+
+ if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+ ->out_offset, &ret, sizeof(reg.out_offset))) {
+ err = -EFAULT;
+ goto reg_err;
+ }
+ err = 0;
+reg_err:
+ scif_err_debug(err, "scif_register");
+ return err;
+ }
+ case SCIF_UNREG:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_unreg unreg;
+
+ if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+ err = -EFAULT;
+ goto unreg_err;
+ }
+ err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+ scif_err_debug(err, "scif_unregister");
+ return err;
+ }
+ case SCIF_READFROM:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(©, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto readfrom_err;
+ }
+ err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+ copy.flags);
+readfrom_err:
+ scif_err_debug(err, "scif_readfrom");
+ return err;
+ }
+ case SCIF_WRITETO:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(©, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto writeto_err;
+ }
+ err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+ copy.flags);
+writeto_err:
+ scif_err_debug(err, "scif_writeto");
+ return err;
+ }
+ case SCIF_VREADFROM:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(©, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto vreadfrom_err;
+ }
+ err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+ copy.roffset, copy.flags);
+vreadfrom_err:
+ scif_err_debug(err, "scif_vreadfrom");
+ return err;
+ }
+ case SCIF_VWRITETO:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(©, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto vwriteto_err;
+ }
+ err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+ copy.roffset, copy.flags);
+vwriteto_err:
+ scif_err_debug(err, "scif_vwriteto");
+ return err;
+ }
+ case SCIF_FENCE_MARK:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_fence_mark mark;
+ int tmp_mark = 0;
+
+ if (copy_from_user(&mark, argp, sizeof(mark))) {
+ err = -EFAULT;
+ goto fence_mark_err;
+ }
+ err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+ if (err)
+ goto fence_mark_err;
+ if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+ sizeof(tmp_mark))) {
+ err = -EFAULT;
+ goto fence_mark_err;
+ }
+fence_mark_err:
+ scif_err_debug(err, "scif_fence_mark");
+ return err;
+ }
+ case SCIF_FENCE_WAIT:
+ {
+ struct scif_endpt *priv = f->private_data;
+
+ err = scif_fence_wait(priv, arg);
+ scif_err_debug(err, "scif_fence_wait");
+ return err;
+ }
+ case SCIF_FENCE_SIGNAL:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_fence_signal signal;
+
+ if (copy_from_user(&signal, argp, sizeof(signal))) {
+ err = -EFAULT;
+ goto fence_signal_err;
+ }
+
+ err = scif_fence_signal(priv, signal.loff, signal.lval,
+ signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+ scif_err_debug(err, "scif_fence_signal");
+ return err;
+ }
+ }
+ return -EINVAL;
+}
+
+const struct file_operations scif_fops = {
+ .open = scif_fdopen,
+ .release = scif_fdclose,
+ .unlocked_ioctl = scif_fdioctl,
+ .mmap = scif_fdmmap,
+ .poll = scif_fdpoll,
+ .flush = scif_fdflush,
+ .owner = THIS_MODULE,
+};
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 0000000..7f2c96f
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,771 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+
+#include "scif_main.h"
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg: Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ int mark, err;
+
+ err = _scif_fence_mark(ep, &mark);
+ if (err)
+ msg->uop = SCIF_MARK_NACK;
+ else
+ msg->uop = SCIF_MARK_ACK;
+ msg->payload[0] = ep->remote_ep;
+ msg->payload[2] = mark;
+ scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg: Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_fence_info *fence_req =
+ (struct scif_fence_info *)msg->payload[1];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (msg->uop == SCIF_MARK_ACK) {
+ fence_req->state = OP_COMPLETED;
+ fence_req->dma_mark = (int)msg->payload[2];
+ } else {
+ fence_req->state = OP_FAILED;
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+ complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg: Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_remote_fence_info *fence;
+
+ /*
+ * Allocate structure for remote fence information and
+ * send a NACK if the allocation failed. The peer will
+ * return ENOMEM upon receiving a NACK.
+ */
+ fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence) {
+ msg->payload[0] = ep->remote_ep;
+ msg->uop = SCIF_WAIT_NACK;
+ scif_nodeqp_send(ep->remote_dev, msg);
+ return;
+ }
+
+ /* Prepare the fence request */
+ memcpy(&fence->msg, msg, sizeof(struct scifmsg));
+ INIT_LIST_HEAD(&fence->list);
+
+ /* Insert to the global remote fence request list */
+ mutex_lock(&scif_info.fencelock);
+ atomic_inc(&ep->rma_info.fence_refcount);
+ list_add_tail(&fence->list, &scif_info.fence);
+ mutex_unlock(&scif_info.fencelock);
+
+ schedule_work(&scif_info.misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg: Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_fence_info *fence_req =
+ (struct scif_fence_info *)msg->payload[1];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (msg->uop == SCIF_WAIT_ACK)
+ fence_req->state = OP_COMPLETED;
+ else
+ fence_req->state = OP_FAILED;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
+ * @msg: Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ int err;
+
+ err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+ SCIF_WINDOW_SELF);
+ if (err)
+ msg->uop = SCIF_SIG_NACK;
+ else
+ msg->uop = SCIF_SIG_ACK;
+ msg->payload[0] = ep->remote_ep;
+ scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg: Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ int err;
+
+ err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+ SCIF_WINDOW_PEER);
+ if (err)
+ msg->uop = SCIF_SIG_NACK;
+ else
+ msg->uop = SCIF_SIG_ACK;
+ msg->payload[0] = ep->remote_ep;
+ scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
+ * @msg: Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_fence_info *fence_req =
+ (struct scif_fence_info *)msg->payload[3];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (msg->uop == SCIF_SIG_ACK)
+ fence_req->state = OP_COMPLETED;
+ else
+ fence_req->state = OP_FAILED;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ complete(&fence_req->comp);
+}
+
+static inline void *scif_get_local_va(off_t off, struct scif_window *window)
+{
+ struct page **pages = window->pinned_pages->pages;
+ int page_nr = (off - window->offset) >> PAGE_SHIFT;
+ off_t page_off = off & ~PAGE_MASK;
+
+ return page_address(pages[page_nr]) + page_off;
+}
+
+static void scif_prog_signal_cb(void *arg)
+{
+ struct scif_status *status = arg;
+
+ dma_pool_free(status->ep->remote_dev->signal_pool, status,
+ status->src_dma_addr);
+}
+
+static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct dma_chan *chan = ep->rma_info.dma_chan;
+ struct dma_device *ddev = chan->device;
+ bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
+ struct dma_async_tx_descriptor *tx;
+ struct scif_status *status = NULL;
+ dma_addr_t src;
+ dma_cookie_t cookie;
+ int err;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto alloc_fail;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = (int)cookie;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto alloc_fail;
+ }
+ dma_async_issue_pending(chan);
+ if (x100) {
+ /*
+ * For X100 use the status descriptor to write the value to
+ * the destination.
+ */
+ tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
+ } else {
+ status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
+ &src);
+ if (!status) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto alloc_fail;
+ }
+ status->val = val;
+ status->src_dma_addr = src;
+ status->ep = ep;
+ src += offsetof(struct scif_status, val);
+ tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
+ DMA_PREP_INTERRUPT);
+ }
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto dma_fail;
+ }
+ if (!x100) {
+ tx->callback = scif_prog_signal_cb;
+ tx->callback_param = status;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = -EIO;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto dma_fail;
+ }
+ dma_async_issue_pending(chan);
+ return 0;
+dma_fail:
+ if (!x100)
+ dma_pool_free(ep->remote_dev->signal_pool, status,
+ status->src_dma_addr);
+alloc_fail:
+ return err;
+}
+
+/*
+ * scif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address to write @val to
+ * @val - Value to be written at @offset
+ * @type - Type of the window.
+ *
+ * Arrange to write a value to the registered offset after ensuring that the
+ * offset provided is indeed valid.
+ */
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+ enum scif_window_type type)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_window *window = NULL;
+ struct scif_rma_req req;
+ dma_addr_t dst_dma_addr;
+ int err;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ req.out_window = &window;
+ req.offset = offset;
+ req.nr_bytes = sizeof(u64);
+ req.prot = SCIF_PROT_WRITE;
+ req.type = SCIF_WINDOW_SINGLE;
+ if (type == SCIF_WINDOW_SELF)
+ req.head = &ep->rma_info.reg_list;
+ else
+ req.head = &ep->rma_info.remote_reg_list;
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto unlock_ret;
+ }
+
+ if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
+ u64 *dst_virt;
+
+ if (type == SCIF_WINDOW_SELF)
+ dst_virt = scif_get_local_va(offset, window);
+ else
+ dst_virt =
+ scif_get_local_va(offset, (struct scif_window *)
+ window->peer_window);
+ *dst_virt = val;
+ } else {
+ dst_dma_addr = __scif_off_to_dma_addr(window, offset);
+ err = _scif_prog_signal(epd, dst_dma_addr, val);
+ }
+unlock_ret:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ return err;
+}
+
+static int _scif_fence_wait(scif_epd_t epd, int mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
+ int err;
+
+ /* Wait for DMA callback in scif_fence_mark_cb(..) */
+ err = wait_event_interruptible_timeout(ep->rma_info.markwq,
+ dma_async_is_tx_complete(
+ ep->rma_info.dma_chan,
+ cookie, NULL, NULL) ==
+ DMA_COMPLETE,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err)
+ err = -ETIMEDOUT;
+ else if (err > 0)
+ err = 0;
+ return err;
+}
+
+/**
+ * scif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void scif_rma_handle_remote_fences(void)
+{
+ struct list_head *item, *tmp;
+ struct scif_remote_fence_info *fence;
+ struct scif_endpt *ep;
+ int mark, err;
+
+ might_sleep();
+ mutex_lock(&scif_info.fencelock);
+ list_for_each_safe(item, tmp, &scif_info.fence) {
+ fence = list_entry(item, struct scif_remote_fence_info,
+ list);
+ /* Remove fence from global list */
+ list_del(&fence->list);
+
+ /* Initiate the fence operation */
+ ep = (struct scif_endpt *)fence->msg.payload[0];
+ mark = fence->msg.payload[2];
+ err = _scif_fence_wait(ep, mark);
+ if (err)
+ fence->msg.uop = SCIF_WAIT_NACK;
+ else
+ fence->msg.uop = SCIF_WAIT_ACK;
+ fence->msg.payload[0] = ep->remote_ep;
+ scif_nodeqp_send(ep->remote_dev, &fence->msg);
+ kfree(fence);
+ if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
+ schedule_work(&scif_info.misc_work);
+ }
+ mutex_unlock(&scif_info.fencelock);
+}
+
+static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
+{
+ int err;
+ struct scifmsg msg;
+ struct scif_fence_info *fence_req;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+ if (!fence_req) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ fence_req->state = OP_IN_PROGRESS;
+ init_completion(&fence_req->comp);
+
+ msg.src = ep->port;
+ msg.uop = uop;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = (u64)fence_req;
+ if (uop == SCIF_WAIT)
+ msg.payload[2] = mark;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ if (err)
+ goto error_free;
+retry:
+ /* Wait for a SCIF_WAIT_(N)ACK message */
+ err = wait_for_completion_timeout(&fence_req->comp,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ if (!err)
+ err = -ENODEV;
+ if (err > 0)
+ err = 0;
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (err < 0) {
+ if (fence_req->state == OP_IN_PROGRESS)
+ fence_req->state = OP_FAILED;
+ }
+ if (fence_req->state == OP_FAILED && !err)
+ err = -ENOMEM;
+ if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
+ *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+ mutex_unlock(&ep->rma_info.rma_lock);
+error_free:
+ kfree(fence_req);
+error:
+ return err;
+}
+
+/**
+ * scif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+ return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
+}
+
+/**
+ * scif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+static int scif_send_fence_wait(scif_epd_t epd, int mark)
+{
+ return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
+}
+
+static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
+ struct scif_fence_info *fence_req)
+{
+ int err;
+
+retry:
+ /* Wait for a SCIF_SIG_(N)ACK message */
+ err = wait_for_completion_timeout(&fence_req->comp,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ if (!err)
+ err = -ENODEV;
+ if (err > 0)
+ err = 0;
+ if (err < 0) {
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (fence_req->state == OP_IN_PROGRESS)
+ fence_req->state = OP_FAILED;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ }
+ if (fence_req->state == OP_FAILED && !err)
+ err = -ENXIO;
+ return err;
+}
+
+/**
+ * scif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
+ off_t loff, u64 lval, int flags)
+{
+ int err = 0;
+ struct scifmsg msg;
+ struct scif_fence_info *fence_req;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+ if (!fence_req) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ fence_req->state = OP_IN_PROGRESS;
+ init_completion(&fence_req->comp);
+ msg.src = ep->port;
+ if (flags & SCIF_SIGNAL_LOCAL) {
+ msg.uop = SCIF_SIG_LOCAL;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = roff;
+ msg.payload[2] = rval;
+ msg.payload[3] = (u64)fence_req;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ if (err)
+ goto error_free;
+ err = _scif_send_fence_signal_wait(ep, fence_req);
+ if (err)
+ goto error_free;
+ }
+ fence_req->state = OP_IN_PROGRESS;
+
+ if (flags & SCIF_SIGNAL_REMOTE) {
+ msg.uop = SCIF_SIG_REMOTE;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = loff;
+ msg.payload[2] = lval;
+ msg.payload[3] = (u64)fence_req;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ if (err)
+ goto error_free;
+ err = _scif_send_fence_signal_wait(ep, fence_req);
+ }
+error_free:
+ kfree(fence_req);
+error:
+ return err;
+}
+
+static void scif_fence_mark_cb(void *arg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)arg;
+
+ wake_up_interruptible(&ep->rma_info.markwq);
+ atomic_dec(&ep->rma_info.fence_refcount);
+}
+
+/*
+ * _scif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int _scif_fence_mark(scif_epd_t epd, int *mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct dma_chan *chan = ep->rma_info.dma_chan;
+ struct dma_device *ddev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_cookie_t cookie;
+ int err;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = (int)cookie;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ dma_async_issue_pending(chan);
+ tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ tx->callback = scif_fence_mark_cb;
+ tx->callback_param = ep;
+ *mark = cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = (int)cookie;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ atomic_inc(&ep->rma_info.fence_refcount);
+ dma_async_issue_pending(chan);
+ return 0;
+}
+
+#define SCIF_LOOPB_MAGIC_MARK 0xdead
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
+ ep, flags, *mark);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ /* Invalid flags? */
+ if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+ return -EINVAL;
+
+ /* At least one of init self or peer RMA should be set */
+ if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+ return -EINVAL;
+
+ /* Exactly one of init self or peer RMA should be set but not both */
+ if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+ return -EINVAL;
+
+ /*
+ * Management node loopback does not need to use DMA.
+ * Return a valid mark to be symmetric.
+ */
+ if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+ *mark = SCIF_LOOPB_MAGIC_MARK;
+ return 0;
+ }
+
+ if (flags & SCIF_FENCE_INIT_SELF)
+ err = _scif_fence_mark(epd, mark);
+ else
+ err = scif_send_fence_mark(ep, mark);
+
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
+ ep, flags, *mark, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_mark);
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_wait: ep %p mark 0x%x\n",
+ ep, mark);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+ /*
+ * Management node loopback does not need to use DMA.
+ * The only valid mark provided is 0 so simply
+ * return success if the mark is valid.
+ */
+ if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+ if (mark == SCIF_LOOPB_MAGIC_MARK)
+ return 0;
+ else
+ return -EINVAL;
+ }
+ if (mark & SCIF_REMOTE_FENCE)
+ err = scif_send_fence_wait(epd, mark);
+ else
+ err = _scif_fence_wait(epd, mark);
+ if (err < 0)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_wait);
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
+ off_t roff, u64 rval, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
+ ep, loff, lval, roff, rval, flags);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ /* Invalid flags? */
+ if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+ SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+ return -EINVAL;
+
+ /* At least one of init self or peer RMA should be set */
+ if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+ return -EINVAL;
+
+ /* Exactly one of init self or peer RMA should be set but not both */
+ if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+ return -EINVAL;
+
+ /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+ if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+ return -EINVAL;
+
+ /* Only Dword offsets allowed */
+ if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
+ return -EINVAL;
+
+ /* Only Dword aligned offsets allowed */
+ if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
+ return -EINVAL;
+
+ if (flags & SCIF_FENCE_INIT_PEER) {
+ err = scif_send_fence_signal(epd, roff, rval, loff,
+ lval, flags);
+ } else {
+ /* Local Signal in Local RAS */
+ if (flags & SCIF_SIGNAL_LOCAL) {
+ err = scif_prog_signal(epd, loff, lval,
+ SCIF_WINDOW_SELF);
+ if (err)
+ goto error_ret;
+ }
+
+ /* Signal in Remote RAS */
+ if (flags & SCIF_SIGNAL_REMOTE)
+ err = scif_prog_signal(epd, roff,
+ rval, SCIF_WINDOW_PEER);
+ }
+error_ret:
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
new file mode 100644
index 0000000..36d847a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -0,0 +1,359 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/idr.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_map.h"
+
+struct scif_info scif_info = {
+ .mdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "scif",
+ .fops = &scif_fops,
+ }
+};
+
+struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
+static atomic_t g_loopb_cnt;
+
+/* Runs in the context of intr_wq */
+static void scif_intr_bh_handler(struct work_struct *work)
+{
+ struct scif_dev *scifdev =
+ container_of(work, struct scif_dev, intr_bh);
+
+ if (scifdev_self(scifdev))
+ scif_loopb_msg_handler(scifdev, scifdev->qpairs);
+ else
+ scif_nodeqp_intrhandler(scifdev, scifdev->qpairs);
+}
+
+int scif_setup_intr_wq(struct scif_dev *scifdev)
+{
+ if (!scifdev->intr_wq) {
+ snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname),
+ "SCIF INTR %d", scifdev->node);
+ scifdev->intr_wq =
+ alloc_ordered_workqueue(scifdev->intr_wqname, 0);
+ if (!scifdev->intr_wq)
+ return -ENOMEM;
+ INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler);
+ }
+ return 0;
+}
+
+void scif_destroy_intr_wq(struct scif_dev *scifdev)
+{
+ if (scifdev->intr_wq) {
+ destroy_workqueue(scifdev->intr_wq);
+ scifdev->intr_wq = NULL;
+ }
+}
+
+irqreturn_t scif_intr_handler(int irq, void *data)
+{
+ struct scif_dev *scifdev = data;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ sdev->hw_ops->ack_interrupt(sdev, scifdev->db);
+ queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+ return IRQ_HANDLED;
+}
+
+static void scif_qp_setup_handler(struct work_struct *work)
+{
+ struct scif_dev *scifdev = container_of(work, struct scif_dev,
+ qp_dwork.work);
+ struct scif_hw_dev *sdev = scifdev->sdev;
+ dma_addr_t da = 0;
+ int err;
+
+ if (scif_is_mgmt_node()) {
+ struct mic_bootparam *bp = sdev->dp;
+
+ da = bp->scif_card_dma_addr;
+ scifdev->rdb = bp->h2c_scif_db;
+ } else {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ da = readq(&bp->scif_host_dma_addr);
+ scifdev->rdb = ioread8(&bp->c2h_scif_db);
+ }
+ if (da) {
+ err = scif_qp_response(da, scifdev);
+ if (err)
+ dev_err(&scifdev->sdev->dev,
+ "scif_qp_response err %d\n", err);
+ } else {
+ schedule_delayed_work(&scifdev->qp_dwork,
+ msecs_to_jiffies(1000));
+ }
+}
+
+static int scif_setup_scifdev(void)
+{
+ /* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
+ int i;
+ u8 num_nodes = MAX_SCIF_NODES;
+
+ scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
+ if (!scif_dev)
+ return -ENOMEM;
+ for (i = 0; i < num_nodes; i++) {
+ struct scif_dev *scifdev = &scif_dev[i];
+
+ scifdev->node = i;
+ scifdev->exit = OP_IDLE;
+ init_waitqueue_head(&scifdev->disconn_wq);
+ mutex_init(&scifdev->lock);
+ INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
+ INIT_DELAYED_WORK(&scifdev->p2p_dwork,
+ scif_poll_qp_state);
+ INIT_DELAYED_WORK(&scifdev->qp_dwork,
+ scif_qp_setup_handler);
+ INIT_LIST_HEAD(&scifdev->p2p);
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ }
+ return 0;
+}
+
+static void scif_destroy_scifdev(void)
+{
+ kfree(scif_dev);
+}
+
+static int scif_probe(struct scif_hw_dev *sdev)
+{
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+ int rc;
+
+ dev_set_drvdata(&sdev->dev, sdev);
+ scifdev->sdev = sdev;
+
+ if (1 == atomic_add_return(1, &g_loopb_cnt)) {
+ struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
+
+ loopb_dev->sdev = sdev;
+ rc = scif_setup_loopback_qp(loopb_dev);
+ if (rc)
+ goto exit;
+ }
+
+ rc = scif_setup_intr_wq(scifdev);
+ if (rc)
+ goto destroy_loopb;
+ rc = scif_setup_qp(scifdev);
+ if (rc)
+ goto destroy_intr;
+ scifdev->db = sdev->hw_ops->next_db(sdev);
+ scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+ "SCIF_INTR", scifdev,
+ scifdev->db);
+ if (IS_ERR(scifdev->cookie)) {
+ rc = PTR_ERR(scifdev->cookie);
+ goto free_qp;
+ }
+ if (scif_is_mgmt_node()) {
+ struct mic_bootparam *bp = sdev->dp;
+
+ bp->c2h_scif_db = scifdev->db;
+ bp->scif_host_dma_addr = scifdev->qp_dma_addr;
+ } else {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ iowrite8(scifdev->db, &bp->h2c_scif_db);
+ writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr);
+ }
+ schedule_delayed_work(&scifdev->qp_dwork,
+ msecs_to_jiffies(1000));
+ return rc;
+free_qp:
+ scif_free_qp(scifdev);
+destroy_intr:
+ scif_destroy_intr_wq(scifdev);
+destroy_loopb:
+ if (atomic_dec_and_test(&g_loopb_cnt))
+ scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+exit:
+ return rc;
+}
+
+void scif_stop(struct scif_dev *scifdev)
+{
+ struct scif_dev *dev;
+ int i;
+
+ for (i = scif_info.maxid; i >= 0; i--) {
+ dev = &scif_dev[i];
+ if (scifdev_self(dev))
+ continue;
+ scif_handle_remove_node(i);
+ }
+}
+
+static void scif_remove(struct scif_hw_dev *sdev)
+{
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+ if (scif_is_mgmt_node()) {
+ struct mic_bootparam *bp = sdev->dp;
+
+ bp->c2h_scif_db = -1;
+ bp->scif_host_dma_addr = 0x0;
+ } else {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ iowrite8(-1, &bp->h2c_scif_db);
+ writeq(0x0, &bp->scif_card_dma_addr);
+ }
+ if (scif_is_mgmt_node()) {
+ scif_disconnect_node(scifdev->node, true);
+ } else {
+ scif_info.card_initiated_exit = true;
+ scif_stop(scifdev);
+ }
+ if (atomic_dec_and_test(&g_loopb_cnt))
+ scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+ if (scifdev->cookie) {
+ sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev);
+ scifdev->cookie = NULL;
+ }
+ scif_destroy_intr_wq(scifdev);
+ cancel_delayed_work(&scifdev->qp_dwork);
+ scif_free_qp(scifdev);
+ scifdev->rdb = -1;
+ scifdev->sdev = NULL;
+}
+
+static struct scif_hw_dev_id id_table[] = {
+ { MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct scif_driver scif_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = scif_probe,
+ .remove = scif_remove,
+};
+
+static int _scif_init(void)
+{
+ int rc;
+
+ mutex_init(&scif_info.eplock);
+ spin_lock_init(&scif_info.rmalock);
+ spin_lock_init(&scif_info.nb_connect_lock);
+ spin_lock_init(&scif_info.port_lock);
+ mutex_init(&scif_info.conflock);
+ mutex_init(&scif_info.connlock);
+ mutex_init(&scif_info.fencelock);
+ INIT_LIST_HEAD(&scif_info.uaccept);
+ INIT_LIST_HEAD(&scif_info.listen);
+ INIT_LIST_HEAD(&scif_info.zombie);
+ INIT_LIST_HEAD(&scif_info.connected);
+ INIT_LIST_HEAD(&scif_info.disconnected);
+ INIT_LIST_HEAD(&scif_info.rma);
+ INIT_LIST_HEAD(&scif_info.rma_tc);
+ INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+ INIT_LIST_HEAD(&scif_info.fence);
+ INIT_LIST_HEAD(&scif_info.nb_connect_list);
+ init_waitqueue_head(&scif_info.exitwq);
+ scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
+ scif_info.en_msg_log = 0;
+ scif_info.p2p_enable = 1;
+ rc = scif_setup_scifdev();
+ if (rc)
+ goto error;
+ unaligned_cache = kmem_cache_create("Unaligned_DMA",
+ SCIF_KMEM_UNALIGNED_BUF_SIZE,
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!unaligned_cache) {
+ rc = -ENOMEM;
+ goto free_sdev;
+ }
+ INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+ INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
+ INIT_WORK(&scif_info.conn_work, scif_conn_handler);
+ idr_init(&scif_ports);
+ return 0;
+free_sdev:
+ scif_destroy_scifdev();
+error:
+ return rc;
+}
+
+static void _scif_exit(void)
+{
+ idr_destroy(&scif_ports);
+ kmem_cache_destroy(unaligned_cache);
+ scif_destroy_scifdev();
+}
+
+static int __init scif_init(void)
+{
+ struct miscdevice *mdev = &scif_info.mdev;
+ int rc;
+
+ _scif_init();
+ iova_cache_get();
+ rc = scif_peer_bus_init();
+ if (rc)
+ goto exit;
+ rc = scif_register_driver(&scif_driver);
+ if (rc)
+ goto peer_bus_exit;
+ rc = misc_register(mdev);
+ if (rc)
+ goto unreg_scif;
+ scif_init_debugfs();
+ return 0;
+unreg_scif:
+ scif_unregister_driver(&scif_driver);
+peer_bus_exit:
+ scif_peer_bus_exit();
+exit:
+ _scif_exit();
+ return rc;
+}
+
+static void __exit scif_exit(void)
+{
+ scif_exit_debugfs();
+ misc_deregister(&scif_info.mdev);
+ scif_unregister_driver(&scif_driver);
+ scif_peer_bus_exit();
+ iova_cache_put();
+ _scif_exit();
+}
+
+module_init(scif_init);
+module_exit(scif_exit);
+
+MODULE_DEVICE_TABLE(scif, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
new file mode 100644
index 0000000..a08f0b6
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -0,0 +1,283 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAIN_H
+#define SCIF_MAIN_H
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/miscdevice.h>
+#include <linux/dmaengine.h>
+#include <linux/iova.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/scif.h>
+#include "../common/mic_dev.h"
+
+#define SCIF_MGMT_NODE 0
+#define SCIF_DEFAULT_WATCHDOG_TO 30
+#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
+#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
+
+/*
+ * Generic state used for certain node QP message exchanges
+ * like Unregister, Alloc etc.
+ */
+enum scif_msg_state {
+ OP_IDLE = 1,
+ OP_IN_PROGRESS,
+ OP_COMPLETED,
+ OP_FAILED
+};
+
+/*
+ * struct scif_info - Global SCIF information
+ *
+ * @nodeid: Node ID this node is to others
+ * @maxid: Max known node ID
+ * @total: Total number of SCIF nodes
+ * @nr_zombies: number of zombie endpoints
+ * @eplock: Lock to synchronize listening, zombie endpoint lists
+ * @connlock: Lock to synchronize connected and disconnected lists
+ * @nb_connect_lock: Synchronize non blocking connect operations
+ * @port_lock: Synchronize access to SCIF ports
+ * @uaccept: List of user acceptreq waiting for acceptreg
+ * @listen: List of listening end points
+ * @zombie: List of zombie end points with pending RMA's
+ * @connected: List of end points in connected state
+ * @disconnected: List of end points in disconnected state
+ * @nb_connect_list: List for non blocking connections
+ * @misc_work: miscellaneous SCIF tasks
+ * @conflock: Lock to synchronize SCIF node configuration changes
+ * @en_msg_log: Enable debug message logging
+ * @p2p_enable: Enable P2P SCIF network
+ * @mdev: The MISC device
+ * @conn_work: Work for workqueue handling all connections
+ * @exitwq: Wait queue for waiting for an EXIT node QP message response
+ * @loopb_dev: Dummy SCIF device used for loopback
+ * @loopb_wq: Workqueue used for handling loopback messages
+ * @loopb_wqname[16]: Name of loopback workqueue
+ * @loopb_work: Used for submitting work to loopb_wq
+ * @loopb_recv_q: List of messages received on the loopb_wq
+ * @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
+ */
+struct scif_info {
+ u8 nodeid;
+ u8 maxid;
+ u8 total;
+ u32 nr_zombies;
+ struct mutex eplock;
+ struct mutex connlock;
+ spinlock_t nb_connect_lock;
+ spinlock_t port_lock;
+ struct list_head uaccept;
+ struct list_head listen;
+ struct list_head zombie;
+ struct list_head connected;
+ struct list_head disconnected;
+ struct list_head nb_connect_list;
+ struct work_struct misc_work;
+ struct mutex conflock;
+ u8 en_msg_log;
+ u8 p2p_enable;
+ struct miscdevice mdev;
+ struct work_struct conn_work;
+ wait_queue_head_t exitwq;
+ struct scif_dev *loopb_dev;
+ struct workqueue_struct *loopb_wq;
+ char loopb_wqname[16];
+ struct work_struct loopb_work;
+ struct list_head loopb_recv_q;
+ bool card_initiated_exit;
+ spinlock_t rmalock;
+ struct mutex fencelock;
+ struct list_head rma;
+ struct list_head rma_tc;
+ struct list_head fence;
+ struct work_struct mmu_notif_work;
+ struct list_head mmu_notif_cleanup;
+ unsigned long rma_tc_limit;
+};
+
+/*
+ * struct scif_p2p_info - SCIF mapping information used for P2P
+ *
+ * @ppi_peer_id - SCIF peer node id
+ * @ppi_sg - Scatter list for bar information (One for mmio and one for aper)
+ * @sg_nentries - Number of entries in the scatterlist
+ * @ppi_da: DMA address for MMIO and APER bars
+ * @ppi_len: Length of MMIO and APER bars
+ * @ppi_list: Link in list of mapping information
+ */
+struct scif_p2p_info {
+ u8 ppi_peer_id;
+ struct scatterlist *ppi_sg[2];
+ u64 sg_nentries[2];
+ dma_addr_t ppi_da[2];
+ u64 ppi_len[2];
+#define SCIF_PPI_MMIO 0
+#define SCIF_PPI_APER 1
+ struct list_head ppi_list;
+};
+
+/*
+ * struct scif_dev - SCIF remote device specific fields
+ *
+ * @node: Node id
+ * @p2p: List of P2P mapping information
+ * @qpairs: The node queue pair for exchanging control messages
+ * @intr_wq: Workqueue for handling Node QP messages
+ * @intr_wqname: Name of node QP workqueue for handling interrupts
+ * @intr_bh: Used for submitting work to intr_wq
+ * @lock: Lock used for synchronizing access to the scif device
+ * @sdev: SCIF hardware device on the SCIF hardware bus
+ * @db: doorbell the peer will trigger to generate an interrupt on self
+ * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
+ * @cookie: Cookie received while registering the interrupt handler
+ * @peer_add_work: Work for handling device_add for peer devices
+ * @p2p_dwork: Delayed work to enable polling for P2P state
+ * @qp_dwork: Delayed work for enabling polling for remote QP information
+ * @p2p_retry: Number of times to retry polling of P2P state
+ * @base_addr: P2P aperture bar base address
+ * @mic_mw mmio: The peer MMIO information used for P2P
+ * @spdev: SCIF peer device on the SCIF peer bus
+ * @node_remove_ack_pending: True if a node_remove_ack is pending
+ * @exit_ack_pending: true if an exit_ack is pending
+ * @disconn_wq: Used while waiting for a node remove response
+ * @disconn_rescnt: Keeps track of number of node remove requests sent
+ * @exit: Status of exit message
+ * @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
+*/
+struct scif_dev {
+ u8 node;
+ struct list_head p2p;
+ struct scif_qp *qpairs;
+ struct workqueue_struct *intr_wq;
+ char intr_wqname[16];
+ struct work_struct intr_bh;
+ struct mutex lock;
+ struct scif_hw_dev *sdev;
+ int db;
+ int rdb;
+ struct mic_irq *cookie;
+ struct work_struct peer_add_work;
+ struct delayed_work p2p_dwork;
+ struct delayed_work qp_dwork;
+ int p2p_retry;
+ dma_addr_t base_addr;
+ struct mic_mw mmio;
+ struct scif_peer_dev __rcu *spdev;
+ bool node_remove_ack_pending;
+ bool exit_ack_pending;
+ wait_queue_head_t disconn_wq;
+ atomic_t disconn_rescnt;
+ enum scif_msg_state exit;
+ dma_addr_t qp_dma_addr;
+ int dma_ch_idx;
+ struct dma_pool *signal_pool;
+};
+
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
+extern struct scif_info scif_info;
+extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
+extern struct scif_dev *scif_dev;
+extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
+
+/* Size of the RB for the Node QP */
+#define SCIF_NODE_QP_SIZE 0x10000
+
+#include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
+
+/*
+ * scifdev_self:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
+ */
+static inline int scifdev_self(struct scif_dev *dev)
+{
+ return dev->node == scif_info.nodeid;
+}
+
+static inline bool scif_is_mgmt_node(void)
+{
+ return !scif_info.nodeid;
+}
+
+/*
+ * scifdev_is_p2p:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
+ */
+static inline bool scifdev_is_p2p(struct scif_dev *dev)
+{
+ if (scif_is_mgmt_node())
+ return false;
+ else
+ return dev != &scif_dev[SCIF_MGMT_NODE] &&
+ !scifdev_self(dev);
+}
+
+/*
+ * scifdev_alive:
+ * @scifdev: The remote SCIF Device
+ *
+ * Returns true if the remote SCIF Device is running or sleeping for
+ * this endpoint.
+ */
+static inline int _scifdev_alive(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+
+ rcu_read_lock();
+ spdev = rcu_dereference(scifdev->spdev);
+ rcu_read_unlock();
+ return !!spdev;
+}
+
+#include "scif_epd.h"
+
+void __init scif_init_debugfs(void);
+void scif_exit_debugfs(void);
+int scif_setup_intr_wq(struct scif_dev *scifdev);
+void scif_destroy_intr_wq(struct scif_dev *scifdev);
+void scif_cleanup_scifdev(struct scif_dev *dev);
+void scif_handle_remove_node(int node);
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated);
+void scif_free_qp(struct scif_dev *dev);
+void scif_misc_handler(struct work_struct *work);
+void scif_stop(struct scif_dev *scifdev);
+irqreturn_t scif_intr_handler(int irq, void *data);
+#endif /* SCIF_MAIN_H */
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
new file mode 100644
index 0000000..3e86360
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -0,0 +1,136 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAP_H
+#define SCIF_MAP_H
+
+#include "../bus/scif_bus.h"
+
+static __always_inline void *
+scif_alloc_coherent(dma_addr_t *dma_handle,
+ struct scif_dev *scifdev, size_t size,
+ gfp_t gfp)
+{
+ void *va;
+
+ if (scifdev_self(scifdev)) {
+ va = kmalloc(size, gfp);
+ if (va)
+ *dma_handle = virt_to_phys(va);
+ } else {
+ va = dma_alloc_coherent(&scifdev->sdev->dev,
+ size, dma_handle, gfp);
+ if (va && scifdev_is_p2p(scifdev))
+ *dma_handle = *dma_handle + scifdev->base_addr;
+ }
+ return va;
+}
+
+static __always_inline void
+scif_free_coherent(void *va, dma_addr_t local,
+ struct scif_dev *scifdev, size_t size)
+{
+ if (scifdev_self(scifdev)) {
+ kfree(va);
+ } else {
+ if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+ local = local - scifdev->base_addr;
+ dma_free_coherent(&scifdev->sdev->dev,
+ size, va, local);
+ }
+}
+
+static __always_inline int
+scif_map_single(dma_addr_t *dma_handle,
+ void *local, struct scif_dev *scifdev, size_t size)
+{
+ int err = 0;
+
+ if (scifdev_self(scifdev)) {
+ *dma_handle = virt_to_phys((local));
+ } else {
+ *dma_handle = dma_map_single(&scifdev->sdev->dev,
+ local, size, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle))
+ err = -ENOMEM;
+ else if (scifdev_is_p2p(scifdev))
+ *dma_handle = *dma_handle + scifdev->base_addr;
+ }
+ if (err)
+ *dma_handle = 0;
+ return err;
+}
+
+static __always_inline void
+scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
+ size_t size)
+{
+ if (!scifdev_self(scifdev)) {
+ if (scifdev_is_p2p(scifdev))
+ local = local - scifdev->base_addr;
+ dma_unmap_single(&scifdev->sdev->dev, local,
+ size, DMA_BIDIRECTIONAL);
+ }
+}
+
+static __always_inline void *
+scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev)
+{
+ void *out_virt;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ if (scifdev_self(scifdev))
+ out_virt = phys_to_virt(phys);
+ else
+ out_virt = (void __force *)
+ sdev->hw_ops->ioremap(sdev, phys, size);
+ return out_virt;
+}
+
+static __always_inline void
+scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
+{
+ if (!scifdev_self(scifdev)) {
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
+ }
+}
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+ struct scif_dev *scifdev)
+{
+ int err = 0;
+
+ if (scifdev_self(scifdev)) {
+ *dma_handle = page_to_phys(page);
+ } else {
+ struct scif_hw_dev *sdev = scifdev->sdev;
+ *dma_handle = dma_map_page(&sdev->dev,
+ page, 0x0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&sdev->dev, *dma_handle))
+ err = -ENOMEM;
+ else if (scifdev_is_p2p(scifdev))
+ *dma_handle = *dma_handle + scifdev->base_addr;
+ }
+ if (err)
+ *dma_handle = 0;
+ return err;
+}
+#endif /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 0000000..49cb8f7
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+/*
+ * struct scif_vma_info - Information about a remote memory mapping
+ * created via scif_mmap(..)
+ * @vma: VM area struct
+ * @list: link to list of active vmas
+ */
+struct scif_vma_info {
+ struct vm_area_struct *vma;
+ struct list_head list;
+};
+
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ struct scif_window *recv_window =
+ (struct scif_window *)msg->payload[0];
+ struct scif_endpt *ep;
+
+ ep = (struct scif_endpt *)recv_window->ep;
+ req.out_window = &window;
+ req.offset = recv_window->offset;
+ req.prot = recv_window->prot;
+ req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+ req.type = SCIF_WINDOW_FULL;
+ req.head = &ep->rma_info.reg_list;
+ msg->payload[0] = ep->remote_ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ if (scif_query_window(&req)) {
+ dev_err(&scifdev->sdev->dev,
+ "%s %d -ENXIO\n", __func__, __LINE__);
+ msg->uop = SCIF_UNREGISTER_ACK;
+ goto error;
+ }
+
+ scif_put_window(window, window->nr_pages);
+
+ if (!window->ref_count) {
+ atomic_inc(&ep->rma_info.tw_refcount);
+ ep->rma_info.async_list_del = 1;
+ list_del_init(&window->list);
+ scif_free_window_offset(ep, window, window->offset);
+ }
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (window && !window->ref_count)
+ scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/*
+ * Remove valid remote memory mappings created via scif_mmap(..) from the
+ * process address space since the remote node is lost
+ */
+static void __scif_zap_mmaps(struct scif_endpt *ep)
+{
+ struct list_head *item;
+ struct scif_vma_info *info;
+ struct vm_area_struct *vma;
+ unsigned long size;
+
+ spin_lock(&ep->lock);
+ list_for_each(item, &ep->rma_info.vma_list) {
+ info = list_entry(item, struct scif_vma_info, list);
+ vma = info->vma;
+ size = vma->vm_end - vma->vm_start;
+ zap_vma_ptes(vma, vma->vm_start, size);
+ dev_dbg(scif_info.mdev.this_device,
+ "%s ep %p zap vma %p size 0x%lx\n",
+ __func__, ep, info->vma, size);
+ }
+ spin_unlock(&ep->lock);
+}
+
+/*
+ * Traverse the list of endpoints for a particular remote node and
+ * zap valid remote memory mappings since the remote node is lost
+ */
+static void _scif_zap_mmaps(int node, struct list_head *head)
+{
+ struct scif_endpt *ep;
+ struct list_head *item;
+
+ mutex_lock(&scif_info.connlock);
+ list_for_each(item, head) {
+ ep = list_entry(item, struct scif_endpt, list);
+ if (ep->remote_dev->node == node)
+ __scif_zap_mmaps(ep);
+ }
+ mutex_unlock(&scif_info.connlock);
+}
+
+/*
+ * Wrapper for removing remote memory mappings for a particular node. This API
+ * is called by peer nodes as part of handling a lost node.
+ */
+void scif_zap_mmaps(int node)
+{
+ _scif_zap_mmaps(node, &scif_info.connected);
+ _scif_zap_mmaps(node, &scif_info.disconnected);
+}
+
+/*
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) Remote memory mappings have been zapped
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
+{
+ struct list_head *pos, *tmp;
+ struct scif_window *window;
+
+ list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+ window = list_entry(pos, struct scif_window, list);
+ if (window->ref_count)
+ scif_put_window(window, window->nr_pages);
+ else
+ dev_err(scif_info.mdev.this_device,
+ "%s %d unexpected\n",
+ __func__, __LINE__);
+ if (!window->ref_count) {
+ atomic_inc(&ep->rma_info.tw_refcount);
+ list_del_init(&window->list);
+ scif_queue_for_cleanup(window, &scif_info.rma);
+ }
+ }
+}
+
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node)
+{
+ struct scif_endpt *ep;
+ struct list_head *item;
+
+ mutex_lock(&scif_info.eplock);
+ list_for_each(item, &scif_info.zombie) {
+ ep = list_entry(item, struct scif_endpt, list);
+ if (ep->remote_dev && ep->remote_dev->node == node)
+ __scif_cleanup_rma_for_zombies(ep);
+ }
+ mutex_unlock(&scif_info.eplock);
+ flush_work(&scif_info.misc_work);
+}
+
+/* Insert the VMA into the per endpoint VMA list */
+static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+ struct scif_vma_info *info;
+ int err = 0;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ err = -ENOMEM;
+ goto done;
+ }
+ info->vma = vma;
+ spin_lock(&ep->lock);
+ list_add_tail(&info->list, &ep->rma_info.vma_list);
+ spin_unlock(&ep->lock);
+done:
+ return err;
+}
+
+/* Delete the VMA from the per endpoint VMA list */
+static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+ struct list_head *item;
+ struct scif_vma_info *info;
+
+ spin_lock(&ep->lock);
+ list_for_each(item, &ep->rma_info.vma_list) {
+ info = list_entry(item, struct scif_vma_info, list);
+ if (info->vma == vma) {
+ list_del(&info->list);
+ kfree(info);
+ break;
+ }
+ }
+ spin_unlock(&ep->lock);
+}
+
+static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
+{
+ struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+ phys_addr_t out_phys, apt_base = 0;
+
+ /*
+ * If the DMA address is card relative then we need to add the
+ * aperture base for mmap to work correctly
+ */
+ if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
+ apt_base = sdev->aper->pa;
+ out_phys = apt_base + phys;
+ return out_phys;
+}
+
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+ struct scif_range **pages)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ int nr_pages, err, i;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
+ ep, offset, len);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ if (!len || (offset < 0) ||
+ (offset + len < offset) ||
+ (ALIGN(offset, PAGE_SIZE) != offset) ||
+ (ALIGN(len, PAGE_SIZE) != len))
+ return -EINVAL;
+
+ nr_pages = len >> PAGE_SHIFT;
+
+ req.out_window = &window;
+ req.offset = offset;
+ req.prot = 0;
+ req.nr_bytes = len;
+ req.type = SCIF_WINDOW_SINGLE;
+ req.head = &ep->rma_info.remote_reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error;
+ }
+
+ /* Allocate scif_range */
+ *pages = kzalloc(sizeof(**pages), GFP_KERNEL);
+ if (!*pages) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ /* Allocate phys addr array */
+ (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
+ if (!((*pages)->phys_addr)) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
+ /* Allocate virtual address array */
+ ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
+ if (!(*pages)->va) {
+ err = -ENOMEM;
+ goto error;
+ }
+ }
+ /* Populate the values */
+ (*pages)->cookie = window;
+ (*pages)->nr_pages = nr_pages;
+ (*pages)->prot_flags = window->prot;
+
+ for (i = 0; i < nr_pages; i++) {
+ (*pages)->phys_addr[i] =
+ __scif_off_to_dma_addr(window, offset +
+ (i * PAGE_SIZE));
+ (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
+ ep);
+ if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
+ (*pages)->va[i] =
+ ep->remote_dev->sdev->aper->va +
+ (*pages)->phys_addr[i] -
+ ep->remote_dev->sdev->aper->pa;
+ }
+
+ scif_get_window(window, nr_pages);
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (err) {
+ if (*pages) {
+ scif_free((*pages)->phys_addr,
+ nr_pages * sizeof(dma_addr_t));
+ scif_free((*pages)->va,
+ nr_pages * sizeof(void *));
+ kfree(*pages);
+ *pages = NULL;
+ }
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_get_pages);
+
+int scif_put_pages(struct scif_range *pages)
+{
+ struct scif_endpt *ep;
+ struct scif_window *window;
+ struct scifmsg msg;
+
+ if (!pages || !pages->cookie)
+ return -EINVAL;
+
+ window = pages->cookie;
+
+ if (!window || window->magic != SCIFEP_MAGIC)
+ return -EINVAL;
+
+ ep = (struct scif_endpt *)window->ep;
+ /*
+ * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+ * callee should be allowed to release references to the pages,
+ * else the endpoint was not connected in the first place,
+ * hence the ENOTCONN.
+ */
+ if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+ return -ENOTCONN;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+
+ scif_put_window(window, pages->nr_pages);
+
+ /* Initiate window destruction if ref count is zero */
+ if (!window->ref_count) {
+ list_del(&window->list);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan);
+ /* Inform the peer about this window being destroyed. */
+ msg.uop = SCIF_MUNMAP;
+ msg.src = ep->port;
+ msg.payload[0] = window->peer_window;
+ /* No error handling for notification messages */
+ scif_nodeqp_send(ep->remote_dev, &msg);
+ /* Destroy this window from the peer's registered AS */
+ scif_destroy_remote_window(window);
+ } else {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ }
+
+ scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+ scif_free(pages->va, pages->nr_pages * sizeof(void *));
+ kfree(pages);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scif_put_pages);
+
+/*
+ * scif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Create VtoP mappings via remap_pfn_range(..)
+ * 2) Once step 1) and 2) complete successfully then traverse the range of
+ * windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
+ int nr_pages, struct vm_area_struct *vma)
+{
+ s64 end_offset, loop_offset = offset;
+ struct scif_window *window = start_window;
+ int loop_nr_pages, nr_pages_left = nr_pages;
+ struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+ struct list_head *head = &ep->rma_info.remote_reg_list;
+ int i, err = 0;
+ dma_addr_t phys_addr;
+ struct scif_window_iter src_win_iter;
+ size_t contig_bytes = 0;
+
+ might_sleep();
+ list_for_each_entry_from(window, head, list) {
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min_t(int,
+ (end_offset - loop_offset) >> PAGE_SHIFT,
+ nr_pages_left);
+ scif_init_window_iter(window, &src_win_iter);
+ for (i = 0; i < loop_nr_pages; i++) {
+ phys_addr = scif_off_to_dma_addr(window, loop_offset,
+ &contig_bytes,
+ &src_win_iter);
+ phys_addr = scif_get_phys(phys_addr, ep);
+ err = remap_pfn_range(vma,
+ vma->vm_start +
+ loop_offset - offset,
+ phys_addr >> PAGE_SHIFT,
+ PAGE_SIZE,
+ vma->vm_page_prot);
+ if (err)
+ goto error;
+ loop_offset += PAGE_SIZE;
+ }
+ nr_pages_left -= loop_nr_pages;
+ if (!nr_pages_left)
+ break;
+ }
+ /*
+ * No more failures expected. Bump up the ref count for all
+ * the windows. Another traversal from start_window required
+ * for handling errors encountered across windows during
+ * remap_pfn_range(..).
+ */
+ loop_offset = offset;
+ nr_pages_left = nr_pages;
+ window = start_window;
+ head = &ep->rma_info.remote_reg_list;
+ list_for_each_entry_from(window, head, list) {
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min_t(int,
+ (end_offset - loop_offset) >> PAGE_SHIFT,
+ nr_pages_left);
+ scif_get_window(window, loop_nr_pages);
+ nr_pages_left -= loop_nr_pages;
+ loop_offset += (loop_nr_pages << PAGE_SHIFT);
+ if (!nr_pages_left)
+ break;
+ }
+error:
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+
+/*
+ * scif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+static void scif_rma_list_munmap(struct scif_window *start_window,
+ s64 offset, int nr_pages)
+{
+ struct scifmsg msg;
+ s64 loop_offset = offset, end_offset;
+ int loop_nr_pages, nr_pages_left = nr_pages;
+ struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+ struct list_head *head = &ep->rma_info.remote_reg_list;
+ struct scif_window *window = start_window, *_window;
+
+ msg.uop = SCIF_MUNMAP;
+ msg.src = ep->port;
+ loop_offset = offset;
+ nr_pages_left = nr_pages;
+ list_for_each_entry_safe_from(window, _window, head, list) {
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min_t(int,
+ (end_offset - loop_offset) >> PAGE_SHIFT,
+ nr_pages_left);
+ scif_put_window(window, loop_nr_pages);
+ if (!window->ref_count) {
+ struct scif_dev *rdev = ep->remote_dev;
+
+ scif_drain_dma_intr(rdev->sdev,
+ ep->rma_info.dma_chan);
+ /* Inform the peer about this munmap */
+ msg.payload[0] = window->peer_window;
+ /* No error handling for Notification messages. */
+ scif_nodeqp_send(ep->remote_dev, &msg);
+ list_del(&window->list);
+ /* Destroy this window from the peer's registered AS */
+ scif_destroy_remote_window(window);
+ }
+ nr_pages_left -= loop_nr_pages;
+ loop_offset += (loop_nr_pages << PAGE_SHIFT);
+ if (!nr_pages_left)
+ break;
+ }
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+ struct scif_endpt *ep; /* End point for remote window */
+ s64 offset; /* offset within remote window */
+ bool valid_offset; /* offset is valid only if the original
+ * mmap request was for a single page
+ * else the offset within the vma is
+ * the correct offset
+ */
+ struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+ struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+
+ kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ * @vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+ struct vma_pvt *vmapvt = vma->vm_private_data;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
+ vma->vm_start, vma->vm_end);
+ scif_insert_vma(vmapvt->ep, vma);
+ kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ * @vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ */
+static void scif_munmap(struct vm_area_struct *vma)
+{
+ struct scif_endpt *ep;
+ struct vma_pvt *vmapvt = vma->vm_private_data;
+ int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ s64 offset;
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ int err;
+
+ might_sleep();
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
+ vma->vm_start, vma->vm_end);
+ ep = vmapvt->ep;
+ offset = vmapvt->valid_offset ? vmapvt->offset :
+ (vma->vm_pgoff) << PAGE_SHIFT;
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
+ ep, nr_pages, offset);
+ req.out_window = &window;
+ req.offset = offset;
+ req.nr_bytes = vma->vm_end - vma->vm_start;
+ req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+ req.type = SCIF_WINDOW_PARTIAL;
+ req.head = &ep->rma_info.remote_reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+
+ err = scif_query_window(&req);
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ else
+ scif_rma_list_munmap(window, offset, nr_pages);
+
+ mutex_unlock(&ep->rma_info.rma_lock);
+ /*
+ * The kernel probably zeroes these out but we still want
+ * to clean up our own mess just in case.
+ */
+ vma->vm_ops = NULL;
+ vma->vm_private_data = NULL;
+ kref_put(&vmapvt->ref, vma_pvt_release);
+ scif_delete_vma(ep, vma);
+}
+
+static const struct vm_operations_struct scif_vm_ops = {
+ .open = scif_vma_open,
+ .close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ * @vma: VMM memory area.
+ * @epd: endpoint descriptor
+ *
+ * Return: Upon successful completion, scif_mmap() returns zero
+ * else an apt error is returned as documented in scif.h
+ */
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
+ int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ int err;
+ struct vma_pvt *vmapvt;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
+ ep, start_offset, nr_pages);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ might_sleep();
+
+ err = scif_insert_vma(ep, vma);
+ if (err)
+ return err;
+
+ vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
+ if (!vmapvt) {
+ scif_delete_vma(ep, vma);
+ return -ENOMEM;
+ }
+
+ vmapvt->ep = ep;
+ kref_init(&vmapvt->ref);
+
+ req.out_window = &window;
+ req.offset = start_offset;
+ req.nr_bytes = vma->vm_end - vma->vm_start;
+ req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+ req.type = SCIF_WINDOW_PARTIAL;
+ req.head = &ep->rma_info.remote_reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unlock;
+ }
+
+ /* Default prot for loopback */
+ if (!scifdev_self(ep->remote_dev))
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+ /*
+ * VM_DONTCOPY - Do not copy this vma on fork
+ * VM_DONTEXPAND - Cannot expand with mremap()
+ * VM_RESERVED - Count as reserved_vm like IO
+ * VM_PFNMAP - Page-ranges managed without "struct page"
+ * VM_IO - Memory mapped I/O or similar
+ *
+ * We do not want to copy this VMA automatically on a fork(),
+ * expand this VMA due to mremap() or swap out these pages since
+ * the VMA is actually backed by physical pages in the remote
+ * node's physical memory and not via a struct page.
+ */
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+
+ if (!scifdev_self(ep->remote_dev))
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+
+ /* Map this range of windows */
+ err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unlock;
+ }
+ /* Set up the driver call back */
+ vma->vm_ops = &scif_vm_ops;
+ vma->vm_private_data = vmapvt;
+error_unlock:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (err) {
+ kfree(vmapvt);
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ scif_delete_vma(ep, vma);
+ }
+ return err;
+}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
new file mode 100644
index 0000000..79f26a0
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -0,0 +1,237 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_peer_bus.h"
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/**
+ * scif_invalidate_ep() - Set state for all connected endpoints
+ * to disconnected and wake up all send/recv waitqueues
+ */
+static void scif_invalidate_ep(int node)
+{
+ struct scif_endpt *ep;
+ struct list_head *pos, *tmpq;
+
+ flush_work(&scif_info.conn_work);
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (ep->remote_dev->node == node) {
+ scif_unmap_all_windows(ep);
+ spin_lock(&ep->lock);
+ scif_cleanup_ep_qp(ep);
+ spin_unlock(&ep->lock);
+ }
+ }
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (ep->remote_dev->node == node) {
+ list_del(pos);
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_DISCONNECTED;
+ list_add_tail(&ep->list, &scif_info.disconnected);
+ scif_cleanup_ep_qp(ep);
+ wake_up_interruptible(&ep->sendwq);
+ wake_up_interruptible(&ep->recvwq);
+ spin_unlock(&ep->lock);
+ scif_unmap_all_windows(ep);
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+}
+
+void scif_free_qp(struct scif_dev *scifdev)
+{
+ struct scif_qp *qp = scifdev->qpairs;
+
+ if (!qp)
+ return;
+ scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+ kfree(qp->inbound_q.rb_base);
+ scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
+ kfree(scifdev->qpairs);
+ scifdev->qpairs = NULL;
+}
+
+static void scif_cleanup_qp(struct scif_dev *dev)
+{
+ struct scif_qp *qp = &dev->qpairs[0];
+
+ if (!qp)
+ return;
+ scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev);
+ scif_iounmap((void *)qp->outbound_q.rb_base,
+ sizeof(struct scif_qp), dev);
+ qp->remote_qp = NULL;
+ qp->local_write = 0;
+ qp->inbound_q.current_write_offset = 0;
+ qp->inbound_q.current_read_offset = 0;
+ if (scifdev_is_p2p(dev))
+ scif_free_qp(dev);
+}
+
+void scif_send_acks(struct scif_dev *dev)
+{
+ struct scifmsg msg;
+
+ if (dev->node_remove_ack_pending) {
+ msg.uop = SCIF_NODE_REMOVE_ACK;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = SCIF_MGMT_NODE;
+ msg.payload[0] = dev->node;
+ scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg);
+ dev->node_remove_ack_pending = false;
+ }
+ if (dev->exit_ack_pending) {
+ msg.uop = SCIF_EXIT_ACK;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = dev->node;
+ scif_nodeqp_send(dev, &msg);
+ dev->exit_ack_pending = false;
+ }
+}
+
+/*
+ * scif_cleanup_scifdev
+ *
+ * @dev: Remote SCIF device.
+ * Uninitialize SCIF data structures for remote SCIF device.
+ */
+void scif_cleanup_scifdev(struct scif_dev *dev)
+{
+ struct scif_hw_dev *sdev = dev->sdev;
+
+ if (!dev->sdev)
+ return;
+ if (scifdev_is_p2p(dev)) {
+ if (dev->cookie) {
+ sdev->hw_ops->free_irq(sdev, dev->cookie, dev);
+ dev->cookie = NULL;
+ }
+ scif_destroy_intr_wq(dev);
+ }
+ flush_work(&scif_info.misc_work);
+ scif_destroy_p2p(dev);
+ scif_invalidate_ep(dev->node);
+ scif_zap_mmaps(dev->node);
+ scif_cleanup_rma_for_zombies(dev->node);
+ flush_work(&scif_info.misc_work);
+ scif_send_acks(dev);
+ if (!dev->node && scif_info.card_initiated_exit) {
+ /*
+ * Send an SCIF_EXIT message which is the last message from MIC
+ * to the Host and wait for a SCIF_EXIT_ACK
+ */
+ scif_send_exit(dev);
+ scif_info.card_initiated_exit = false;
+ }
+ scif_cleanup_qp(dev);
+}
+
+/*
+ * scif_remove_node:
+ *
+ * @node: Node to remove
+ */
+void scif_handle_remove_node(int node)
+{
+ struct scif_dev *scifdev = &scif_dev[node];
+
+ if (scif_peer_unregister_device(scifdev))
+ scif_send_acks(scifdev);
+}
+
+static int scif_send_rmnode_msg(int node, int remove_node)
+{
+ struct scifmsg notif_msg;
+ struct scif_dev *dev = &scif_dev[node];
+
+ notif_msg.uop = SCIF_NODE_REMOVE;
+ notif_msg.src.node = scif_info.nodeid;
+ notif_msg.dst.node = node;
+ notif_msg.payload[0] = remove_node;
+ return scif_nodeqp_send(dev, ¬if_msg);
+}
+
+/**
+ * scif_node_disconnect:
+ *
+ * @node_id[in]: source node id.
+ * @mgmt_initiated: Disconnection initiated from the mgmt node
+ *
+ * Disconnect a node from the scif network.
+ */
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated)
+{
+ int ret;
+ int msg_cnt = 0;
+ u32 i = 0;
+ struct scif_dev *scifdev = &scif_dev[node_id];
+
+ if (!node_id)
+ return;
+
+ atomic_set(&scifdev->disconn_rescnt, 0);
+
+ /* Destroy p2p network */
+ for (i = 1; i <= scif_info.maxid; i++) {
+ if (i == node_id)
+ continue;
+ ret = scif_send_rmnode_msg(i, node_id);
+ if (!ret)
+ msg_cnt++;
+ }
+ /* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */
+ ret = wait_event_timeout(scifdev->disconn_wq,
+ (atomic_read(&scifdev->disconn_rescnt)
+ == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT);
+ /* Tell the card to clean up */
+ if (mgmt_initiated && _scifdev_alive(scifdev))
+ /*
+ * Send an SCIF_EXIT message which is the last message from Host
+ * to the MIC and wait for a SCIF_EXIT_ACK
+ */
+ scif_send_exit(scifdev);
+ atomic_set(&scifdev->disconn_rescnt, 0);
+ /* Tell the mgmt node to clean up */
+ ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id);
+ if (!ret)
+ /* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */
+ wait_event_timeout(scifdev->disconn_wq,
+ (atomic_read(&scifdev->disconn_rescnt) == 1),
+ SCIF_NODE_ALIVE_TIMEOUT);
+}
+
+void scif_get_node_info(void)
+{
+ struct scifmsg msg;
+ DECLARE_COMPLETION_ONSTACK(node_info);
+
+ msg.uop = SCIF_GET_NODE_INFO;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = SCIF_MGMT_NODE;
+ msg.payload[3] = (u64)&node_info;
+
+ if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg)))
+ return;
+
+ /* Wait for a response with SCIF_GET_NODE_INFO */
+ wait_for_completion(&node_info);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
new file mode 100644
index 0000000..c66ca1a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -0,0 +1,1354 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_nodeqp.h"
+#include "scif_map.h"
+
+/*
+ ************************************************************************
+ * SCIF node Queue Pair (QP) setup flow:
+ *
+ * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
+ * 2) scif_setup_qp(..) allocates the local qp and calls
+ * scif_setup_qp_connect(..) which allocates and maps the local
+ * buffer for the inbound QP
+ * 3) The local node updates the device page with the DMA address of the QP
+ * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
+ * the peer node has updated its QP DMA address
+ * 5) Once a valid non zero address is found in the QP DMA address field
+ * in the device page, the local node maps the remote node's QP,
+ * updates its outbound QP and sends a SCIF_INIT message to the peer
+ * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
+ * half handler by calling scif_init(..)
+ * 7) scif_init(..) registers a new SCIF peer node by calling
+ * scif_peer_register_device(..) which signifies the addition of a new
+ * SCIF node
+ * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
+ * remote nodes are online via scif_p2p_setup(..)
+ * 9) For P2P setup, the host maps the remote nodes' aperture and memory
+ * bars and sends a SCIF_NODE_ADD message to both nodes
+ * 10) As part of scif_nodeadd, both nodes set up their local inbound
+ * QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
+ * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
+ * SCIF_NODE_ADD_ACK to the remote nodes
+ * 12) As part of scif_node_add_ack(..) the remote nodes update their
+ * outbound QPs, make sure they can access memory on the remote node
+ * and then add a new SCIF peer node by calling
+ * scif_peer_register_device(..) which signifies the addition of a new
+ * SCIF node.
+ * 13) The SCIF network is now established across all nodes.
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by non mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) A non mgmt node now cleans up all local data structures and sends a
+ * SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
+ * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
+ * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
+ * peers and waits for a SCIF_NODE_REMOVE_ACK
+ * 6) As part of scif_node_remove(..) a remote node unregisters the peer
+ * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ * it sends itself a node remove message whose handling cleans up local
+ * data structures and unregisters the peer node from the SCIF network
+ * 8) The mgmt node sends a SCIF_EXIT_ACK
+ * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
+ * completes the SCIF remove routine
+ * 10) The SCIF network is now torn down for the node initiating the
+ * teardown sequence
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) The mgmt node calls scif_disconnect_node(..)
+ * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
+ * and waits for a SCIF_NODE_REMOVE_ACK
+ * 5) As part of scif_node_remove(..) a remote node unregisters the peer
+ * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ * it unregisters the peer node from the SCIF network
+ * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
+ * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
+ * which would clean up local data structures for all SCIF nodes and
+ * then send a SCIF_EXIT_ACK back to the mgmt node
+ * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
+ * remove message whose handling cleans up local data structures and
+ * destroys any P2P mappings.
+ * 10) The SCIF hardware device for which a remove callback was received is now
+ * disconnected from the SCIF network.
+ */
+/*
+ * Initializes "local" data structures for the QP. Allocates the QP
+ * ring buffer (rb) and initializes the "in bound" queue.
+ */
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+ int local_size, struct scif_dev *scifdev)
+{
+ void *local_q = qp->inbound_q.rb_base;
+ int err = 0;
+ u32 tmp_rd = 0;
+
+ spin_lock_init(&qp->send_lock);
+ spin_lock_init(&qp->recv_lock);
+
+ /* Allocate rb only if not already allocated */
+ if (!local_q) {
+ local_q = kzalloc(local_size, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ return err;
+ }
+ }
+
+ err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+ if (err)
+ goto kfree;
+ /*
+ * To setup the inbound_q, the buffer lives locally, the read pointer
+ * is remote and the write pointer is local.
+ */
+ scif_rb_init(&qp->inbound_q,
+ &tmp_rd,
+ &qp->local_write,
+ local_q, get_count_order(local_size));
+ /*
+ * The read pointer is NULL initially and it is unsafe to use the ring
+ * buffer til this changes!
+ */
+ qp->inbound_q.read_ptr = NULL;
+ err = scif_map_single(qp_offset, qp,
+ scifdev, sizeof(struct scif_qp));
+ if (err)
+ goto unmap;
+ qp->local_qp = *qp_offset;
+ return err;
+unmap:
+ scif_unmap_single(qp->local_buf, scifdev, local_size);
+ qp->local_buf = 0;
+kfree:
+ kfree(local_q);
+ return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+ dma_addr_t phys, int local_size,
+ struct scif_dev *scifdev)
+{
+ void *local_q;
+ void *remote_q;
+ struct scif_qp *remote_qp;
+ int remote_size;
+ int err = 0;
+
+ spin_lock_init(&qp->send_lock);
+ spin_lock_init(&qp->recv_lock);
+ /* Start by figuring out where we need to point */
+ remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
+ if (!remote_qp)
+ return -EIO;
+ qp->remote_qp = remote_qp;
+ if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+ err = -EIO;
+ goto iounmap;
+ }
+ qp->remote_buf = remote_qp->local_buf;
+ remote_size = qp->remote_qp->inbound_q.size;
+ remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+ if (!remote_q) {
+ err = -EIO;
+ goto iounmap;
+ }
+ qp->remote_qp->local_write = 0;
+ /*
+ * To setup the outbound_q, the buffer lives in remote memory,
+ * the read pointer is local, the write pointer is remote
+ */
+ scif_rb_init(&qp->outbound_q,
+ &qp->local_read,
+ &qp->remote_qp->local_write,
+ remote_q,
+ get_count_order(remote_size));
+ local_q = kzalloc(local_size, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ goto iounmap_1;
+ }
+ err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+ if (err)
+ goto kfree;
+ qp->remote_qp->local_read = 0;
+ /*
+ * To setup the inbound_q, the buffer lives locally, the read pointer
+ * is remote and the write pointer is local
+ */
+ scif_rb_init(&qp->inbound_q,
+ &qp->remote_qp->local_read,
+ &qp->local_write,
+ local_q, get_count_order(local_size));
+ err = scif_map_single(qp_offset, qp, scifdev,
+ sizeof(struct scif_qp));
+ if (err)
+ goto unmap;
+ qp->local_qp = *qp_offset;
+ return err;
+unmap:
+ scif_unmap_single(qp->local_buf, scifdev, local_size);
+ qp->local_buf = 0;
+kfree:
+ kfree(local_q);
+iounmap_1:
+ scif_iounmap(remote_q, remote_size, scifdev);
+ qp->outbound_q.rb_base = NULL;
+iounmap:
+ scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
+ qp->remote_qp = NULL;
+ return err;
+}
+
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+ struct scif_qp *qp, u64 payload)
+{
+ int err = 0;
+ void *r_buf;
+ int remote_size;
+ phys_addr_t tmp_phys;
+
+ qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
+
+ if (!qp->remote_qp) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+ dev_err(&scifdev->sdev->dev,
+ "SCIFEP_MAGIC mismatch between self %d remote %d\n",
+ scif_dev[scif_info.nodeid].node, scifdev->node);
+ err = -ENODEV;
+ goto error;
+ }
+
+ tmp_phys = qp->remote_qp->local_buf;
+ remote_size = qp->remote_qp->inbound_q.size;
+ r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+ if (!r_buf)
+ return -EIO;
+
+ qp->local_read = 0;
+ scif_rb_init(&qp->outbound_q,
+ &qp->local_read,
+ &qp->remote_qp->local_write,
+ r_buf,
+ get_count_order(remote_size));
+ /*
+ * Because the node QP may already be processing an INIT message, set
+ * the read pointer so the cached read offset isn't lost
+ */
+ qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+ /*
+ * resetup the inbound_q now that we know where the
+ * inbound_read really is.
+ */
+ scif_rb_init(&qp->inbound_q,
+ &qp->remote_qp->local_read,
+ &qp->local_write,
+ qp->inbound_q.rb_base,
+ get_count_order(qp->inbound_q.size));
+error:
+ return err;
+}
+
+static __always_inline void
+scif_send_msg_intr(struct scif_dev *scifdev)
+{
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ if (scifdev_is_p2p(scifdev))
+ sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
+ else
+ sdev->hw_ops->send_intr(sdev, scifdev->rdb);
+}
+
+int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
+{
+ int err = 0;
+ struct scifmsg msg;
+
+ err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
+ if (!err) {
+ /*
+ * Now that everything is setup and mapped, we're ready
+ * to tell the peer about our queue's location
+ */
+ msg.uop = SCIF_INIT;
+ msg.dst.node = scifdev->node;
+ err = scif_nodeqp_send(scifdev, &msg);
+ }
+ return err;
+}
+
+void scif_send_exit(struct scif_dev *scifdev)
+{
+ struct scifmsg msg;
+ int ret;
+
+ scifdev->exit = OP_IN_PROGRESS;
+ msg.uop = SCIF_EXIT;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = scifdev->node;
+ ret = scif_nodeqp_send(scifdev, &msg);
+ if (ret)
+ goto done;
+ /* Wait for a SCIF_EXIT_ACK message */
+ wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
+ SCIF_NODE_ALIVE_TIMEOUT);
+done:
+ scifdev->exit = OP_IDLE;
+}
+
+int scif_setup_qp(struct scif_dev *scifdev)
+{
+ int err = 0;
+ int local_size;
+ struct scif_qp *qp;
+
+ local_size = SCIF_NODE_QP_SIZE;
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ err = -ENOMEM;
+ return err;
+ }
+ qp->magic = SCIFEP_MAGIC;
+ scifdev->qpairs = qp;
+ err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
+ local_size, scifdev);
+ if (err)
+ goto free_qp;
+ /*
+ * We're as setup as we can be. The inbound_q is setup, w/o a usable
+ * outbound q. When we get a message, the read_ptr will be updated,
+ * and we will pull the message.
+ */
+ return err;
+free_qp:
+ kfree(scifdev->qpairs);
+ scifdev->qpairs = NULL;
+ return err;
+}
+
+static void scif_p2p_freesg(struct scatterlist *sg)
+{
+ kfree(sg);
+}
+
+static struct scatterlist *
+scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
+{
+ struct scatterlist *sg;
+ struct page *page;
+ int i;
+
+ sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
+ if (!sg)
+ return NULL;
+ sg_init_table(sg, page_cnt);
+ for (i = 0; i < page_cnt; i++) {
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ sg_set_page(&sg[i], page, page_size, 0);
+ pa += page_size;
+ }
+ return sg;
+}
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static struct scif_p2p_info *
+scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
+{
+ struct scif_p2p_info *p2p;
+ int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
+ struct scif_hw_dev *psdev = peerdev->sdev;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
+ num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
+
+ p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
+ if (!p2p)
+ return NULL;
+ p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
+ PAGE_SIZE, num_mmio_pages);
+ if (!p2p->ppi_sg[SCIF_PPI_MMIO])
+ goto free_p2p;
+ p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
+ sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
+ num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+ p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
+ 1 << sg_page_shift,
+ num_aper_chunks);
+ p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
+ err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+ if (err != num_mmio_pages)
+ goto scif_p2p_free;
+ err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+ num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
+ if (err != num_aper_chunks)
+ goto dma_unmap;
+ p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
+ p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
+ p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
+ p2p->ppi_peer_id = peerdev->node;
+ return p2p;
+dma_unmap:
+ dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+scif_p2p_free:
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+free_p2p:
+ kfree(p2p);
+ return NULL;
+}
+
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+ struct scif_p2p_info *p2p)
+{
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+ p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+ kfree(p2p);
+}
+
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @dst: Destination node
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Management node here acts like a proxy.
+ */
+static void scif_node_connect(struct scif_dev *scifdev, int dst)
+{
+ struct scif_dev *dev_j = scifdev;
+ struct scif_dev *dev_i = NULL;
+ struct scif_p2p_info *p2p_ij = NULL; /* bus addr for j from i */
+ struct scif_p2p_info *p2p_ji = NULL; /* bus addr for i from j */
+ struct scif_p2p_info *p2p;
+ struct list_head *pos, *tmp;
+ struct scifmsg msg;
+ int err;
+ u64 tmppayload;
+
+ if (dst < 1 || dst > scif_info.maxid)
+ return;
+
+ dev_i = &scif_dev[dst];
+
+ if (!_scifdev_alive(dev_i))
+ return;
+ /*
+ * If the p2p connection is already setup or in the process of setting
+ * up then just ignore this request. The requested node will get
+ * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+ */
+ if (!list_empty(&dev_i->p2p)) {
+ list_for_each_safe(pos, tmp, &dev_i->p2p) {
+ p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+ if (p2p->ppi_peer_id == dev_j->node)
+ return;
+ }
+ }
+ p2p_ij = scif_init_p2p_info(dev_i, dev_j);
+ if (!p2p_ij)
+ return;
+ p2p_ji = scif_init_p2p_info(dev_j, dev_i);
+ if (!p2p_ji) {
+ scif_deinit_p2p_info(dev_i, p2p_ij);
+ return;
+ }
+ list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
+ list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
+
+ /*
+ * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+ * as seen from dev_j
+ */
+ msg.uop = SCIF_NODE_ADD;
+ msg.src.node = dev_j->node;
+ msg.dst.node = dev_i->node;
+
+ msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
+ msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
+ msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
+ msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+ err = scif_nodeqp_send(dev_i, &msg);
+ if (err) {
+ dev_err(&scifdev->sdev->dev,
+ "%s %d error %d\n", __func__, __LINE__, err);
+ return;
+ }
+
+ /* Same as above but to dev_j */
+ msg.uop = SCIF_NODE_ADD;
+ msg.src.node = dev_i->node;
+ msg.dst.node = dev_j->node;
+
+ tmppayload = msg.payload[0];
+ msg.payload[0] = msg.payload[2];
+ msg.payload[2] = tmppayload;
+ msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
+ msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+ scif_nodeqp_send(dev_j, &msg);
+}
+
+static void scif_p2p_setup(void)
+{
+ int i, j;
+
+ if (!scif_info.p2p_enable)
+ return;
+
+ for (i = 1; i <= scif_info.maxid; i++)
+ if (!_scifdev_alive(&scif_dev[i]))
+ return;
+
+ for (i = 1; i <= scif_info.maxid; i++) {
+ for (j = 1; j <= scif_info.maxid; j++) {
+ struct scif_dev *scifdev = &scif_dev[i];
+
+ if (i == j)
+ continue;
+ scif_node_connect(scifdev, j);
+ }
+ }
+}
+
+static char *message_types[] = {"BAD",
+ "INIT",
+ "EXIT",
+ "SCIF_EXIT_ACK",
+ "SCIF_NODE_ADD",
+ "SCIF_NODE_ADD_ACK",
+ "SCIF_NODE_ADD_NACK",
+ "REMOVE_NODE",
+ "REMOVE_NODE_ACK",
+ "CNCT_REQ",
+ "CNCT_GNT",
+ "CNCT_GNTACK",
+ "CNCT_GNTNACK",
+ "CNCT_REJ",
+ "DISCNCT",
+ "DISCNT_ACK",
+ "CLIENT_SENT",
+ "CLIENT_RCVD",
+ "SCIF_GET_NODE_INFO",
+ "REGISTER",
+ "REGISTER_ACK",
+ "REGISTER_NACK",
+ "UNREGISTER",
+ "UNREGISTER_ACK",
+ "UNREGISTER_NACK",
+ "ALLOC_REQ",
+ "ALLOC_GNT",
+ "ALLOC_REJ",
+ "FREE_PHYS",
+ "FREE_VIRT",
+ "MUNMAP",
+ "MARK",
+ "MARK_ACK",
+ "MARK_NACK",
+ "WAIT",
+ "WAIT_ACK",
+ "WAIT_NACK",
+ "SIGNAL_LOCAL",
+ "SIGNAL_REMOTE",
+ "SIG_ACK",
+ "SIG_NACK"};
+
+static void
+scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
+ const char *label)
+{
+ if (!scif_info.en_msg_log)
+ return;
+ if (msg->uop > SCIF_MAX_MSG) {
+ dev_err(&scifdev->sdev->dev,
+ "%s: unknown msg type %d\n", label, msg->uop);
+ return;
+ }
+ dev_info(&scifdev->sdev->dev,
+ "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
+ label, message_types[msg->uop], msg->src.node, msg->src.port,
+ msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
+ msg->payload[2], msg->payload[3]);
+}
+
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_qp *qp = scifdev->qpairs;
+ int err = -ENOMEM, loop_cnt = 0;
+
+ scif_display_message(scifdev, msg, "Sent");
+ if (!qp) {
+ err = -EINVAL;
+ goto error;
+ }
+ spin_lock(&qp->send_lock);
+
+ while ((err = scif_rb_write(&qp->outbound_q,
+ msg, sizeof(struct scifmsg)))) {
+ mdelay(1);
+#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
+ if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
+ err = -ENODEV;
+ break;
+ }
+ }
+ if (!err)
+ scif_rb_commit(&qp->outbound_q);
+ spin_unlock(&qp->send_lock);
+ if (!err) {
+ if (scifdev_self(scifdev))
+ /*
+ * For loopback we need to emulate an interrupt by
+ * queuing work for the queue handling real node
+ * Qp interrupts.
+ */
+ queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+ else
+ scif_send_msg_intr(scifdev);
+ }
+error:
+ if (err)
+ dev_dbg(&scifdev->sdev->dev,
+ "%s %d error %d uop %d\n",
+ __func__, __LINE__, err, msg->uop);
+ return err;
+}
+
+/**
+ * scif_nodeqp_send - Send a message on the node queue pair
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ */
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ int err;
+ struct device *spdev = NULL;
+
+ if (msg->uop > SCIF_EXIT_ACK) {
+ /* Dont send messages once the exit flow has begun */
+ if (OP_IDLE != scifdev->exit)
+ return -ENODEV;
+ spdev = scif_get_peer_dev(scifdev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+ }
+ err = _scif_nodeqp_send(scifdev, msg);
+ if (msg->uop > SCIF_EXIT_ACK)
+ scif_put_peer_dev(spdev);
+ return err;
+}
+
+/*
+ * scif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ * created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
+ */
+void scif_misc_handler(struct work_struct *work)
+{
+ scif_rma_handle_remote_fences();
+ scif_rma_destroy_windows();
+ scif_rma_destroy_tcw_invalid();
+ scif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init() - Respond to SCIF_INIT interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ */
+static __always_inline void
+scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ /*
+ * Allow the thread waiting for device page updates for the peer QP DMA
+ * address to complete initializing the inbound_q.
+ */
+ flush_delayed_work(&scifdev->qp_dwork);
+
+ scif_peer_register_device(scifdev);
+
+ if (scif_is_mgmt_node()) {
+ mutex_lock(&scif_info.conflock);
+ scif_p2p_setup();
+ mutex_unlock(&scif_info.conflock);
+ }
+}
+
+/**
+ * scif_exit() - Respond to SCIF_EXIT interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ * This function stops the SCIF interface for the node which sent
+ * the SCIF_EXIT message and starts waiting for that node to
+ * resetup the queue pair again.
+ */
+static __always_inline void
+scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+ scifdev->exit_ack_pending = true;
+ if (scif_is_mgmt_node())
+ scif_disconnect_node(scifdev->node, false);
+ else
+ scif_stop(scifdev);
+ schedule_delayed_work(&scifdev->qp_dwork,
+ msecs_to_jiffies(1000));
+}
+
+/**
+ * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ */
+static __always_inline void
+scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+ scifdev->exit = OP_COMPLETED;
+ wake_up(&scif_info.exitwq);
+}
+
+/**
+ * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ * When the mgmt node driver has finished initializing a MIC node queue pair it
+ * marks the node as online. It then looks for all currently online MIC cards
+ * and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
+ * this message to the new node
+ */
+static __always_inline void
+scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_dev *newdev;
+ dma_addr_t qp_offset;
+ int qp_connect;
+ struct scif_hw_dev *sdev;
+
+ dev_dbg(&scifdev->sdev->dev,
+ "Scifdev %d:%d received NODE_ADD msg for node %d\n",
+ scifdev->node, msg->dst.node, msg->src.node);
+ dev_dbg(&scifdev->sdev->dev,
+ "Remote address for this node's aperture %llx\n",
+ msg->payload[0]);
+ newdev = &scif_dev[msg->src.node];
+ newdev->node = msg->src.node;
+ newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+ sdev = newdev->sdev;
+
+ if (scif_setup_intr_wq(newdev)) {
+ dev_err(&scifdev->sdev->dev,
+ "failed to setup interrupts for %d\n", msg->src.node);
+ goto interrupt_setup_error;
+ }
+ newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len);
+ if (!newdev->mmio.va) {
+ dev_err(&scifdev->sdev->dev,
+ "failed to map mmio for %d\n", msg->src.node);
+ goto mmio_map_error;
+ }
+ newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
+ if (!newdev->qpairs)
+ goto qp_alloc_error;
+ /*
+ * Set the base address of the remote node's memory since it gets
+ * added to qp_offset
+ */
+ newdev->base_addr = msg->payload[0];
+
+ qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
+ SCIF_NODE_QP_SIZE, newdev);
+ if (qp_connect) {
+ dev_err(&scifdev->sdev->dev,
+ "failed to setup qp_connect %d\n", qp_connect);
+ goto qp_connect_error;
+ }
+
+ newdev->db = sdev->hw_ops->next_db(sdev);
+ newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+ "SCIF_INTR", newdev,
+ newdev->db);
+ if (IS_ERR(newdev->cookie))
+ goto qp_connect_error;
+ newdev->qpairs->magic = SCIFEP_MAGIC;
+ newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
+
+ msg->uop = SCIF_NODE_ADD_ACK;
+ msg->dst.node = msg->src.node;
+ msg->src.node = scif_info.nodeid;
+ msg->payload[0] = qp_offset;
+ msg->payload[2] = newdev->db;
+ scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+ return;
+qp_connect_error:
+ kfree(newdev->qpairs);
+ newdev->qpairs = NULL;
+qp_alloc_error:
+ iounmap(newdev->mmio.va);
+ newdev->mmio.va = NULL;
+mmio_map_error:
+interrupt_setup_error:
+ dev_err(&scifdev->sdev->dev,
+ "node add failed for node %d\n", msg->src.node);
+ msg->uop = SCIF_NODE_ADD_NACK;
+ msg->dst.node = msg->src.node;
+ msg->src.node = scif_info.nodeid;
+ scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define SCIF_NODE_QP_RETRY 100
+#define SCIF_NODE_QP_TIMEOUT 100
+ struct scif_dev *peerdev = container_of(work, struct scif_dev,
+ p2p_dwork.work);
+ struct scif_qp *qp = &peerdev->qpairs[0];
+
+ if (qp->qp_state != SCIF_QP_ONLINE ||
+ qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
+ if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
+ dev_err(&peerdev->sdev->dev,
+ "Warning: QP check timeout with state %d\n",
+ qp->qp_state);
+ goto timeout;
+ }
+ schedule_delayed_work(&peerdev->p2p_dwork,
+ msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
+ return;
+ }
+ return;
+timeout:
+ dev_err(&peerdev->sdev->dev,
+ "%s %d remote node %d offline, state = 0x%x\n",
+ __func__, __LINE__, peerdev->node, qp->qp_state);
+ qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+ scif_peer_unregister_device(peerdev);
+ scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
+ * message to the mgmt node to confirm the sequence is finished.
+ *
+ */
+static __always_inline void
+scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_dev *peerdev;
+ struct scif_qp *qp;
+ struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+ dev_dbg(&scifdev->sdev->dev,
+ "Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
+ scifdev->node, msg->src.node, msg->dst.node);
+ dev_dbg(&scifdev->sdev->dev,
+ "payload %llx %llx %llx %llx\n", msg->payload[0],
+ msg->payload[1], msg->payload[2], msg->payload[3]);
+ if (scif_is_mgmt_node()) {
+ /*
+ * the lock serializes with scif_qp_response_ack. The mgmt node
+ * is forwarding the NODE_ADD_ACK message from src to dst we
+ * need to make sure that the dst has already received a
+ * NODE_ADD for src and setup its end of the qp to dst
+ */
+ mutex_lock(&scif_info.conflock);
+ msg->payload[1] = scif_info.maxid;
+ scif_nodeqp_send(dst_dev, msg);
+ mutex_unlock(&scif_info.conflock);
+ return;
+ }
+ peerdev = &scif_dev[msg->src.node];
+ peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+ peerdev->node = msg->src.node;
+
+ qp = &peerdev->qpairs[0];
+
+ if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+ msg->payload[0])))
+ goto local_error;
+ peerdev->rdb = msg->payload[2];
+ qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+ scif_peer_register_device(peerdev);
+
+ schedule_delayed_work(&peerdev->p2p_dwork, 0);
+ return;
+local_error:
+ scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg: Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ if (scif_is_mgmt_node()) {
+ struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+ dev_dbg(&scifdev->sdev->dev,
+ "SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
+ scif_nodeqp_send(dst_dev, msg);
+ }
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ int node = msg->payload[0];
+ struct scif_dev *scdev = &scif_dev[node];
+
+ scdev->node_remove_ack_pending = true;
+ scif_handle_remove_node(node);
+}
+
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_dev *sdev = &scif_dev[msg->payload[0]];
+
+ atomic_inc(&sdev->disconn_rescnt);
+ wake_up(&sdev->disconn_wq);
+}
+
+/**
+ * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg: Interrupt message
+ *
+ * Retrieve node info i.e maxid and total from the mgmt node.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ if (scif_is_mgmt_node()) {
+ swap(msg->dst.node, msg->src.node);
+ mutex_lock(&scif_info.conflock);
+ msg->payload[1] = scif_info.maxid;
+ msg->payload[2] = scif_info.total;
+ mutex_unlock(&scif_info.conflock);
+ scif_nodeqp_send(scifdev, msg);
+ } else {
+ struct completion *node_info =
+ (struct completion *)msg->payload[3];
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.maxid = msg->payload[1];
+ scif_info.total = msg->payload[2];
+ complete_all(node_info);
+ mutex_unlock(&scif_info.conflock);
+ }
+}
+
+static void
+scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ /* Bogus Node Qp Message? */
+ dev_err(&scifdev->sdev->dev,
+ "Unknown message 0x%xn scifdev->node 0x%x\n",
+ msg->uop, scifdev->node);
+}
+
+static void (*scif_intr_func[SCIF_MAX_MSG + 1])
+ (struct scif_dev *, struct scifmsg *msg) = {
+ scif_msg_unknown, /* Error */
+ scif_init, /* SCIF_INIT */
+ scif_exit, /* SCIF_EXIT */
+ scif_exit_ack, /* SCIF_EXIT_ACK */
+ scif_node_add, /* SCIF_NODE_ADD */
+ scif_node_add_ack, /* SCIF_NODE_ADD_ACK */
+ scif_node_add_nack, /* SCIF_NODE_ADD_NACK */
+ scif_node_remove, /* SCIF_NODE_REMOVE */
+ scif_node_remove_ack, /* SCIF_NODE_REMOVE_ACK */
+ scif_cnctreq, /* SCIF_CNCT_REQ */
+ scif_cnctgnt, /* SCIF_CNCT_GNT */
+ scif_cnctgnt_ack, /* SCIF_CNCT_GNTACK */
+ scif_cnctgnt_nack, /* SCIF_CNCT_GNTNACK */
+ scif_cnctrej, /* SCIF_CNCT_REJ */
+ scif_discnct, /* SCIF_DISCNCT */
+ scif_discnt_ack, /* SCIF_DISCNT_ACK */
+ scif_clientsend, /* SCIF_CLIENT_SENT */
+ scif_clientrcvd, /* SCIF_CLIENT_RCVD */
+ scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+ scif_recv_reg, /* SCIF_REGISTER */
+ scif_recv_reg_ack, /* SCIF_REGISTER_ACK */
+ scif_recv_reg_nack, /* SCIF_REGISTER_NACK */
+ scif_recv_unreg, /* SCIF_UNREGISTER */
+ scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */
+ scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */
+ scif_alloc_req, /* SCIF_ALLOC_REQ */
+ scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */
+ scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */
+ scif_free_virt, /* SCIF_FREE_VIRT */
+ scif_recv_munmap, /* SCIF_MUNMAP */
+ scif_recv_mark, /* SCIF_MARK */
+ scif_recv_mark_resp, /* SCIF_MARK_ACK */
+ scif_recv_mark_resp, /* SCIF_MARK_NACK */
+ scif_recv_wait, /* SCIF_WAIT */
+ scif_recv_wait_resp, /* SCIF_WAIT_ACK */
+ scif_recv_wait_resp, /* SCIF_WAIT_NACK */
+ scif_recv_sig_local, /* SCIF_SIG_LOCAL */
+ scif_recv_sig_remote, /* SCIF_SIG_REMOTE */
+ scif_recv_sig_resp, /* SCIF_SIG_ACK */
+ scif_recv_sig_resp, /* SCIF_SIG_NACK */
+};
+
+/**
+ * scif_nodeqp_msg_handler() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appropriate routine to handle a Node Qp
+ * message receipt
+ */
+static int scif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+scif_nodeqp_msg_handler(struct scif_dev *scifdev,
+ struct scif_qp *qp, struct scifmsg *msg)
+{
+ scif_display_message(scifdev, msg, "Rcvd");
+
+ if (msg->uop > (u32)scif_max_msg_id) {
+ /* Bogus Node Qp Message? */
+ dev_err(&scifdev->sdev->dev,
+ "Unknown message 0x%xn scifdev->node 0x%x\n",
+ msg->uop, scifdev->node);
+ return;
+ }
+
+ scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhandler() - Interrupt handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism. It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+ struct scifmsg msg;
+ int read_size;
+
+ do {
+ read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
+ if (!read_size)
+ break;
+ scif_nodeqp_msg_handler(scifdev, qp, &msg);
+ /*
+ * The node queue pair is unmapped so skip the read pointer
+ * update after receipt of a SCIF_EXIT_ACK
+ */
+ if (SCIF_EXIT_ACK == msg.uop)
+ break;
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ } while (1);
+}
+
+/**
+ * scif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void scif_loopb_wq_handler(struct work_struct *unused)
+{
+ struct scif_dev *scifdev = scif_info.loopb_dev;
+ struct scif_qp *qp = scifdev->qpairs;
+ struct scif_loopb_msg *msg;
+
+ do {
+ msg = NULL;
+ spin_lock(&qp->recv_lock);
+ if (!list_empty(&scif_info.loopb_recv_q)) {
+ msg = list_first_entry(&scif_info.loopb_recv_q,
+ struct scif_loopb_msg,
+ list);
+ list_del(&msg->list);
+ }
+ spin_unlock(&qp->recv_lock);
+
+ if (msg) {
+ scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+ kfree(msg);
+ }
+ } while (msg);
+}
+
+/**
+ * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF device and blocks since
+ * there is no space in the RB while it has the send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+ int read_size;
+ struct scif_loopb_msg *msg;
+
+ do {
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
+ sizeof(struct scifmsg));
+ if (read_size != sizeof(struct scifmsg)) {
+ kfree(msg);
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ break;
+ }
+ spin_lock(&qp->recv_lock);
+ list_add_tail(&msg->list, &scif_info.loopb_recv_q);
+ spin_unlock(&qp->recv_lock);
+ queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ } while (read_size == sizeof(struct scifmsg));
+ return read_size;
+}
+
+/**
+ * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs and ring buffers
+ */
+int scif_setup_loopback_qp(struct scif_dev *scifdev)
+{
+ int err = 0;
+ void *local_q;
+ struct scif_qp *qp;
+
+ err = scif_setup_intr_wq(scifdev);
+ if (err)
+ goto exit;
+ INIT_LIST_HEAD(&scif_info.loopb_recv_q);
+ snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
+ "SCIF LOOPB %d", scifdev->node);
+ scif_info.loopb_wq =
+ alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
+ if (!scif_info.loopb_wq) {
+ err = -ENOMEM;
+ goto destroy_intr;
+ }
+ INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
+ /* Allocate Self Qpair */
+ scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
+ if (!scifdev->qpairs) {
+ err = -ENOMEM;
+ goto destroy_loopb_wq;
+ }
+
+ qp = scifdev->qpairs;
+ qp->magic = SCIFEP_MAGIC;
+ spin_lock_init(&qp->send_lock);
+ spin_lock_init(&qp->recv_lock);
+
+ local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ goto free_qpairs;
+ }
+ /*
+ * For loopback the inbound_q and outbound_q are essentially the same
+ * since the Node sends a message on the loopback interface to the
+ * outbound_q which is then received on the inbound_q.
+ */
+ scif_rb_init(&qp->outbound_q,
+ &qp->local_read,
+ &qp->local_write,
+ local_q, get_count_order(SCIF_NODE_QP_SIZE));
+
+ scif_rb_init(&qp->inbound_q,
+ &qp->local_read,
+ &qp->local_write,
+ local_q, get_count_order(SCIF_NODE_QP_SIZE));
+ scif_info.nodeid = scifdev->node;
+
+ scif_peer_register_device(scifdev);
+
+ scif_info.loopb_dev = scifdev;
+ return err;
+free_qpairs:
+ kfree(scifdev->qpairs);
+destroy_loopb_wq:
+ destroy_workqueue(scif_info.loopb_wq);
+destroy_intr:
+ scif_destroy_intr_wq(scifdev);
+exit:
+ return err;
+}
+
+/**
+ * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int scif_destroy_loopback_qp(struct scif_dev *scifdev)
+{
+ scif_peer_unregister_device(scifdev);
+ destroy_workqueue(scif_info.loopb_wq);
+ scif_destroy_intr_wq(scifdev);
+ kfree(scifdev->qpairs->outbound_q.rb_base);
+ kfree(scifdev->qpairs);
+ scifdev->sdev = NULL;
+ scif_info.loopb_dev = NULL;
+ return 0;
+}
+
+void scif_destroy_p2p(struct scif_dev *scifdev)
+{
+ struct scif_dev *peer_dev;
+ struct scif_p2p_info *p2p;
+ struct list_head *pos, *tmp;
+ int bd;
+
+ mutex_lock(&scif_info.conflock);
+ /* Free P2P mappings in the given node for all its peer nodes */
+ list_for_each_safe(pos, tmp, &scifdev->p2p) {
+ p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+ dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO],
+ DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+ p2p->sg_nentries[SCIF_PPI_APER],
+ DMA_BIDIRECTIONAL);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+ list_del(pos);
+ kfree(p2p);
+ }
+
+ /* Free P2P mapping created in the peer nodes for the given node */
+ for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
+ peer_dev = &scif_dev[bd];
+ list_for_each_safe(pos, tmp, &peer_dev->p2p) {
+ p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+ if (p2p->ppi_peer_id == scifdev->node) {
+ dma_unmap_sg(&peer_dev->sdev->dev,
+ p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO],
+ DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&peer_dev->sdev->dev,
+ p2p->ppi_sg[SCIF_PPI_APER],
+ p2p->sg_nentries[SCIF_PPI_APER],
+ DMA_BIDIRECTIONAL);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+ list_del(pos);
+ kfree(p2p);
+ }
+ }
+ }
+ mutex_unlock(&scif_info.conflock);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
new file mode 100644
index 0000000..9589627
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -0,0 +1,221 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_NODEQP
+#define SCIF_NODEQP
+
+#include "scif_rb.h"
+#include "scif_peer_bus.h"
+
+#define SCIF_INIT 1 /* First message sent to the peer node for discovery */
+#define SCIF_EXIT 2 /* Last message from the peer informing intent to exit */
+#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */
+#define SCIF_NODE_ADD 4 /* Tell Online nodes a new node exits */
+#define SCIF_NODE_ADD_ACK 5 /* Confirm to mgmt node sequence is finished */
+#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */
+#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */
+#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */
+#define SCIF_CNCT_REQ 9 /* Phys addr of Request connection to a port */
+#define SCIF_CNCT_GNT 10 /* Phys addr of new Grant connection request */
+#define SCIF_CNCT_GNTACK 11 /* Error type Reject a connection request */
+#define SCIF_CNCT_GNTNACK 12 /* Error type Reject a connection request */
+#define SCIF_CNCT_REJ 13 /* Error type Reject a connection request */
+#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */
+#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */
+#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
+#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
+#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
+
+/*
+ * struct scifmsg - Node QP message format
+ *
+ * @src: Source information
+ * @dst: Destination information
+ * @uop: The message opcode
+ * @payload: Unique payload format for each message
+ */
+struct scifmsg {
+ struct scif_port_id src;
+ struct scif_port_id dst;
+ u32 uop;
+ u64 payload[4];
+} __packed;
+
+/*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+ dma_addr_t phys_addr;
+ unsigned long vaddr;
+ size_t size;
+ enum scif_msg_state state;
+ wait_queue_head_t allocwq;
+};
+
+/*
+ * struct scif_qp - Node Queue Pair
+ *
+ * Interesting structure -- a little difficult because we can only
+ * write across the PCIe, so any r/w pointer we need to read is
+ * local. We only need to read the read pointer on the inbound_q
+ * and read the write pointer in the outbound_q
+ *
+ * @magic: Magic value to ensure the peer sees the QP correctly
+ * @outbound_q: The outbound ring buffer for sending messages
+ * @inbound_q: The inbound ring buffer for receiving messages
+ * @local_write: Local write index
+ * @local_read: Local read index
+ * @remote_qp: The remote queue pair
+ * @local_buf: DMA address of local ring buffer
+ * @local_qp: DMA address of the local queue pair data structure
+ * @remote_buf: DMA address of remote ring buffer
+ * @qp_state: QP state i.e. online or offline used for P2P
+ * @send_lock: synchronize access to outbound queue
+ * @recv_lock: Synchronize access to inbound queue
+ */
+struct scif_qp {
+ u64 magic;
+#define SCIFEP_MAGIC 0x5c1f000000005c1fULL
+ struct scif_rb outbound_q;
+ struct scif_rb inbound_q;
+
+ u32 local_write __aligned(64);
+ u32 local_read __aligned(64);
+ struct scif_qp *remote_qp;
+ dma_addr_t local_buf;
+ dma_addr_t local_qp;
+ dma_addr_t remote_buf;
+ u32 qp_state;
+#define SCIF_QP_OFFLINE 0xdead
+#define SCIF_QP_ONLINE 0xc0de
+ spinlock_t send_lock;
+ spinlock_t recv_lock;
+};
+
+/*
+ * struct scif_loopb_msg - An element in the loopback Node QP message list.
+ *
+ * @msg - The SCIF node QP message
+ * @list - link in the list of messages
+ */
+struct scif_loopb_msg {
+ struct scifmsg msg;
+ struct list_head list;
+};
+
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_setup_qp(struct scif_dev *scifdev);
+int scif_qp_response(phys_addr_t phys, struct scif_dev *dev);
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+ int local_size, struct scif_dev *scifdev);
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+ dma_addr_t phys, int local_size,
+ struct scif_dev *scifdev);
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+ struct scif_qp *qp, u64 payload);
+int scif_setup_loopback_qp(struct scif_dev *scifdev);
+int scif_destroy_loopback_qp(struct scif_dev *scifdev);
+void scif_poll_qp_state(struct work_struct *work);
+void scif_destroy_p2p(struct scif_dev *scifdev);
+void scif_send_exit(struct scif_dev *scifdev);
+static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+ struct device *spdev_ret;
+
+ rcu_read_lock();
+ spdev = rcu_dereference(scifdev->spdev);
+ if (spdev)
+ spdev_ret = get_device(&spdev->dev);
+ else
+ spdev_ret = ERR_PTR(-ENODEV);
+ rcu_read_unlock();
+ return spdev_ret;
+}
+
+static inline void scif_put_peer_dev(struct device *dev)
+{
+ put_device(dev);
+}
+#endif /* SCIF_NODEQP */
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
new file mode 100644
index 0000000..6ffa3bd
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -0,0 +1,183 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#include "scif_main.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+
+static inline struct scif_peer_dev *
+dev_to_scif_peer(struct device *dev)
+{
+ return container_of(dev, struct scif_peer_dev, dev);
+}
+
+struct bus_type scif_peer_bus = {
+ .name = "scif_peer_bus",
+};
+
+static void scif_peer_release_dev(struct device *d)
+{
+ struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+ scif_cleanup_scifdev(scifdev);
+ kfree(sdev);
+}
+
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+ int ret;
+
+ spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+ if (!spdev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ spdev->dev.parent = scifdev->sdev->dev.parent;
+ spdev->dev.release = scif_peer_release_dev;
+ spdev->dnode = scifdev->node;
+ spdev->dev.bus = &scif_peer_bus;
+ dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+ device_initialize(&spdev->dev);
+ get_device(&spdev->dev);
+ rcu_assign_pointer(scifdev->spdev, spdev);
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total++;
+ scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+ mutex_unlock(&scif_info.conflock);
+ return 0;
+err:
+ dev_err(&scifdev->sdev->dev,
+ "dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+ return ret;
+}
+
+static int scif_peer_add_device(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+ char pool_name[16];
+ int ret;
+
+ ret = device_add(&spdev->dev);
+ put_device(&spdev->dev);
+ if (ret) {
+ dev_err(&scifdev->sdev->dev,
+ "dnode %d: peer device_add failed\n", scifdev->node);
+ goto put_spdev;
+ }
+
+ scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+ scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+ sizeof(struct scif_status), 1,
+ 0);
+ if (!scifdev->signal_pool) {
+ dev_err(&scifdev->sdev->dev,
+ "dnode %d: dmam_pool_create failed\n", scifdev->node);
+ ret = -ENOMEM;
+ goto del_spdev;
+ }
+ dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+ return 0;
+del_spdev:
+ device_del(&spdev->dev);
+put_spdev:
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ synchronize_rcu();
+ put_device(&spdev->dev);
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total--;
+ mutex_unlock(&scif_info.conflock);
+ return ret;
+}
+
+void scif_add_peer_device(struct work_struct *work)
+{
+ struct scif_dev *scifdev = container_of(work, struct scif_dev,
+ peer_add_work);
+
+ scif_peer_add_device(scifdev);
+}
+
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
+{
+ int ret;
+
+ mutex_lock(&scifdev->lock);
+ ret = scif_peer_initialize_device(scifdev);
+ if (ret)
+ goto exit;
+ schedule_work(&scifdev->peer_add_work);
+exit:
+ mutex_unlock(&scifdev->lock);
+}
+
+int scif_peer_unregister_device(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+
+ mutex_lock(&scifdev->lock);
+ /* Flush work to ensure device register is complete */
+ flush_work(&scifdev->peer_add_work);
+
+ /*
+ * Continue holding scifdev->lock since theoretically unregister_device
+ * can be called simultaneously from multiple threads
+ */
+ spdev = rcu_dereference(scifdev->spdev);
+ if (!spdev) {
+ mutex_unlock(&scifdev->lock);
+ return -ENODEV;
+ }
+
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ synchronize_rcu();
+ mutex_unlock(&scifdev->lock);
+
+ dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+ device_unregister(&spdev->dev);
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total--;
+ mutex_unlock(&scif_info.conflock);
+ return 0;
+}
+
+int scif_peer_bus_init(void)
+{
+ return bus_register(&scif_peer_bus);
+}
+
+void scif_peer_bus_exit(void)
+{
+ bus_unregister(&scif_peer_bus);
+}
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
new file mode 100644
index 0000000..a3b8dd2
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -0,0 +1,31 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef _SCIF_PEER_BUS_H_
+#define _SCIF_PEER_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mic_common.h>
+#include <linux/scif.h>
+
+struct scif_dev;
+
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
+int scif_peer_bus_init(void);
+void scif_peer_bus_exit(void);
+#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_ports.c b/drivers/misc/mic/scif/scif_ports.c
new file mode 100644
index 0000000..594e18d
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_ports.c
@@ -0,0 +1,124 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/idr.h>
+
+#include "scif_main.h"
+
+#define SCIF_PORT_COUNT 0x10000 /* Ports available */
+
+struct idr scif_ports;
+
+/*
+ * struct scif_port - SCIF port information
+ *
+ * @ref_cnt - Reference count since there can be multiple endpoints
+ * created via scif_accept(..) simultaneously using a port.
+ */
+struct scif_port {
+ int ref_cnt;
+};
+
+/**
+ * __scif_get_port - Reserve a specified port # for SCIF and add it
+ * to the global list.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ * On memory allocation failure, returns -ENOMEM.
+ */
+static int __scif_get_port(int start, int end)
+{
+ int id;
+ struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC);
+
+ if (!port)
+ return -ENOMEM;
+ spin_lock(&scif_info.port_lock);
+ id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC);
+ if (id >= 0)
+ port->ref_cnt++;
+ spin_unlock(&scif_info.port_lock);
+ return id;
+}
+
+/**
+ * scif_rsrv_port - Reserve a specified port # for SCIF.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ * On memory allocation failure, returns -ENOMEM.
+ */
+int scif_rsrv_port(u16 port)
+{
+ return __scif_get_port(port, port + 1);
+}
+
+/**
+ * scif_get_new_port - Get and reserve any port # for SCIF in the range
+ * SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if no ports available.
+ * On memory allocation failure, returns -ENOMEM.
+ */
+int scif_get_new_port(void)
+{
+ return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT);
+}
+
+/**
+ * scif_get_port - Increment the reference count for a SCIF port
+ * @id : SCIF port
+ *
+ * @return : None
+ */
+void scif_get_port(u16 id)
+{
+ struct scif_port *port;
+
+ if (!id)
+ return;
+ spin_lock(&scif_info.port_lock);
+ port = idr_find(&scif_ports, id);
+ if (port)
+ port->ref_cnt++;
+ spin_unlock(&scif_info.port_lock);
+}
+
+/**
+ * scif_put_port - Release a reserved SCIF port
+ * @id : SCIF port to be released.
+ *
+ * @return : None
+ */
+void scif_put_port(u16 id)
+{
+ struct scif_port *port;
+
+ if (!id)
+ return;
+ spin_lock(&scif_info.port_lock);
+ port = idr_find(&scif_ports, id);
+ if (port) {
+ port->ref_cnt--;
+ if (!port->ref_cnt) {
+ idr_remove(&scif_ports, id);
+ kfree(port);
+ }
+ }
+ spin_unlock(&scif_info.port_lock);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c
new file mode 100644
index 0000000..637cc46
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.c
@@ -0,0 +1,249 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/circ_buf.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+
+#include "scif_rb.h"
+
+#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size)
+#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size)
+
+/**
+ * scif_rb_init - Initializes the ring buffer
+ * @rb: ring buffer
+ * @read_ptr: A pointer to the read offset
+ * @write_ptr: A pointer to the write offset
+ * @rb_base: A pointer to the base of the ring buffer
+ * @size: The size of the ring buffer in powers of two
+ */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+ void *rb_base, u8 size)
+{
+ rb->rb_base = rb_base;
+ rb->size = (1 << size);
+ rb->read_ptr = read_ptr;
+ rb->write_ptr = write_ptr;
+ rb->current_read_offset = *read_ptr;
+ rb->current_write_offset = *write_ptr;
+}
+
+/* Copies a message to the ring buffer -- handles the wrap around case */
+static void memcpy_torb(struct scif_rb *rb, void *header,
+ void *msg, u32 size)
+{
+ u32 size1, size2;
+
+ if (header + size >= rb->rb_base + rb->size) {
+ /* Need to call two copies if it wraps around */
+ size1 = (u32)(rb->rb_base + rb->size - header);
+ size2 = size - size1;
+ memcpy_toio((void __iomem __force *)header, msg, size1);
+ memcpy_toio((void __iomem __force *)rb->rb_base,
+ msg + size1, size2);
+ } else {
+ memcpy_toio((void __iomem __force *)header, msg, size);
+ }
+}
+
+/* Copies a message from the ring buffer -- handles the wrap around case */
+static void memcpy_fromrb(struct scif_rb *rb, void *header,
+ void *msg, u32 size)
+{
+ u32 size1, size2;
+
+ if (header + size >= rb->rb_base + rb->size) {
+ /* Need to call two copies if it wraps around */
+ size1 = (u32)(rb->rb_base + rb->size - header);
+ size2 = size - size1;
+ memcpy_fromio(msg, (void __iomem __force *)header, size1);
+ memcpy_fromio(msg + size1,
+ (void __iomem __force *)rb->rb_base, size2);
+ } else {
+ memcpy_fromio(msg, (void __iomem __force *)header, size);
+ }
+}
+
+/**
+ * scif_rb_space - Query space available for writing to the RB
+ * @rb: ring buffer
+ *
+ * Return: size available for writing to RB in bytes.
+ */
+u32 scif_rb_space(struct scif_rb *rb)
+{
+ rb->current_read_offset = *rb->read_ptr;
+ /*
+ * Update from the HW read pointer only once the peer has exposed the
+ * new empty slot. This barrier is paired with the memory barrier
+ * scif_rb_update_read_ptr()
+ */
+ mb();
+ return scif_rb_ring_space(rb->current_write_offset,
+ rb->current_read_offset, rb->size);
+}
+
+/**
+ * scif_rb_write - Write a message to the RB
+ * @rb: ring buffer
+ * @msg: buffer to send the message. Must be at least size bytes long
+ * @size: the size (in bytes) to be copied to the RB
+ *
+ * This API does not block if there isn't enough space in the RB.
+ * Returns: 0 on success or -ENOMEM on failure
+ */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size)
+{
+ void *header;
+
+ if (scif_rb_space(rb) < size)
+ return -ENOMEM;
+ header = rb->rb_base + rb->current_write_offset;
+ memcpy_torb(rb, header, msg, size);
+ /*
+ * Wait until scif_rb_commit(). Update the local ring
+ * buffer data, not the shared data until commit.
+ */
+ rb->current_write_offset =
+ (rb->current_write_offset + size) & (rb->size - 1);
+ return 0;
+}
+
+/**
+ * scif_rb_commit - To submit the message to let the peer fetch it
+ * @rb: ring buffer
+ */
+void scif_rb_commit(struct scif_rb *rb)
+{
+ /*
+ * We must ensure ordering between the all the data committed
+ * previously before we expose the new message to the peer by
+ * updating the write_ptr. This write barrier is paired with
+ * the read barrier in scif_rb_count(..)
+ */
+ wmb();
+ ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset;
+#ifdef CONFIG_INTEL_MIC_CARD
+ /*
+ * X100 Si bug: For the case where a Core is performing an EXT_WR
+ * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+ * same address with the same data before it does the Doorbell Write.
+ * This way, if ordering is violated for the Interrupt Message, it will
+ * fall just behind the first Posted associated with the first EXT_WR.
+ */
+ ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset;
+#endif
+}
+
+/**
+ * scif_rb_get - To get next message from the ring buffer
+ * @rb: ring buffer
+ * @size: Number of bytes to be read
+ *
+ * Return: NULL if no bytes to be read from the ring buffer, otherwise the
+ * pointer to the next byte
+ */
+static void *scif_rb_get(struct scif_rb *rb, u32 size)
+{
+ void *header = NULL;
+
+ if (scif_rb_count(rb, size) >= size)
+ header = rb->rb_base + rb->current_read_offset;
+ return header;
+}
+
+/*
+ * scif_rb_get_next - Read from ring buffer.
+ * @rb: ring buffer
+ * @msg: buffer to hold the message. Must be at least size bytes long
+ * @size: Number of bytes to be read
+ *
+ * Return: number of bytes read if available bytes are >= size, otherwise
+ * returns zero.
+ */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size)
+{
+ void *header = NULL;
+ int read_size = 0;
+
+ header = scif_rb_get(rb, size);
+ if (header) {
+ u32 next_cmd_offset =
+ (rb->current_read_offset + size) & (rb->size - 1);
+
+ read_size = size;
+ rb->current_read_offset = next_cmd_offset;
+ memcpy_fromrb(rb, header, msg, size);
+ }
+ return read_size;
+}
+
+/**
+ * scif_rb_update_read_ptr
+ * @rb: ring buffer
+ */
+void scif_rb_update_read_ptr(struct scif_rb *rb)
+{
+ u32 new_offset;
+
+ new_offset = rb->current_read_offset;
+ /*
+ * We must ensure ordering between the all the data committed or read
+ * previously before we expose the empty slot to the peer by updating
+ * the read_ptr. This barrier is paired with the memory barrier in
+ * scif_rb_space(..)
+ */
+ mb();
+ ACCESS_ONCE(*rb->read_ptr) = new_offset;
+#ifdef CONFIG_INTEL_MIC_CARD
+ /*
+ * X100 Si Bug: For the case where a Core is performing an EXT_WR
+ * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+ * same address with the same data before it does the Doorbell Write.
+ * This way, if ordering is violated for the Interrupt Message, it will
+ * fall just behind the first Posted associated with the first EXT_WR.
+ */
+ ACCESS_ONCE(*rb->read_ptr) = new_offset;
+#endif
+}
+
+/**
+ * scif_rb_count
+ * @rb: ring buffer
+ * @size: Number of bytes expected to be read
+ *
+ * Return: number of bytes that can be read from the RB
+ */
+u32 scif_rb_count(struct scif_rb *rb, u32 size)
+{
+ if (scif_rb_ring_cnt(rb->current_write_offset,
+ rb->current_read_offset,
+ rb->size) < size) {
+ rb->current_write_offset = *rb->write_ptr;
+ /*
+ * Update from the HW write pointer if empty only once the peer
+ * has exposed the new message. This read barrier is paired
+ * with the write barrier in scif_rb_commit(..)
+ */
+ smp_rmb();
+ }
+ return scif_rb_ring_cnt(rb->current_write_offset,
+ rb->current_read_offset,
+ rb->size);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.h b/drivers/misc/mic/scif/scif_rb.h
new file mode 100644
index 0000000..166dffe
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.h
@@ -0,0 +1,100 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef SCIF_RB_H
+#define SCIF_RB_H
+/*
+ * This file describes a general purpose, byte based ring buffer. Writers to the
+ * ring buffer need to synchronize using a lock. The same is true for readers,
+ * although in practice, the ring buffer has a single reader. It is lockless
+ * between producer and consumer so it can handle being used across the PCIe
+ * bus. The ring buffer ensures that there are no reads across the PCIe bus for
+ * performance reasons. Two of these are used to form a single bidirectional
+ * queue-pair across PCIe.
+ */
+/*
+ * struct scif_rb - SCIF Ring Buffer
+ *
+ * @rb_base: The base of the memory used for storing RB messages
+ * @read_ptr: Pointer to the read offset
+ * @write_ptr: Pointer to the write offset
+ * @size: Size of the memory in rb_base
+ * @current_read_offset: Cached read offset for performance
+ * @current_write_offset: Cached write offset for performance
+ */
+struct scif_rb {
+ void *rb_base;
+ u32 *read_ptr;
+ u32 *write_ptr;
+ u32 size;
+ u32 current_read_offset;
+ u32 current_write_offset;
+};
+
+/* methods used by both */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+ void *rb_base, u8 size);
+/* writer only methods */
+/* write a new command, then scif_rb_commit() */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size);
+/* after write(), then scif_rb_commit() */
+void scif_rb_commit(struct scif_rb *rb);
+/* query space available for writing to a RB. */
+u32 scif_rb_space(struct scif_rb *rb);
+
+/* reader only methods */
+/* read a new message from the ring buffer of size bytes */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size);
+/* update the read pointer so that the space can be reused */
+void scif_rb_update_read_ptr(struct scif_rb *rb);
+/* count the number of bytes that can be read */
+u32 scif_rb_count(struct scif_rb *rb, u32 size);
+#endif
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 0000000..6a451bd
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1776 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/dma_remapping.h>
+#include <linux/pagemap.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
+#define SCIF_MAP_ULIMIT 0x40
+
+bool scif_ulimit_check = 1;
+
+/**
+ * scif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+void scif_rma_ep_init(struct scif_endpt *ep)
+{
+ struct scif_endpt_rma_info *rma = &ep->rma_info;
+
+ mutex_init(&rma->rma_lock);
+ init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
+ SCIF_DMA_64BIT_PFN);
+ spin_lock_init(&rma->tc_lock);
+ mutex_init(&rma->mmn_lock);
+ INIT_LIST_HEAD(&rma->reg_list);
+ INIT_LIST_HEAD(&rma->remote_reg_list);
+ atomic_set(&rma->tw_refcount, 0);
+ atomic_set(&rma->tcw_refcount, 0);
+ atomic_set(&rma->tcw_total_pages, 0);
+ atomic_set(&rma->fence_refcount, 0);
+
+ rma->async_list_del = 0;
+ rma->dma_chan = NULL;
+ INIT_LIST_HEAD(&rma->mmn_list);
+ INIT_LIST_HEAD(&rma->vma_list);
+ init_waitqueue_head(&rma->markwq);
+}
+
+/**
+ * scif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep)
+{
+ int ret = 0;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Destroy RMA Info only if both lists are empty */
+ if (list_empty(&ep->rma_info.reg_list) &&
+ list_empty(&ep->rma_info.remote_reg_list) &&
+ list_empty(&ep->rma_info.mmn_list) &&
+ !atomic_read(&ep->rma_info.tw_refcount) &&
+ !atomic_read(&ep->rma_info.tcw_refcount) &&
+ !atomic_read(&ep->rma_info.fence_refcount))
+ ret = 1;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ return ret;
+}
+
+/**
+ * scif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+static struct scif_pinned_pages *
+scif_create_pinned_pages(int nr_pages, int prot)
+{
+ struct scif_pinned_pages *pin;
+
+ might_sleep();
+ pin = scif_zalloc(sizeof(*pin));
+ if (!pin)
+ goto error;
+
+ pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
+ if (!pin->pages)
+ goto error_free_pinned_pages;
+
+ pin->prot = prot;
+ pin->magic = SCIFEP_MAGIC;
+ return pin;
+
+error_free_pinned_pages:
+ scif_free(pin, sizeof(*pin));
+error:
+ return NULL;
+}
+
+/**
+ * scif_destroy_pinned_pages:
+ * @pin: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
+{
+ int j;
+ int writeable = pin->prot & SCIF_PROT_WRITE;
+ int kernel = SCIF_MAP_KERNEL & pin->map_flags;
+
+ for (j = 0; j < pin->nr_pages; j++) {
+ if (pin->pages[j] && !kernel) {
+ if (writeable)
+ SetPageDirty(pin->pages[j]);
+ put_page(pin->pages[j]);
+ }
+ }
+
+ scif_free(pin->pages,
+ pin->nr_pages * sizeof(*pin->pages));
+ scif_free(pin, sizeof(*pin));
+ return 0;
+}
+
+/*
+ * scif_create_window:
+ * @ep: end point
+ * @nr_pages: number of pages
+ * @offset: registration offset
+ * @temp: true if a temporary window is being created
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+ s64 offset, bool temp)
+{
+ struct scif_window *window;
+
+ might_sleep();
+ window = scif_zalloc(sizeof(*window));
+ if (!window)
+ goto error;
+
+ window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+ if (!window->dma_addr)
+ goto error_free_window;
+
+ window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
+ if (!window->num_pages)
+ goto error_free_window;
+
+ window->offset = offset;
+ window->ep = (u64)ep;
+ window->magic = SCIFEP_MAGIC;
+ window->reg_state = OP_IDLE;
+ init_waitqueue_head(&window->regwq);
+ window->unreg_state = OP_IDLE;
+ init_waitqueue_head(&window->unregwq);
+ INIT_LIST_HEAD(&window->list);
+ window->type = SCIF_WINDOW_SELF;
+ window->temp = temp;
+ return window;
+
+error_free_window:
+ scif_free(window->dma_addr,
+ nr_pages * sizeof(*window->dma_addr));
+ scif_free(window, sizeof(*window));
+error:
+ return NULL;
+}
+
+/**
+ * scif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+static void scif_destroy_incomplete_window(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ int err;
+ int nr_pages = window->nr_pages;
+ struct scif_allocmsg *alloc = &window->alloc_handle;
+ struct scifmsg msg;
+
+retry:
+ /* Wait for a SCIF_ALLOC_GNT/REJ message */
+ err = wait_event_timeout(alloc->allocwq,
+ alloc->state != OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (alloc->state == OP_COMPLETED) {
+ msg.uop = SCIF_FREE_VIRT;
+ msg.src = ep->port;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = window->alloc_handle.vaddr;
+ msg.payload[2] = (u64)window;
+ msg.payload[3] = SCIF_REGISTER;
+ _scif_nodeqp_send(ep->remote_dev, &msg);
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+
+ scif_free_window_offset(ep, window, window->offset);
+ scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+ scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+ scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_unmap_window:
+ * @remote_dev: SCIF remote device
+ * @window: registration window
+ *
+ * Delete any DMA mappings created for a registered self window
+ */
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+ int j;
+
+ if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
+ if (window->st) {
+ dma_unmap_sg(&remote_dev->sdev->dev,
+ window->st->sgl, window->st->nents,
+ DMA_BIDIRECTIONAL);
+ sg_free_table(window->st);
+ kfree(window->st);
+ window->st = NULL;
+ }
+ } else {
+ for (j = 0; j < window->nr_contig_chunks; j++) {
+ if (window->dma_addr[j]) {
+ scif_unmap_single(window->dma_addr[j],
+ remote_dev,
+ window->num_pages[j] <<
+ PAGE_SHIFT);
+ window->dma_addr[j] = 0x0;
+ }
+ }
+ }
+}
+
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+ if (scif_ulimit_check)
+ return get_task_mm(current);
+ return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+ if (mm)
+ mmput(mm);
+}
+
+static inline int
+__scif_dec_pinned_vm_lock(struct mm_struct *mm,
+ int nr_pages, bool try_lock)
+{
+ if (!mm || !nr_pages || !scif_ulimit_check)
+ return 0;
+ if (try_lock) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err\n", __func__, __LINE__);
+ return -1;
+ }
+ } else {
+ down_write(&mm->mmap_sem);
+ }
+ mm->pinned_vm -= nr_pages;
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+ int nr_pages)
+{
+ unsigned long locked, lock_limit;
+
+ if (!mm || !nr_pages || !scif_ulimit_check)
+ return 0;
+
+ locked = nr_pages;
+ locked += mm->pinned_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ dev_err(scif_info.mdev.this_device,
+ "locked(%lu) > lock_limit(%lu)\n",
+ locked, lock_limit);
+ return -ENOMEM;
+ }
+ mm->pinned_vm = locked;
+ return 0;
+}
+
+/**
+ * scif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
+{
+ int j;
+ struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+ int nr_pages = window->nr_pages;
+
+ might_sleep();
+ if (!window->temp && window->mm) {
+ __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+ __scif_release_mm(window->mm);
+ window->mm = NULL;
+ }
+
+ scif_free_window_offset(ep, window, window->offset);
+ scif_unmap_window(ep->remote_dev, window);
+ /*
+ * Decrement references for this set of pinned pages from
+ * this window.
+ */
+ j = atomic_sub_return(1, &pinned_pages->ref_count);
+ if (j < 0)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d incorrect ref count %d\n",
+ __func__, __LINE__, j);
+ /*
+ * If the ref count for pinned_pages is zero then someone
+ * has already called scif_unpin_pages() for it and we should
+ * destroy the page cache.
+ */
+ if (!j)
+ scif_destroy_pinned_pages(window->pinned_pages);
+ scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+ scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+ window->magic = 0;
+ scif_free(window, sizeof(*window));
+ return 0;
+}
+
+/**
+ * scif_create_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+ struct scif_window *window)
+{
+ int i, j, err = 0;
+ int nr_pages = window->nr_pages;
+ bool vmalloc_dma_phys, vmalloc_num_pages;
+
+ might_sleep();
+ /* Map window */
+ err = scif_map_single(&window->mapped_offset,
+ window, remote_dev, sizeof(*window));
+ if (err)
+ goto error_window;
+
+ /* Compute the number of lookup entries. 21 == 2MB Shift */
+ window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
+ ((2) * 1024 * 1024)) >> 21;
+
+ window->dma_addr_lookup.lookup =
+ scif_alloc_coherent(&window->dma_addr_lookup.offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*window->dma_addr_lookup.lookup),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!window->dma_addr_lookup.lookup) {
+ err = -ENOMEM;
+ goto error_window;
+ }
+
+ window->num_pages_lookup.lookup =
+ scif_alloc_coherent(&window->num_pages_lookup.offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*window->num_pages_lookup.lookup),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!window->num_pages_lookup.lookup) {
+ err = -ENOMEM;
+ goto error_window;
+ }
+
+ vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+ vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
+
+ /* Now map each of the pages containing physical addresses */
+ for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
+ err = scif_map_page(&window->dma_addr_lookup.lookup[j],
+ vmalloc_dma_phys ?
+ vmalloc_to_page(&window->dma_addr[i]) :
+ virt_to_page(&window->dma_addr[i]),
+ remote_dev);
+ if (err)
+ goto error_window;
+ err = scif_map_page(&window->num_pages_lookup.lookup[j],
+ vmalloc_dma_phys ?
+ vmalloc_to_page(&window->num_pages[i]) :
+ virt_to_page(&window->num_pages[i]),
+ remote_dev);
+ if (err)
+ goto error_window;
+ }
+ return 0;
+error_window:
+ return err;
+}
+
+/**
+ * scif_destroy_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
+ struct scif_window *window)
+{
+ int i, j;
+
+ if (window->nr_lookup) {
+ struct scif_rma_lookup *lup = &window->dma_addr_lookup;
+ struct scif_rma_lookup *npup = &window->num_pages_lookup;
+
+ for (i = 0, j = 0; i < window->nr_pages;
+ i += SCIF_NR_ADDR_IN_PAGE, j++) {
+ if (lup->lookup && lup->lookup[j])
+ scif_unmap_single(lup->lookup[j],
+ remote_dev,
+ PAGE_SIZE);
+ if (npup->lookup && npup->lookup[j])
+ scif_unmap_single(npup->lookup[j],
+ remote_dev,
+ PAGE_SIZE);
+ }
+ if (lup->lookup)
+ scif_free_coherent(lup->lookup, lup->offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*lup->lookup));
+ if (npup->lookup)
+ scif_free_coherent(npup->lookup, npup->offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*npup->lookup));
+ if (window->mapped_offset)
+ scif_unmap_single(window->mapped_offset,
+ remote_dev, sizeof(*window));
+ window->nr_lookup = 0;
+ }
+}
+
+/**
+ * scif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+static struct scif_window *
+scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
+{
+ struct scif_window *window;
+
+ might_sleep();
+ window = scif_zalloc(sizeof(*window));
+ if (!window)
+ goto error_ret;
+
+ window->magic = SCIFEP_MAGIC;
+ window->nr_pages = nr_pages;
+
+ window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+ if (!window->dma_addr)
+ goto error_window;
+
+ window->num_pages = scif_zalloc(nr_pages *
+ sizeof(*window->num_pages));
+ if (!window->num_pages)
+ goto error_window;
+
+ if (scif_create_remote_lookup(scifdev, window))
+ goto error_window;
+
+ window->type = SCIF_WINDOW_PEER;
+ window->unreg_state = OP_IDLE;
+ INIT_LIST_HEAD(&window->list);
+ return window;
+error_window:
+ scif_destroy_remote_window(window);
+error_ret:
+ return NULL;
+}
+
+/**
+ * scif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void
+scif_destroy_remote_window(struct scif_window *window)
+{
+ scif_free(window->dma_addr, window->nr_pages *
+ sizeof(*window->dma_addr));
+ scif_free(window->num_pages, window->nr_pages *
+ sizeof(*window->num_pages));
+ window->magic = 0;
+ scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_iommu_map: create DMA mappings if the IOMMU is enabled
+ * @remote_dev: SCIF remote device
+ * @window: remote registration window
+ *
+ * Map the physical pages using dma_map_sg(..) and then detect the number
+ * of contiguous DMA mappings allocated
+ */
+static int scif_iommu_map(struct scif_dev *remote_dev,
+ struct scif_window *window)
+{
+ struct scatterlist *sg;
+ int i, err;
+ scif_pinned_pages_t pin = window->pinned_pages;
+
+ window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
+ if (!window->st)
+ return -ENOMEM;
+
+ err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
+ if (err)
+ return err;
+
+ for_each_sg(window->st->sgl, sg, window->st->nents, i)
+ sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
+
+ err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
+ window->st->nents, DMA_BIDIRECTIONAL);
+ if (!err)
+ return -ENOMEM;
+ /* Detect contiguous ranges of DMA mappings */
+ sg = window->st->sgl;
+ for (i = 0; sg; i++) {
+ dma_addr_t last_da;
+
+ window->dma_addr[i] = sg_dma_address(sg);
+ window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
+ last_da = sg_dma_address(sg) + sg_dma_len(sg);
+ while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
+ window->num_pages[i] +=
+ (sg_dma_len(sg) >> PAGE_SHIFT);
+ last_da = window->dma_addr[i] +
+ sg_dma_len(sg);
+ }
+ window->nr_contig_chunks++;
+ }
+ return 0;
+}
+
+/**
+ * scif_map_window:
+ * @remote_dev: SCIF remote device
+ * @window: self registration window
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also determine addresses required for DMA.
+ */
+int
+scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+ int i, j, k, err = 0, nr_contig_pages;
+ scif_pinned_pages_t pin;
+ phys_addr_t phys_prev, phys_curr;
+
+ might_sleep();
+
+ pin = window->pinned_pages;
+
+ if (intel_iommu_enabled && !scifdev_self(remote_dev))
+ return scif_iommu_map(remote_dev, window);
+
+ for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
+ phys_prev = page_to_phys(pin->pages[i]);
+ nr_contig_pages = 1;
+
+ /* Detect physically contiguous chunks */
+ for (k = i + 1; k < window->nr_pages; k++) {
+ phys_curr = page_to_phys(pin->pages[k]);
+ if (phys_curr != (phys_prev + PAGE_SIZE))
+ break;
+ phys_prev = phys_curr;
+ nr_contig_pages++;
+ }
+ window->num_pages[j] = nr_contig_pages;
+ window->nr_contig_chunks++;
+ if (scif_is_mgmt_node()) {
+ /*
+ * Management node has to deal with SMPT on X100 and
+ * hence the DMA mapping is required
+ */
+ err = scif_map_single(&window->dma_addr[j],
+ phys_to_virt(page_to_phys(
+ pin->pages[i])),
+ remote_dev,
+ nr_contig_pages << PAGE_SHIFT);
+ if (err)
+ return err;
+ } else {
+ window->dma_addr[j] = page_to_phys(pin->pages[i]);
+ }
+ }
+ return err;
+}
+
+/**
+ * scif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+static int scif_send_scif_unregister(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ struct scifmsg msg;
+
+ msg.uop = SCIF_UNREGISTER;
+ msg.src = ep->port;
+ msg.payload[0] = window->alloc_handle.vaddr;
+ msg.payload[1] = (u64)window;
+ return scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int scif_unregister_window(struct scif_window *window)
+{
+ int err = 0;
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+ bool send_msg = false;
+
+ might_sleep();
+ switch (window->unreg_state) {
+ case OP_IDLE:
+ {
+ window->unreg_state = OP_IN_PROGRESS;
+ send_msg = true;
+ /* fall through */
+ }
+ case OP_IN_PROGRESS:
+ {
+ scif_get_window(window, 1);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (send_msg) {
+ err = scif_send_scif_unregister(ep, window);
+ if (err) {
+ window->unreg_state = OP_COMPLETED;
+ goto done;
+ }
+ } else {
+ /* Return ENXIO since unregistration is in progress */
+ mutex_lock(&ep->rma_info.rma_lock);
+ return -ENXIO;
+ }
+retry:
+ /* Wait for a SCIF_UNREGISTER_(N)ACK message */
+ err = wait_event_timeout(window->unregwq,
+ window->unreg_state != OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ if (!err) {
+ err = -ENODEV;
+ window->unreg_state = OP_COMPLETED;
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ }
+ if (err > 0)
+ err = 0;
+done:
+ mutex_lock(&ep->rma_info.rma_lock);
+ scif_put_window(window, 1);
+ break;
+ }
+ case OP_FAILED:
+ {
+ if (!scifdev_alive(ep)) {
+ err = -ENODEV;
+ window->unreg_state = OP_COMPLETED;
+ }
+ break;
+ }
+ case OP_COMPLETED:
+ break;
+ default:
+ err = -ENODEV;
+ }
+
+ if (window->unreg_state == OP_COMPLETED && window->ref_count)
+ scif_put_window(window, window->nr_pages);
+
+ if (!window->ref_count) {
+ atomic_inc(&ep->rma_info.tw_refcount);
+ list_del_init(&window->list);
+ scif_free_window_offset(ep, window, window->offset);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
+ scifdev_alive(ep)) {
+ scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan);
+ } else {
+ if (!__scif_dec_pinned_vm_lock(window->mm,
+ window->nr_pages, 1)) {
+ __scif_release_mm(window->mm);
+ window->mm = NULL;
+ }
+ }
+ scif_queue_for_cleanup(window, &scif_info.rma);
+ mutex_lock(&ep->rma_info.rma_lock);
+ }
+ return err;
+}
+
+/**
+ * scif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+static int scif_send_alloc_request(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ struct scifmsg msg;
+ struct scif_allocmsg *alloc = &window->alloc_handle;
+
+ /* Set up the Alloc Handle */
+ alloc->state = OP_IN_PROGRESS;
+ init_waitqueue_head(&alloc->allocwq);
+
+ /* Send out an allocation request */
+ msg.uop = SCIF_ALLOC_REQ;
+ msg.payload[1] = window->nr_pages;
+ msg.payload[2] = (u64)&window->alloc_handle;
+ return _scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * and prepares the remote window by copying over the page lists
+ */
+static int scif_prep_remote_window(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ struct scifmsg msg;
+ struct scif_window *remote_window;
+ struct scif_allocmsg *alloc = &window->alloc_handle;
+ dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
+ int i = 0, j = 0;
+ int nr_contig_chunks, loop_nr_contig_chunks;
+ int remaining_nr_contig_chunks, nr_lookup;
+ int err, map_err;
+
+ map_err = scif_map_window(ep->remote_dev, window);
+ if (map_err)
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d map_err %d\n", __func__, __LINE__, map_err);
+ remaining_nr_contig_chunks = window->nr_contig_chunks;
+ nr_contig_chunks = window->nr_contig_chunks;
+retry:
+ /* Wait for a SCIF_ALLOC_GNT/REJ message */
+ err = wait_event_timeout(alloc->allocwq,
+ alloc->state != OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Synchronize with the thread waking up allocwq */
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+
+ if (!err)
+ err = -ENODEV;
+
+ if (err > 0)
+ err = 0;
+ else
+ return err;
+
+ /* Bail out. The remote end rejected this request */
+ if (alloc->state == OP_FAILED)
+ return -ENOMEM;
+
+ if (map_err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, map_err);
+ msg.uop = SCIF_FREE_VIRT;
+ msg.src = ep->port;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = window->alloc_handle.vaddr;
+ msg.payload[2] = (u64)window;
+ msg.payload[3] = SCIF_REGISTER;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ return err;
+ }
+
+ remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
+ ep->remote_dev);
+
+ /* Compute the number of lookup entries. 21 == 2MB Shift */
+ nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
+ >> ilog2(SCIF_NR_ADDR_IN_PAGE);
+
+ dma_phys_lookup =
+ scif_ioremap(remote_window->dma_addr_lookup.offset,
+ nr_lookup *
+ sizeof(*remote_window->dma_addr_lookup.lookup),
+ ep->remote_dev);
+ num_pages_lookup =
+ scif_ioremap(remote_window->num_pages_lookup.offset,
+ nr_lookup *
+ sizeof(*remote_window->num_pages_lookup.lookup),
+ ep->remote_dev);
+
+ while (remaining_nr_contig_chunks) {
+ loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
+ (int)SCIF_NR_ADDR_IN_PAGE);
+ /* #1/2 - Copy physical addresses over to the remote side */
+
+ /* #2/2 - Copy DMA addresses (addresses that are fed into the
+ * DMA engine) We transfer bus addresses which are then
+ * converted into a MIC physical address on the remote
+ * side if it is a MIC, if the remote node is a mgmt node we
+ * transfer the MIC physical address
+ */
+ tmp = scif_ioremap(dma_phys_lookup[j],
+ loop_nr_contig_chunks *
+ sizeof(*window->dma_addr),
+ ep->remote_dev);
+ tmp1 = scif_ioremap(num_pages_lookup[j],
+ loop_nr_contig_chunks *
+ sizeof(*window->num_pages),
+ ep->remote_dev);
+ if (scif_is_mgmt_node()) {
+ memcpy_toio((void __force __iomem *)tmp,
+ &window->dma_addr[i], loop_nr_contig_chunks
+ * sizeof(*window->dma_addr));
+ memcpy_toio((void __force __iomem *)tmp1,
+ &window->num_pages[i], loop_nr_contig_chunks
+ * sizeof(*window->num_pages));
+ } else {
+ if (scifdev_is_p2p(ep->remote_dev)) {
+ /*
+ * add remote node's base address for this node
+ * to convert it into a MIC address
+ */
+ int m;
+ dma_addr_t dma_addr;
+
+ for (m = 0; m < loop_nr_contig_chunks; m++) {
+ dma_addr = window->dma_addr[i + m] +
+ ep->remote_dev->base_addr;
+ writeq(dma_addr,
+ (void __force __iomem *)&tmp[m]);
+ }
+ memcpy_toio((void __force __iomem *)tmp1,
+ &window->num_pages[i],
+ loop_nr_contig_chunks
+ * sizeof(*window->num_pages));
+ } else {
+ /* Mgmt node or loopback - transfer DMA
+ * addresses as is, this is the same as a
+ * MIC physical address (we use the dma_addr
+ * and not the phys_addr array since the
+ * phys_addr is only setup if there is a mmap()
+ * request from the mgmt node)
+ */
+ memcpy_toio((void __force __iomem *)tmp,
+ &window->dma_addr[i],
+ loop_nr_contig_chunks *
+ sizeof(*window->dma_addr));
+ memcpy_toio((void __force __iomem *)tmp1,
+ &window->num_pages[i],
+ loop_nr_contig_chunks *
+ sizeof(*window->num_pages));
+ }
+ }
+ remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+ i += loop_nr_contig_chunks;
+ j++;
+ scif_iounmap(tmp, loop_nr_contig_chunks *
+ sizeof(*window->dma_addr), ep->remote_dev);
+ scif_iounmap(tmp1, loop_nr_contig_chunks *
+ sizeof(*window->num_pages), ep->remote_dev);
+ }
+
+ /* Prepare the remote window for the peer */
+ remote_window->peer_window = (u64)window;
+ remote_window->offset = window->offset;
+ remote_window->prot = window->prot;
+ remote_window->nr_contig_chunks = nr_contig_chunks;
+ remote_window->ep = ep->remote_ep;
+ scif_iounmap(num_pages_lookup,
+ nr_lookup *
+ sizeof(*remote_window->num_pages_lookup.lookup),
+ ep->remote_dev);
+ scif_iounmap(dma_phys_lookup,
+ nr_lookup *
+ sizeof(*remote_window->dma_addr_lookup.lookup),
+ ep->remote_dev);
+ scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+ window->peer_window = alloc->vaddr;
+ return err;
+}
+
+/**
+ * scif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+static int scif_send_scif_register(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ int err = 0;
+ struct scifmsg msg;
+
+ msg.src = ep->port;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = window->alloc_handle.vaddr;
+ msg.payload[2] = (u64)window;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED) {
+ msg.uop = SCIF_REGISTER;
+ window->reg_state = OP_IN_PROGRESS;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ spin_unlock(&ep->lock);
+ if (!err) {
+retry:
+ /* Wait for a SCIF_REGISTER_(N)ACK message */
+ err = wait_event_timeout(window->regwq,
+ window->reg_state !=
+ OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ err = !err ? -ENODEV : 0;
+ if (window->reg_state == OP_FAILED)
+ err = -ENOTCONN;
+ }
+ } else {
+ msg.uop = SCIF_FREE_VIRT;
+ msg.payload[3] = SCIF_REGISTER;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ spin_unlock(&ep->lock);
+ if (!err)
+ err = -ENOTCONN;
+ }
+ return err;
+}
+
+/**
+ * scif_get_window_offset:
+ * @ep: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @num_pages: number of pages
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP.
+ */
+int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
+ int num_pages, s64 *out_offset)
+{
+ s64 page_index;
+ struct iova *iova_ptr;
+ int err = 0;
+
+ if (flags & SCIF_MAP_FIXED) {
+ page_index = SCIF_IOVA_PFN(offset);
+ iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
+ page_index + num_pages - 1);
+ if (!iova_ptr)
+ err = -EADDRINUSE;
+ } else {
+ iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
+ SCIF_DMA_63BIT_PFN - 1, 0);
+ if (!iova_ptr)
+ err = -ENOMEM;
+ }
+ if (!err)
+ *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
+ return err;
+}
+
+/**
+ * scif_free_window_offset:
+ * @ep: end point descriptor
+ * @window: registration window
+ * @offset: Offset to be freed
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void scif_free_window_offset(struct scif_endpt *ep,
+ struct scif_window *window, s64 offset)
+{
+ if ((window && !window->offset_freed) || !window) {
+ free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
+ if (window)
+ window->offset_freed = true;
+ }
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ int err;
+ struct scif_window *window = NULL;
+ int nr_pages = msg->payload[1];
+
+ window = scif_create_remote_window(scifdev, nr_pages);
+ if (!window) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ /* The peer's allocation request is granted */
+ msg->uop = SCIF_ALLOC_GNT;
+ msg->payload[0] = (u64)window;
+ msg->payload[1] = window->mapped_offset;
+ err = scif_nodeqp_send(scifdev, msg);
+ if (err)
+ scif_destroy_remote_window(window);
+ return;
+error:
+ /* The peer's allocation request is rejected */
+ dev_err(&scifdev->sdev->dev,
+ "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
+ __func__, __LINE__, err, window, nr_pages);
+ msg->uop = SCIF_ALLOC_REJ;
+ scif_nodeqp_send(scifdev, msg);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
+ struct scif_window *window = container_of(handle, struct scif_window,
+ alloc_handle);
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ handle->vaddr = msg->payload[0];
+ handle->phys_addr = msg->payload[1];
+ if (msg->uop == SCIF_ALLOC_GNT)
+ handle->state = OP_COMPLETED;
+ else
+ handle->state = OP_FAILED;
+ wake_up(&handle->allocwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg: Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window = (struct scif_window *)msg->payload[1];
+
+ scif_destroy_remote_window(window);
+}
+
+static void
+scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
+{
+ int j;
+ struct scif_hw_dev *sdev = dev->sdev;
+ phys_addr_t apt_base = 0;
+
+ /*
+ * Add the aperture base if the DMA address is not card relative
+ * since the DMA addresses need to be an offset into the bar
+ */
+ if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+ sdev->aper && !sdev->card_rel_da)
+ apt_base = sdev->aper->pa;
+ else
+ return;
+
+ for (j = 0; j < window->nr_contig_chunks; j++) {
+ if (window->num_pages[j])
+ window->dma_addr[j] += apt_base;
+ else
+ break;
+ }
+}
+
+/**
+ * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
+ * @msg: Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[1];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED) {
+ msg->uop = SCIF_REGISTER_ACK;
+ scif_nodeqp_send(ep->remote_dev, msg);
+ scif_fixup_aper_base(ep->remote_dev, window);
+ /* No further failures expected. Insert new window */
+ scif_insert_window(window, &ep->rma_info.remote_reg_list);
+ } else {
+ msg->uop = SCIF_REGISTER_NACK;
+ scif_nodeqp_send(ep->remote_dev, msg);
+ }
+ spin_unlock(&ep->lock);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ /* free up any lookup resources now that page lists are transferred */
+ scif_destroy_remote_lookup(ep->remote_dev, window);
+ /*
+ * We could not insert the window but we need to
+ * destroy the window.
+ */
+ if (msg->uop == SCIF_REGISTER_NACK)
+ scif_destroy_remote_window(window);
+}
+
+/**
+ * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
+ * @msg: Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ struct scif_window *recv_window =
+ (struct scif_window *)msg->payload[0];
+ struct scif_endpt *ep;
+ int del_window = 0;
+
+ ep = (struct scif_endpt *)recv_window->ep;
+ req.out_window = &window;
+ req.offset = recv_window->offset;
+ req.prot = 0;
+ req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+ req.type = SCIF_WINDOW_FULL;
+ req.head = &ep->rma_info.remote_reg_list;
+ msg->payload[0] = ep->remote_ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ if (scif_query_window(&req)) {
+ dev_err(&scifdev->sdev->dev,
+ "%s %d -ENXIO\n", __func__, __LINE__);
+ msg->uop = SCIF_UNREGISTER_ACK;
+ goto error;
+ }
+ if (window) {
+ if (window->ref_count)
+ scif_put_window(window, window->nr_pages);
+ else
+ dev_err(&scifdev->sdev->dev,
+ "%s %d ref count should be +ve\n",
+ __func__, __LINE__);
+ window->unreg_state = OP_COMPLETED;
+ if (!window->ref_count) {
+ msg->uop = SCIF_UNREGISTER_ACK;
+ atomic_inc(&ep->rma_info.tw_refcount);
+ ep->rma_info.async_list_del = 1;
+ list_del_init(&window->list);
+ del_window = 1;
+ } else {
+ /* NACK! There are valid references to this window */
+ msg->uop = SCIF_UNREGISTER_NACK;
+ }
+ } else {
+ /* The window did not make its way to the list at all. ACK */
+ msg->uop = SCIF_UNREGISTER_ACK;
+ scif_destroy_remote_window(recv_window);
+ }
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (del_window)
+ scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan);
+ scif_nodeqp_send(ep->remote_dev, msg);
+ if (del_window)
+ scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/**
+ * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[2];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->reg_state = OP_COMPLETED;
+ wake_up(&window->regwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[2];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->reg_state = OP_FAILED;
+ wake_up(&window->regwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[1];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->unreg_state = OP_COMPLETED;
+ wake_up(&window->unregwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[1];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->unreg_state = OP_FAILED;
+ wake_up(&window->unregwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+ int map_flags, scif_pinned_pages_t *pages)
+{
+ struct scif_pinned_pages *pinned_pages;
+ int nr_pages, err = 0, i;
+ bool vmalloc_addr = false;
+ bool try_upgrade = false;
+ int prot = *out_prot;
+ int ulimit = 0;
+ struct mm_struct *mm = NULL;
+
+ /* Unsupported flags */
+ if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+ return -EINVAL;
+ ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+ /* Unsupported protection requested */
+ if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+ return -EINVAL;
+
+ /* addr/len must be page aligned. len should be non zero */
+ if (!len ||
+ (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+ (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+ return -EINVAL;
+
+ might_sleep();
+
+ nr_pages = len >> PAGE_SHIFT;
+
+ /* Allocate a set of pinned pages */
+ pinned_pages = scif_create_pinned_pages(nr_pages, prot);
+ if (!pinned_pages)
+ return -ENOMEM;
+
+ if (map_flags & SCIF_MAP_KERNEL) {
+ if (is_vmalloc_addr(addr))
+ vmalloc_addr = true;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (vmalloc_addr)
+ pinned_pages->pages[i] =
+ vmalloc_to_page(addr + (i * PAGE_SIZE));
+ else
+ pinned_pages->pages[i] =
+ virt_to_page(addr + (i * PAGE_SIZE));
+ }
+ pinned_pages->nr_pages = nr_pages;
+ pinned_pages->map_flags = SCIF_MAP_KERNEL;
+ } else {
+ /*
+ * SCIF supports registration caching. If a registration has
+ * been requested with read only permissions, then we try
+ * to pin the pages with RW permissions so that a subsequent
+ * transfer with RW permission can hit the cache instead of
+ * invalidating it. If the upgrade fails with RW then we
+ * revert back to R permission and retry
+ */
+ if (prot == SCIF_PROT_READ)
+ try_upgrade = true;
+ prot |= SCIF_PROT_WRITE;
+retry:
+ mm = current->mm;
+ down_write(&mm->mmap_sem);
+ if (ulimit) {
+ err = __scif_check_inc_pinned_vm(mm, nr_pages);
+ if (err) {
+ up_write(&mm->mmap_sem);
+ pinned_pages->nr_pages = 0;
+ goto error_unmap;
+ }
+ }
+
+ pinned_pages->nr_pages = get_user_pages(
+ current,
+ mm,
+ (u64)addr,
+ nr_pages,
+ !!(prot & SCIF_PROT_WRITE),
+ 0,
+ pinned_pages->pages,
+ NULL);
+ up_write(&mm->mmap_sem);
+ if (nr_pages != pinned_pages->nr_pages) {
+ if (try_upgrade) {
+ if (ulimit)
+ __scif_dec_pinned_vm_lock(mm,
+ nr_pages, 0);
+ /* Roll back any pinned pages */
+ for (i = 0; i < pinned_pages->nr_pages; i++) {
+ if (pinned_pages->pages[i])
+ put_page(
+ pinned_pages->pages[i]);
+ }
+ prot &= ~SCIF_PROT_WRITE;
+ try_upgrade = false;
+ goto retry;
+ }
+ }
+ pinned_pages->map_flags = 0;
+ }
+
+ if (pinned_pages->nr_pages < nr_pages) {
+ err = -EFAULT;
+ pinned_pages->nr_pages = nr_pages;
+ goto dec_pinned;
+ }
+
+ *out_prot = prot;
+ atomic_set(&pinned_pages->ref_count, 1);
+ *pages = pinned_pages;
+ return err;
+dec_pinned:
+ if (ulimit)
+ __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+ /* Something went wrong! Rollback */
+error_unmap:
+ pinned_pages->nr_pages = nr_pages;
+ scif_destroy_pinned_pages(pinned_pages);
+ *pages = NULL;
+ dev_dbg(scif_info.mdev.this_device,
+ "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+ return err;
+}
+
+int scif_pin_pages(void *addr, size_t len, int prot,
+ int map_flags, scif_pinned_pages_t *pages)
+{
+ return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL_GPL(scif_pin_pages);
+
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+ int err = 0, ret;
+
+ if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+ return -EINVAL;
+
+ ret = atomic_sub_return(1, &pinned_pages->ref_count);
+ if (ret < 0) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d scif_unpin_pages called without pinning? rc %d\n",
+ __func__, __LINE__, ret);
+ return -EINVAL;
+ }
+ /*
+ * Destroy the window if the ref count for this set of pinned
+ * pages has dropped to zero. If it is positive then there is
+ * a valid registered window which is backed by these pages and
+ * it will be destroyed once all such windows are unregistered.
+ */
+ if (!ret)
+ err = scif_destroy_pinned_pages(pinned_pages);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_unpin_pages);
+
+static inline void
+scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
+{
+ mutex_lock(&ep->rma_info.rma_lock);
+ scif_insert_window(window, &ep->rma_info.reg_list);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+off_t scif_register_pinned_pages(scif_epd_t epd,
+ scif_pinned_pages_t pinned_pages,
+ off_t offset, int map_flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ s64 computed_offset;
+ struct scif_window *window;
+ int err;
+ size_t len;
+ struct device *spdev;
+
+ /* Unsupported flags */
+ if (map_flags & ~SCIF_MAP_FIXED)
+ return -EINVAL;
+
+ len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+ /*
+ * Offset is not page aligned/negative or offset+len
+ * wraps around with SCIF_MAP_FIXED.
+ */
+ if ((map_flags & SCIF_MAP_FIXED) &&
+ ((ALIGN(offset, PAGE_SIZE) != offset) ||
+ (offset < 0) ||
+ (len > LONG_MAX - offset)))
+ return -EINVAL;
+
+ might_sleep();
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+ /*
+ * It is an error to pass pinned_pages to scif_register_pinned_pages()
+ * after calling scif_unpin_pages().
+ */
+ if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
+ return -EINVAL;
+
+ /* Compute the offset for this registration */
+ err = scif_get_window_offset(ep, map_flags, offset,
+ len, &computed_offset);
+ if (err) {
+ atomic_sub(1, &pinned_pages->ref_count);
+ return err;
+ }
+
+ /* Allocate and prepare self registration window */
+ window = scif_create_window(ep, pinned_pages->nr_pages,
+ computed_offset, false);
+ if (!window) {
+ atomic_sub(1, &pinned_pages->ref_count);
+ scif_free_window_offset(ep, NULL, computed_offset);
+ return -ENOMEM;
+ }
+
+ window->pinned_pages = pinned_pages;
+ window->nr_pages = pinned_pages->nr_pages;
+ window->prot = pinned_pages->prot;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ scif_destroy_window(ep, window);
+ return err;
+ }
+ err = scif_send_alloc_request(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ /* Prepare the remote registration window */
+ err = scif_prep_remote_window(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ /* Tell the peer about the new window */
+ err = scif_send_scif_register(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ scif_put_peer_dev(spdev);
+ /* No further failures expected. Insert new window */
+ scif_insert_local_window(window, ep);
+ return computed_offset;
+error_unmap:
+ scif_destroy_window(ep, window);
+ scif_put_peer_dev(spdev);
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
+
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+ int prot, int map_flags)
+{
+ scif_pinned_pages_t pinned_pages;
+ off_t err;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ s64 computed_offset;
+ struct scif_window *window;
+ struct mm_struct *mm = NULL;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
+ epd, addr, len, offset, prot, map_flags);
+ /* Unsupported flags */
+ if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+ return -EINVAL;
+
+ /*
+ * Offset is not page aligned/negative or offset+len
+ * wraps around with SCIF_MAP_FIXED.
+ */
+ if ((map_flags & SCIF_MAP_FIXED) &&
+ ((ALIGN(offset, PAGE_SIZE) != offset) ||
+ (offset < 0) ||
+ (len > LONG_MAX - offset)))
+ return -EINVAL;
+
+ /* Unsupported protection requested */
+ if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+ return -EINVAL;
+
+ /* addr/len must be page aligned. len should be non zero */
+ if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+ (ALIGN(len, PAGE_SIZE) != len))
+ return -EINVAL;
+
+ might_sleep();
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ /* Compute the offset for this registration */
+ err = scif_get_window_offset(ep, map_flags, offset,
+ len >> PAGE_SHIFT, &computed_offset);
+ if (err)
+ return err;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ scif_free_window_offset(ep, NULL, computed_offset);
+ return err;
+ }
+ /* Allocate and prepare self registration window */
+ window = scif_create_window(ep, len >> PAGE_SHIFT,
+ computed_offset, false);
+ if (!window) {
+ scif_free_window_offset(ep, NULL, computed_offset);
+ scif_put_peer_dev(spdev);
+ return -ENOMEM;
+ }
+
+ window->nr_pages = len >> PAGE_SHIFT;
+
+ err = scif_send_alloc_request(ep, window);
+ if (err) {
+ scif_destroy_incomplete_window(ep, window);
+ scif_put_peer_dev(spdev);
+ return err;
+ }
+
+ if (!(map_flags & SCIF_MAP_KERNEL)) {
+ mm = __scif_acquire_mm();
+ map_flags |= SCIF_MAP_ULIMIT;
+ }
+ /* Pin down the pages */
+ err = __scif_pin_pages(addr, len, &prot,
+ map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+ &pinned_pages);
+ if (err) {
+ scif_destroy_incomplete_window(ep, window);
+ __scif_release_mm(mm);
+ goto error;
+ }
+
+ window->pinned_pages = pinned_pages;
+ window->prot = pinned_pages->prot;
+ window->mm = mm;
+
+ /* Prepare the remote registration window */
+ err = scif_prep_remote_window(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %ld\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ /* Tell the peer about the new window */
+ err = scif_send_scif_register(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %ld\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ scif_put_peer_dev(spdev);
+ /* No further failures expected. Insert new window */
+ scif_insert_local_window(window, ep);
+ dev_dbg(&ep->remote_dev->sdev->dev,
+ "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
+ epd, addr, len, computed_offset);
+ return computed_offset;
+error_unmap:
+ scif_destroy_window(ep, window);
+error:
+ scif_put_peer_dev(spdev);
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %ld\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_register);
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_window *window = NULL;
+ struct scif_rma_req req;
+ int nr_pages, err;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
+ ep, offset, len);
+ /* len must be page aligned. len should be non zero */
+ if (!len ||
+ (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+ return -EINVAL;
+
+ /* Offset is not page aligned or offset+len wraps around */
+ if ((ALIGN(offset, PAGE_SIZE) != offset) ||
+ (offset < 0) ||
+ (len > LONG_MAX - offset))
+ return -EINVAL;
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ might_sleep();
+ nr_pages = len >> PAGE_SHIFT;
+
+ req.out_window = &window;
+ req.offset = offset;
+ req.prot = 0;
+ req.nr_bytes = len;
+ req.type = SCIF_WINDOW_FULL;
+ req.head = &ep->rma_info.reg_list;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error;
+ }
+ /* Unregister all the windows in this range */
+ err = scif_rma_list_unregister(window, offset, nr_pages);
+ if (err)
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ scif_put_peer_dev(spdev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 0000000..fa67222
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_H
+#define SCIF_RMA_H
+
+#include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+
+#include "../bus/scif_bus.h"
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT 31
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
+
+#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
+ (L1_CACHE_BYTES << 1))
+
+#define SCIF_IOVA_START_PFN (1)
+#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
+#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
+
+/*
+ * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
+ *
+ * @reg_list: List of registration windows for self
+ * @remote_reg_list: List of registration windows for peer
+ * @iovad: Offset generator
+ * @rma_lock: Synchronizes access to self/remote list and also protects the
+ * window from being destroyed while RMAs are in progress.
+ * @tc_lock: Synchronizes access to temporary cached windows list
+ * for SCIF Registration Caching.
+ * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
+ * @tw_refcount: Keeps track of number of outstanding temporary registered
+ * windows created by scif_vreadfrom/scif_vwriteto which have
+ * not been destroyed.
+ * @tcw_refcount: Same as tw_refcount but for temporary cached windows
+ * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
+ * @mmn_list: MMU notifier so that we can destroy the windows when required
+ * @fence_refcount: Keeps track of number of outstanding remote fence
+ * requests which have been received by the peer.
+ * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
+ * @async_list_del: Detect asynchronous list entry deletion
+ * @vma_list: List of vmas with remote memory mappings
+ * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
+*/
+struct scif_endpt_rma_info {
+ struct list_head reg_list;
+ struct list_head remote_reg_list;
+ struct iova_domain iovad;
+ struct mutex rma_lock;
+ spinlock_t tc_lock;
+ struct mutex mmn_lock;
+ atomic_t tw_refcount;
+ atomic_t tcw_refcount;
+ atomic_t tcw_total_pages;
+ struct list_head mmn_list;
+ atomic_t fence_refcount;
+ struct dma_chan *dma_chan;
+ int async_list_del;
+ struct list_head vma_list;
+ wait_queue_head_t markwq;
+};
+
+/*
+ * struct scif_fence_info - used for tracking fence requests
+ *
+ * @state: State of this transfer
+ * @wq: Fences wait on this queue
+ * @dma_mark: Used for storing the DMA mark
+ */
+struct scif_fence_info {
+ enum scif_msg_state state;
+ struct completion comp;
+ int dma_mark;
+};
+
+/*
+ * struct scif_remote_fence_info - used for tracking remote fence requests
+ *
+ * @msg: List of SCIF node QP fence messages
+ * @list: Link to list of remote fence requests
+ */
+struct scif_remote_fence_info {
+ struct scifmsg msg;
+ struct list_head list;
+};
+
+/*
+ * Specifies whether an RMA operation can span across partial windows, a single
+ * window or multiple contiguous windows. Mmaps can span across partial windows.
+ * Unregistration can span across complete windows. scif_get_pages() can span a
+ * single window. A window can also be of type self or peer.
+ */
+enum scif_window_type {
+ SCIF_WINDOW_PARTIAL,
+ SCIF_WINDOW_SINGLE,
+ SCIF_WINDOW_FULL,
+ SCIF_WINDOW_SELF,
+ SCIF_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3)
+
+/*
+ * struct scif_rma_lookup - RMA lookup data structure for page list transfers
+ *
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+
+ * @lookup: Array of offsets
+ * @offset: DMA offset of lookup array
+ */
+struct scif_rma_lookup {
+ dma_addr_t *lookup;
+ dma_addr_t offset;
+};
+
+/*
+ * struct scif_pinned_pages - A set of pinned pages obtained with
+ * scif_pin_pages() which could be part of multiple registered
+ * windows across different end points.
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @prot: read/write protections
+ * @map_flags: Flags specified during the pin operation
+ * @ref_count: Reference count bumped in terms of number of pages
+ * @magic: A magic value
+ * @pages: Array of pointers to struct pages populated with get_user_pages(..)
+ */
+struct scif_pinned_pages {
+ s64 nr_pages;
+ int prot;
+ int map_flags;
+ atomic_t ref_count;
+ u64 magic;
+ struct page **pages;
+};
+
+/*
+ * struct scif_status - Stores DMA status update information
+ *
+ * @src_dma_addr: Source buffer DMA address
+ * @val: src location for value to be written to the destination
+ * @ep: SCIF endpoint
+ */
+struct scif_status {
+ dma_addr_t src_dma_addr;
+ u64 val;
+ struct scif_endpt *ep;
+};
+
+/*
+ * struct scif_window - Registration Window for Self and Remote
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @nr_contig_chunks: Number of contiguous physical chunks
+ * @prot: read/write protections
+ * @ref_count: reference count in terms of number of pages
+ * @magic: Cookie to detect corruption
+ * @offset: registered offset
+ * @va_for_temp: va address that this window represents
+ * @dma_mark: Used to determine if all DMAs against the window are done
+ * @ep: Pointer to EP. Useful for passing EP around with messages to
+ avoid expensive list traversals.
+ * @list: link to list of windows for the endpoint
+ * @type: self or peer window
+ * @peer_window: Pointer to peer window. Useful for sending messages to peer
+ * without requiring an extra list traversal
+ * @unreg_state: unregistration state
+ * @offset_freed: True if the offset has been freed
+ * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @st: scatter gather table for DMA mappings with IOMMU enabled
+ * @pinned_pages: The set of pinned_pages backing this window
+ * @alloc_handle: Handle for sending ALLOC_REQ
+ * @regwq: Wait Queue for an registration (N)ACK
+ * @reg_state: Registration state
+ * @unregwq: Wait Queue for an unregistration (N)ACK
+ * @dma_addr_lookup: Lookup for physical addresses used for DMA
+ * @nr_lookup: Number of entries in lookup
+ * @mapped_offset: Offset used to map the window by the peer
+ * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
+ * @num_pages: Array specifying number of pages for each physical address
+ */
+struct scif_window {
+ s64 nr_pages;
+ int nr_contig_chunks;
+ int prot;
+ int ref_count;
+ u64 magic;
+ s64 offset;
+ unsigned long va_for_temp;
+ int dma_mark;
+ u64 ep;
+ struct list_head list;
+ enum scif_window_type type;
+ u64 peer_window;
+ enum scif_msg_state unreg_state;
+ bool offset_freed;
+ bool temp;
+ struct mm_struct *mm;
+ struct sg_table *st;
+ union {
+ struct {
+ struct scif_pinned_pages *pinned_pages;
+ struct scif_allocmsg alloc_handle;
+ wait_queue_head_t regwq;
+ enum scif_msg_state reg_state;
+ wait_queue_head_t unregwq;
+ };
+ struct {
+ struct scif_rma_lookup dma_addr_lookup;
+ struct scif_rma_lookup num_pages_lookup;
+ int nr_lookup;
+ dma_addr_t mapped_offset;
+ };
+ };
+ dma_addr_t *dma_addr;
+ u64 *num_pages;
+} __packed;
+
+/*
+ * scif_mmu_notif - SCIF mmu notifier information
+ *
+ * @mmu_notifier ep_mmu_notifier: MMU notifier operations
+ * @tc_reg_list: List of temp registration windows for self
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @ep: SCIF endpoint
+ * @list: link to list of MMU notifier information
+ */
+struct scif_mmu_notif {
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_notifier ep_mmu_notifier;
+#endif
+ struct list_head tc_reg_list;
+ struct mm_struct *mm;
+ struct scif_endpt *ep;
+ struct list_head list;
+};
+
+enum scif_rma_dir {
+ SCIF_LOCAL_TO_REMOTE,
+ SCIF_REMOTE_TO_LOCAL
+};
+
+extern struct kmem_cache *unaligned_cache;
+/* Initialize RMA for this EP */
+void scif_rma_ep_init(struct scif_endpt *ep);
+/* Check if epd can be uninitialized */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep);
+/* Obtain a new offset. Callee must grab RMA lock */
+int scif_get_window_offset(struct scif_endpt *ep, int flags,
+ s64 offset, int nr_pages, s64 *out_offset);
+/* Free offset. Callee must grab RMA lock */
+void scif_free_window_offset(struct scif_endpt *ep,
+ struct scif_window *window, s64 offset);
+/* Create self registration window */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+ s64 offset, bool temp);
+/* Destroy self registration window.*/
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
+/* Map pages of self window to Aperture/PCI */
+int scif_map_window(struct scif_dev *remote_dev,
+ struct scif_window *window);
+/* Unregister a self window */
+int scif_unregister_window(struct scif_window *window);
+/* Destroy remote registration window */
+void
+scif_destroy_remote_window(struct scif_window *window);
+/* remove valid remote memory mappings from process address space */
+void scif_zap_mmaps(int node);
+/* Query if any applications have remote memory mappings */
+bool scif_rma_do_apps_have_mmaps(int node);
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node);
+/* Reserve a DMA channel for a particular endpoint */
+int scif_reserve_dma_chan(struct scif_endpt *ep);
+/* Setup a DMA mark for an endpoint */
+int _scif_fence_mark(scif_epd_t epd, int *mark);
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+ enum scif_window_type type);
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_mmu_notif_handler(struct work_struct *work);
+void scif_rma_handle_remote_fences(void);
+void scif_rma_destroy_windows(void);
+void scif_rma_destroy_tcw_invalid(void);
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
+
+struct scif_window_iter {
+ s64 offset;
+ int index;
+};
+
+static inline void
+scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
+{
+ iter->offset = window->offset;
+ iter->index = 0;
+}
+
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+ size_t *nr_bytes,
+ struct scif_window_iter *iter);
+static inline
+dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
+{
+ return scif_off_to_dma_addr(window, off, NULL, NULL);
+}
+
+static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
+{
+ src_offset = src_offset & (L1_CACHE_BYTES - 1);
+ dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+ return !(src_offset == dst_offset);
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vzalloc(..) if that fails.
+ */
+static inline void *scif_zalloc(size_t size)
+{
+ void *ret = NULL;
+ size_t align = ALIGN(size, PAGE_SIZE);
+
+ if (align && get_order(align) < MAX_ORDER)
+ ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(align));
+ return ret ? ret : vzalloc(align);
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static inline void scif_free(void *addr, size_t size)
+{
+ size_t align = ALIGN(size, PAGE_SIZE);
+
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ free_pages((unsigned long)addr, get_order(align));
+}
+
+static inline void scif_get_window(struct scif_window *window, int nr_pages)
+{
+ window->ref_count += nr_pages;
+}
+
+static inline void scif_put_window(struct scif_window *window, int nr_pages)
+{
+ window->ref_count -= nr_pages;
+}
+
+static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
+{
+ window->ref_count = nr_pages;
+}
+
+static inline void
+scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
+{
+ spin_lock(&scif_info.rmalock);
+ list_add_tail(&window->list, list);
+ spin_unlock(&scif_info.rmalock);
+ schedule_work(&scif_info.misc_work);
+}
+
+static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
+{
+ list_del_init(&window->list);
+ scif_queue_for_cleanup(window, &scif_info.rma_tc);
+}
+
+static inline bool scif_is_iommu_enabled(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+ return intel_iommu_enabled;
+#else
+ return false;
+#endif
+}
+#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 0000000..e1ef8da
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+
+/*
+ * scif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void scif_insert_tcw(struct scif_window *window, struct list_head *head)
+{
+ struct scif_window *curr = NULL;
+ struct scif_window *prev = list_entry(head, struct scif_window, list);
+ struct list_head *item;
+
+ INIT_LIST_HEAD(&window->list);
+ /* Compare with tail and if the entry is new tail add it to the end */
+ if (!list_empty(head)) {
+ curr = list_entry(head->prev, struct scif_window, list);
+ if (curr->va_for_temp < window->va_for_temp) {
+ list_add_tail(&window->list, head);
+ return;
+ }
+ }
+ list_for_each(item, head) {
+ curr = list_entry(item, struct scif_window, list);
+ if (curr->va_for_temp > window->va_for_temp)
+ break;
+ prev = curr;
+ }
+ list_add(&window->list, &prev->list);
+}
+
+/*
+ * scif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void scif_insert_window(struct scif_window *window, struct list_head *head)
+{
+ struct scif_window *curr = NULL, *prev = NULL;
+ struct list_head *item;
+
+ INIT_LIST_HEAD(&window->list);
+ list_for_each(item, head) {
+ curr = list_entry(item, struct scif_window, list);
+ if (curr->offset > window->offset)
+ break;
+ prev = curr;
+ }
+ if (!prev)
+ list_add(&window->list, head);
+ else
+ list_add(&window->list, &prev->list);
+ scif_set_window_ref(window, window->nr_pages);
+}
+
+/*
+ * scif_query_tcw:
+ *
+ * Query the temp cached registration list of ep for an overlapping window
+ * in case of permission mismatch, destroy the previous window. if permissions
+ * match and overlap is partial, destroy the window but return the new range
+ * RMA lock must be held.
+ */
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
+{
+ struct list_head *item, *temp, *head = req->head;
+ struct scif_window *window;
+ u64 start_va_window, start_va_req = req->va_for_temp;
+ u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+ if (!req->nr_bytes)
+ return -EINVAL;
+ /*
+ * Avoid traversing the entire list to find out that there
+ * is no entry that matches
+ */
+ if (!list_empty(head)) {
+ window = list_last_entry(head, struct scif_window, list);
+ end_va_window = window->va_for_temp +
+ (window->nr_pages << PAGE_SHIFT);
+ if (start_va_req > end_va_window)
+ return -ENXIO;
+ }
+ list_for_each_safe(item, temp, head) {
+ window = list_entry(item, struct scif_window, list);
+ start_va_window = window->va_for_temp;
+ end_va_window = window->va_for_temp +
+ (window->nr_pages << PAGE_SHIFT);
+ if (start_va_req < start_va_window &&
+ end_va_req < start_va_window)
+ break;
+ if (start_va_req >= end_va_window)
+ continue;
+ if ((window->prot & req->prot) == req->prot) {
+ if (start_va_req >= start_va_window &&
+ end_va_req <= end_va_window) {
+ *req->out_window = window;
+ return 0;
+ }
+ /* expand window */
+ if (start_va_req < start_va_window) {
+ req->nr_bytes +=
+ start_va_window - start_va_req;
+ req->va_for_temp = start_va_window;
+ }
+ if (end_va_req >= end_va_window)
+ req->nr_bytes += end_va_window - end_va_req;
+ }
+ /* Destroy the old window to create a new one */
+ __scif_rma_destroy_tcw_helper(window);
+ break;
+ }
+ return -ENXIO;
+}
+
+/*
+ * scif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int scif_query_window(struct scif_rma_req *req)
+{
+ struct list_head *item;
+ struct scif_window *window;
+ s64 end_offset, offset = req->offset;
+ u64 tmp_min, nr_bytes_left = req->nr_bytes;
+
+ if (!req->nr_bytes)
+ return -EINVAL;
+
+ list_for_each(item, req->head) {
+ window = list_entry(item, struct scif_window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ if (offset < window->offset)
+ /* Offset not found! */
+ return -ENXIO;
+ if (offset >= end_offset)
+ continue;
+ /* Check read/write protections. */
+ if ((window->prot & req->prot) != req->prot)
+ return -EPERM;
+ if (nr_bytes_left == req->nr_bytes)
+ /* Store the first window */
+ *req->out_window = window;
+ tmp_min = min((u64)end_offset - offset, nr_bytes_left);
+ nr_bytes_left -= tmp_min;
+ offset += tmp_min;
+ /*
+ * Range requested encompasses
+ * multiple windows contiguously.
+ */
+ if (!nr_bytes_left) {
+ /* Done for partial window */
+ if (req->type == SCIF_WINDOW_PARTIAL ||
+ req->type == SCIF_WINDOW_SINGLE)
+ return 0;
+ /* Extra logic for full windows */
+ if (offset == end_offset)
+ /* Spanning multiple whole windows */
+ return 0;
+ /* Not spanning multiple whole windows */
+ return -ENXIO;
+ }
+ if (req->type == SCIF_WINDOW_SINGLE)
+ break;
+ }
+ dev_err(scif_info.mdev.this_device,
+ "%s %d ENXIO\n", __func__, __LINE__);
+ return -ENXIO;
+}
+
+/*
+ * scif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_rma_list_unregister(struct scif_window *window,
+ s64 offset, int nr_pages)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+ struct list_head *head = &ep->rma_info.reg_list;
+ s64 end_offset;
+ int err = 0;
+ int loop_nr_pages;
+ struct scif_window *_window;
+
+ list_for_each_entry_safe_from(window, _window, head, list) {
+ end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+ nr_pages);
+ err = scif_unregister_window(window);
+ if (err)
+ return err;
+ nr_pages -= loop_nr_pages;
+ offset += (loop_nr_pages << PAGE_SHIFT);
+ if (!nr_pages)
+ break;
+ }
+ return 0;
+}
+
+/*
+ * scif_unmap_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Delete any DMA mappings created
+ */
+void scif_unmap_all_windows(scif_epd_t epd)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct list_head *head = &ep->rma_info.reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ list_for_each_safe(item, tmp, head) {
+ window = list_entry(item, struct scif_window, list);
+ scif_unmap_window(ep->remote_dev, window);
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_unregister_all_windows(scif_epd_t epd)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct list_head *head = &ep->rma_info.reg_list;
+ int err = 0;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+retry:
+ item = NULL;
+ tmp = NULL;
+ list_for_each_safe(item, tmp, head) {
+ window = list_entry(item, struct scif_window, list);
+ ep->rma_info.async_list_del = 0;
+ err = scif_unregister_window(window);
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n",
+ __func__, __LINE__, err);
+ /*
+ * Need to restart list traversal if there has been
+ * an asynchronous list entry deletion.
+ */
+ if (ACCESS_ONCE(ep->rma_info.async_list_del))
+ goto retry;
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (!list_empty(&ep->rma_info.mmn_list)) {
+ spin_lock(&scif_info.rmalock);
+ list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
+ spin_unlock(&scif_info.rmalock);
+ schedule_work(&scif_info.mmu_notif_work);
+ }
+ return err;
+}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 0000000..7d58d1d
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_LIST_H
+#define SCIF_RMA_LIST_H
+
+/*
+ * struct scif_rma_req - Self Registration list RMA Request query
+ *
+ * @out_window - Returns the window if found
+ * @offset: Starting offset
+ * @nr_bytes: number of bytes
+ * @prot: protection requested i.e. read or write or both
+ * @type: Specify single, partial or multiple windows
+ * @head: Head of list on which to search
+ * @va_for_temp: VA for searching temporary cached windows
+ */
+struct scif_rma_req {
+ struct scif_window **out_window;
+ union {
+ s64 offset;
+ unsigned long va_for_temp;
+ };
+ size_t nr_bytes;
+ int prot;
+ enum scif_window_type type;
+ struct list_head *head;
+};
+
+/* Insert */
+void scif_insert_window(struct scif_window *window, struct list_head *head);
+void scif_insert_tcw(struct scif_window *window,
+ struct list_head *head);
+/* Query */
+int scif_query_window(struct scif_rma_req *request);
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
+/* Called from close to unregister all self windows */
+int scif_unregister_all_windows(scif_epd_t epd);
+void scif_unmap_all_windows(scif_epd_t epd);
+/* Traverse list and unregister */
+int scif_rma_list_unregister(struct scif_window *window, s64 offset,
+ int nr_pages);
+#endif /* SCIF_RMA_LIST_H */
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
new file mode 100644
index 0000000..9a17a9b
--- /dev/null
+++ b/drivers/misc/pch_phub.c
@@ -0,0 +1,898 @@
+/*
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/if_ether.h>
+#include <linux/ctype.h>
+#include <linux/dmi.h>
+
+#define PHUB_STATUS 0x00 /* Status Register offset */
+#define PHUB_CONTROL 0x04 /* Control Register offset */
+#define PHUB_TIMEOUT 0x05 /* Time out value for Status Register */
+#define PCH_PHUB_ROM_WRITE_ENABLE 0x01 /* Enabling for writing ROM */
+#define PCH_PHUB_ROM_WRITE_DISABLE 0x00 /* Disabling for writing ROM */
+#define PCH_PHUB_MAC_START_ADDR_EG20T 0x14 /* MAC data area start address
+ offset */
+#define PCH_PHUB_MAC_START_ADDR_ML7223 0x20C /* MAC data area start address
+ offset */
+#define PCH_PHUB_ROM_START_ADDR_EG20T 0x80 /* ROM data area start address offset
+ (Intel EG20T PCH)*/
+#define PCH_PHUB_ROM_START_ADDR_ML7213 0x400 /* ROM data area start address
+ offset(LAPIS Semicon ML7213)
+ */
+#define PCH_PHUB_ROM_START_ADDR_ML7223 0x400 /* ROM data area start address
+ offset(LAPIS Semicon ML7223)
+ */
+
+/* MAX number of INT_REDUCE_CONTROL registers */
+#define MAX_NUM_INT_REDUCE_CONTROL_REG 128
+#define PCI_DEVICE_ID_PCH1_PHUB 0x8801
+#define PCH_MINOR_NOS 1
+#define CLKCFG_CAN_50MHZ 0x12000000
+#define CLKCFG_CANCLK_MASK 0xFF000000
+#define CLKCFG_UART_MASK 0xFFFFFF
+
+/* CM-iTC */
+#define CLKCFG_UART_48MHZ (1 << 16)
+#define CLKCFG_BAUDDIV (2 << 20)
+#define CLKCFG_PLL2VCO (8 << 9)
+#define CLKCFG_UARTCLKSEL (1 << 18)
+
+/* Macros for ML7213 */
+#define PCI_VENDOR_ID_ROHM 0x10db
+#define PCI_DEVICE_ID_ROHM_ML7213_PHUB 0x801A
+
+/* Macros for ML7223 */
+#define PCI_DEVICE_ID_ROHM_ML7223_mPHUB 0x8012 /* for Bus-m */
+#define PCI_DEVICE_ID_ROHM_ML7223_nPHUB 0x8002 /* for Bus-n */
+
+/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7831_PHUB 0x8801
+
+/* SROM ACCESS Macro */
+#define PCH_WORD_ADDR_MASK (~((1 << 2) - 1))
+
+/* Registers address offset */
+#define PCH_PHUB_ID_REG 0x0000
+#define PCH_PHUB_QUEUE_PRI_VAL_REG 0x0004
+#define PCH_PHUB_RC_QUEUE_MAXSIZE_REG 0x0008
+#define PCH_PHUB_BRI_QUEUE_MAXSIZE_REG 0x000C
+#define PCH_PHUB_COMP_RESP_TIMEOUT_REG 0x0010
+#define PCH_PHUB_BUS_SLAVE_CONTROL_REG 0x0014
+#define PCH_PHUB_DEADLOCK_AVOID_TYPE_REG 0x0018
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG0 0x0020
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG1 0x0024
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG2 0x0028
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG3 0x002C
+#define PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE 0x0040
+#define CLKCFG_REG_OFFSET 0x500
+#define FUNCSEL_REG_OFFSET 0x508
+
+#define PCH_PHUB_OROM_SIZE 15360
+
+/**
+ * struct pch_phub_reg - PHUB register structure
+ * @phub_id_reg: PHUB_ID register val
+ * @q_pri_val_reg: QUEUE_PRI_VAL register val
+ * @rc_q_maxsize_reg: RC_QUEUE_MAXSIZE register val
+ * @bri_q_maxsize_reg: BRI_QUEUE_MAXSIZE register val
+ * @comp_resp_timeout_reg: COMP_RESP_TIMEOUT register val
+ * @bus_slave_control_reg: BUS_SLAVE_CONTROL_REG register val
+ * @deadlock_avoid_type_reg: DEADLOCK_AVOID_TYPE register val
+ * @intpin_reg_wpermit_reg0: INTPIN_REG_WPERMIT register 0 val
+ * @intpin_reg_wpermit_reg1: INTPIN_REG_WPERMIT register 1 val
+ * @intpin_reg_wpermit_reg2: INTPIN_REG_WPERMIT register 2 val
+ * @intpin_reg_wpermit_reg3: INTPIN_REG_WPERMIT register 3 val
+ * @int_reduce_control_reg: INT_REDUCE_CONTROL registers val
+ * @clkcfg_reg: CLK CFG register val
+ * @funcsel_reg: Function select register value
+ * @pch_phub_base_address: Register base address
+ * @pch_phub_extrom_base_address: external rom base address
+ * @pch_mac_start_address: MAC address area start address
+ * @pch_opt_rom_start_address: Option ROM start address
+ * @ioh_type: Save IOH type
+ * @pdev: pointer to pci device struct
+ */
+struct pch_phub_reg {
+ u32 phub_id_reg;
+ u32 q_pri_val_reg;
+ u32 rc_q_maxsize_reg;
+ u32 bri_q_maxsize_reg;
+ u32 comp_resp_timeout_reg;
+ u32 bus_slave_control_reg;
+ u32 deadlock_avoid_type_reg;
+ u32 intpin_reg_wpermit_reg0;
+ u32 intpin_reg_wpermit_reg1;
+ u32 intpin_reg_wpermit_reg2;
+ u32 intpin_reg_wpermit_reg3;
+ u32 int_reduce_control_reg[MAX_NUM_INT_REDUCE_CONTROL_REG];
+ u32 clkcfg_reg;
+ u32 funcsel_reg;
+ void __iomem *pch_phub_base_address;
+ void __iomem *pch_phub_extrom_base_address;
+ u32 pch_mac_start_address;
+ u32 pch_opt_rom_start_address;
+ int ioh_type;
+ struct pci_dev *pdev;
+};
+
+/* SROM SPEC for MAC address assignment offset */
+static const int pch_phub_mac_offset[ETH_ALEN] = {0x3, 0x2, 0x1, 0x0, 0xb, 0xa};
+
+static DEFINE_MUTEX(pch_phub_mutex);
+
+/**
+ * pch_phub_read_modify_write_reg() - Reading modifying and writing register
+ * @reg_addr_offset: Register offset address value.
+ * @data: Writing value.
+ * @mask: Mask value.
+ */
+static void pch_phub_read_modify_write_reg(struct pch_phub_reg *chip,
+ unsigned int reg_addr_offset,
+ unsigned int data, unsigned int mask)
+{
+ void __iomem *reg_addr = chip->pch_phub_base_address + reg_addr_offset;
+ iowrite32(((ioread32(reg_addr) & ~mask)) | data, reg_addr);
+}
+
+#ifdef CONFIG_PM
+/* pch_phub_save_reg_conf - saves register configuration */
+static void pch_phub_save_reg_conf(struct pci_dev *pdev)
+{
+ unsigned int i;
+ struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+
+ void __iomem *p = chip->pch_phub_base_address;
+
+ chip->phub_id_reg = ioread32(p + PCH_PHUB_ID_REG);
+ chip->q_pri_val_reg = ioread32(p + PCH_PHUB_QUEUE_PRI_VAL_REG);
+ chip->rc_q_maxsize_reg = ioread32(p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG);
+ chip->bri_q_maxsize_reg = ioread32(p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG);
+ chip->comp_resp_timeout_reg =
+ ioread32(p + PCH_PHUB_COMP_RESP_TIMEOUT_REG);
+ chip->bus_slave_control_reg =
+ ioread32(p + PCH_PHUB_BUS_SLAVE_CONTROL_REG);
+ chip->deadlock_avoid_type_reg =
+ ioread32(p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG);
+ chip->intpin_reg_wpermit_reg0 =
+ ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0);
+ chip->intpin_reg_wpermit_reg1 =
+ ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1);
+ chip->intpin_reg_wpermit_reg2 =
+ ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2);
+ chip->intpin_reg_wpermit_reg3 =
+ ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3);
+ dev_dbg(&pdev->dev, "%s : "
+ "chip->phub_id_reg=%x, "
+ "chip->q_pri_val_reg=%x, "
+ "chip->rc_q_maxsize_reg=%x, "
+ "chip->bri_q_maxsize_reg=%x, "
+ "chip->comp_resp_timeout_reg=%x, "
+ "chip->bus_slave_control_reg=%x, "
+ "chip->deadlock_avoid_type_reg=%x, "
+ "chip->intpin_reg_wpermit_reg0=%x, "
+ "chip->intpin_reg_wpermit_reg1=%x, "
+ "chip->intpin_reg_wpermit_reg2=%x, "
+ "chip->intpin_reg_wpermit_reg3=%x\n", __func__,
+ chip->phub_id_reg,
+ chip->q_pri_val_reg,
+ chip->rc_q_maxsize_reg,
+ chip->bri_q_maxsize_reg,
+ chip->comp_resp_timeout_reg,
+ chip->bus_slave_control_reg,
+ chip->deadlock_avoid_type_reg,
+ chip->intpin_reg_wpermit_reg0,
+ chip->intpin_reg_wpermit_reg1,
+ chip->intpin_reg_wpermit_reg2,
+ chip->intpin_reg_wpermit_reg3);
+ for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) {
+ chip->int_reduce_control_reg[i] =
+ ioread32(p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i);
+ dev_dbg(&pdev->dev, "%s : "
+ "chip->int_reduce_control_reg[%d]=%x\n",
+ __func__, i, chip->int_reduce_control_reg[i]);
+ }
+ chip->clkcfg_reg = ioread32(p + CLKCFG_REG_OFFSET);
+ if ((chip->ioh_type == 2) || (chip->ioh_type == 4))
+ chip->funcsel_reg = ioread32(p + FUNCSEL_REG_OFFSET);
+}
+
+/* pch_phub_restore_reg_conf - restore register configuration */
+static void pch_phub_restore_reg_conf(struct pci_dev *pdev)
+{
+ unsigned int i;
+ struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+ void __iomem *p;
+ p = chip->pch_phub_base_address;
+
+ iowrite32(chip->phub_id_reg, p + PCH_PHUB_ID_REG);
+ iowrite32(chip->q_pri_val_reg, p + PCH_PHUB_QUEUE_PRI_VAL_REG);
+ iowrite32(chip->rc_q_maxsize_reg, p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG);
+ iowrite32(chip->bri_q_maxsize_reg, p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG);
+ iowrite32(chip->comp_resp_timeout_reg,
+ p + PCH_PHUB_COMP_RESP_TIMEOUT_REG);
+ iowrite32(chip->bus_slave_control_reg,
+ p + PCH_PHUB_BUS_SLAVE_CONTROL_REG);
+ iowrite32(chip->deadlock_avoid_type_reg,
+ p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG);
+ iowrite32(chip->intpin_reg_wpermit_reg0,
+ p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0);
+ iowrite32(chip->intpin_reg_wpermit_reg1,
+ p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1);
+ iowrite32(chip->intpin_reg_wpermit_reg2,
+ p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2);
+ iowrite32(chip->intpin_reg_wpermit_reg3,
+ p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3);
+ dev_dbg(&pdev->dev, "%s : "
+ "chip->phub_id_reg=%x, "
+ "chip->q_pri_val_reg=%x, "
+ "chip->rc_q_maxsize_reg=%x, "
+ "chip->bri_q_maxsize_reg=%x, "
+ "chip->comp_resp_timeout_reg=%x, "
+ "chip->bus_slave_control_reg=%x, "
+ "chip->deadlock_avoid_type_reg=%x, "
+ "chip->intpin_reg_wpermit_reg0=%x, "
+ "chip->intpin_reg_wpermit_reg1=%x, "
+ "chip->intpin_reg_wpermit_reg2=%x, "
+ "chip->intpin_reg_wpermit_reg3=%x\n", __func__,
+ chip->phub_id_reg,
+ chip->q_pri_val_reg,
+ chip->rc_q_maxsize_reg,
+ chip->bri_q_maxsize_reg,
+ chip->comp_resp_timeout_reg,
+ chip->bus_slave_control_reg,
+ chip->deadlock_avoid_type_reg,
+ chip->intpin_reg_wpermit_reg0,
+ chip->intpin_reg_wpermit_reg1,
+ chip->intpin_reg_wpermit_reg2,
+ chip->intpin_reg_wpermit_reg3);
+ for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) {
+ iowrite32(chip->int_reduce_control_reg[i],
+ p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i);
+ dev_dbg(&pdev->dev, "%s : "
+ "chip->int_reduce_control_reg[%d]=%x\n",
+ __func__, i, chip->int_reduce_control_reg[i]);
+ }
+
+ iowrite32(chip->clkcfg_reg, p + CLKCFG_REG_OFFSET);
+ if ((chip->ioh_type == 2) || (chip->ioh_type == 4))
+ iowrite32(chip->funcsel_reg, p + FUNCSEL_REG_OFFSET);
+}
+#endif
+
+/**
+ * pch_phub_read_serial_rom() - Reading Serial ROM
+ * @offset_address: Serial ROM offset address to read.
+ * @data: Read buffer for specified Serial ROM value.
+ */
+static void pch_phub_read_serial_rom(struct pch_phub_reg *chip,
+ unsigned int offset_address, u8 *data)
+{
+ void __iomem *mem_addr = chip->pch_phub_extrom_base_address +
+ offset_address;
+
+ *data = ioread8(mem_addr);
+}
+
+/**
+ * pch_phub_write_serial_rom() - Writing Serial ROM
+ * @offset_address: Serial ROM offset address.
+ * @data: Serial ROM value to write.
+ */
+static int pch_phub_write_serial_rom(struct pch_phub_reg *chip,
+ unsigned int offset_address, u8 data)
+{
+ void __iomem *mem_addr = chip->pch_phub_extrom_base_address +
+ (offset_address & PCH_WORD_ADDR_MASK);
+ int i;
+ unsigned int word_data;
+ unsigned int pos;
+ unsigned int mask;
+ pos = (offset_address % 4) * 8;
+ mask = ~(0xFF << pos);
+
+ iowrite32(PCH_PHUB_ROM_WRITE_ENABLE,
+ chip->pch_phub_extrom_base_address + PHUB_CONTROL);
+
+ word_data = ioread32(mem_addr);
+ iowrite32((word_data & mask) | (u32)data << pos, mem_addr);
+
+ i = 0;
+ while (ioread8(chip->pch_phub_extrom_base_address +
+ PHUB_STATUS) != 0x00) {
+ msleep(1);
+ if (i == PHUB_TIMEOUT)
+ return -ETIMEDOUT;
+ i++;
+ }
+
+ iowrite32(PCH_PHUB_ROM_WRITE_DISABLE,
+ chip->pch_phub_extrom_base_address + PHUB_CONTROL);
+
+ return 0;
+}
+
+/**
+ * pch_phub_read_serial_rom_val() - Read Serial ROM value
+ * @offset_address: Serial ROM address offset value.
+ * @data: Serial ROM value to read.
+ */
+static void pch_phub_read_serial_rom_val(struct pch_phub_reg *chip,
+ unsigned int offset_address, u8 *data)
+{
+ unsigned int mem_addr;
+
+ mem_addr = chip->pch_mac_start_address +
+ pch_phub_mac_offset[offset_address];
+
+ pch_phub_read_serial_rom(chip, mem_addr, data);
+}
+
+/**
+ * pch_phub_write_serial_rom_val() - writing Serial ROM value
+ * @offset_address: Serial ROM address offset value.
+ * @data: Serial ROM value.
+ */
+static int pch_phub_write_serial_rom_val(struct pch_phub_reg *chip,
+ unsigned int offset_address, u8 data)
+{
+ int retval;
+ unsigned int mem_addr;
+
+ mem_addr = chip->pch_mac_start_address +
+ pch_phub_mac_offset[offset_address];
+
+ retval = pch_phub_write_serial_rom(chip, mem_addr, data);
+
+ return retval;
+}
+
+/* pch_phub_gbe_serial_rom_conf - makes Serial ROM header format configuration
+ * for Gigabit Ethernet MAC address
+ */
+static int pch_phub_gbe_serial_rom_conf(struct pch_phub_reg *chip)
+{
+ int retval;
+
+ retval = pch_phub_write_serial_rom(chip, 0x0b, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x0a, 0x10);
+ retval |= pch_phub_write_serial_rom(chip, 0x09, 0x01);
+ retval |= pch_phub_write_serial_rom(chip, 0x08, 0x02);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x0f, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x0e, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x0d, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x0c, 0x80);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x13, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x12, 0x10);
+ retval |= pch_phub_write_serial_rom(chip, 0x11, 0x01);
+ retval |= pch_phub_write_serial_rom(chip, 0x10, 0x18);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x1b, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x1a, 0x10);
+ retval |= pch_phub_write_serial_rom(chip, 0x19, 0x01);
+ retval |= pch_phub_write_serial_rom(chip, 0x18, 0x19);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x23, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x22, 0x10);
+ retval |= pch_phub_write_serial_rom(chip, 0x21, 0x01);
+ retval |= pch_phub_write_serial_rom(chip, 0x20, 0x3a);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x27, 0x01);
+ retval |= pch_phub_write_serial_rom(chip, 0x26, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x25, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x24, 0x00);
+
+ return retval;
+}
+
+/* pch_phub_gbe_serial_rom_conf_mp - makes SerialROM header format configuration
+ * for Gigabit Ethernet MAC address
+ */
+static int pch_phub_gbe_serial_rom_conf_mp(struct pch_phub_reg *chip)
+{
+ int retval;
+ u32 offset_addr;
+
+ offset_addr = 0x200;
+ retval = pch_phub_write_serial_rom(chip, 0x03 + offset_addr, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x02 + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x01 + offset_addr, 0x40);
+ retval |= pch_phub_write_serial_rom(chip, 0x00 + offset_addr, 0x02);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x07 + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x06 + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x05 + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x04 + offset_addr, 0x80);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x0b + offset_addr, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x0a + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x09 + offset_addr, 0x40);
+ retval |= pch_phub_write_serial_rom(chip, 0x08 + offset_addr, 0x18);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x13 + offset_addr, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x12 + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x11 + offset_addr, 0x40);
+ retval |= pch_phub_write_serial_rom(chip, 0x10 + offset_addr, 0x19);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x1b + offset_addr, 0xbc);
+ retval |= pch_phub_write_serial_rom(chip, 0x1a + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x19 + offset_addr, 0x40);
+ retval |= pch_phub_write_serial_rom(chip, 0x18 + offset_addr, 0x3a);
+
+ retval |= pch_phub_write_serial_rom(chip, 0x1f + offset_addr, 0x01);
+ retval |= pch_phub_write_serial_rom(chip, 0x1e + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x1d + offset_addr, 0x00);
+ retval |= pch_phub_write_serial_rom(chip, 0x1c + offset_addr, 0x00);
+
+ return retval;
+}
+
+/**
+ * pch_phub_read_gbe_mac_addr() - Read Gigabit Ethernet MAC address
+ * @offset_address: Gigabit Ethernet MAC address offset value.
+ * @data: Buffer of the Gigabit Ethernet MAC address value.
+ */
+static void pch_phub_read_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data)
+{
+ int i;
+ for (i = 0; i < ETH_ALEN; i++)
+ pch_phub_read_serial_rom_val(chip, i, &data[i]);
+}
+
+/**
+ * pch_phub_write_gbe_mac_addr() - Write MAC address
+ * @offset_address: Gigabit Ethernet MAC address offset value.
+ * @data: Gigabit Ethernet MAC address value.
+ */
+static int pch_phub_write_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data)
+{
+ int retval;
+ int i;
+
+ if ((chip->ioh_type == 1) || (chip->ioh_type == 5)) /* EG20T or ML7831*/
+ retval = pch_phub_gbe_serial_rom_conf(chip);
+ else /* ML7223 */
+ retval = pch_phub_gbe_serial_rom_conf_mp(chip);
+ if (retval)
+ return retval;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ retval = pch_phub_write_serial_rom_val(chip, i, data[i]);
+ if (retval)
+ return retval;
+ }
+
+ return retval;
+}
+
+static ssize_t pch_phub_bin_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ unsigned int rom_signature;
+ unsigned char rom_length;
+ unsigned int tmp;
+ unsigned int addr_offset;
+ unsigned int orom_size;
+ int ret;
+ int err;
+ ssize_t rom_size;
+
+ struct pch_phub_reg *chip =
+ dev_get_drvdata(container_of(kobj, struct device, kobj));
+
+ ret = mutex_lock_interruptible(&pch_phub_mutex);
+ if (ret) {
+ err = -ERESTARTSYS;
+ goto return_err_nomutex;
+ }
+
+ /* Get Rom signature */
+ chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+ if (!chip->pch_phub_extrom_base_address)
+ goto exrom_map_err;
+
+ pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address,
+ (unsigned char *)&rom_signature);
+ rom_signature &= 0xff;
+ pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address + 1,
+ (unsigned char *)&tmp);
+ rom_signature |= (tmp & 0xff) << 8;
+ if (rom_signature == 0xAA55) {
+ pch_phub_read_serial_rom(chip,
+ chip->pch_opt_rom_start_address + 2,
+ &rom_length);
+ orom_size = rom_length * 512;
+ if (orom_size < off) {
+ addr_offset = 0;
+ goto return_ok;
+ }
+ if (orom_size < count) {
+ addr_offset = 0;
+ goto return_ok;
+ }
+
+ for (addr_offset = 0; addr_offset < count; addr_offset++) {
+ pch_phub_read_serial_rom(chip,
+ chip->pch_opt_rom_start_address + addr_offset + off,
+ &buf[addr_offset]);
+ }
+ } else {
+ err = -ENODATA;
+ goto return_err;
+ }
+return_ok:
+ pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+ mutex_unlock(&pch_phub_mutex);
+ return addr_offset;
+
+return_err:
+ pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+exrom_map_err:
+ mutex_unlock(&pch_phub_mutex);
+return_err_nomutex:
+ return err;
+}
+
+static ssize_t pch_phub_bin_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int err;
+ unsigned int addr_offset;
+ int ret;
+ ssize_t rom_size;
+ struct pch_phub_reg *chip =
+ dev_get_drvdata(container_of(kobj, struct device, kobj));
+
+ ret = mutex_lock_interruptible(&pch_phub_mutex);
+ if (ret)
+ return -ERESTARTSYS;
+
+ if (off > PCH_PHUB_OROM_SIZE) {
+ addr_offset = 0;
+ goto return_ok;
+ }
+ if (count > PCH_PHUB_OROM_SIZE) {
+ addr_offset = 0;
+ goto return_ok;
+ }
+
+ chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+ if (!chip->pch_phub_extrom_base_address) {
+ err = -ENOMEM;
+ goto exrom_map_err;
+ }
+
+ for (addr_offset = 0; addr_offset < count; addr_offset++) {
+ if (PCH_PHUB_OROM_SIZE < off + addr_offset)
+ goto return_ok;
+
+ ret = pch_phub_write_serial_rom(chip,
+ chip->pch_opt_rom_start_address + addr_offset + off,
+ buf[addr_offset]);
+ if (ret) {
+ err = ret;
+ goto return_err;
+ }
+ }
+
+return_ok:
+ pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+ mutex_unlock(&pch_phub_mutex);
+ return addr_offset;
+
+return_err:
+ pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+
+exrom_map_err:
+ mutex_unlock(&pch_phub_mutex);
+ return err;
+}
+
+static ssize_t show_pch_mac(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u8 mac[8];
+ struct pch_phub_reg *chip = dev_get_drvdata(dev);
+ ssize_t rom_size;
+
+ chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+ if (!chip->pch_phub_extrom_base_address)
+ return -ENOMEM;
+
+ pch_phub_read_gbe_mac_addr(chip, mac);
+ pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+
+ return sprintf(buf, "%pM\n", mac);
+}
+
+static ssize_t store_pch_mac(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u8 mac[ETH_ALEN];
+ ssize_t rom_size;
+ struct pch_phub_reg *chip = dev_get_drvdata(dev);
+ int ret;
+
+ if (!mac_pton(buf, mac))
+ return -EINVAL;
+
+ chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+ if (!chip->pch_phub_extrom_base_address)
+ return -ENOMEM;
+
+ ret = pch_phub_write_gbe_mac_addr(chip, mac);
+ pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(pch_mac, S_IRUGO | S_IWUSR, show_pch_mac, store_pch_mac);
+
+static struct bin_attribute pch_bin_attr = {
+ .attr = {
+ .name = "pch_firmware",
+ .mode = S_IRUGO | S_IWUSR,
+ },
+ .size = PCH_PHUB_OROM_SIZE + 1,
+ .read = pch_phub_bin_read,
+ .write = pch_phub_bin_write,
+};
+
+static int pch_phub_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int ret;
+ struct pch_phub_reg *chip;
+
+ chip = kzalloc(sizeof(struct pch_phub_reg), GFP_KERNEL);
+ if (chip == NULL)
+ return -ENOMEM;
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "%s : pci_enable_device FAILED(ret=%d)", __func__, ret);
+ goto err_pci_enable_dev;
+ }
+ dev_dbg(&pdev->dev, "%s : pci_enable_device returns %d\n", __func__,
+ ret);
+
+ ret = pci_request_regions(pdev, KBUILD_MODNAME);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "%s : pci_request_regions FAILED(ret=%d)", __func__, ret);
+ goto err_req_regions;
+ }
+ dev_dbg(&pdev->dev, "%s : "
+ "pci_request_regions returns %d\n", __func__, ret);
+
+ chip->pch_phub_base_address = pci_iomap(pdev, 1, 0);
+
+
+ if (chip->pch_phub_base_address == NULL) {
+ dev_err(&pdev->dev, "%s : pci_iomap FAILED", __func__);
+ ret = -ENOMEM;
+ goto err_pci_iomap;
+ }
+ dev_dbg(&pdev->dev, "%s : pci_iomap SUCCESS and value "
+ "in pch_phub_base_address variable is %p\n", __func__,
+ chip->pch_phub_base_address);
+
+ chip->pdev = pdev; /* Save pci device struct */
+
+ if (id->driver_data == 1) { /* EG20T PCH */
+ const char *board_name;
+
+ ret = sysfs_create_file(&pdev->dev.kobj,
+ &dev_attr_pch_mac.attr);
+ if (ret)
+ goto err_sysfs_create;
+
+ ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+ if (ret)
+ goto exit_bin_attr;
+
+ pch_phub_read_modify_write_reg(chip,
+ (unsigned int)CLKCFG_REG_OFFSET,
+ CLKCFG_CAN_50MHZ,
+ CLKCFG_CANCLK_MASK);
+
+ /* quirk for CM-iTC board */
+ board_name = dmi_get_system_info(DMI_BOARD_NAME);
+ if (board_name && strstr(board_name, "CM-iTC"))
+ pch_phub_read_modify_write_reg(chip,
+ (unsigned int)CLKCFG_REG_OFFSET,
+ CLKCFG_UART_48MHZ | CLKCFG_BAUDDIV |
+ CLKCFG_PLL2VCO | CLKCFG_UARTCLKSEL,
+ CLKCFG_UART_MASK);
+
+ /* set the prefech value */
+ iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14);
+ /* set the interrupt delay value */
+ iowrite32(0x25, chip->pch_phub_base_address + 0x44);
+ chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
+ chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+ } else if (id->driver_data == 2) { /* ML7213 IOH */
+ ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+ if (ret)
+ goto err_sysfs_create;
+ /* set the prefech value
+ * Device2(USB OHCI #1/ USB EHCI #1/ USB Device):a
+ * Device4(SDIO #0,1,2):f
+ * Device6(SATA 2):f
+ * Device8(USB OHCI #0/ USB EHCI #0):a
+ */
+ iowrite32(0x000affa0, chip->pch_phub_base_address + 0x14);
+ chip->pch_opt_rom_start_address =\
+ PCH_PHUB_ROM_START_ADDR_ML7213;
+ } else if (id->driver_data == 3) { /* ML7223 IOH Bus-m*/
+ /* set the prefech value
+ * Device8(GbE)
+ */
+ iowrite32(0x000a0000, chip->pch_phub_base_address + 0x14);
+ /* set the interrupt delay value */
+ iowrite32(0x25, chip->pch_phub_base_address + 0x140);
+ chip->pch_opt_rom_start_address =\
+ PCH_PHUB_ROM_START_ADDR_ML7223;
+ chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223;
+ } else if (id->driver_data == 4) { /* ML7223 IOH Bus-n*/
+ ret = sysfs_create_file(&pdev->dev.kobj,
+ &dev_attr_pch_mac.attr);
+ if (ret)
+ goto err_sysfs_create;
+ ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+ if (ret)
+ goto exit_bin_attr;
+ /* set the prefech value
+ * Device2(USB OHCI #0,1,2,3/ USB EHCI #0):a
+ * Device4(SDIO #0,1):f
+ * Device6(SATA 2):f
+ */
+ iowrite32(0x0000ffa0, chip->pch_phub_base_address + 0x14);
+ chip->pch_opt_rom_start_address =\
+ PCH_PHUB_ROM_START_ADDR_ML7223;
+ chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223;
+ } else if (id->driver_data == 5) { /* ML7831 */
+ ret = sysfs_create_file(&pdev->dev.kobj,
+ &dev_attr_pch_mac.attr);
+ if (ret)
+ goto err_sysfs_create;
+
+ ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+ if (ret)
+ goto exit_bin_attr;
+
+ /* set the prefech value */
+ iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14);
+ /* set the interrupt delay value */
+ iowrite32(0x25, chip->pch_phub_base_address + 0x44);
+ chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
+ chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+ }
+
+ chip->ioh_type = id->driver_data;
+ pci_set_drvdata(pdev, chip);
+
+ return 0;
+exit_bin_attr:
+ sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr);
+
+err_sysfs_create:
+ pci_iounmap(pdev, chip->pch_phub_base_address);
+err_pci_iomap:
+ pci_release_regions(pdev);
+err_req_regions:
+ pci_disable_device(pdev);
+err_pci_enable_dev:
+ kfree(chip);
+ dev_err(&pdev->dev, "%s returns %d\n", __func__, ret);
+ return ret;
+}
+
+static void pch_phub_remove(struct pci_dev *pdev)
+{
+ struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+
+ sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr);
+ sysfs_remove_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+ pci_iounmap(pdev, chip->pch_phub_base_address);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ kfree(chip);
+}
+
+#ifdef CONFIG_PM
+
+static int pch_phub_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ int ret;
+
+ pch_phub_save_reg_conf(pdev);
+ ret = pci_save_state(pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ " %s -pci_save_state returns %d\n", __func__, ret);
+ return ret;
+ }
+ pci_enable_wake(pdev, PCI_D3hot, 0);
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+ return 0;
+}
+
+static int pch_phub_resume(struct pci_dev *pdev)
+{
+ int ret;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "%s-pci_enable_device failed(ret=%d) ", __func__, ret);
+ return ret;
+ }
+
+ pci_enable_wake(pdev, PCI_D3hot, 0);
+ pch_phub_restore_reg_conf(pdev);
+
+ return 0;
+}
+#else
+#define pch_phub_suspend NULL
+#define pch_phub_resume NULL
+#endif /* CONFIG_PM */
+
+static struct pci_device_id pch_phub_pcidev_id[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH1_PHUB), 1, },
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7213_PHUB), 2, },
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_mPHUB), 3, },
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_nPHUB), 4, },
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7831_PHUB), 5, },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, pch_phub_pcidev_id);
+
+static struct pci_driver pch_phub_driver = {
+ .name = "pch_phub",
+ .id_table = pch_phub_pcidev_id,
+ .probe = pch_phub_probe,
+ .remove = pch_phub_remove,
+ .suspend = pch_phub_suspend,
+ .resume = pch_phub_resume
+};
+
+module_pci_driver(pch_phub_driver);
+
+MODULE_DESCRIPTION("Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7223) PHUB");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
new file mode 100644
index 0000000..3075492
--- /dev/null
+++ b/drivers/misc/phantom.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2005-2007 Jiri Slaby <jirislaby@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * You need a userspace library to cooperate with this driver. It (and other
+ * info) may be obtained here:
+ * http://www.fi.muni.cz/~xslaby/phantom.html
+ * or alternatively, you might use OpenHaptics provided by Sensable.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/interrupt.h>
+#include <linux/cdev.h>
+#include <linux/slab.h>
+#include <linux/phantom.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+
+#define PHANTOM_VERSION "n0.9.8"
+
+#define PHANTOM_MAX_MINORS 8
+
+#define PHN_IRQCTL 0x4c /* irq control in caddr space */
+
+#define PHB_RUNNING 1
+#define PHB_NOT_OH 2
+
+static DEFINE_MUTEX(phantom_mutex);
+static struct class *phantom_class;
+static int phantom_major;
+
+struct phantom_device {
+ unsigned int opened;
+ void __iomem *caddr;
+ u32 __iomem *iaddr;
+ u32 __iomem *oaddr;
+ unsigned long status;
+ atomic_t counter;
+
+ wait_queue_head_t wait;
+ struct cdev cdev;
+
+ struct mutex open_lock;
+ spinlock_t regs_lock;
+
+ /* used in NOT_OH mode */
+ struct phm_regs oregs;
+ u32 ctl_reg;
+};
+
+static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
+
+static int phantom_status(struct phantom_device *dev, unsigned long newstat)
+{
+ pr_debug("phantom_status %lx %lx\n", dev->status, newstat);
+
+ if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) {
+ atomic_set(&dev->counter, 0);
+ iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL);
+ iowrite32(0x43, dev->caddr + PHN_IRQCTL);
+ ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+ } else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) {
+ iowrite32(0, dev->caddr + PHN_IRQCTL);
+ ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+ }
+
+ dev->status = newstat;
+
+ return 0;
+}
+
+/*
+ * File ops
+ */
+
+static long phantom_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct phantom_device *dev = file->private_data;
+ struct phm_regs rs;
+ struct phm_reg r;
+ void __user *argp = (void __user *)arg;
+ unsigned long flags;
+ unsigned int i;
+
+ switch (cmd) {
+ case PHN_SETREG:
+ case PHN_SET_REG:
+ if (copy_from_user(&r, argp, sizeof(r)))
+ return -EFAULT;
+
+ if (r.reg > 7)
+ return -EINVAL;
+
+ spin_lock_irqsave(&dev->regs_lock, flags);
+ if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
+ phantom_status(dev, dev->status | PHB_RUNNING)){
+ spin_unlock_irqrestore(&dev->regs_lock, flags);
+ return -ENODEV;
+ }
+
+ pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+ /* preserve amp bit (don't allow to change it when in NOT_OH) */
+ if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+ r.value &= ~PHN_CTL_AMP;
+ r.value |= dev->ctl_reg & PHN_CTL_AMP;
+ dev->ctl_reg = r.value;
+ }
+
+ iowrite32(r.value, dev->iaddr + r.reg);
+ ioread32(dev->iaddr); /* PCI posting */
+
+ if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
+ phantom_status(dev, dev->status & ~PHB_RUNNING);
+ spin_unlock_irqrestore(&dev->regs_lock, flags);
+ break;
+ case PHN_SETREGS:
+ case PHN_SET_REGS:
+ if (copy_from_user(&rs, argp, sizeof(rs)))
+ return -EFAULT;
+
+ pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
+ spin_lock_irqsave(&dev->regs_lock, flags);
+ if (dev->status & PHB_NOT_OH)
+ memcpy(&dev->oregs, &rs, sizeof(rs));
+ else {
+ u32 m = min(rs.count, 8U);
+ for (i = 0; i < m; i++)
+ if (rs.mask & BIT(i))
+ iowrite32(rs.values[i], dev->oaddr + i);
+ ioread32(dev->iaddr); /* PCI posting */
+ }
+ spin_unlock_irqrestore(&dev->regs_lock, flags);
+ break;
+ case PHN_GETREG:
+ case PHN_GET_REG:
+ if (copy_from_user(&r, argp, sizeof(r)))
+ return -EFAULT;
+
+ if (r.reg > 7)
+ return -EINVAL;
+
+ r.value = ioread32(dev->iaddr + r.reg);
+
+ if (copy_to_user(argp, &r, sizeof(r)))
+ return -EFAULT;
+ break;
+ case PHN_GETREGS:
+ case PHN_GET_REGS: {
+ u32 m;
+
+ if (copy_from_user(&rs, argp, sizeof(rs)))
+ return -EFAULT;
+
+ m = min(rs.count, 8U);
+
+ pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
+ spin_lock_irqsave(&dev->regs_lock, flags);
+ for (i = 0; i < m; i++)
+ if (rs.mask & BIT(i))
+ rs.values[i] = ioread32(dev->iaddr + i);
+ atomic_set(&dev->counter, 0);
+ spin_unlock_irqrestore(&dev->regs_lock, flags);
+
+ if (copy_to_user(argp, &rs, sizeof(rs)))
+ return -EFAULT;
+ break;
+ } case PHN_NOT_OH:
+ spin_lock_irqsave(&dev->regs_lock, flags);
+ if (dev->status & PHB_RUNNING) {
+ printk(KERN_ERR "phantom: you need to set NOT_OH "
+ "before you start the device!\n");
+ spin_unlock_irqrestore(&dev->regs_lock, flags);
+ return -EINVAL;
+ }
+ dev->status |= PHB_NOT_OH;
+ spin_unlock_irqrestore(&dev->regs_lock, flags);
+ break;
+ default:
+ return -ENOTTY;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_COMPAT
+static long phantom_compat_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
+ cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT);
+ cmd |= sizeof(void *) << _IOC_SIZESHIFT;
+ }
+ return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define phantom_compat_ioctl NULL
+#endif
+
+static int phantom_open(struct inode *inode, struct file *file)
+{
+ struct phantom_device *dev = container_of(inode->i_cdev,
+ struct phantom_device, cdev);
+
+ mutex_lock(&phantom_mutex);
+ nonseekable_open(inode, file);
+
+ if (mutex_lock_interruptible(&dev->open_lock)) {
+ mutex_unlock(&phantom_mutex);
+ return -ERESTARTSYS;
+ }
+
+ if (dev->opened) {
+ mutex_unlock(&dev->open_lock);
+ mutex_unlock(&phantom_mutex);
+ return -EINVAL;
+ }
+
+ WARN_ON(dev->status & PHB_NOT_OH);
+
+ file->private_data = dev;
+
+ atomic_set(&dev->counter, 0);
+ dev->opened++;
+ mutex_unlock(&dev->open_lock);
+ mutex_unlock(&phantom_mutex);
+ return 0;
+}
+
+static int phantom_release(struct inode *inode, struct file *file)
+{
+ struct phantom_device *dev = file->private_data;
+
+ mutex_lock(&dev->open_lock);
+
+ dev->opened = 0;
+ phantom_status(dev, dev->status & ~PHB_RUNNING);
+ dev->status &= ~PHB_NOT_OH;
+
+ mutex_unlock(&dev->open_lock);
+
+ return 0;
+}
+
+static unsigned int phantom_poll(struct file *file, poll_table *wait)
+{
+ struct phantom_device *dev = file->private_data;
+ unsigned int mask = 0;
+
+ pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter));
+ poll_wait(file, &dev->wait, wait);
+
+ if (!(dev->status & PHB_RUNNING))
+ mask = POLLERR;
+ else if (atomic_read(&dev->counter))
+ mask = POLLIN | POLLRDNORM;
+
+ pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter));
+
+ return mask;
+}
+
+static const struct file_operations phantom_file_ops = {
+ .open = phantom_open,
+ .release = phantom_release,
+ .unlocked_ioctl = phantom_ioctl,
+ .compat_ioctl = phantom_compat_ioctl,
+ .poll = phantom_poll,
+ .llseek = no_llseek,
+};
+
+static irqreturn_t phantom_isr(int irq, void *data)
+{
+ struct phantom_device *dev = data;
+ unsigned int i;
+ u32 ctl;
+
+ spin_lock(&dev->regs_lock);
+ ctl = ioread32(dev->iaddr + PHN_CONTROL);
+ if (!(ctl & PHN_CTL_IRQ)) {
+ spin_unlock(&dev->regs_lock);
+ return IRQ_NONE;
+ }
+
+ iowrite32(0, dev->iaddr);
+ iowrite32(0xc0, dev->iaddr);
+
+ if (dev->status & PHB_NOT_OH) {
+ struct phm_regs *r = &dev->oregs;
+ u32 m = min(r->count, 8U);
+
+ for (i = 0; i < m; i++)
+ if (r->mask & BIT(i))
+ iowrite32(r->values[i], dev->oaddr + i);
+
+ dev->ctl_reg ^= PHN_CTL_AMP;
+ iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+ }
+ spin_unlock(&dev->regs_lock);
+
+ ioread32(dev->iaddr); /* PCI posting */
+
+ atomic_inc(&dev->counter);
+ wake_up_interruptible(&dev->wait);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Init and deinit driver
+ */
+
+static unsigned int phantom_get_free(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < PHANTOM_MAX_MINORS; i++)
+ if (phantom_devices[i] == 0)
+ break;
+
+ return i;
+}
+
+static int phantom_probe(struct pci_dev *pdev,
+ const struct pci_device_id *pci_id)
+{
+ struct phantom_device *pht;
+ unsigned int minor;
+ int retval;
+
+ retval = pci_enable_device(pdev);
+ if (retval) {
+ dev_err(&pdev->dev, "pci_enable_device failed!\n");
+ goto err;
+ }
+
+ minor = phantom_get_free();
+ if (minor == PHANTOM_MAX_MINORS) {
+ dev_err(&pdev->dev, "too many devices found!\n");
+ retval = -EIO;
+ goto err_dis;
+ }
+
+ phantom_devices[minor] = 1;
+
+ retval = pci_request_regions(pdev, "phantom");
+ if (retval) {
+ dev_err(&pdev->dev, "pci_request_regions failed!\n");
+ goto err_null;
+ }
+
+ retval = -ENOMEM;
+ pht = kzalloc(sizeof(*pht), GFP_KERNEL);
+ if (pht == NULL) {
+ dev_err(&pdev->dev, "unable to allocate device\n");
+ goto err_reg;
+ }
+
+ pht->caddr = pci_iomap(pdev, 0, 0);
+ if (pht->caddr == NULL) {
+ dev_err(&pdev->dev, "can't remap conf space\n");
+ goto err_fr;
+ }
+ pht->iaddr = pci_iomap(pdev, 2, 0);
+ if (pht->iaddr == NULL) {
+ dev_err(&pdev->dev, "can't remap input space\n");
+ goto err_unmc;
+ }
+ pht->oaddr = pci_iomap(pdev, 3, 0);
+ if (pht->oaddr == NULL) {
+ dev_err(&pdev->dev, "can't remap output space\n");
+ goto err_unmi;
+ }
+
+ mutex_init(&pht->open_lock);
+ spin_lock_init(&pht->regs_lock);
+ init_waitqueue_head(&pht->wait);
+ cdev_init(&pht->cdev, &phantom_file_ops);
+ pht->cdev.owner = THIS_MODULE;
+
+ iowrite32(0, pht->caddr + PHN_IRQCTL);
+ ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+ retval = request_irq(pdev->irq, phantom_isr,
+ IRQF_SHARED, "phantom", pht);
+ if (retval) {
+ dev_err(&pdev->dev, "can't establish ISR\n");
+ goto err_unmo;
+ }
+
+ retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1);
+ if (retval) {
+ dev_err(&pdev->dev, "chardev registration failed\n");
+ goto err_irq;
+ }
+
+ if (IS_ERR(device_create(phantom_class, &pdev->dev,
+ MKDEV(phantom_major, minor), NULL,
+ "phantom%u", minor)))
+ dev_err(&pdev->dev, "can't create device\n");
+
+ pci_set_drvdata(pdev, pht);
+
+ return 0;
+err_irq:
+ free_irq(pdev->irq, pht);
+err_unmo:
+ pci_iounmap(pdev, pht->oaddr);
+err_unmi:
+ pci_iounmap(pdev, pht->iaddr);
+err_unmc:
+ pci_iounmap(pdev, pht->caddr);
+err_fr:
+ kfree(pht);
+err_reg:
+ pci_release_regions(pdev);
+err_null:
+ phantom_devices[minor] = 0;
+err_dis:
+ pci_disable_device(pdev);
+err:
+ return retval;
+}
+
+static void phantom_remove(struct pci_dev *pdev)
+{
+ struct phantom_device *pht = pci_get_drvdata(pdev);
+ unsigned int minor = MINOR(pht->cdev.dev);
+
+ device_destroy(phantom_class, MKDEV(phantom_major, minor));
+
+ cdev_del(&pht->cdev);
+
+ iowrite32(0, pht->caddr + PHN_IRQCTL);
+ ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+ free_irq(pdev->irq, pht);
+
+ pci_iounmap(pdev, pht->oaddr);
+ pci_iounmap(pdev, pht->iaddr);
+ pci_iounmap(pdev, pht->caddr);
+
+ kfree(pht);
+
+ pci_release_regions(pdev);
+
+ phantom_devices[minor] = 0;
+
+ pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM
+static int phantom_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct phantom_device *dev = pci_get_drvdata(pdev);
+
+ iowrite32(0, dev->caddr + PHN_IRQCTL);
+ ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+
+ synchronize_irq(pdev->irq);
+
+ return 0;
+}
+
+static int phantom_resume(struct pci_dev *pdev)
+{
+ struct phantom_device *dev = pci_get_drvdata(pdev);
+
+ iowrite32(0, dev->caddr + PHN_IRQCTL);
+
+ return 0;
+}
+#else
+#define phantom_suspend NULL
+#define phantom_resume NULL
+#endif
+
+static struct pci_device_id phantom_pci_tbl[] = {
+ { .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050,
+ .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050,
+ .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, phantom_pci_tbl);
+
+static struct pci_driver phantom_pci_driver = {
+ .name = "phantom",
+ .id_table = phantom_pci_tbl,
+ .probe = phantom_probe,
+ .remove = phantom_remove,
+ .suspend = phantom_suspend,
+ .resume = phantom_resume
+};
+
+static CLASS_ATTR_STRING(version, 0444, PHANTOM_VERSION);
+
+static int __init phantom_init(void)
+{
+ int retval;
+ dev_t dev;
+
+ phantom_class = class_create(THIS_MODULE, "phantom");
+ if (IS_ERR(phantom_class)) {
+ retval = PTR_ERR(phantom_class);
+ printk(KERN_ERR "phantom: can't register phantom class\n");
+ goto err;
+ }
+ retval = class_create_file(phantom_class, &class_attr_version.attr);
+ if (retval) {
+ printk(KERN_ERR "phantom: can't create sysfs version file\n");
+ goto err_class;
+ }
+
+ retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom");
+ if (retval) {
+ printk(KERN_ERR "phantom: can't register character device\n");
+ goto err_attr;
+ }
+ phantom_major = MAJOR(dev);
+
+ retval = pci_register_driver(&phantom_pci_driver);
+ if (retval) {
+ printk(KERN_ERR "phantom: can't register pci driver\n");
+ goto err_unchr;
+ }
+
+ printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", "
+ "init OK\n");
+
+ return 0;
+err_unchr:
+ unregister_chrdev_region(dev, PHANTOM_MAX_MINORS);
+err_attr:
+ class_remove_file(phantom_class, &class_attr_version.attr);
+err_class:
+ class_destroy(phantom_class);
+err:
+ return retval;
+}
+
+static void __exit phantom_exit(void)
+{
+ pci_unregister_driver(&phantom_pci_driver);
+
+ unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS);
+
+ class_remove_file(phantom_class, &class_attr_version.attr);
+ class_destroy(phantom_class);
+
+ pr_debug("phantom: module successfully removed\n");
+}
+
+module_init(phantom_init);
+module_exit(phantom_exit);
+
+MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
new file mode 100644
index 0000000..eda38cb
--- /dev/null
+++ b/drivers/misc/pti.c
@@ -0,0 +1,988 @@
+/*
+ * pti.c - PTI driver for cJTAG data extration
+ *
+ * Copyright (C) Intel 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+#include <linux/pti.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#define DRIVERNAME "pti"
+#define PCINAME "pciPTI"
+#define TTYNAME "ttyPTI"
+#define CHARNAME "pti"
+#define PTITTY_MINOR_START 0
+#define PTITTY_MINOR_NUM 2
+#define MAX_APP_IDS 16 /* 128 channel ids / u8 bit size */
+#define MAX_OS_IDS 16 /* 128 channel ids / u8 bit size */
+#define MAX_MODEM_IDS 16 /* 128 channel ids / u8 bit size */
+#define MODEM_BASE_ID 71 /* modem master ID address */
+#define CONTROL_ID 72 /* control master ID address */
+#define CONSOLE_ID 73 /* console master ID address */
+#define OS_BASE_ID 74 /* base OS master ID address */
+#define APP_BASE_ID 80 /* base App master ID address */
+#define CONTROL_FRAME_LEN 32 /* PTI control frame maximum size */
+#define USER_COPY_SIZE 8192 /* 8Kb buffer for user space copy */
+#define APERTURE_14 0x3800000 /* offset to first OS write addr */
+#define APERTURE_LEN 0x400000 /* address length */
+
+struct pti_tty {
+ struct pti_masterchannel *mc;
+};
+
+struct pti_dev {
+ struct tty_port port[PTITTY_MINOR_NUM];
+ unsigned long pti_addr;
+ unsigned long aperture_base;
+ void __iomem *pti_ioaddr;
+ u8 ia_app[MAX_APP_IDS];
+ u8 ia_os[MAX_OS_IDS];
+ u8 ia_modem[MAX_MODEM_IDS];
+};
+
+/*
+ * This protects access to ia_app, ia_os, and ia_modem,
+ * which keeps track of channels allocated in
+ * an aperture write id.
+ */
+static DEFINE_MUTEX(alloclock);
+
+static const struct pci_device_id pci_ids[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)},
+ {0}
+};
+
+static struct tty_driver *pti_tty_driver;
+static struct pti_dev *drv_data;
+
+static unsigned int pti_console_channel;
+static unsigned int pti_control_channel;
+
+/**
+ * pti_write_to_aperture()- The private write function to PTI HW.
+ *
+ * @mc: The 'aperture'. It's part of a write address that holds
+ * a master and channel ID.
+ * @buf: Data being written to the HW that will ultimately be seen
+ * in a debugging tool (Fido, Lauterbach).
+ * @len: Size of buffer.
+ *
+ * Since each aperture is specified by a unique
+ * master/channel ID, no two processes will be writing
+ * to the same aperture at the same time so no lock is required. The
+ * PTI-Output agent will send these out in the order that they arrived, and
+ * thus, it will intermix these messages. The debug tool can then later
+ * regroup the appropriate message segments together reconstituting each
+ * message.
+ */
+static void pti_write_to_aperture(struct pti_masterchannel *mc,
+ u8 *buf,
+ int len)
+{
+ int dwordcnt;
+ int final;
+ int i;
+ u32 ptiword;
+ u32 __iomem *aperture;
+ u8 *p = buf;
+
+ /*
+ * calculate the aperture offset from the base using the master and
+ * channel id's.
+ */
+ aperture = drv_data->pti_ioaddr + (mc->master << 15)
+ + (mc->channel << 8);
+
+ dwordcnt = len >> 2;
+ final = len - (dwordcnt << 2); /* final = trailing bytes */
+ if (final == 0 && dwordcnt != 0) { /* always need a final dword */
+ final += 4;
+ dwordcnt--;
+ }
+
+ for (i = 0; i < dwordcnt; i++) {
+ ptiword = be32_to_cpu(*(u32 *)p);
+ p += 4;
+ iowrite32(ptiword, aperture);
+ }
+
+ aperture += PTI_LASTDWORD_DTS; /* adding DTS signals that is EOM */
+
+ ptiword = 0;
+ for (i = 0; i < final; i++)
+ ptiword |= *p++ << (24-(8*i));
+
+ iowrite32(ptiword, aperture);
+ return;
+}
+
+/**
+ * pti_control_frame_built_and_sent()- control frame build and send function.
+ *
+ * @mc: The master / channel structure on which the function
+ * built a control frame.
+ * @thread_name: The thread name associated with the master / channel or
+ * 'NULL' if using the 'current' global variable.
+ *
+ * To be able to post process the PTI contents on host side, a control frame
+ * is added before sending any PTI content. So the host side knows on
+ * each PTI frame the name of the thread using a dedicated master / channel.
+ * The thread name is retrieved from 'current' global variable if 'thread_name'
+ * is 'NULL', else it is retrieved from 'thread_name' parameter.
+ * This function builds this frame and sends it to a master ID CONTROL_ID.
+ * The overhead is only 32 bytes since the driver only writes to HW
+ * in 32 byte chunks.
+ */
+static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
+ const char *thread_name)
+{
+ /*
+ * Since we access the comm member in current's task_struct, we only
+ * need to be as large as what 'comm' in that structure is.
+ */
+ char comm[TASK_COMM_LEN];
+ struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
+ .channel = 0};
+ const char *thread_name_p;
+ const char *control_format = "%3d %3d %s";
+ u8 control_frame[CONTROL_FRAME_LEN];
+
+ if (!thread_name) {
+ if (!in_interrupt())
+ get_task_comm(comm, current);
+ else
+ strncpy(comm, "Interrupt", TASK_COMM_LEN);
+
+ /* Absolutely ensure our buffer is zero terminated. */
+ comm[TASK_COMM_LEN-1] = 0;
+ thread_name_p = comm;
+ } else {
+ thread_name_p = thread_name;
+ }
+
+ mccontrol.channel = pti_control_channel;
+ pti_control_channel = (pti_control_channel + 1) & 0x7f;
+
+ snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master,
+ mc->channel, thread_name_p);
+ pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame));
+}
+
+/**
+ * pti_write_full_frame_to_aperture()- high level function to
+ * write to PTI.
+ *
+ * @mc: The 'aperture'. It's part of a write address that holds
+ * a master and channel ID.
+ * @buf: Data being written to the HW that will ultimately be seen
+ * in a debugging tool (Fido, Lauterbach).
+ * @len: Size of buffer.
+ *
+ * All threads sending data (either console, user space application, ...)
+ * are calling the high level function to write to PTI meaning that it is
+ * possible to add a control frame before sending the content.
+ */
+static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc,
+ const unsigned char *buf,
+ int len)
+{
+ pti_control_frame_built_and_sent(mc, NULL);
+ pti_write_to_aperture(mc, (u8 *)buf, len);
+}
+
+/**
+ * get_id()- Allocate a master and channel ID.
+ *
+ * @id_array: an array of bits representing what channel
+ * id's are allocated for writing.
+ * @max_ids: The max amount of available write IDs to use.
+ * @base_id: The starting SW channel ID, based on the Intel
+ * PTI arch.
+ * @thread_name: The thread name associated with the master / channel or
+ * 'NULL' if using the 'current' global variable.
+ *
+ * Returns:
+ * pti_masterchannel struct with master, channel ID address
+ * 0 for error
+ *
+ * Each bit in the arrays ia_app and ia_os correspond to a master and
+ * channel id. The bit is one if the id is taken and 0 if free. For
+ * every master there are 128 channel id's.
+ */
+static struct pti_masterchannel *get_id(u8 *id_array,
+ int max_ids,
+ int base_id,
+ const char *thread_name)
+{
+ struct pti_masterchannel *mc;
+ int i, j, mask;
+
+ mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL);
+ if (mc == NULL)
+ return NULL;
+
+ /* look for a byte with a free bit */
+ for (i = 0; i < max_ids; i++)
+ if (id_array[i] != 0xff)
+ break;
+ if (i == max_ids) {
+ kfree(mc);
+ return NULL;
+ }
+ /* find the bit in the 128 possible channel opportunities */
+ mask = 0x80;
+ for (j = 0; j < 8; j++) {
+ if ((id_array[i] & mask) == 0)
+ break;
+ mask >>= 1;
+ }
+
+ /* grab it */
+ id_array[i] |= mask;
+ mc->master = base_id;
+ mc->channel = ((i & 0xf)<<3) + j;
+ /* write new master Id / channel Id allocation to channel control */
+ pti_control_frame_built_and_sent(mc, thread_name);
+ return mc;
+}
+
+/*
+ * The following three functions:
+ * pti_request_mastercahannel(), mipi_release_masterchannel()
+ * and pti_writedata() are an API for other kernel drivers to
+ * access PTI.
+ */
+
+/**
+ * pti_request_masterchannel()- Kernel API function used to allocate
+ * a master, channel ID address
+ * to write to PTI HW.
+ *
+ * @type: 0- request Application master, channel aperture ID
+ * write address.
+ * 1- request OS master, channel aperture ID write
+ * address.
+ * 2- request Modem master, channel aperture ID
+ * write address.
+ * Other values, error.
+ * @thread_name: The thread name associated with the master / channel or
+ * 'NULL' if using the 'current' global variable.
+ *
+ * Returns:
+ * pti_masterchannel struct
+ * 0 for error
+ */
+struct pti_masterchannel *pti_request_masterchannel(u8 type,
+ const char *thread_name)
+{
+ struct pti_masterchannel *mc;
+
+ mutex_lock(&alloclock);
+
+ switch (type) {
+
+ case 0:
+ mc = get_id(drv_data->ia_app, MAX_APP_IDS,
+ APP_BASE_ID, thread_name);
+ break;
+
+ case 1:
+ mc = get_id(drv_data->ia_os, MAX_OS_IDS,
+ OS_BASE_ID, thread_name);
+ break;
+
+ case 2:
+ mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS,
+ MODEM_BASE_ID, thread_name);
+ break;
+ default:
+ mc = NULL;
+ }
+
+ mutex_unlock(&alloclock);
+ return mc;
+}
+EXPORT_SYMBOL_GPL(pti_request_masterchannel);
+
+/**
+ * pti_release_masterchannel()- Kernel API function used to release
+ * a master, channel ID address
+ * used to write to PTI HW.
+ *
+ * @mc: master, channel apeture ID address to be released. This
+ * will de-allocate the structure via kfree().
+ */
+void pti_release_masterchannel(struct pti_masterchannel *mc)
+{
+ u8 master, channel, i;
+
+ mutex_lock(&alloclock);
+
+ if (mc) {
+ master = mc->master;
+ channel = mc->channel;
+
+ if (master == APP_BASE_ID) {
+ i = channel >> 3;
+ drv_data->ia_app[i] &= ~(0x80>>(channel & 0x7));
+ } else if (master == OS_BASE_ID) {
+ i = channel >> 3;
+ drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7));
+ } else {
+ i = channel >> 3;
+ drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7));
+ }
+
+ kfree(mc);
+ }
+
+ mutex_unlock(&alloclock);
+}
+EXPORT_SYMBOL_GPL(pti_release_masterchannel);
+
+/**
+ * pti_writedata()- Kernel API function used to write trace
+ * debugging data to PTI HW.
+ *
+ * @mc: Master, channel aperture ID address to write to.
+ * Null value will return with no write occurring.
+ * @buf: Trace debuging data to write to the PTI HW.
+ * Null value will return with no write occurring.
+ * @count: Size of buf. Value of 0 or a negative number will
+ * return with no write occuring.
+ */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count)
+{
+ /*
+ * since this function is exported, this is treated like an
+ * API function, thus, all parameters should
+ * be checked for validity.
+ */
+ if ((mc != NULL) && (buf != NULL) && (count > 0))
+ pti_write_to_aperture(mc, buf, count);
+ return;
+}
+EXPORT_SYMBOL_GPL(pti_writedata);
+
+/*
+ * for the tty_driver_*() basic function descriptions, see tty_driver.h.
+ * Specific header comments made for PTI-related specifics.
+ */
+
+/**
+ * pti_tty_driver_open()- Open an Application master, channel aperture
+ * ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_open() call.
+ *
+ * Returns:
+ * int, 0 for success
+ * otherwise, fail value
+ *
+ * The main purpose of using the tty device interface is for
+ * each tty port to have a unique PTI write aperture. In an
+ * example use case, ttyPTI0 gets syslogd and an APP aperture
+ * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route
+ * modem messages into PTI. Modem trace data does not have to
+ * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct
+ * master IDs. These messages go through the PTI HW and out of
+ * the handheld platform and to the Fido/Lauterbach device.
+ */
+static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp)
+{
+ /*
+ * we actually want to allocate a new channel per open, per
+ * system arch. HW gives more than plenty channels for a single
+ * system task to have its own channel to write trace data. This
+ * also removes a locking requirement for the actual write
+ * procedure.
+ */
+ return tty_port_open(tty->port, tty, filp);
+}
+
+/**
+ * pti_tty_driver_close()- close tty device and release Application
+ * master, channel aperture ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_close() call.
+ *
+ * The main purpose of using the tty device interface is to route
+ * syslog daemon messages to the PTI HW and out of the handheld platform
+ * and to the Fido/Lauterbach device.
+ */
+static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp)
+{
+ tty_port_close(tty->port, tty, filp);
+}
+
+/**
+ * pti_tty_install()- Used to set up specific master-channels
+ * to tty ports for organizational purposes when
+ * tracing viewed from debuging tools.
+ *
+ * @driver: tty driver information.
+ * @tty: tty struct containing pti information.
+ *
+ * Returns:
+ * 0 for success
+ * otherwise, error
+ */
+static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+ int idx = tty->index;
+ struct pti_tty *pti_tty_data;
+ int ret = tty_standard_install(driver, tty);
+
+ if (ret == 0) {
+ pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL);
+ if (pti_tty_data == NULL)
+ return -ENOMEM;
+
+ if (idx == PTITTY_MINOR_START)
+ pti_tty_data->mc = pti_request_masterchannel(0, NULL);
+ else
+ pti_tty_data->mc = pti_request_masterchannel(2, NULL);
+
+ if (pti_tty_data->mc == NULL) {
+ kfree(pti_tty_data);
+ return -ENXIO;
+ }
+ tty->driver_data = pti_tty_data;
+ }
+
+ return ret;
+}
+
+/**
+ * pti_tty_cleanup()- Used to de-allocate master-channel resources
+ * tied to tty's of this driver.
+ *
+ * @tty: tty struct containing pti information.
+ */
+static void pti_tty_cleanup(struct tty_struct *tty)
+{
+ struct pti_tty *pti_tty_data = tty->driver_data;
+ if (pti_tty_data == NULL)
+ return;
+ pti_release_masterchannel(pti_tty_data->mc);
+ kfree(pti_tty_data);
+ tty->driver_data = NULL;
+}
+
+/**
+ * pti_tty_driver_write()- Write trace debugging data through the char
+ * interface to the PTI HW. Part of the misc device implementation.
+ *
+ * @filp: Contains private data which is used to obtain
+ * master, channel write ID.
+ * @data: trace data to be written.
+ * @len: # of byte to write.
+ *
+ * Returns:
+ * int, # of bytes written
+ * otherwise, error
+ */
+static int pti_tty_driver_write(struct tty_struct *tty,
+ const unsigned char *buf, int len)
+{
+ struct pti_tty *pti_tty_data = tty->driver_data;
+ if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) {
+ pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len);
+ return len;
+ }
+ /*
+ * we can't write to the pti hardware if the private driver_data
+ * and the mc address is not there.
+ */
+ else
+ return -EFAULT;
+}
+
+/**
+ * pti_tty_write_room()- Always returns 2048.
+ *
+ * @tty: contains tty info of the pti driver.
+ */
+static int pti_tty_write_room(struct tty_struct *tty)
+{
+ return 2048;
+}
+
+/**
+ * pti_char_open()- Open an Application master, channel aperture
+ * ID to the PTI device. Part of the misc device implementation.
+ *
+ * @inode: not used.
+ * @filp: Output- will have a masterchannel struct set containing
+ * the allocated application PTI aperture write address.
+ *
+ * Returns:
+ * int, 0 for success
+ * otherwise, a fail value
+ */
+static int pti_char_open(struct inode *inode, struct file *filp)
+{
+ struct pti_masterchannel *mc;
+
+ /*
+ * We really do want to fail immediately if
+ * pti_request_masterchannel() fails,
+ * before assigning the value to filp->private_data.
+ * Slightly easier to debug if this driver needs debugging.
+ */
+ mc = pti_request_masterchannel(0, NULL);
+ if (mc == NULL)
+ return -ENOMEM;
+ filp->private_data = mc;
+ return 0;
+}
+
+/**
+ * pti_char_release()- Close a char channel to the PTI device. Part
+ * of the misc device implementation.
+ *
+ * @inode: Not used in this implementaiton.
+ * @filp: Contains private_data that contains the master, channel
+ * ID to be released by the PTI device.
+ *
+ * Returns:
+ * always 0
+ */
+static int pti_char_release(struct inode *inode, struct file *filp)
+{
+ pti_release_masterchannel(filp->private_data);
+ filp->private_data = NULL;
+ return 0;
+}
+
+/**
+ * pti_char_write()- Write trace debugging data through the char
+ * interface to the PTI HW. Part of the misc device implementation.
+ *
+ * @filp: Contains private data which is used to obtain
+ * master, channel write ID.
+ * @data: trace data to be written.
+ * @len: # of byte to write.
+ * @ppose: Not used in this function implementation.
+ *
+ * Returns:
+ * int, # of bytes written
+ * otherwise, error value
+ *
+ * Notes: From side discussions with Alan Cox and experimenting
+ * with PTI debug HW like Nokia's Fido box and Lauterbach
+ * devices, 8192 byte write buffer used by USER_COPY_SIZE was
+ * deemed an appropriate size for this type of usage with
+ * debugging HW.
+ */
+static ssize_t pti_char_write(struct file *filp, const char __user *data,
+ size_t len, loff_t *ppose)
+{
+ struct pti_masterchannel *mc;
+ void *kbuf;
+ const char __user *tmp;
+ size_t size = USER_COPY_SIZE;
+ size_t n = 0;
+
+ tmp = data;
+ mc = filp->private_data;
+
+ kbuf = kmalloc(size, GFP_KERNEL);
+ if (kbuf == NULL) {
+ pr_err("%s(%d): buf allocation failed\n",
+ __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ do {
+ if (len - n > USER_COPY_SIZE)
+ size = USER_COPY_SIZE;
+ else
+ size = len - n;
+
+ if (copy_from_user(kbuf, tmp, size)) {
+ kfree(kbuf);
+ return n ? n : -EFAULT;
+ }
+
+ pti_write_to_aperture(mc, kbuf, size);
+ n += size;
+ tmp += size;
+
+ } while (len > n);
+
+ kfree(kbuf);
+ return len;
+}
+
+static const struct tty_operations pti_tty_driver_ops = {
+ .open = pti_tty_driver_open,
+ .close = pti_tty_driver_close,
+ .write = pti_tty_driver_write,
+ .write_room = pti_tty_write_room,
+ .install = pti_tty_install,
+ .cleanup = pti_tty_cleanup
+};
+
+static const struct file_operations pti_char_driver_ops = {
+ .owner = THIS_MODULE,
+ .write = pti_char_write,
+ .open = pti_char_open,
+ .release = pti_char_release,
+};
+
+static struct miscdevice pti_char_driver = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = CHARNAME,
+ .fops = &pti_char_driver_ops
+};
+
+/**
+ * pti_console_write()- Write to the console that has been acquired.
+ *
+ * @c: Not used in this implementaiton.
+ * @buf: Data to be written.
+ * @len: Length of buf.
+ */
+static void pti_console_write(struct console *c, const char *buf, unsigned len)
+{
+ static struct pti_masterchannel mc = {.master = CONSOLE_ID,
+ .channel = 0};
+
+ mc.channel = pti_console_channel;
+ pti_console_channel = (pti_console_channel + 1) & 0x7f;
+
+ pti_write_full_frame_to_aperture(&mc, buf, len);
+}
+
+/**
+ * pti_console_device()- Return the driver tty structure and set the
+ * associated index implementation.
+ *
+ * @c: Console device of the driver.
+ * @index: index associated with c.
+ *
+ * Returns:
+ * always value of pti_tty_driver structure when this function
+ * is called.
+ */
+static struct tty_driver *pti_console_device(struct console *c, int *index)
+{
+ *index = c->index;
+ return pti_tty_driver;
+}
+
+/**
+ * pti_console_setup()- Initialize console variables used by the driver.
+ *
+ * @c: Not used.
+ * @opts: Not used.
+ *
+ * Returns:
+ * always 0.
+ */
+static int pti_console_setup(struct console *c, char *opts)
+{
+ pti_console_channel = 0;
+ pti_control_channel = 0;
+ return 0;
+}
+
+/*
+ * pti_console struct, used to capture OS printk()'s and shift
+ * out to the PTI device for debugging. This cannot be
+ * enabled upon boot because of the possibility of eating
+ * any serial console printk's (race condition discovered).
+ * The console should be enabled upon when the tty port is
+ * used for the first time. Since the primary purpose for
+ * the tty port is to hook up syslog to it, the tty port
+ * will be open for a really long time.
+ */
+static struct console pti_console = {
+ .name = TTYNAME,
+ .write = pti_console_write,
+ .device = pti_console_device,
+ .setup = pti_console_setup,
+ .flags = CON_PRINTBUFFER,
+ .index = 0,
+};
+
+/**
+ * pti_port_activate()- Used to start/initialize any items upon
+ * first opening of tty_port().
+ *
+ * @port- The tty port number of the PTI device.
+ * @tty- The tty struct associated with this device.
+ *
+ * Returns:
+ * always returns 0
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static int pti_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+ if (port->tty->index == PTITTY_MINOR_START)
+ console_start(&pti_console);
+ return 0;
+}
+
+/**
+ * pti_port_shutdown()- Used to stop/shutdown any items upon the
+ * last tty port close.
+ *
+ * @port- The tty port number of the PTI device.
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static void pti_port_shutdown(struct tty_port *port)
+{
+ if (port->tty->index == PTITTY_MINOR_START)
+ console_stop(&pti_console);
+}
+
+static const struct tty_port_operations tty_port_ops = {
+ .activate = pti_port_activate,
+ .shutdown = pti_port_shutdown,
+};
+
+/*
+ * Note the _probe() call sets everything up and ties the char and tty
+ * to successfully detecting the PTI device on the pci bus.
+ */
+
+/**
+ * pti_pci_probe()- Used to detect pti on the pci bus and set
+ * things up in the driver.
+ *
+ * @pdev- pci_dev struct values for pti.
+ * @ent- pci_device_id struct for pti driver.
+ *
+ * Returns:
+ * 0 for success
+ * otherwise, error
+ */
+static int pti_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ unsigned int a;
+ int retval = -EINVAL;
+ int pci_bar = 1;
+
+ dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__,
+ __func__, __LINE__, pdev->vendor, pdev->device);
+
+ retval = misc_register(&pti_char_driver);
+ if (retval) {
+ pr_err("%s(%d): CHAR registration failed of pti driver\n",
+ __func__, __LINE__);
+ pr_err("%s(%d): Error value returned: %d\n",
+ __func__, __LINE__, retval);
+ goto err;
+ }
+
+ retval = pci_enable_device(pdev);
+ if (retval != 0) {
+ dev_err(&pdev->dev,
+ "%s: pci_enable_device() returned error %d\n",
+ __func__, retval);
+ goto err_unreg_misc;
+ }
+
+ drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL);
+ if (drv_data == NULL) {
+ retval = -ENOMEM;
+ dev_err(&pdev->dev,
+ "%s(%d): kmalloc() returned NULL memory.\n",
+ __func__, __LINE__);
+ goto err_disable_pci;
+ }
+ drv_data->pti_addr = pci_resource_start(pdev, pci_bar);
+
+ retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+ if (retval != 0) {
+ dev_err(&pdev->dev,
+ "%s(%d): pci_request_region() returned error %d\n",
+ __func__, __LINE__, retval);
+ goto err_free_dd;
+ }
+ drv_data->aperture_base = drv_data->pti_addr+APERTURE_14;
+ drv_data->pti_ioaddr =
+ ioremap_nocache((u32)drv_data->aperture_base,
+ APERTURE_LEN);
+ if (!drv_data->pti_ioaddr) {
+ retval = -ENOMEM;
+ goto err_rel_reg;
+ }
+
+ pci_set_drvdata(pdev, drv_data);
+
+ for (a = 0; a < PTITTY_MINOR_NUM; a++) {
+ struct tty_port *port = &drv_data->port[a];
+ tty_port_init(port);
+ port->ops = &tty_port_ops;
+
+ tty_port_register_device(port, pti_tty_driver, a, &pdev->dev);
+ }
+
+ register_console(&pti_console);
+
+ return 0;
+err_rel_reg:
+ pci_release_region(pdev, pci_bar);
+err_free_dd:
+ kfree(drv_data);
+err_disable_pci:
+ pci_disable_device(pdev);
+err_unreg_misc:
+ misc_deregister(&pti_char_driver);
+err:
+ return retval;
+}
+
+/**
+ * pti_pci_remove()- Driver exit method to remove PTI from
+ * PCI bus.
+ * @pdev: variable containing pci info of PTI.
+ */
+static void pti_pci_remove(struct pci_dev *pdev)
+{
+ struct pti_dev *drv_data = pci_get_drvdata(pdev);
+ unsigned int a;
+
+ unregister_console(&pti_console);
+
+ for (a = 0; a < PTITTY_MINOR_NUM; a++) {
+ tty_unregister_device(pti_tty_driver, a);
+ tty_port_destroy(&drv_data->port[a]);
+ }
+
+ iounmap(drv_data->pti_ioaddr);
+ kfree(drv_data);
+ pci_release_region(pdev, 1);
+ pci_disable_device(pdev);
+
+ misc_deregister(&pti_char_driver);
+}
+
+static struct pci_driver pti_pci_driver = {
+ .name = PCINAME,
+ .id_table = pci_ids,
+ .probe = pti_pci_probe,
+ .remove = pti_pci_remove,
+};
+
+/**
+ *
+ * pti_init()- Overall entry/init call to the pti driver.
+ * It starts the registration process with the kernel.
+ *
+ * Returns:
+ * int __init, 0 for success
+ * otherwise value is an error
+ *
+ */
+static int __init pti_init(void)
+{
+ int retval = -EINVAL;
+
+ /* First register module as tty device */
+
+ pti_tty_driver = alloc_tty_driver(PTITTY_MINOR_NUM);
+ if (pti_tty_driver == NULL) {
+ pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n",
+ __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ pti_tty_driver->driver_name = DRIVERNAME;
+ pti_tty_driver->name = TTYNAME;
+ pti_tty_driver->major = 0;
+ pti_tty_driver->minor_start = PTITTY_MINOR_START;
+ pti_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM;
+ pti_tty_driver->subtype = SYSTEM_TYPE_SYSCONS;
+ pti_tty_driver->flags = TTY_DRIVER_REAL_RAW |
+ TTY_DRIVER_DYNAMIC_DEV;
+ pti_tty_driver->init_termios = tty_std_termios;
+
+ tty_set_operations(pti_tty_driver, &pti_tty_driver_ops);
+
+ retval = tty_register_driver(pti_tty_driver);
+ if (retval) {
+ pr_err("%s(%d): TTY registration failed of pti driver\n",
+ __func__, __LINE__);
+ pr_err("%s(%d): Error value returned: %d\n",
+ __func__, __LINE__, retval);
+
+ goto put_tty;
+ }
+
+ retval = pci_register_driver(&pti_pci_driver);
+ if (retval) {
+ pr_err("%s(%d): PCI registration failed of pti driver\n",
+ __func__, __LINE__);
+ pr_err("%s(%d): Error value returned: %d\n",
+ __func__, __LINE__, retval);
+ goto unreg_tty;
+ }
+
+ return 0;
+unreg_tty:
+ tty_unregister_driver(pti_tty_driver);
+put_tty:
+ put_tty_driver(pti_tty_driver);
+ pti_tty_driver = NULL;
+ return retval;
+}
+
+/**
+ * pti_exit()- Unregisters this module as a tty and pci driver.
+ */
+static void __exit pti_exit(void)
+{
+ tty_unregister_driver(pti_tty_driver);
+ pci_unregister_driver(&pti_pci_driver);
+ put_tty_driver(pti_tty_driver);
+}
+
+module_init(pti_init);
+module_exit(pti_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ken Mills, Jay Freyensee");
+MODULE_DESCRIPTION("PTI Driver");
+
diff --git a/drivers/misc/qcom-coincell.c b/drivers/misc/qcom-coincell.c
new file mode 100644
index 0000000..7b4a2da
--- /dev/null
+++ b/drivers/misc/qcom-coincell.c
@@ -0,0 +1,152 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+struct qcom_coincell {
+ struct device *dev;
+ struct regmap *regmap;
+ u32 base_addr;
+};
+
+#define QCOM_COINCELL_REG_RSET 0x44
+#define QCOM_COINCELL_REG_VSET 0x45
+#define QCOM_COINCELL_REG_ENABLE 0x46
+
+#define QCOM_COINCELL_ENABLE BIT(7)
+
+static const int qcom_rset_map[] = { 2100, 1700, 1200, 800 };
+static const int qcom_vset_map[] = { 2500, 3200, 3100, 3000 };
+/* NOTE: for pm8921 and others, voltage of 2500 is 16 (10000b), not 0 */
+
+/* if enable==0, rset and vset are ignored */
+static int qcom_coincell_chgr_config(struct qcom_coincell *chgr, int rset,
+ int vset, bool enable)
+{
+ int i, j, rc;
+
+ /* if disabling, just do that and skip other operations */
+ if (!enable)
+ return regmap_write(chgr->regmap,
+ chgr->base_addr + QCOM_COINCELL_REG_ENABLE, 0);
+
+ /* find index for current-limiting resistor */
+ for (i = 0; i < ARRAY_SIZE(qcom_rset_map); i++)
+ if (rset == qcom_rset_map[i])
+ break;
+
+ if (i >= ARRAY_SIZE(qcom_rset_map)) {
+ dev_err(chgr->dev, "invalid rset-ohms value %d\n", rset);
+ return -EINVAL;
+ }
+
+ /* find index for charge voltage */
+ for (j = 0; j < ARRAY_SIZE(qcom_vset_map); j++)
+ if (vset == qcom_vset_map[j])
+ break;
+
+ if (j >= ARRAY_SIZE(qcom_vset_map)) {
+ dev_err(chgr->dev, "invalid vset-millivolts value %d\n", vset);
+ return -EINVAL;
+ }
+
+ rc = regmap_write(chgr->regmap,
+ chgr->base_addr + QCOM_COINCELL_REG_RSET, i);
+ if (rc) {
+ /*
+ * This is mainly to flag a bad base_addr (reg) from dts.
+ * Other failures writing to the registers should be
+ * extremely rare, or indicative of problems that
+ * should be reported elsewhere (eg. spmi failure).
+ */
+ dev_err(chgr->dev, "could not write to RSET register\n");
+ return rc;
+ }
+
+ rc = regmap_write(chgr->regmap,
+ chgr->base_addr + QCOM_COINCELL_REG_VSET, j);
+ if (rc)
+ return rc;
+
+ /* set 'enable' register */
+ return regmap_write(chgr->regmap,
+ chgr->base_addr + QCOM_COINCELL_REG_ENABLE,
+ QCOM_COINCELL_ENABLE);
+}
+
+static int qcom_coincell_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct qcom_coincell chgr;
+ u32 rset, vset;
+ bool enable;
+ int rc;
+
+ chgr.dev = &pdev->dev;
+
+ chgr.regmap = dev_get_regmap(pdev->dev.parent, NULL);
+ if (!chgr.regmap) {
+ dev_err(chgr.dev, "Unable to get regmap\n");
+ return -EINVAL;
+ }
+
+ rc = of_property_read_u32(node, "reg", &chgr.base_addr);
+ if (rc)
+ return rc;
+
+ enable = !of_property_read_bool(node, "qcom,charger-disable");
+
+ if (enable) {
+ rc = of_property_read_u32(node, "qcom,rset-ohms", &rset);
+ if (rc) {
+ dev_err(chgr.dev,
+ "can't find 'qcom,rset-ohms' in DT block");
+ return rc;
+ }
+
+ rc = of_property_read_u32(node, "qcom,vset-millivolts", &vset);
+ if (rc) {
+ dev_err(chgr.dev,
+ "can't find 'qcom,vset-millivolts' in DT block");
+ return rc;
+ }
+ }
+
+ return qcom_coincell_chgr_config(&chgr, rset, vset, enable);
+}
+
+static const struct of_device_id qcom_coincell_match_table[] = {
+ { .compatible = "qcom,pm8941-coincell", },
+ {}
+};
+
+MODULE_DEVICE_TABLE(of, qcom_coincell_match_table);
+
+static struct platform_driver qcom_coincell_driver = {
+ .driver = {
+ .name = "qcom-spmi-coincell",
+ .of_match_table = qcom_coincell_match_table,
+ },
+ .probe = qcom_coincell_probe,
+};
+
+module_platform_driver(qcom_coincell_driver);
+
+MODULE_DESCRIPTION("Qualcomm PMIC coincell charger driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 0000000..0003a1d
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,5 @@
+ccflags-$(CONFIG_SGI_GRU_DEBUG) := -DDEBUG
+
+obj-$(CONFIG_SGI_GRU) := gru.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
+
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 0000000..3ad76cd
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+
+/*
+ * GRU architectural definitions
+ */
+#define GRU_CACHE_LINE_BYTES 64
+#define GRU_HANDLE_STRIDE 256
+#define GRU_CB_BASE 0
+#define GRU_DS_BASE 0x20000
+
+/*
+ * Size used to map GRU GSeg
+ */
+#if defined(CONFIG_IA64)
+#define GRU_GSEG_PAGESIZE (256 * 1024UL)
+#elif defined(CONFIG_X86_64)
+#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Structure for obtaining GRU resource information
+ */
+struct gru_chiplet_info {
+ int node;
+ int chiplet;
+ int blade;
+ int total_dsr_bytes;
+ int total_cbr;
+ int total_user_dsr_bytes;
+ int total_user_cbr;
+ int free_user_dsr_bytes;
+ int free_user_cbr;
+};
+
+/*
+ * Statictics kept for each context.
+ */
+struct gru_gseg_statistics {
+ unsigned long fmm_tlbmiss;
+ unsigned long upm_tlbmiss;
+ unsigned long tlbdropin;
+ unsigned long context_stolen;
+ unsigned long reserved[10];
+};
+
+/* Flags for GRU options on the gru_create_context() call */
+/* Select one of the follow 4 options to specify how TLB misses are handled */
+#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
+#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */
+#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to
+ handle fault */
+#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */
+#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */
+
+
+
+#endif /* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 0000000..04d5170
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRU_INSTRUCTIONS_H__
+#define __GRU_INSTRUCTIONS_H__
+
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+
+
+/*
+ * Architecture dependent functions
+ */
+
+#if defined(CONFIG_IA64)
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+#define __flush_cache(p) ia64_fc((unsigned long)p)
+/* Use volatile on IA64 to ensure ordering via st4.rel */
+#define gru_ordered_store_ulong(p, v) \
+ do { \
+ barrier(); \
+ *((volatile unsigned long *)(p)) = v; /* force st.rel */ \
+ } while (0)
+#elif defined(CONFIG_X86_64)
+#include <asm/cacheflush.h>
+#define __flush_cache(p) clflush(p)
+#define gru_ordered_store_ulong(p, v) \
+ do { \
+ barrier(); \
+ *(unsigned long *)p = v; \
+ } while (0)
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Control block status and exception codes
+ */
+#define CBS_IDLE 0
+#define CBS_EXCEPTION 1
+#define CBS_ACTIVE 2
+#define CBS_CALL_OS 3
+
+/* CB substatus bitmasks */
+#define CBSS_MSG_QUEUE_MASK 7
+#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8
+
+/* CB substatus message queue values (low 3 bits of substatus) */
+#define CBSS_NO_ERROR 0
+#define CBSS_LB_OVERFLOWED 1
+#define CBSS_QLIMIT_REACHED 2
+#define CBSS_PAGE_OVERFLOW 3
+#define CBSS_AMO_NACKED 4
+#define CBSS_PUT_NACKED 5
+
+/*
+ * Structure used to fetch exception detail for CBs that terminate with
+ * CBS_EXCEPTION
+ */
+struct control_block_extended_exc_detail {
+ unsigned long cb;
+ int opc;
+ int ecause;
+ int exopc;
+ long exceptdet0;
+ int exceptdet1;
+ int cbrstate;
+ int cbrexecstatus;
+};
+
+/*
+ * Instruction formats
+ */
+
+/*
+ * Generic instruction format.
+ * This definition has precise bit field definitions.
+ */
+struct gru_instruction_bits {
+ /* DW 0 - low */
+ unsigned int icmd: 1;
+ unsigned char ima: 3; /* CB_DelRep, unmapped mode */
+ unsigned char reserved0: 4;
+ unsigned int xtype: 3;
+ unsigned int iaa0: 2;
+ unsigned int iaa1: 2;
+ unsigned char reserved1: 1;
+ unsigned char opc: 8; /* opcode */
+ unsigned char exopc: 8; /* extended opcode */
+ /* DW 0 - high */
+ unsigned int idef2: 22; /* TRi0 */
+ unsigned char reserved2: 2;
+ unsigned char istatus: 2;
+ unsigned char isubstatus:4;
+ unsigned char reserved3: 1;
+ unsigned char tlb_fault_color: 1;
+ /* DW 1 */
+ unsigned long idef4; /* 42 bits: TRi1, BufSize */
+ /* DW 2-6 */
+ unsigned long idef1; /* BAddr0 */
+ unsigned long idef5; /* Nelem */
+ unsigned long idef6; /* Stride, Operand1 */
+ unsigned long idef3; /* BAddr1, Value, Operand2 */
+ unsigned long reserved4;
+ /* DW 7 */
+ unsigned long avalue; /* AValue */
+};
+
+/*
+ * Generic instruction with friendlier names. This format is used
+ * for inline instructions.
+ */
+struct gru_instruction {
+ /* DW 0 */
+ union {
+ unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */
+ struct {
+ unsigned int op32;
+ unsigned int tri0;
+ };
+ };
+ unsigned long tri1_bufsize; /* DW 1 */
+ unsigned long baddr0; /* DW 2 */
+ unsigned long nelem; /* DW 3 */
+ unsigned long op1_stride; /* DW 4 */
+ unsigned long op2_value_baddr1; /* DW 5 */
+ unsigned long reserved0; /* DW 6 */
+ unsigned long avalue; /* DW 7 */
+};
+
+/* Some shifts and masks for the low 64 bits of a GRU command */
+#define GRU_CB_ICMD_SHFT 0
+#define GRU_CB_ICMD_MASK 0x1
+#define GRU_CB_XTYPE_SHFT 8
+#define GRU_CB_XTYPE_MASK 0x7
+#define GRU_CB_IAA0_SHFT 11
+#define GRU_CB_IAA0_MASK 0x3
+#define GRU_CB_IAA1_SHFT 13
+#define GRU_CB_IAA1_MASK 0x3
+#define GRU_CB_IMA_SHFT 1
+#define GRU_CB_IMA_MASK 0x3
+#define GRU_CB_OPC_SHFT 16
+#define GRU_CB_OPC_MASK 0xff
+#define GRU_CB_EXOPC_SHFT 24
+#define GRU_CB_EXOPC_MASK 0xff
+#define GRU_IDEF2_SHFT 32
+#define GRU_IDEF2_MASK 0x3ffff
+#define GRU_ISTATUS_SHFT 56
+#define GRU_ISTATUS_MASK 0x3
+
+/* GRU instruction opcodes (opc field) */
+#define OP_NOP 0x00
+#define OP_BCOPY 0x01
+#define OP_VLOAD 0x02
+#define OP_IVLOAD 0x03
+#define OP_VSTORE 0x04
+#define OP_IVSTORE 0x05
+#define OP_VSET 0x06
+#define OP_IVSET 0x07
+#define OP_MESQ 0x08
+#define OP_GAMXR 0x09
+#define OP_GAMIR 0x0a
+#define OP_GAMIRR 0x0b
+#define OP_GAMER 0x0c
+#define OP_GAMERR 0x0d
+#define OP_BSTORE 0x0e
+#define OP_VFLUSH 0x0f
+
+
+/* Extended opcodes values (exopc field) */
+
+/* GAMIR - AMOs with implicit operands */
+#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */
+#define EOP_IR_CLR 0x02 /* Fetch and clear */
+#define EOP_IR_INC 0x05 /* Fetch and increment */
+#define EOP_IR_DEC 0x07 /* Fetch and decrement */
+#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */
+#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */
+
+/* GAMIRR - Registered AMOs with implicit operands */
+#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */
+#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */
+#define EOP_IRR_INC 0x05 /* Registered fetch and increment */
+#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */
+#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/
+
+/* GAMER - AMOs with explicit operands */
+#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */
+#define EOP_ER_OR 0x01 /* Logical OR with memory */
+#define EOP_ER_AND 0x02 /* Logical AND with memory */
+#define EOP_ER_XOR 0x03 /* Logical XOR with memory */
+#define EOP_ER_ADD 0x04 /* Add value to memory */
+#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */
+
+/* GAMERR - Registered AMOs with explicit operands */
+#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */
+#define EOP_ERR_OR 0x01 /* Logical OR with memory */
+#define EOP_ERR_AND 0x02 /* Logical AND with memory */
+#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */
+#define EOP_ERR_ADD 0x04 /* Add value to memory */
+#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ERR_EPOLL 0x09 /* Poll for equality */
+#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */
+
+/* GAMXR - SGI Arithmetic unit */
+#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */
+
+
+/* Transfer types (xtype field) */
+#define XTYPE_B 0x0 /* byte */
+#define XTYPE_S 0x1 /* short (2-byte) */
+#define XTYPE_W 0x2 /* word (4-byte) */
+#define XTYPE_DW 0x3 /* doubleword (8-byte) */
+#define XTYPE_CL 0x6 /* cacheline (64-byte) */
+
+
+/* Instruction access attributes (iaa0, iaa1 fields) */
+#define IAA_RAM 0x0 /* normal cached RAM access */
+#define IAA_NCRAM 0x2 /* noncoherent RAM access */
+#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */
+#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */
+
+
+/* Instruction mode attributes (ima field) */
+#define IMA_MAPPED 0x0 /* Virtual mode */
+#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */
+#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */
+#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */
+
+/* CBE ecause bits */
+#define CBE_CAUSE_RI (1 << 0)
+#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1)
+#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2)
+#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3)
+#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4)
+#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5)
+#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6)
+#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7)
+#define CBE_CAUSE_TLBHW_ERROR (1 << 8)
+#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9)
+#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10)
+#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11)
+#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12)
+#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
+#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
+#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18)
+#define CBE_CAUSE_FORCED_ERROR (1 << 19)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT 0
+#define CBR_EXS_INT_OCC_BIT 1
+#define CBR_EXS_PENDING_BIT 2
+#define CBR_EXS_QUEUED_BIT 3
+#define CBR_EXS_TLB_INVAL_BIT 4
+#define CBR_EXS_EXCEPTION_BIT 5
+#define CBR_EXS_CB_INT_PENDING_BIT 6
+
+#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
+#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT)
+
+/*
+ * Exceptions are retried for the following cases. If any OTHER bits are set
+ * in ecause, the exception is not retryable.
+ */
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \
+ CBE_CAUSE_TLBHW_ERROR | \
+ CBE_CAUSE_RA_REQUEST_TIMEOUT | \
+ CBE_CAUSE_RA_RESPONSE_NON_FATAL | \
+ CBE_CAUSE_HA_RESPONSE_NON_FATAL | \
+ CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \
+ CBE_CAUSE_HA_RESPONSE_DATA_ERROR \
+ )
+
+/* Message queue head structure */
+union gru_mesqhead {
+ unsigned long val;
+ struct {
+ unsigned int head;
+ unsigned int limit;
+ };
+};
+
+
+/* Generate the low word of a GRU instruction */
+static inline unsigned long
+__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+ unsigned char iaa0, unsigned char iaa1,
+ unsigned long idef2, unsigned char ima)
+{
+ return (1 << GRU_CB_ICMD_SHFT) |
+ ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) |
+ (idef2<< GRU_IDEF2_SHFT) |
+ (iaa0 << GRU_CB_IAA0_SHFT) |
+ (iaa1 << GRU_CB_IAA1_SHFT) |
+ (ima << GRU_CB_IMA_SHFT) |
+ (xtype << GRU_CB_XTYPE_SHFT) |
+ (opcode << GRU_CB_OPC_SHFT) |
+ (exopc << GRU_CB_EXOPC_SHFT);
+}
+
+/*
+ * Architecture specific intrinsics
+ */
+static inline void gru_flush_cache(void *p)
+{
+ __flush_cache(p);
+}
+
+/*
+ * Store the lower 64 bits of the command including the "start" bit. Then
+ * start the instruction executing.
+ */
+static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64)
+{
+ gru_ordered_store_ulong(ins, op64);
+ mb();
+ gru_flush_cache(ins);
+}
+
+
+/* Convert "hints" to IMA */
+#define CB_IMA(h) ((h) | IMA_UNMAPPED)
+
+/* Convert data segment cache line index into TRI0 / TRI1 value */
+#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES)
+
+/* Inline functions for GRU instructions.
+ * Note:
+ * - nelem and stride are in elements
+ * - tri0/tri1 is in bytes for the beginning of the data segment.
+ */
+static inline void gru_vload_phys(void *cb, unsigned long gpa,
+ unsigned int tri0, int iaa, unsigned long hints)
+{
+ struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+ ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+ ins->nelem = 1;
+ ins->op1_stride = 1;
+ gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0,
+ (unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore_phys(void *cb, unsigned long gpa,
+ unsigned int tri0, int iaa, unsigned long hints)
+{
+ struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+ ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+ ins->nelem = 1;
+ ins->op1_stride = 1;
+ gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0,
+ (unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vload(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned char xtype, unsigned long nelem,
+ unsigned long stride, unsigned long hints)
+{
+ struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->op1_stride = stride;
+ gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+ (unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned char xtype, unsigned long nelem,
+ unsigned long stride, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->op1_stride = stride;
+ gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivload(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned int tri1, unsigned char xtype,
+ unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->tri1_bufsize = tri1;
+ gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivstore(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned int tri1,
+ unsigned char xtype, unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->tri1_bufsize = tri1;
+ gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vset(void *cb, unsigned long mem_addr,
+ unsigned long value, unsigned char xtype, unsigned long nelem,
+ unsigned long stride, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->op2_value_baddr1 = value;
+ ins->nelem = nelem;
+ ins->op1_stride = stride;
+ gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_ivset(void *cb, unsigned long mem_addr,
+ unsigned int tri1, unsigned long value, unsigned char xtype,
+ unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->op2_value_baddr1 = value;
+ ins->nelem = nelem;
+ ins->tri1_bufsize = tri1;
+ gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_vflush(void *cb, unsigned long mem_addr,
+ unsigned long nelem, unsigned char xtype, unsigned long stride,
+ unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->op1_stride = stride;
+ ins->nelem = nelem;
+ gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_nop(void *cb, int hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints)));
+}
+
+
+static inline void gru_bcopy(void *cb, const unsigned long src,
+ unsigned long dest,
+ unsigned int tri0, unsigned int xtype, unsigned long nelem,
+ unsigned int bufsize, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op2_value_baddr1 = (long)dest;
+ ins->nelem = nelem;
+ ins->tri1_bufsize = bufsize;
+ gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
+ IAA_RAM, tri0, CB_IMA(hints)));
+}
+
+static inline void gru_bstore(void *cb, const unsigned long src,
+ unsigned long dest, unsigned int tri0, unsigned int xtype,
+ unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op2_value_baddr1 = (long)dest;
+ ins->nelem = nelem;
+ gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+ tri0, CB_IMA(hints)));
+}
+
+static inline void gru_gamir(void *cb, int exopc, unsigned long src,
+ unsigned int xtype, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
+ unsigned int xtype, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_gamer(void *cb, int exopc, unsigned long src,
+ unsigned int xtype,
+ unsigned long operand1, unsigned long operand2,
+ unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op1_stride = operand1;
+ ins->op2_value_baddr1 = operand2;
+ gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
+ unsigned int xtype, unsigned long operand1,
+ unsigned long operand2, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op1_stride = operand1;
+ ins->op2_value_baddr1 = operand2;
+ gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
+}
+
+static inline void gru_gamxr(void *cb, unsigned long src,
+ unsigned int tri0, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->nelem = 4;
+ gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+ IAA_RAM, 0, 0, CB_IMA(hints)));
+}
+
+static inline void gru_mesq(void *cb, unsigned long queue,
+ unsigned long tri0, unsigned long nelem,
+ unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)queue;
+ ins->nelem = nelem;
+ gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
+}
+
+static inline unsigned long gru_get_amo_value(void *cb)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ return ins->avalue;
+}
+
+static inline int gru_get_amo_value_head(void *cb)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ return ins->avalue & 0xffffffff;
+}
+
+static inline int gru_get_amo_value_limit(void *cb)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ return ins->avalue >> 32;
+}
+
+static inline union gru_mesqhead gru_mesq_head(int head, int limit)
+{
+ union gru_mesqhead mqh;
+
+ mqh.head = head;
+ mqh.limit = limit;
+ return mqh;
+}
+
+/*
+ * Get struct control_block_extended_exc_detail for CB.
+ */
+extern int gru_get_cb_exception_detail(void *cb,
+ struct control_block_extended_exc_detail *excdet);
+
+#define GRU_EXC_STR_SIZE 256
+
+
+/*
+ * Control block definition for checking status
+ */
+struct gru_control_block_status {
+ unsigned int icmd :1;
+ unsigned int ima :3;
+ unsigned int reserved0 :4;
+ unsigned int unused1 :24;
+ unsigned int unused2 :24;
+ unsigned int istatus :2;
+ unsigned int isubstatus :4;
+ unsigned int unused3 :2;
+};
+
+/* Get CB status */
+static inline int gru_get_cb_status(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ return cbs->istatus;
+}
+
+/* Get CB message queue substatus */
+static inline int gru_get_cb_message_queue_substatus(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
+}
+
+/* Get CB substatus */
+static inline int gru_get_cb_substatus(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ return cbs->isubstatus;
+}
+
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
+ */
+static inline int gru_check_status(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+ int ret;
+
+ ret = cbs->istatus;
+ if (ret != CBS_ACTIVE)
+ ret = gru_check_status_proc(cb);
+ return ret;
+}
+
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
+ */
+static inline int gru_wait(void *cb)
+{
+ return gru_wait_proc(cb);
+}
+
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
+ * mean TLB mis - only fatal errors such as memory parity error or user
+ * bugs will cause termination.
+ */
+static inline void gru_wait_abort(void *cb)
+{
+ gru_wait_abort_proc(cb);
+}
+
+/*
+ * Get a pointer to the start of a gseg
+ * p - Any valid pointer within the gseg
+ */
+static inline void *gru_get_gseg_pointer (void *p)
+{
+ return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1));
+}
+
+/*
+ * Get a pointer to a control block
+ * gseg - GSeg address returned from gru_get_thread_gru_segment()
+ * index - index of desired CB
+ */
+static inline void *gru_get_cb_pointer(void *gseg,
+ int index)
+{
+ return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
+}
+
+/*
+ * Get a pointer to a cacheline in the data segment portion of a GSeg
+ * gseg - GSeg address returned from gru_get_thread_gru_segment()
+ * index - index of desired cache line
+ */
+static inline void *gru_get_data_pointer(void *gseg, int index)
+{
+ return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
+}
+
+/*
+ * Convert a vaddr into the tri index within the GSEG
+ * vaddr - virtual address of within gseg
+ */
+static inline int gru_get_tri(void *vaddr)
+{
+ return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
+}
+#endif /* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 0000000..f74fc0c
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,903 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * FAULT HANDLER FOR GRU DETECTED TLB MISSES
+ *
+ * This file contains code that handles TLB misses within the GRU.
+ * These misses are reported either via interrupts or user polling of
+ * the user CB.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/security.h>
+#include <linux/prefetch.h>
+#include <asm/pgtable.h>
+#include "gru.h"
+#include "grutables.h"
+#include "grulib.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/* Return codes for vtop functions */
+#define VTOP_SUCCESS 0
+#define VTOP_INVALID -1
+#define VTOP_RETRY -2
+
+
+/*
+ * Test if a physical address is a valid GRU GSEG address
+ */
+static inline int is_gru_paddr(unsigned long paddr)
+{
+ return paddr >= gru_start_paddr && paddr < gru_end_paddr;
+}
+
+/*
+ * Find the vma of a GRU segment. Caller must hold mmap_sem.
+ */
+struct vm_area_struct *gru_find_vma(unsigned long vaddr)
+{
+ struct vm_area_struct *vma;
+
+ vma = find_vma(current->mm, vaddr);
+ if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
+ return vma;
+ return NULL;
+}
+
+/*
+ * Find and lock the gts that contains the specified user vaddr.
+ *
+ * Returns:
+ * - *gts with the mmap_sem locked for read and the GTS locked.
+ * - NULL if vaddr invalid OR is not a valid GSEG vaddr.
+ */
+
+static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct gru_thread_state *gts = NULL;
+
+ down_read(&mm->mmap_sem);
+ vma = gru_find_vma(vaddr);
+ if (vma)
+ gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+ if (gts)
+ mutex_lock(>s->ts_ctxlock);
+ else
+ up_read(&mm->mmap_sem);
+ return gts;
+}
+
+static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct gru_thread_state *gts = ERR_PTR(-EINVAL);
+
+ down_write(&mm->mmap_sem);
+ vma = gru_find_vma(vaddr);
+ if (!vma)
+ goto err;
+
+ gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+ if (IS_ERR(gts))
+ goto err;
+ mutex_lock(>s->ts_ctxlock);
+ downgrade_write(&mm->mmap_sem);
+ return gts;
+
+err:
+ up_write(&mm->mmap_sem);
+ return gts;
+}
+
+/*
+ * Unlock a GTS that was previously locked with gru_find_lock_gts().
+ */
+static void gru_unlock_gts(struct gru_thread_state *gts)
+{
+ mutex_unlock(>s->ts_ctxlock);
+ up_read(¤t->mm->mmap_sem);
+}
+
+/*
+ * Set a CB.istatus to active using a user virtual address. This must be done
+ * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
+ * If the line is evicted, the status may be lost. The in-cache update
+ * is necessary to prevent the user from seeing a stale cb.istatus that will
+ * change as soon as the TFH restart is complete. Races may cause an
+ * occasional failure to clear the cb.istatus, but that is ok.
+ */
+static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
+{
+ if (cbk) {
+ cbk->istatus = CBS_ACTIVE;
+ }
+}
+
+/*
+ * Read & clear a TFM
+ *
+ * The GRU has an array of fault maps. A map is private to a cpu
+ * Only one cpu will be accessing a cpu's fault map.
+ *
+ * This function scans the cpu-private fault map & clears all bits that
+ * are set. The function returns a bitmap that indicates the bits that
+ * were cleared. Note that sense the maps may be updated asynchronously by
+ * the GRU, atomic operations must be used to clear bits.
+ */
+static void get_clear_fault_map(struct gru_state *gru,
+ struct gru_tlb_fault_map *imap,
+ struct gru_tlb_fault_map *dmap)
+{
+ unsigned long i, k;
+ struct gru_tlb_fault_map *tfm;
+
+ tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
+ prefetchw(tfm); /* Helps on hardware, required for emulator */
+ for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
+ k = tfm->fault_bits[i];
+ if (k)
+ k = xchg(&tfm->fault_bits[i], 0UL);
+ imap->fault_bits[i] = k;
+ k = tfm->done_bits[i];
+ if (k)
+ k = xchg(&tfm->done_bits[i], 0UL);
+ dmap->fault_bits[i] = k;
+ }
+
+ /*
+ * Not functionally required but helps performance. (Required
+ * on emulator)
+ */
+ gru_flush_cache(tfm);
+}
+
+/*
+ * Atomic (interrupt context) & non-atomic (user context) functions to
+ * convert a vaddr into a physical address. The size of the page
+ * is returned in pageshift.
+ * returns:
+ * 0 - successful
+ * < 0 - error code
+ * 1 - (atomic only) try again in non-atomic context
+ */
+static int non_atomic_pte_lookup(struct vm_area_struct *vma,
+ unsigned long vaddr, int write,
+ unsigned long *paddr, int *pageshift)
+{
+ struct page *page;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+ *pageshift = PAGE_SHIFT;
+#endif
+ if (get_user_pages
+ (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
+ return -EFAULT;
+ *paddr = page_to_phys(page);
+ put_page(page);
+ return 0;
+}
+
+/*
+ * atomic_pte_lookup
+ *
+ * Convert a user virtual address to a physical address
+ * Only supports Intel large pages (2MB only) on x86_64.
+ * ZZZ - hugepage support is incomplete
+ *
+ * NOTE: mmap_sem is already held on entry to this function. This
+ * guarantees existence of the page tables.
+ */
+static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
+ int write, unsigned long *paddr, int *pageshift)
+{
+ pgd_t *pgdp;
+ pmd_t *pmdp;
+ pud_t *pudp;
+ pte_t pte;
+
+ pgdp = pgd_offset(vma->vm_mm, vaddr);
+ if (unlikely(pgd_none(*pgdp)))
+ goto err;
+
+ pudp = pud_offset(pgdp, vaddr);
+ if (unlikely(pud_none(*pudp)))
+ goto err;
+
+ pmdp = pmd_offset(pudp, vaddr);
+ if (unlikely(pmd_none(*pmdp)))
+ goto err;
+#ifdef CONFIG_X86_64
+ if (unlikely(pmd_large(*pmdp)))
+ pte = *(pte_t *) pmdp;
+ else
+#endif
+ pte = *pte_offset_kernel(pmdp, vaddr);
+
+ if (unlikely(!pte_present(pte) ||
+ (write && (!pte_write(pte) || !pte_dirty(pte)))))
+ return 1;
+
+ *paddr = pte_pfn(pte) << PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+ *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+ *pageshift = PAGE_SHIFT;
+#endif
+ return 0;
+
+err:
+ return 1;
+}
+
+static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
+ int write, int atomic, unsigned long *gpa, int *pageshift)
+{
+ struct mm_struct *mm = gts->ts_mm;
+ struct vm_area_struct *vma;
+ unsigned long paddr;
+ int ret, ps;
+
+ vma = find_vma(mm, vaddr);
+ if (!vma)
+ goto inval;
+
+ /*
+ * Atomic lookup is faster & usually works even if called in non-atomic
+ * context.
+ */
+ rmb(); /* Must/check ms_range_active before loading PTEs */
+ ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
+ if (ret) {
+ if (atomic)
+ goto upm;
+ if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
+ goto inval;
+ }
+ if (is_gru_paddr(paddr))
+ goto inval;
+ paddr = paddr & ~((1UL << ps) - 1);
+ *gpa = uv_soc_phys_ram_to_gpa(paddr);
+ *pageshift = ps;
+ return VTOP_SUCCESS;
+
+inval:
+ return VTOP_INVALID;
+upm:
+ return VTOP_RETRY;
+}
+
+
+/*
+ * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
+ * CBE cacheline so that the line will be written back to home agent.
+ * Otherwise the line may be silently dropped. This has no impact
+ * except on performance.
+ */
+static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
+{
+ if (unlikely(cbe)) {
+ cbe->cbrexecstatus = 0; /* make CL dirty */
+ gru_flush_cache(cbe);
+ }
+}
+
+/*
+ * Preload the TLB with entries that may be required. Currently, preloading
+ * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to
+ * the end of the bcopy tranfer, whichever is smaller.
+ */
+static void gru_preload_tlb(struct gru_state *gru,
+ struct gru_thread_state *gts, int atomic,
+ unsigned long fault_vaddr, int asid, int write,
+ unsigned char tlb_preload_count,
+ struct gru_tlb_fault_handle *tfh,
+ struct gru_control_block_extended *cbe)
+{
+ unsigned long vaddr = 0, gpa;
+ int ret, pageshift;
+
+ if (cbe->opccpy != OP_BCOPY)
+ return;
+
+ if (fault_vaddr == cbe->cbe_baddr0)
+ vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
+ else if (fault_vaddr == cbe->cbe_baddr1)
+ vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
+
+ fault_vaddr &= PAGE_MASK;
+ vaddr &= PAGE_MASK;
+ vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
+
+ while (vaddr > fault_vaddr) {
+ ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+ if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
+ GRU_PAGESIZE(pageshift)))
+ return;
+ gru_dbg(grudev,
+ "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
+ atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
+ vaddr, asid, write, pageshift, gpa);
+ vaddr -= PAGE_SIZE;
+ STAT(tlb_preload_page);
+ }
+}
+
+/*
+ * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
+ * Input:
+ * cb Address of user CBR. Null if not running in user context
+ * Return:
+ * 0 = dropin, exception, or switch to UPM successful
+ * 1 = range invalidate active
+ * < 0 = error code
+ *
+ */
+static int gru_try_dropin(struct gru_state *gru,
+ struct gru_thread_state *gts,
+ struct gru_tlb_fault_handle *tfh,
+ struct gru_instruction_bits *cbk)
+{
+ struct gru_control_block_extended *cbe = NULL;
+ unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
+ int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
+ unsigned long gpa = 0, vaddr = 0;
+
+ /*
+ * NOTE: The GRU contains magic hardware that eliminates races between
+ * TLB invalidates and TLB dropins. If an invalidate occurs
+ * in the window between reading the TFH and the subsequent TLB dropin,
+ * the dropin is ignored. This eliminates the need for additional locks.
+ */
+
+ /*
+ * Prefetch the CBE if doing TLB preloading
+ */
+ if (unlikely(tlb_preload_count)) {
+ cbe = gru_tfh_to_cbe(tfh);
+ prefetchw(cbe);
+ }
+
+ /*
+ * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
+ * Might be a hardware race OR a stupid user. Ignore FMM because FMM
+ * is a transient state.
+ */
+ if (tfh->status != TFHSTATUS_EXCEPTION) {
+ gru_flush_cache(tfh);
+ sync_core();
+ if (tfh->status != TFHSTATUS_EXCEPTION)
+ goto failnoexception;
+ STAT(tfh_stale_on_fault);
+ }
+ if (tfh->state == TFHSTATE_IDLE)
+ goto failidle;
+ if (tfh->state == TFHSTATE_MISS_FMM && cbk)
+ goto failfmm;
+
+ write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
+ vaddr = tfh->missvaddr;
+ asid = tfh->missasid;
+ indexway = tfh->indexway;
+ if (asid == 0)
+ goto failnoasid;
+
+ rmb(); /* TFH must be cache resident before reading ms_range_active */
+
+ /*
+ * TFH is cache resident - at least briefly. Fail the dropin
+ * if a range invalidate is active.
+ */
+ if (atomic_read(>s->ts_gms->ms_range_active))
+ goto failactive;
+
+ ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+ if (ret == VTOP_INVALID)
+ goto failinval;
+ if (ret == VTOP_RETRY)
+ goto failupm;
+
+ if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
+ gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
+ if (atomic || !gru_update_cch(gts)) {
+ gts->ts_force_cch_reload = 1;
+ goto failupm;
+ }
+ }
+
+ if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
+ gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
+ gru_flush_cache_cbe(cbe);
+ }
+
+ gru_cb_set_istatus_active(cbk);
+ gts->ustats.tlbdropin++;
+ tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
+ GRU_PAGESIZE(pageshift));
+ gru_dbg(grudev,
+ "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
+ " rw %d, ps %d, gpa 0x%lx\n",
+ atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
+ indexway, write, pageshift, gpa);
+ STAT(tlb_dropin);
+ return 0;
+
+failnoasid:
+ /* No asid (delayed unload). */
+ STAT(tlb_dropin_fail_no_asid);
+ gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+ if (!cbk)
+ tfh_user_polling_mode(tfh);
+ else
+ gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
+ return -EAGAIN;
+
+failupm:
+ /* Atomic failure switch CBR to UPM */
+ tfh_user_polling_mode(tfh);
+ gru_flush_cache_cbe(cbe);
+ STAT(tlb_dropin_fail_upm);
+ gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+ return 1;
+
+failfmm:
+ /* FMM state on UPM call */
+ gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
+ STAT(tlb_dropin_fail_fmm);
+ gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
+ return 0;
+
+failnoexception:
+ /* TFH status did not show exception pending */
+ gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
+ if (cbk)
+ gru_flush_cache(cbk);
+ STAT(tlb_dropin_fail_no_exception);
+ gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
+ tfh, tfh->status, tfh->state);
+ return 0;
+
+failidle:
+ /* TFH state was idle - no miss pending */
+ gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
+ if (cbk)
+ gru_flush_cache(cbk);
+ STAT(tlb_dropin_fail_idle);
+ gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
+ return 0;
+
+failinval:
+ /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
+ tfh_exception(tfh);
+ gru_flush_cache_cbe(cbe);
+ STAT(tlb_dropin_fail_invalid);
+ gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+ return -EFAULT;
+
+failactive:
+ /* Range invalidate active. Switch to UPM iff atomic */
+ if (!cbk)
+ tfh_user_polling_mode(tfh);
+ else
+ gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
+ STAT(tlb_dropin_fail_range_active);
+ gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
+ tfh, vaddr);
+ return 1;
+}
+
+/*
+ * Process an external interrupt from the GRU. This interrupt is
+ * caused by a TLB miss.
+ * Note that this is the interrupt handler that is registered with linux
+ * interrupt handlers.
+ */
+static irqreturn_t gru_intr(int chiplet, int blade)
+{
+ struct gru_state *gru;
+ struct gru_tlb_fault_map imap, dmap;
+ struct gru_thread_state *gts;
+ struct gru_tlb_fault_handle *tfh = NULL;
+ struct completion *cmp;
+ int cbrnum, ctxnum;
+
+ STAT(intr);
+
+ gru = &gru_base[blade]->bs_grus[chiplet];
+ if (!gru) {
+ dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
+ raw_smp_processor_id(), chiplet);
+ return IRQ_NONE;
+ }
+ get_clear_fault_map(gru, &imap, &dmap);
+ gru_dbg(grudev,
+ "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
+ smp_processor_id(), chiplet, gru->gs_gid,
+ imap.fault_bits[0], imap.fault_bits[1],
+ dmap.fault_bits[0], dmap.fault_bits[1]);
+
+ for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+ STAT(intr_cbr);
+ cmp = gru->gs_blade->bs_async_wq;
+ if (cmp)
+ complete(cmp);
+ gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+ gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
+ }
+
+ for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
+ STAT(intr_tfh);
+ tfh = get_tfh_by_index(gru, cbrnum);
+ prefetchw(tfh); /* Helps on hdw, required for emulator */
+
+ /*
+ * When hardware sets a bit in the faultmap, it implicitly
+ * locks the GRU context so that it cannot be unloaded.
+ * The gts cannot change until a TFH start/writestart command
+ * is issued.
+ */
+ ctxnum = tfh->ctxnum;
+ gts = gru->gs_gts[ctxnum];
+
+ /* Spurious interrupts can cause this. Ignore. */
+ if (!gts) {
+ STAT(intr_spurious);
+ continue;
+ }
+
+ /*
+ * This is running in interrupt context. Trylock the mmap_sem.
+ * If it fails, retry the fault in user context.
+ */
+ gts->ustats.fmm_tlbmiss++;
+ if (!gts->ts_force_cch_reload &&
+ down_read_trylock(>s->ts_mm->mmap_sem)) {
+ gru_try_dropin(gru, gts, tfh, NULL);
+ up_read(>s->ts_mm->mmap_sem);
+ } else {
+ tfh_user_polling_mode(tfh);
+ STAT(intr_mm_lock_failed);
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+irqreturn_t gru0_intr(int irq, void *dev_id)
+{
+ return gru_intr(0, uv_numa_blade_id());
+}
+
+irqreturn_t gru1_intr(int irq, void *dev_id)
+{
+ return gru_intr(1, uv_numa_blade_id());
+}
+
+irqreturn_t gru_intr_mblade(int irq, void *dev_id)
+{
+ int blade;
+
+ for_each_possible_blade(blade) {
+ if (uv_blade_nr_possible_cpus(blade))
+ continue;
+ gru_intr(0, blade);
+ gru_intr(1, blade);
+ }
+ return IRQ_HANDLED;
+}
+
+
+static int gru_user_dropin(struct gru_thread_state *gts,
+ struct gru_tlb_fault_handle *tfh,
+ void *cb)
+{
+ struct gru_mm_struct *gms = gts->ts_gms;
+ int ret;
+
+ gts->ustats.upm_tlbmiss++;
+ while (1) {
+ wait_event(gms->ms_wait_queue,
+ atomic_read(&gms->ms_range_active) == 0);
+ prefetchw(tfh); /* Helps on hdw, required for emulator */
+ ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
+ if (ret <= 0)
+ return ret;
+ STAT(call_os_wait_queue);
+ }
+}
+
+/*
+ * This interface is called as a result of a user detecting a "call OS" bit
+ * in a user CB. Normally means that a TLB fault has occurred.
+ * cb - user virtual address of the CB
+ */
+int gru_handle_user_call_os(unsigned long cb)
+{
+ struct gru_tlb_fault_handle *tfh;
+ struct gru_thread_state *gts;
+ void *cbk;
+ int ucbnum, cbrnum, ret = -EINVAL;
+
+ STAT(call_os);
+
+ /* sanity check the cb pointer */
+ ucbnum = get_cb_number((void *)cb);
+ if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+ return -EINVAL;
+
+ gts = gru_find_lock_gts(cb);
+ if (!gts)
+ return -EINVAL;
+ gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+
+ if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
+ goto exit;
+
+ gru_check_context_placement(gts);
+
+ /*
+ * CCH may contain stale data if ts_force_cch_reload is set.
+ */
+ if (gts->ts_gru && gts->ts_force_cch_reload) {
+ gts->ts_force_cch_reload = 0;
+ gru_update_cch(gts);
+ }
+
+ ret = -EAGAIN;
+ cbrnum = thread_cbr_number(gts, ucbnum);
+ if (gts->ts_gru) {
+ tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
+ cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
+ gts->ts_ctxnum, ucbnum);
+ ret = gru_user_dropin(gts, tfh, cbk);
+ }
+exit:
+ gru_unlock_gts(gts);
+ return ret;
+}
+
+/*
+ * Fetch the exception detail information for a CB that terminated with
+ * an exception.
+ */
+int gru_get_exception_detail(unsigned long arg)
+{
+ struct control_block_extended_exc_detail excdet;
+ struct gru_control_block_extended *cbe;
+ struct gru_thread_state *gts;
+ int ucbnum, cbrnum, ret;
+
+ STAT(user_exception);
+ if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
+ return -EFAULT;
+
+ gts = gru_find_lock_gts(excdet.cb);
+ if (!gts)
+ return -EINVAL;
+
+ gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+ ucbnum = get_cb_number((void *)excdet.cb);
+ if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+ ret = -EINVAL;
+ } else if (gts->ts_gru) {
+ cbrnum = thread_cbr_number(gts, ucbnum);
+ cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+ gru_flush_cache(cbe); /* CBE not coherent */
+ sync_core(); /* make sure we are have current data */
+ excdet.opc = cbe->opccpy;
+ excdet.exopc = cbe->exopccpy;
+ excdet.ecause = cbe->ecause;
+ excdet.exceptdet0 = cbe->idef1upd;
+ excdet.exceptdet1 = cbe->idef3upd;
+ excdet.cbrstate = cbe->cbrstate;
+ excdet.cbrexecstatus = cbe->cbrexecstatus;
+ gru_flush_cache_cbe(cbe);
+ ret = 0;
+ } else {
+ ret = -EAGAIN;
+ }
+ gru_unlock_gts(gts);
+
+ gru_dbg(grudev,
+ "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+ "exdet0 0x%lx, exdet1 0x%x\n",
+ excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+ excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
+ if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
+ ret = -EFAULT;
+ return ret;
+}
+
+/*
+ * User request to unload a context. Content is saved for possible reload.
+ */
+static int gru_unload_all_contexts(void)
+{
+ struct gru_thread_state *gts;
+ struct gru_state *gru;
+ int gid, ctxnum;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ foreach_gid(gid) {
+ gru = GID_TO_GRU(gid);
+ spin_lock(&gru->gs_lock);
+ for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ gts = gru->gs_gts[ctxnum];
+ if (gts && mutex_trylock(>s->ts_ctxlock)) {
+ spin_unlock(&gru->gs_lock);
+ gru_unload_context(gts, 1);
+ mutex_unlock(>s->ts_ctxlock);
+ spin_lock(&gru->gs_lock);
+ }
+ }
+ spin_unlock(&gru->gs_lock);
+ }
+ return 0;
+}
+
+int gru_user_unload_context(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_unload_context_req req;
+
+ STAT(user_unload_context);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
+
+ if (!req.gseg)
+ return gru_unload_all_contexts();
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (!gts)
+ return -EINVAL;
+
+ if (gts->ts_gru)
+ gru_unload_context(gts, 1);
+ gru_unlock_gts(gts);
+
+ return 0;
+}
+
+/*
+ * User request to flush a range of virtual addresses from the GRU TLB
+ * (Mainly for testing).
+ */
+int gru_user_flush_tlb(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_flush_tlb_req req;
+ struct gru_mm_struct *gms;
+
+ STAT(user_flush_tlb);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
+ req.vaddr, req.len);
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (!gts)
+ return -EINVAL;
+
+ gms = gts->ts_gms;
+ gru_unlock_gts(gts);
+ gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+ return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_get_gseg_statistics_req req;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ /*
+ * The library creates arrays of contexts for threaded programs.
+ * If no gts exists in the array, the context has never been used & all
+ * statistics are implicitly 0.
+ */
+ gts = gru_find_lock_gts(req.gseg);
+ if (gts) {
+ memcpy(&req.stats, >s->ustats, sizeof(gts->ustats));
+ gru_unlock_gts(gts);
+ } else {
+ memset(&req.stats, 0, sizeof(gts->ustats));
+ }
+
+ if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Register the current task as the user of the GSEG slice.
+ * Needed for TLB fault interrupt targeting.
+ */
+int gru_set_context_option(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_set_context_option_req req;
+ int ret = 0;
+
+ STAT(set_context_option);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+ gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (!gts) {
+ gts = gru_alloc_locked_gts(req.gseg);
+ if (IS_ERR(gts))
+ return PTR_ERR(gts);
+ }
+
+ switch (req.op) {
+ case sco_blade_chiplet:
+ /* Select blade/chiplet for GRU context */
+ if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
+ req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
+ (req.val1 >= 0 && !gru_base[req.val1])) {
+ ret = -EINVAL;
+ } else {
+ gts->ts_user_blade_id = req.val1;
+ gts->ts_user_chiplet_id = req.val0;
+ gru_check_context_placement(gts);
+ }
+ break;
+ case sco_gseg_owner:
+ /* Register the current task as the GSEG owner */
+ gts->ts_tgid_owner = current->tgid;
+ break;
+ case sco_cch_req_slice:
+ /* Set the CCH slice option */
+ gts->ts_cch_req_slice = req.val1 & 3;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ gru_unlock_gts(gts);
+
+ return ret;
+}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 0000000..104a05f
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,623 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * FILE OPERATIONS & DRIVER INITIALIZATION
+ *
+ * This file supports the user system call for file open, close, mmap, etc.
+ * This also incudes the driver initialization code.
+ *
+ * Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_X86_64
+#include <asm/uv/uv_irq.h>
+#endif
+#include <asm/uv/uv.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
+unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
+unsigned long gru_end_paddr __read_mostly;
+unsigned int gru_max_gids __read_mostly;
+struct gru_stats_s gru_stats;
+
+/* Guaranteed user available resources on each node */
+static int max_user_cbrs, max_user_dsr_bytes;
+
+static struct miscdevice gru_miscdev;
+
+static int gru_supported(void)
+{
+ return is_uv_system() &&
+ (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE);
+}
+
+/*
+ * gru_vma_close
+ *
+ * Called when unmapping a device mapping. Frees all gru resources
+ * and tables belonging to the vma.
+ */
+static void gru_vma_close(struct vm_area_struct *vma)
+{
+ struct gru_vma_data *vdata;
+ struct gru_thread_state *gts;
+ struct list_head *entry, *next;
+
+ if (!vma->vm_private_data)
+ return;
+
+ vdata = vma->vm_private_data;
+ vma->vm_private_data = NULL;
+ gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
+ vdata);
+ list_for_each_safe(entry, next, &vdata->vd_head) {
+ gts =
+ list_entry(entry, struct gru_thread_state, ts_next);
+ list_del(>s->ts_next);
+ mutex_lock(>s->ts_ctxlock);
+ if (gts->ts_gru)
+ gru_unload_context(gts, 0);
+ mutex_unlock(>s->ts_ctxlock);
+ gts_drop(gts);
+ }
+ kfree(vdata);
+ STAT(vdata_free);
+}
+
+/*
+ * gru_file_mmap
+ *
+ * Called when mmapping the device. Initializes the vma with a fault handler
+ * and private data structure necessary to allocate, track, and free the
+ * underlying pages.
+ */
+static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
+ return -EPERM;
+
+ if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+ vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED |
+ VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_page_prot = PAGE_SHARED;
+ vma->vm_ops = &gru_vm_ops;
+
+ vma->vm_private_data = gru_alloc_vma_data(vma, 0);
+ if (!vma->vm_private_data)
+ return -ENOMEM;
+
+ gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
+ file, vma->vm_start, vma, vma->vm_private_data);
+ return 0;
+}
+
+/*
+ * Create a new GRU context
+ */
+static int gru_create_new_context(unsigned long arg)
+{
+ struct gru_create_context_req req;
+ struct vm_area_struct *vma;
+ struct gru_vma_data *vdata;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ if (req.data_segment_bytes > max_user_dsr_bytes)
+ return -EINVAL;
+ if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
+ return -EINVAL;
+
+ if (!(req.options & GRU_OPT_MISS_MASK))
+ req.options |= GRU_OPT_MISS_FMM_INTR;
+
+ down_write(¤t->mm->mmap_sem);
+ vma = gru_find_vma(req.gseg);
+ if (vma) {
+ vdata = vma->vm_private_data;
+ vdata->vd_user_options = req.options;
+ vdata->vd_dsr_au_count =
+ GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
+ vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+ vdata->vd_tlb_preload_count = req.tlb_preload_count;
+ ret = 0;
+ }
+ up_write(¤t->mm->mmap_sem);
+
+ return ret;
+}
+
+/*
+ * Get GRU configuration info (temp - for emulator testing)
+ */
+static long gru_get_config_info(unsigned long arg)
+{
+ struct gru_config_info info;
+ int nodesperblade;
+
+ if (num_online_nodes() > 1 &&
+ (uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
+ nodesperblade = 2;
+ else
+ nodesperblade = 1;
+ memset(&info, 0, sizeof(info));
+ info.cpus = num_online_cpus();
+ info.nodes = num_online_nodes();
+ info.blades = info.nodes / nodesperblade;
+ info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
+
+ if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * gru_file_unlocked_ioctl
+ *
+ * Called to update file attributes via IOCTL calls.
+ */
+static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
+ unsigned long arg)
+{
+ int err = -EBADRQC;
+
+ gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg);
+
+ switch (req) {
+ case GRU_CREATE_CONTEXT:
+ err = gru_create_new_context(arg);
+ break;
+ case GRU_SET_CONTEXT_OPTION:
+ err = gru_set_context_option(arg);
+ break;
+ case GRU_USER_GET_EXCEPTION_DETAIL:
+ err = gru_get_exception_detail(arg);
+ break;
+ case GRU_USER_UNLOAD_CONTEXT:
+ err = gru_user_unload_context(arg);
+ break;
+ case GRU_USER_FLUSH_TLB:
+ err = gru_user_flush_tlb(arg);
+ break;
+ case GRU_USER_CALL_OS:
+ err = gru_handle_user_call_os(arg);
+ break;
+ case GRU_GET_GSEG_STATISTICS:
+ err = gru_get_gseg_statistics(arg);
+ break;
+ case GRU_KTEST:
+ err = gru_ktest(arg);
+ break;
+ case GRU_GET_CONFIG_INFO:
+ err = gru_get_config_info(arg);
+ break;
+ case GRU_DUMP_CHIPLET_STATE:
+ err = gru_dump_chiplet_request(arg);
+ break;
+ }
+ return err;
+}
+
+/*
+ * Called at init time to build tables for all GRUs that are present in the
+ * system.
+ */
+static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
+ void *vaddr, int blade_id, int chiplet_id)
+{
+ spin_lock_init(&gru->gs_lock);
+ spin_lock_init(&gru->gs_asid_lock);
+ gru->gs_gru_base_paddr = paddr;
+ gru->gs_gru_base_vaddr = vaddr;
+ gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
+ gru->gs_blade = gru_base[blade_id];
+ gru->gs_blade_id = blade_id;
+ gru->gs_chiplet_id = chiplet_id;
+ gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
+ gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+ gru->gs_asid_limit = MAX_ASID;
+ gru_tgh_flush_init(gru);
+ if (gru->gs_gid >= gru_max_gids)
+ gru_max_gids = gru->gs_gid + 1;
+ gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
+ blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
+ gru->gs_gru_base_paddr);
+}
+
+static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
+{
+ int pnode, nid, bid, chip;
+ int cbrs, dsrbytes, n;
+ int order = get_order(sizeof(struct gru_blade_state));
+ struct page *page;
+ struct gru_state *gru;
+ unsigned long paddr;
+ void *vaddr;
+
+ max_user_cbrs = GRU_NUM_CB;
+ max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
+ for_each_possible_blade(bid) {
+ pnode = uv_blade_to_pnode(bid);
+ nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */
+ page = alloc_pages_node(nid, GFP_KERNEL, order);
+ if (!page)
+ goto fail;
+ gru_base[bid] = page_address(page);
+ memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
+ gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
+ spin_lock_init(&gru_base[bid]->bs_lock);
+ init_rwsem(&gru_base[bid]->bs_kgts_sema);
+
+ dsrbytes = 0;
+ cbrs = 0;
+ for (gru = gru_base[bid]->bs_grus, chip = 0;
+ chip < GRU_CHIPLETS_PER_BLADE;
+ chip++, gru++) {
+ paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
+ vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
+ gru_init_chiplet(gru, paddr, vaddr, bid, chip);
+ n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+ cbrs = max(cbrs, n);
+ n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+ dsrbytes = max(dsrbytes, n);
+ }
+ max_user_cbrs = min(max_user_cbrs, cbrs);
+ max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
+ }
+
+ return 0;
+
+fail:
+ for (bid--; bid >= 0; bid--)
+ free_pages((unsigned long)gru_base[bid], order);
+ return -ENOMEM;
+}
+
+static void gru_free_tables(void)
+{
+ int bid;
+ int order = get_order(sizeof(struct gru_state) *
+ GRU_CHIPLETS_PER_BLADE);
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+ free_pages((unsigned long)gru_base[bid], order);
+}
+
+static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
+{
+ unsigned long mmr = 0;
+ int core;
+
+ /*
+ * We target the cores of a blade and not the hyperthreads themselves.
+ * There is a max of 8 cores per socket and 2 sockets per blade,
+ * making for a max total of 16 cores (i.e., 16 CPUs without
+ * hyperthreading and 32 CPUs with hyperthreading).
+ */
+ core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+ if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu))
+ return 0;
+
+ if (chiplet == 0) {
+ mmr = UVH_GR0_TLB_INT0_CONFIG +
+ core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG);
+ } else if (chiplet == 1) {
+ mmr = UVH_GR1_TLB_INT0_CONFIG +
+ core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG);
+ } else {
+ BUG();
+ }
+
+ *corep = core;
+ return mmr;
+}
+
+#ifdef CONFIG_IA64
+
+static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
+
+static void gru_noop(struct irq_data *d)
+{
+}
+
+static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
+ [0 ... GRU_CHIPLETS_PER_BLADE - 1] {
+ .irq_mask = gru_noop,
+ .irq_unmask = gru_noop,
+ .irq_ack = gru_noop
+ }
+};
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+ irq_handler_t irq_handler, int cpu, int blade)
+{
+ unsigned long mmr;
+ int irq = IRQ_GRU + chiplet;
+ int ret, core;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr == 0)
+ return 0;
+
+ if (gru_irq_count[chiplet] == 0) {
+ gru_chip[chiplet].name = irq_name;
+ ret = irq_set_chip(irq, &gru_chip[chiplet]);
+ if (ret) {
+ printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -ret);
+ return ret;
+ }
+
+ ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+ if (ret) {
+ printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -ret);
+ return ret;
+ }
+ }
+ gru_irq_count[chiplet]++;
+
+ return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+ unsigned long mmr;
+ int core, irq = IRQ_GRU + chiplet;
+
+ if (gru_irq_count[chiplet] == 0)
+ return;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr == 0)
+ return;
+
+ if (--gru_irq_count[chiplet] == 0)
+ free_irq(irq, NULL);
+}
+
+#elif defined CONFIG_X86_64
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+ irq_handler_t irq_handler, int cpu, int blade)
+{
+ unsigned long mmr;
+ int irq, core;
+ int ret;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr == 0)
+ return 0;
+
+ irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU);
+ if (irq < 0) {
+ printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -irq);
+ return irq;
+ }
+
+ ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+ if (ret) {
+ uv_teardown_irq(irq);
+ printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -ret);
+ return ret;
+ }
+ gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq;
+ return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+ int irq, core;
+ unsigned long mmr;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr) {
+ irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core];
+ if (irq) {
+ free_irq(irq, NULL);
+ uv_teardown_irq(irq);
+ }
+ }
+}
+
+#endif
+
+static void gru_teardown_tlb_irqs(void)
+{
+ int blade;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ blade = uv_cpu_to_blade_id(cpu);
+ gru_chiplet_teardown_tlb_irq(0, cpu, blade);
+ gru_chiplet_teardown_tlb_irq(1, cpu, blade);
+ }
+ for_each_possible_blade(blade) {
+ if (uv_blade_nr_possible_cpus(blade))
+ continue;
+ gru_chiplet_teardown_tlb_irq(0, 0, blade);
+ gru_chiplet_teardown_tlb_irq(1, 0, blade);
+ }
+}
+
+static int gru_setup_tlb_irqs(void)
+{
+ int blade;
+ int cpu;
+ int ret;
+
+ for_each_online_cpu(cpu) {
+ blade = uv_cpu_to_blade_id(cpu);
+ ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade);
+ if (ret != 0)
+ goto exit1;
+
+ ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade);
+ if (ret != 0)
+ goto exit1;
+ }
+ for_each_possible_blade(blade) {
+ if (uv_blade_nr_possible_cpus(blade))
+ continue;
+ ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade);
+ if (ret != 0)
+ goto exit1;
+
+ ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade);
+ if (ret != 0)
+ goto exit1;
+ }
+
+ return 0;
+
+exit1:
+ gru_teardown_tlb_irqs();
+ return ret;
+}
+
+/*
+ * gru_init
+ *
+ * Called at boot or module load time to initialize the GRUs.
+ */
+static int __init gru_init(void)
+{
+ int ret;
+
+ if (!gru_supported())
+ return 0;
+
+#if defined CONFIG_IA64
+ gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
+#else
+ gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
+ 0x7fffffffffffUL;
+#endif
+ gru_start_vaddr = __va(gru_start_paddr);
+ gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
+ printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
+ gru_start_paddr, gru_end_paddr);
+ ret = misc_register(&gru_miscdev);
+ if (ret) {
+ printk(KERN_ERR "%s: misc_register failed\n",
+ GRU_DRIVER_ID_STR);
+ goto exit0;
+ }
+
+ ret = gru_proc_init();
+ if (ret) {
+ printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
+ goto exit1;
+ }
+
+ ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
+ if (ret) {
+ printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
+ goto exit2;
+ }
+
+ ret = gru_setup_tlb_irqs();
+ if (ret != 0)
+ goto exit3;
+
+ gru_kservices_init();
+
+ printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
+ GRU_DRIVER_VERSION_STR);
+ return 0;
+
+exit3:
+ gru_free_tables();
+exit2:
+ gru_proc_exit();
+exit1:
+ misc_deregister(&gru_miscdev);
+exit0:
+ return ret;
+
+}
+
+static void __exit gru_exit(void)
+{
+ if (!gru_supported())
+ return;
+
+ gru_teardown_tlb_irqs();
+ gru_kservices_exit();
+ gru_free_tables();
+ misc_deregister(&gru_miscdev);
+ gru_proc_exit();
+}
+
+static const struct file_operations gru_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = gru_file_unlocked_ioctl,
+ .mmap = gru_file_mmap,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice gru_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "gru",
+ .fops = &gru_fops,
+};
+
+const struct vm_operations_struct gru_vm_ops = {
+ .close = gru_vma_close,
+ .fault = gru_fault,
+};
+
+#ifndef MODULE
+fs_initcall(gru_init);
+#else
+module_init(gru_init);
+#endif
+module_exit(gru_exit);
+
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
+MODULE_VERSION(GRU_DRIVER_VERSION_STR);
+
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
new file mode 100644
index 0000000..1ee8e82
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -0,0 +1,210 @@
+/*
+ * GRU KERNEL MCS INSTRUCTIONS
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+/* 10 sec */
+#ifdef CONFIG_IA64
+#include <asm/processor.h>
+#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
+#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
+#else
+#include <asm/tsc.h>
+#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
+#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
+#endif
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
+
+struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+static void update_mcs_stats(enum mcs_op op, unsigned long clks)
+{
+ unsigned long nsec;
+
+ nsec = CLKS2NSEC(clks);
+ atomic_long_inc(&mcs_op_statistics[op].count);
+ atomic_long_add(nsec, &mcs_op_statistics[op].total);
+ if (mcs_op_statistics[op].max < nsec)
+ mcs_op_statistics[op].max = nsec;
+}
+
+static void start_instruction(void *h)
+{
+ unsigned long *w0 = h;
+
+ wmb(); /* setting CMD/STATUS bits must be last */
+ *w0 = *w0 | 0x20001;
+ gru_flush_cache(h);
+}
+
+static void report_instruction_timeout(void *h)
+{
+ unsigned long goff = GSEGPOFF((unsigned long)h);
+ char *id = "???";
+
+ if (TYPE_IS(CCH, goff))
+ id = "CCH";
+ else if (TYPE_IS(TGH, goff))
+ id = "TGH";
+ else if (TYPE_IS(TFH, goff))
+ id = "TFH";
+
+ panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id);
+}
+
+static int wait_instruction_complete(void *h, enum mcs_op opc)
+{
+ int status;
+ unsigned long start_time = get_cycles();
+
+ while (1) {
+ cpu_relax();
+ status = GET_MSEG_HANDLE_STATUS(h);
+ if (status != CCHSTATUS_ACTIVE)
+ break;
+ if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
+ report_instruction_timeout(h);
+ start_time = get_cycles();
+ }
+ }
+ if (gru_options & OPT_STATS)
+ update_mcs_stats(opc, get_cycles() - start_time);
+ return status;
+}
+
+int cch_allocate(struct gru_context_configuration_handle *cch)
+{
+ int ret;
+
+ cch->opc = CCHOP_ALLOCATE;
+ start_instruction(cch);
+ ret = wait_instruction_complete(cch, cchop_allocate);
+
+ /*
+ * Stop speculation into the GSEG being mapped by the previous ALLOCATE.
+ * The GSEG memory does not exist until the ALLOCATE completes.
+ */
+ sync_core();
+ return ret;
+}
+
+int cch_start(struct gru_context_configuration_handle *cch)
+{
+ cch->opc = CCHOP_START;
+ start_instruction(cch);
+ return wait_instruction_complete(cch, cchop_start);
+}
+
+int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+ cch->opc = CCHOP_INTERRUPT;
+ start_instruction(cch);
+ return wait_instruction_complete(cch, cchop_interrupt);
+}
+
+int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+ int ret;
+
+ cch->opc = CCHOP_DEALLOCATE;
+ start_instruction(cch);
+ ret = wait_instruction_complete(cch, cchop_deallocate);
+
+ /*
+ * Stop speculation into the GSEG being unmapped by the previous
+ * DEALLOCATE.
+ */
+ sync_core();
+ return ret;
+}
+
+int cch_interrupt_sync(struct gru_context_configuration_handle
+ *cch)
+{
+ cch->opc = CCHOP_INTERRUPT_SYNC;
+ start_instruction(cch);
+ return wait_instruction_complete(cch, cchop_interrupt_sync);
+}
+
+int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+ unsigned long vaddr, unsigned long vaddrmask,
+ int asid, int pagesize, int global, int n,
+ unsigned short ctxbitmap)
+{
+ tgh->vaddr = vaddr;
+ tgh->asid = asid;
+ tgh->pagesize = pagesize;
+ tgh->n = n;
+ tgh->global = global;
+ tgh->vaddrmask = vaddrmask;
+ tgh->ctxbitmap = ctxbitmap;
+ tgh->opc = TGHOP_TLBINV;
+ start_instruction(tgh);
+ return wait_instruction_complete(tgh, tghop_invalidate);
+}
+
+int tfh_write_only(struct gru_tlb_fault_handle *tfh,
+ unsigned long paddr, int gaa,
+ unsigned long vaddr, int asid, int dirty,
+ int pagesize)
+{
+ tfh->fillasid = asid;
+ tfh->fillvaddr = vaddr;
+ tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+ tfh->gaa = gaa;
+ tfh->dirty = dirty;
+ tfh->pagesize = pagesize;
+ tfh->opc = TFHOP_WRITE_ONLY;
+ start_instruction(tfh);
+ return wait_instruction_complete(tfh, tfhop_write_only);
+}
+
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+ unsigned long paddr, int gaa,
+ unsigned long vaddr, int asid, int dirty,
+ int pagesize)
+{
+ tfh->fillasid = asid;
+ tfh->fillvaddr = vaddr;
+ tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+ tfh->gaa = gaa;
+ tfh->dirty = dirty;
+ tfh->pagesize = pagesize;
+ tfh->opc = TFHOP_WRITE_RESTART;
+ start_instruction(tfh);
+}
+
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+ tfh->opc = TFHOP_USER_POLLING_MODE;
+ start_instruction(tfh);
+}
+
+void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+ tfh->opc = TFHOP_EXCEPTION;
+ start_instruction(tfh);
+}
+
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 0000000..3d7bd36
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,530 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * GRU HANDLE DEFINITION
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRUHANDLES_H__
+#define __GRUHANDLES_H__
+#include "gru_instructions.h"
+
+/*
+ * Manifest constants for GRU Memory Map
+ */
+#define GRU_GSEG0_BASE 0
+#define GRU_MCS_BASE (64 * 1024 * 1024)
+#define GRU_SIZE (128UL * 1024 * 1024)
+
+/* Handle & resource counts */
+#define GRU_NUM_CB 128
+#define GRU_NUM_DSR_BYTES (32 * 1024)
+#define GRU_NUM_TFM 16
+#define GRU_NUM_TGH 24
+#define GRU_NUM_CBE 128
+#define GRU_NUM_TFH 128
+#define GRU_NUM_CCH 16
+
+/* Maximum resource counts that can be reserved by user programs */
+#define GRU_NUM_USER_CBR GRU_NUM_CBE
+#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES
+
+/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
+ * are the same */
+#define GRU_HANDLE_BYTES 64
+#define GRU_HANDLE_STRIDE 256
+
+/* Base addresses of handles */
+#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000)
+#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000)
+#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
+#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
+#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
+
+/* User gseg constants */
+#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
+#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1))
+
+/* Data segment constants */
+#define GRU_DSR_AU_BYTES 1024
+#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
+
+/* Control block constants */
+#define GRU_CBR_AU_SIZE 2
+#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE)
+
+/* Convert resource counts to the number of AU */
+#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
+#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
+
+/* UV limits */
+#define GRU_CHIPLETS_PER_HUB 2
+#define GRU_HUBS_PER_BLADE 1
+#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
+
+/* User GRU Gseg offsets */
+#define GRU_CB_BASE 0
+#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
+#define GRU_DS_BASE 0x20000
+#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES)
+
+/* Convert a GRU physical address to the chiplet offset */
+#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
+
+/* Convert an arbitrary handle address to the beginning of the GRU segment */
+#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+
+/* Test a valid handle address to determine the type */
+#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \
+ GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \
+ (((h) & (GRU_HANDLE_STRIDE - 1)) == 0))
+
+
+/* General addressing macros. */
+static inline void *get_gseg_base_address(void *base, int ctxnum)
+{
+ return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
+}
+
+static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
+{
+ return (void *)(get_gseg_base_address(base, ctxnum) +
+ GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
+}
+
+static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
+{
+ return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
+ GRU_CACHE_LINE_BYTES * line);
+}
+
+static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
+{
+ return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
+{
+ return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
+{
+ return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
+{
+ return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_context_configuration_handle *get_cch(void *base,
+ int ctxnum)
+{
+ return (struct gru_context_configuration_handle *)(base +
+ GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline unsigned long get_cb_number(void *cb)
+{
+ return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
+ GRU_HANDLE_STRIDE;
+}
+
+/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
+static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
+ int chiplet)
+{
+ return paddr + GRU_SIZE * (2 * pnode + chiplet);
+}
+
+static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
+{
+ return vaddr + GRU_SIZE * (2 * pnode + chiplet);
+}
+
+static inline struct gru_control_block_extended *gru_tfh_to_cbe(
+ struct gru_tlb_fault_handle *tfh)
+{
+ unsigned long cbe;
+
+ cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
+ return (struct gru_control_block_extended*)cbe;
+}
+
+
+
+
+/*
+ * Global TLB Fault Map
+ * Bitmap of outstanding TLB misses needing interrupt/polling service.
+ *
+ */
+struct gru_tlb_fault_map {
+ unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+ unsigned long fill0[2];
+ unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+ unsigned long fill1[2];
+};
+
+/*
+ * TGH - TLB Global Handle
+ * Used for TLB flushing.
+ *
+ */
+struct gru_tlb_global_handle {
+ unsigned int cmd:1; /* DW 0 */
+ unsigned int delresp:1;
+ unsigned int opc:1;
+ unsigned int fill1:5;
+
+ unsigned int fill2:8;
+
+ unsigned int status:2;
+ unsigned long fill3:2;
+ unsigned int state:3;
+ unsigned long fill4:1;
+
+ unsigned int cause:3;
+ unsigned long fill5:37;
+
+ unsigned long vaddr:64; /* DW 1 */
+
+ unsigned int asid:24; /* DW 2 */
+ unsigned int fill6:8;
+
+ unsigned int pagesize:5;
+ unsigned int fill7:11;
+
+ unsigned int global:1;
+ unsigned int fill8:15;
+
+ unsigned long vaddrmask:39; /* DW 3 */
+ unsigned int fill9:9;
+ unsigned int n:10;
+ unsigned int fill10:6;
+
+ unsigned int ctxbitmap:16; /* DW4 */
+ unsigned long fill11[3];
+};
+
+enum gru_tgh_cmd {
+ TGHCMD_START
+};
+
+enum gru_tgh_opc {
+ TGHOP_TLBNOP,
+ TGHOP_TLBINV
+};
+
+enum gru_tgh_status {
+ TGHSTATUS_IDLE,
+ TGHSTATUS_EXCEPTION,
+ TGHSTATUS_ACTIVE
+};
+
+enum gru_tgh_state {
+ TGHSTATE_IDLE,
+ TGHSTATE_PE_INVAL,
+ TGHSTATE_INTERRUPT_INVAL,
+ TGHSTATE_WAITDONE,
+ TGHSTATE_RESTART_CTX,
+};
+
+enum gru_tgh_cause {
+ TGHCAUSE_RR_ECC,
+ TGHCAUSE_TLB_ECC,
+ TGHCAUSE_LRU_ECC,
+ TGHCAUSE_PS_ECC,
+ TGHCAUSE_MUL_ERR,
+ TGHCAUSE_DATA_ERR,
+ TGHCAUSE_SW_FORCE
+};
+
+
+/*
+ * TFH - TLB Global Handle
+ * Used for TLB dropins into the GRU TLB.
+ *
+ */
+struct gru_tlb_fault_handle {
+ unsigned int cmd:1; /* DW 0 - low 32*/
+ unsigned int delresp:1;
+ unsigned int fill0:2;
+ unsigned int opc:3;
+ unsigned int fill1:9;
+
+ unsigned int status:2;
+ unsigned int fill2:2;
+ unsigned int state:3;
+ unsigned int fill3:1;
+
+ unsigned int cause:6;
+ unsigned int cb_int:1;
+ unsigned int fill4:1;
+
+ unsigned int indexway:12; /* DW 0 - high 32 */
+ unsigned int fill5:4;
+
+ unsigned int ctxnum:4;
+ unsigned int fill6:12;
+
+ unsigned long missvaddr:64; /* DW 1 */
+
+ unsigned int missasid:24; /* DW 2 */
+ unsigned int fill7:8;
+ unsigned int fillasid:24;
+ unsigned int dirty:1;
+ unsigned int gaa:2;
+ unsigned long fill8:5;
+
+ unsigned long pfn:41; /* DW 3 */
+ unsigned int fill9:7;
+ unsigned int pagesize:5;
+ unsigned int fill10:11;
+
+ unsigned long fillvaddr:64; /* DW 4 */
+
+ unsigned long fill11[3];
+};
+
+enum gru_tfh_opc {
+ TFHOP_NOOP,
+ TFHOP_RESTART,
+ TFHOP_WRITE_ONLY,
+ TFHOP_WRITE_RESTART,
+ TFHOP_EXCEPTION,
+ TFHOP_USER_POLLING_MODE = 7,
+};
+
+enum tfh_status {
+ TFHSTATUS_IDLE,
+ TFHSTATUS_EXCEPTION,
+ TFHSTATUS_ACTIVE,
+};
+
+enum tfh_state {
+ TFHSTATE_INACTIVE,
+ TFHSTATE_IDLE,
+ TFHSTATE_MISS_UPM,
+ TFHSTATE_MISS_FMM,
+ TFHSTATE_HW_ERR,
+ TFHSTATE_WRITE_TLB,
+ TFHSTATE_RESTART_CBR,
+};
+
+/* TFH cause bits */
+enum tfh_cause {
+ TFHCAUSE_NONE,
+ TFHCAUSE_TLB_MISS,
+ TFHCAUSE_TLB_MOD,
+ TFHCAUSE_HW_ERROR_RR,
+ TFHCAUSE_HW_ERROR_MAIN_ARRAY,
+ TFHCAUSE_HW_ERROR_VALID,
+ TFHCAUSE_HW_ERROR_PAGESIZE,
+ TFHCAUSE_INSTRUCTION_EXCEPTION,
+ TFHCAUSE_UNCORRECTIBLE_ERROR,
+};
+
+/* GAA values */
+#define GAA_RAM 0x0
+#define GAA_NCRAM 0x2
+#define GAA_MMIO 0x1
+#define GAA_REGISTER 0x3
+
+/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
+#define GRU_PADDR_SHIFT 12
+
+/*
+ * Context Configuration handle
+ * Used to allocate resources to a GSEG context.
+ *
+ */
+struct gru_context_configuration_handle {
+ unsigned int cmd:1; /* DW0 */
+ unsigned int delresp:1;
+ unsigned int opc:3;
+ unsigned int unmap_enable:1;
+ unsigned int req_slice_set_enable:1;
+ unsigned int req_slice:2;
+ unsigned int cb_int_enable:1;
+ unsigned int tlb_int_enable:1;
+ unsigned int tfm_fault_bit_enable:1;
+ unsigned int tlb_int_select:4;
+
+ unsigned int status:2;
+ unsigned int state:2;
+ unsigned int reserved2:4;
+
+ unsigned int cause:4;
+ unsigned int tfm_done_bit_enable:1;
+ unsigned int unused:3;
+
+ unsigned int dsr_allocation_map;
+
+ unsigned long cbr_allocation_map; /* DW1 */
+
+ unsigned int asid[8]; /* DW 2 - 5 */
+ unsigned short sizeavail[8]; /* DW 6 - 7 */
+} __attribute__ ((packed));
+
+enum gru_cch_opc {
+ CCHOP_START = 1,
+ CCHOP_ALLOCATE,
+ CCHOP_INTERRUPT,
+ CCHOP_DEALLOCATE,
+ CCHOP_INTERRUPT_SYNC,
+};
+
+enum gru_cch_status {
+ CCHSTATUS_IDLE,
+ CCHSTATUS_EXCEPTION,
+ CCHSTATUS_ACTIVE,
+};
+
+enum gru_cch_state {
+ CCHSTATE_INACTIVE,
+ CCHSTATE_MAPPED,
+ CCHSTATE_ACTIVE,
+ CCHSTATE_INTERRUPTED,
+};
+
+/* CCH Exception cause */
+enum gru_cch_cause {
+ CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
+ CCHCAUSE_ILLEGAL_OPCODE = 2,
+ CCHCAUSE_INVALID_START_REQUEST = 3,
+ CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
+ CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
+ CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
+ CCHCAUSE_CCH_BUSY = 7,
+ CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
+ CCHCAUSE_BAD_TFM_CONFIG = 9,
+ CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
+ CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
+ CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
+};
+/*
+ * CBE - Control Block Extended
+ * Maintains internal GRU state for active CBs.
+ *
+ */
+struct gru_control_block_extended {
+ unsigned int reserved0:1; /* DW 0 - low */
+ unsigned int imacpy:3;
+ unsigned int reserved1:4;
+ unsigned int xtypecpy:3;
+ unsigned int iaa0cpy:2;
+ unsigned int iaa1cpy:2;
+ unsigned int reserved2:1;
+ unsigned int opccpy:8;
+ unsigned int exopccpy:8;
+
+ unsigned int idef2cpy:22; /* DW 0 - high */
+ unsigned int reserved3:10;
+
+ unsigned int idef4cpy:22; /* DW 1 */
+ unsigned int reserved4:10;
+ unsigned int idef4upd:22;
+ unsigned int reserved5:10;
+
+ unsigned long idef1upd:64; /* DW 2 */
+
+ unsigned long idef5cpy:64; /* DW 3 */
+
+ unsigned long idef6cpy:64; /* DW 4 */
+
+ unsigned long idef3upd:64; /* DW 5 */
+
+ unsigned long idef5upd:64; /* DW 6 */
+
+ unsigned int idef2upd:22; /* DW 7 */
+ unsigned int reserved6:10;
+
+ unsigned int ecause:20;
+ unsigned int cbrstate:4;
+ unsigned int cbrexecstatus:8;
+};
+
+/* CBE fields for active BCOPY instructions */
+#define cbe_baddr0 idef1upd
+#define cbe_baddr1 idef3upd
+#define cbe_src_cl idef6cpy
+#define cbe_nelemcur idef5upd
+
+enum gru_cbr_state {
+ CBRSTATE_INACTIVE,
+ CBRSTATE_IDLE,
+ CBRSTATE_PE_CHECK,
+ CBRSTATE_QUEUED,
+ CBRSTATE_WAIT_RESPONSE,
+ CBRSTATE_INTERRUPTED,
+ CBRSTATE_INTERRUPTED_MISS_FMM,
+ CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
+ CBRSTATE_INTERRUPTED_MISS_UPM,
+ CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
+ CBRSTATE_REQUEST_ISSUE,
+ CBRSTATE_BUSY_INTERRUPT,
+};
+
+/* CBE cbrexecstatus bits - defined in gru_instructions.h*/
+/* CBE ecause bits - defined in gru_instructions.h */
+
+/*
+ * Convert a processor pagesize into the strange encoded pagesize used by the
+ * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
+ * pagesize log pagesize grupagesize
+ * 4k 12 0
+ * 16k 14 1
+ * 64k 16 2
+ * 256k 18 3
+ * 1m 20 4
+ * 2m 21 5
+ * 4m 22 6
+ * 16m 24 7
+ * 64m 26 8
+ * ...
+ */
+#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6)
+#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh))
+
+/* minimum TLB purge count to ensure a full purge */
+#define GRUMAXINVAL 1024UL
+
+int cch_allocate(struct gru_context_configuration_handle *cch);
+int cch_start(struct gru_context_configuration_handle *cch);
+int cch_interrupt(struct gru_context_configuration_handle *cch);
+int cch_deallocate(struct gru_context_configuration_handle *cch);
+int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
+int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
+ unsigned long vaddrmask, int asid, int pagesize, int global, int n,
+ unsigned short ctxbitmap);
+int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+ int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+ int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
+void tfh_exception(struct gru_tlb_fault_handle *tfh);
+
+#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 0000000..313da31
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,232 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * Dump GRU State
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS 10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+ if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+ return -1;
+ *dp += GRU_HANDLE_BYTES;
+ return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+ struct gru_context_configuration_handle *cch,
+ void __user *ubuf, int ctxnum, int dsrcnt,
+ int flush_cbrs)
+{
+ void *cb, *cbe, *tfh, *gseg;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ tfh = grubase + GRU_TFH_BASE;
+
+ for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+ if (flush_cbrs)
+ gru_flush_cache(cb);
+ if (gru_user_copy_handle(&ubuf, cb))
+ goto fail;
+ if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+ goto fail;
+ if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+ goto fail;
+ cb += GRU_HANDLE_STRIDE;
+ }
+ if (dsrcnt)
+ memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+ return 0;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+ void __user *ubuf, void __user *ubufend)
+{
+ struct gru_tlb_fault_map *tfm;
+ int i;
+
+ if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+ return -EFBIG;
+
+ for (i = 0; i < GRU_NUM_TFM; i++) {
+ tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+ if (gru_user_copy_handle(&ubuf, tfm))
+ goto fail;
+ }
+ return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+ void __user *ubuf, void __user *ubufend)
+{
+ struct gru_tlb_global_handle *tgh;
+ int i;
+
+ if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+ return -EFBIG;
+
+ for (i = 0; i < GRU_NUM_TGH; i++) {
+ tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+ if (gru_user_copy_handle(&ubuf, tgh))
+ goto fail;
+ }
+ return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+ void __user *ubuf, void __user *ubufend, char data_opt,
+ char lock_cch, char flush_cbrs)
+{
+ struct gru_dump_context_header hdr;
+ struct gru_dump_context_header __user *uhdr = ubuf;
+ struct gru_context_configuration_handle *cch, *ubufcch;
+ struct gru_thread_state *gts;
+ int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+ void *grubase;
+
+ memset(&hdr, 0, sizeof(hdr));
+ grubase = gru->gs_gru_base_vaddr;
+ cch = get_cch(grubase, ctxnum);
+ for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+ cch_locked = trylock_cch_handle(cch);
+ if (cch_locked)
+ break;
+ msleep(1);
+ }
+
+ ubuf += sizeof(hdr);
+ ubufcch = ubuf;
+ if (gru_user_copy_handle(&ubuf, cch)) {
+ if (cch_locked)
+ unlock_cch_handle(cch);
+ return -EFAULT;
+ }
+ if (cch_locked)
+ ubufcch->delresp = 0;
+ bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+ if (cch_locked || !lock_cch) {
+ gts = gru->gs_gts[ctxnum];
+ if (gts && gts->ts_vma) {
+ hdr.pid = gts->ts_tgid_owner;
+ hdr.vaddr = gts->ts_vma->vm_start;
+ }
+ if (cch->state != CCHSTATE_INACTIVE) {
+ cbrcnt = hweight64(cch->cbr_allocation_map) *
+ GRU_CBR_AU_SIZE;
+ dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+ GRU_DSR_AU_CL : 0;
+ }
+ bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+ else
+ ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+ dsrcnt, flush_cbrs);
+ }
+ if (cch_locked)
+ unlock_cch_handle(cch);
+ if (ret)
+ return ret;
+
+ hdr.magic = GRU_DUMP_MAGIC;
+ hdr.gid = gru->gs_gid;
+ hdr.ctxnum = ctxnum;
+ hdr.cbrcnt = cbrcnt;
+ hdr.dsrcnt = dsrcnt;
+ hdr.cch_locked = cch_locked;
+ if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ return bytes;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+ struct gru_state *gru;
+ struct gru_dump_chiplet_state_req req;
+ void __user *ubuf;
+ void __user *ubufend;
+ int ctxnum, ret, cnt = 0;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ /* Currently, only dump by gid is implemented */
+ if (req.gid >= gru_max_gids)
+ return -EINVAL;
+
+ gru = GID_TO_GRU(req.gid);
+ ubuf = req.buf;
+ ubufend = req.buf + req.buflen;
+
+ ret = gru_dump_tfm(gru, ubuf, ubufend);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+
+ ret = gru_dump_tgh(gru, ubuf, ubufend);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+
+ for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+ ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+ req.data_opt, req.lock_cch,
+ req.flush_cbrs);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+ cnt++;
+ }
+ }
+
+ if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+ return -EFAULT;
+ return cnt;
+
+fail:
+ return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 0000000..967b9dd
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,1165 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * KERNEL SERVICES THAT USE THE GRU
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <asm/io_apic.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+#include "grukservices.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Kernel GRU Usage
+ *
+ * The following is an interim algorithm for management of kernel GRU
+ * resources. This will likely be replaced when we better understand the
+ * kernel/user requirements.
+ *
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * Each blade has one "kernel context" that owns GRU kernel resources
+ * located on the blade. Kernel drivers use GRU resources in this context
+ * for sending messages, zeroing memory, etc.
+ *
+ * The kernel context is dynamically loaded on demand. If it is not in
+ * use by the kernel, the kernel context can be unloaded & given to a user.
+ * The kernel context will be reloaded when needed. This may require that
+ * a context be stolen from a user.
+ * NOTE: frequent unloading/reloading of the kernel context is
+ * expensive. We are depending on batch schedulers, cpusets, sane
+ * drivers or some other mechanism to prevent the need for frequent
+ * stealing/reloading.
+ *
+ * The kernel context consists of two parts:
+ * - 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * Each cpu has it's own private resources & does not share them
+ * with other cpus. These resources are used serially, ie,
+ * locked, used & unlocked on each call to a function in
+ * grukservices.
+ * (Now that we have dynamic loading of kernel contexts, I
+ * may rethink this & allow sharing between cpus....)
+ *
+ * - Additional resources can be reserved long term & used directly
+ * by UV drivers located in the kernel. Drivers using these GRU
+ * resources can use asynchronous GRU instructions that send
+ * interrupts on completion.
+ * - these resources must be explicitly locked/unlocked
+ * - locked resources prevent (obviously) the kernel
+ * context from being unloaded.
+ * - drivers using these resource directly issue their own
+ * GRU instruction and must wait/check completion.
+ *
+ * When these resources are reserved, the caller can optionally
+ * associate a wait_queue with the resources and use asynchronous
+ * GRU instructions. When an async GRU instruction completes, the
+ * driver will do a wakeup on the event.
+ *
+ */
+
+
+#define ASYNC_HAN_TO_BID(h) ((h) - 1)
+#define ASYNC_BID_TO_HAN(b) ((b) + 1)
+#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
+
+#define GRU_NUM_KERNEL_CBR 1
+#define GRU_NUM_KERNEL_DSR_BYTES 256
+#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
+ GRU_CACHE_LINE_BYTES)
+
+/* GRU instruction attributes for all instructions */
+#define IMA IMA_CB_DELAY
+
+/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
+#define __gru_cacheline_aligned__ \
+ __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
+
+#define MAGIC 0x1234567887654321UL
+
+/* Default retry count for GRU errors on kernel instructions */
+#define EXCEPTION_RETRY_LIMIT 3
+
+/* Status of message queue sections */
+#define MQS_EMPTY 0
+#define MQS_FULL 1
+#define MQS_NOOP 2
+
+/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
+/* optimized for x86_64 */
+struct message_queue {
+ union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */
+ int qlines; /* DW 1 */
+ long hstatus[2];
+ void *next __gru_cacheline_aligned__;/* CL 1 */
+ void *limit;
+ void *start;
+ void *start2;
+ char data ____cacheline_aligned; /* CL 2 */
+};
+
+/* First word in every message - used by mesq interface */
+struct message_header {
+ char present;
+ char present2;
+ char lines;
+ char fill;
+};
+
+#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
+
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+ struct gru_state *gru;
+ struct gru_thread_state *kgts;
+ void *vaddr;
+ int ctxnum, ncpus;
+
+ up_read(&bs->bs_kgts_sema);
+ down_write(&bs->bs_kgts_sema);
+
+ if (!bs->bs_kgts) {
+ do {
+ bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+ if (!IS_ERR(bs->bs_kgts))
+ break;
+ msleep(1);
+ } while (true);
+ bs->bs_kgts->ts_user_blade_id = blade_id;
+ }
+ kgts = bs->bs_kgts;
+
+ if (!kgts->ts_gru) {
+ STAT(load_kernel_context);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+ GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+ kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+ GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+ bs->bs_async_dsr_bytes);
+ while (!gru_assign_gru_context(kgts)) {
+ msleep(1);
+ gru_steal_context(kgts);
+ }
+ gru_load_context(kgts);
+ gru = bs->bs_kgts->ts_gru;
+ vaddr = gru->gs_gru_base_vaddr;
+ ctxnum = kgts->ts_ctxnum;
+ bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+ bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+ }
+ downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ * Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int bid, ret = 0;
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+ bs = gru_base[bid];
+ if (!bs)
+ continue;
+
+ /* Ignore busy contexts. Don't want to block here. */
+ if (down_write_trylock(&bs->bs_kgts_sema)) {
+ kgts = bs->bs_kgts;
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ bs->bs_kgts = NULL;
+ up_write(&bs->bs_kgts_sema);
+ kfree(kgts);
+ } else {
+ ret++;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+ int bid;
+
+ STAT(lock_kernel_context);
+again:
+ bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
+ bs = gru_base[bid];
+
+ /* Handle the case where migration occurred while waiting for the sema */
+ down_read(&bs->bs_kgts_sema);
+ if (blade_id < 0 && bid != uv_numa_blade_id()) {
+ up_read(&bs->bs_kgts_sema);
+ goto again;
+ }
+ if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+ gru_load_kernel_context(bs, bid);
+ return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+
+ bs = gru_base[blade_id];
+ up_read(&bs->bs_kgts_sema);
+ STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * - returns with preemption disabled
+ */
+static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
+{
+ struct gru_blade_state *bs;
+ int lcpu;
+
+ BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
+ preempt_disable();
+ bs = gru_lock_kernel_context(-1);
+ lcpu = uv_blade_processor_id();
+ *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
+ *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
+ return 0;
+}
+
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
+static void gru_free_cpu_resources(void *cb, void *dsr)
+{
+ gru_unlock_kernel_context(uv_numa_blade_id());
+ preempt_enable();
+}
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ * Note: currently supports only 1 reservation per blade.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * output:
+ * handle to identify resource
+ * (0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int ret = 0;
+
+ bs = gru_base[blade_id];
+
+ down_write(&bs->bs_kgts_sema);
+
+ /* Verify no resources already reserved */
+ if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+ goto done;
+ bs->bs_async_dsr_bytes = dsr_bytes;
+ bs->bs_async_cbrs = cbrs;
+ bs->bs_async_wq = cmp;
+ kgts = bs->bs_kgts;
+
+ /* Resources changed. Unload context if already loaded */
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+ up_write(&bs->bs_kgts_sema);
+ return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ down_write(&bs->bs_kgts_sema);
+ bs->bs_async_dsr_bytes = 0;
+ bs->bs_async_cbrs = 0;
+ bs->bs_async_wq = NULL;
+ up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ wait_for_completion(bs->bs_async_wq);
+ mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han, void **cb, void **dsr)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+ int blade_id = ASYNC_HAN_TO_BID(han);
+ int ncpus;
+
+ gru_lock_kernel_context(blade_id);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ if (cb)
+ *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+ if (dsr)
+ *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+ int blade_id = ASYNC_HAN_TO_BID(han);
+
+ gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
+int gru_get_cb_exception_detail(void *cb,
+ struct control_block_extended_exc_detail *excdet)
+{
+ struct gru_control_block_extended *cbe;
+ struct gru_thread_state *kgts = NULL;
+ unsigned long off;
+ int cbrnum, bid;
+
+ /*
+ * Locate kgts for cb. This algorithm is SLOW but
+ * this function is rarely called (ie., almost never).
+ * Performance does not matter.
+ */
+ for_each_possible_blade(bid) {
+ if (!gru_base[bid])
+ break;
+ kgts = gru_base[bid]->bs_kgts;
+ if (!kgts || !kgts->ts_gru)
+ continue;
+ off = cb - kgts->ts_gru->gs_gru_base_vaddr;
+ if (off < GRU_SIZE)
+ break;
+ kgts = NULL;
+ }
+ BUG_ON(!kgts);
+ cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
+ cbe = get_cbe(GRUBASE(cb), cbrnum);
+ gru_flush_cache(cbe); /* CBE not coherent */
+ sync_core();
+ excdet->opc = cbe->opccpy;
+ excdet->exopc = cbe->exopccpy;
+ excdet->ecause = cbe->ecause;
+ excdet->exceptdet0 = cbe->idef1upd;
+ excdet->exceptdet1 = cbe->idef3upd;
+ gru_flush_cache(cbe);
+ return 0;
+}
+
+static char *gru_get_cb_exception_detail_str(int ret, void *cb,
+ char *buf, int size)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ struct control_block_extended_exc_detail excdet;
+
+ if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
+ gru_get_cb_exception_detail(cb, &excdet);
+ snprintf(buf, size,
+ "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+ "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
+ gen, excdet.opc, excdet.exopc, excdet.ecause,
+ excdet.exceptdet0, excdet.exceptdet1);
+ } else {
+ snprintf(buf, size, "No exception");
+ }
+ return buf;
+}
+
+static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
+{
+ while (gen->istatus >= CBS_ACTIVE) {
+ cpu_relax();
+ barrier();
+ }
+ return gen->istatus;
+}
+
+static int gru_retry_exception(void *cb)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ struct control_block_extended_exc_detail excdet;
+ int retry = EXCEPTION_RETRY_LIMIT;
+
+ while (1) {
+ if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
+ return CBS_IDLE;
+ if (gru_get_cb_message_queue_substatus(cb))
+ return CBS_EXCEPTION;
+ gru_get_cb_exception_detail(cb, &excdet);
+ if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+ (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
+ break;
+ if (retry-- == 0)
+ break;
+ gen->icmd = 1;
+ gru_flush_cache(gen);
+ }
+ return CBS_EXCEPTION;
+}
+
+int gru_check_status_proc(void *cb)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ int ret;
+
+ ret = gen->istatus;
+ if (ret == CBS_EXCEPTION)
+ ret = gru_retry_exception(cb);
+ rmb();
+ return ret;
+
+}
+
+int gru_wait_proc(void *cb)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ int ret;
+
+ ret = gru_wait_idle_or_exception(gen);
+ if (ret == CBS_EXCEPTION)
+ ret = gru_retry_exception(cb);
+ rmb();
+ return ret;
+}
+
+static void gru_abort(int ret, void *cb, char *str)
+{
+ char buf[GRU_EXC_STR_SIZE];
+
+ panic("GRU FATAL ERROR: %s - %s\n", str,
+ gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
+}
+
+void gru_wait_abort_proc(void *cb)
+{
+ int ret;
+
+ ret = gru_wait_proc(cb);
+ if (ret)
+ gru_abort(ret, cb, "gru_wait_abort");
+}
+
+
+/*------------------------------ MESSAGE QUEUES -----------------------------*/
+
+/* Internal status . These are NOT returned to the user. */
+#define MQIE_AGAIN -1 /* try again */
+
+
+/*
+ * Save/restore the "present" flag that is in the second line of 2-line
+ * messages
+ */
+static inline int get_present2(void *p)
+{
+ struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+ return mhdr->present;
+}
+
+static inline void restore_present2(void *p, int val)
+{
+ struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+ mhdr->present = val;
+}
+
+/*
+ * Create a message queue.
+ * qlines - message queue size in cache lines. Includes 2-line header.
+ */
+int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+ void *p, unsigned int bytes, int nasid, int vector, int apicid)
+{
+ struct message_queue *mq = p;
+ unsigned int qlines;
+
+ qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
+ memset(mq, 0, bytes);
+ mq->start = &mq->data;
+ mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
+ mq->next = &mq->data;
+ mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
+ mq->qlines = qlines;
+ mq->hstatus[0] = 0;
+ mq->hstatus[1] = 1;
+ mq->head = gru_mesq_head(2, qlines / 2 + 1);
+ mqd->mq = mq;
+ mqd->mq_gpa = uv_gpa(mq);
+ mqd->qlines = qlines;
+ mqd->interrupt_pnode = nasid >> 1;
+ mqd->interrupt_vector = vector;
+ mqd->interrupt_apicid = apicid;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gru_create_message_queue);
+
+/*
+ * Send a NOOP message to a message queue
+ * Returns:
+ * 0 - if queue is full after the send. This is the normal case
+ * but various races can change this.
+ * -1 - if mesq sent successfully but queue not full
+ * >0 - unexpected error. MQE_xxx returned
+ */
+static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg)
+{
+ const struct message_header noop_header = {
+ .present = MQS_NOOP, .lines = 1};
+ unsigned long m;
+ int substatus, ret;
+ struct message_header save_mhdr, *mhdr = mesg;
+
+ STAT(mesq_noop);
+ save_mhdr = *mhdr;
+ *mhdr = noop_header;
+ gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
+ ret = gru_wait(cb);
+
+ if (ret) {
+ substatus = gru_get_cb_message_queue_substatus(cb);
+ switch (substatus) {
+ case CBSS_NO_ERROR:
+ STAT(mesq_noop_unexpected_error);
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ case CBSS_LB_OVERFLOWED:
+ STAT(mesq_noop_lb_overflow);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_QLIMIT_REACHED:
+ STAT(mesq_noop_qlimit_reached);
+ ret = 0;
+ break;
+ case CBSS_AMO_NACKED:
+ STAT(mesq_noop_amo_nacked);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_PUT_NACKED:
+ STAT(mesq_noop_put_nacked);
+ m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
+ IMA);
+ if (gru_wait(cb) == CBS_IDLE)
+ ret = MQIE_AGAIN;
+ else
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ case CBSS_PAGE_OVERFLOW:
+ STAT(mesq_noop_page_overflow);
+ /* fallthru */
+ default:
+ BUG();
+ }
+ }
+ *mhdr = save_mhdr;
+ return ret;
+}
+
+/*
+ * Handle a gru_mesq full.
+ */
+static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg, int lines)
+{
+ union gru_mesqhead mqh;
+ unsigned int limit, head;
+ unsigned long avalue;
+ int half, qlines;
+
+ /* Determine if switching to first/second half of q */
+ avalue = gru_get_amo_value(cb);
+ head = gru_get_amo_value_head(cb);
+ limit = gru_get_amo_value_limit(cb);
+
+ qlines = mqd->qlines;
+ half = (limit != qlines);
+
+ if (half)
+ mqh = gru_mesq_head(qlines / 2 + 1, qlines);
+ else
+ mqh = gru_mesq_head(2, qlines / 2 + 1);
+
+ /* Try to get lock for switching head pointer */
+ gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ if (!gru_get_amo_value(cb)) {
+ STAT(mesq_qf_locked);
+ return MQE_QUEUE_FULL;
+ }
+
+ /* Got the lock. Send optional NOP if queue not full, */
+ if (head != limit) {
+ if (send_noop_message(cb, mqd, mesg)) {
+ gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
+ XTYPE_DW, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ STAT(mesq_qf_noop_not_full);
+ return MQIE_AGAIN;
+ }
+ avalue++;
+ }
+
+ /* Then flip queuehead to other half of queue. */
+ gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
+ IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+
+ /* If not successfully in swapping queue head, clear the hstatus lock */
+ if (gru_get_amo_value(cb) != avalue) {
+ STAT(mesq_qf_switch_head_failed);
+ gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
+ IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ }
+ return MQIE_AGAIN;
+cberr:
+ STAT(mesq_qf_unexpected_error);
+ return MQE_UNEXPECTED_CB_ERR;
+}
+
+/*
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
+ */
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg, int lines)
+{
+ unsigned long m, *val = mesg, gpa, save;
+ int ret;
+
+ m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+ if (lines == 2) {
+ gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ }
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+
+ if (!mqd->interrupt_vector)
+ return MQE_OK;
+
+ /*
+ * Send a cross-partition interrupt to the SSI that contains the target
+ * message queue. Normally, the interrupt is automatically delivered by
+ * hardware but some error conditions require explicit delivery.
+ * Use the GRU to deliver the interrupt. Otherwise partition failures
+ * could cause unrecovered errors.
+ */
+ gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
+ save = *val;
+ *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
+ dest_Fixed);
+ gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
+ ret = gru_wait(cb);
+ *val = save;
+ if (ret != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ return MQE_OK;
+}
+
+/*
+ * Handle a gru_mesq failure. Some of these failures are software recoverable
+ * or retryable.
+ */
+static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg, int lines)
+{
+ int substatus, ret = 0;
+
+ substatus = gru_get_cb_message_queue_substatus(cb);
+ switch (substatus) {
+ case CBSS_NO_ERROR:
+ STAT(mesq_send_unexpected_error);
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ case CBSS_LB_OVERFLOWED:
+ STAT(mesq_send_lb_overflow);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_QLIMIT_REACHED:
+ STAT(mesq_send_qlimit_reached);
+ ret = send_message_queue_full(cb, mqd, mesg, lines);
+ break;
+ case CBSS_AMO_NACKED:
+ STAT(mesq_send_amo_nacked);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_PUT_NACKED:
+ STAT(mesq_send_put_nacked);
+ ret = send_message_put_nacked(cb, mqd, mesg, lines);
+ break;
+ case CBSS_PAGE_OVERFLOW:
+ STAT(mesq_page_overflow);
+ /* fallthru */
+ default:
+ BUG();
+ }
+ return ret;
+}
+
+/*
+ * Send a message to a message queue
+ * mqd message queue descriptor
+ * mesg message. ust be vaddr within a GSEG
+ * bytes message size (<= 2 CL)
+ */
+int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
+ unsigned int bytes)
+{
+ struct message_header *mhdr;
+ void *cb;
+ void *dsr;
+ int istatus, clines, ret;
+
+ STAT(mesq_send);
+ BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
+
+ clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
+ if (gru_get_cpu_resources(bytes, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ memcpy(dsr, mesg, bytes);
+ mhdr = dsr;
+ mhdr->present = MQS_FULL;
+ mhdr->lines = clines;
+ if (clines == 2) {
+ mhdr->present2 = get_present2(mhdr);
+ restore_present2(mhdr, MQS_FULL);
+ }
+
+ do {
+ ret = MQE_OK;
+ gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
+ istatus = gru_wait(cb);
+ if (istatus != CBS_IDLE)
+ ret = send_message_failure(cb, mqd, dsr, clines);
+ } while (ret == MQIE_AGAIN);
+ gru_free_cpu_resources(cb, dsr);
+
+ if (ret)
+ STAT(mesq_send_failed);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_send_message_gpa);
+
+/*
+ * Advance the receive pointer for the queue to the next message.
+ */
+void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
+{
+ struct message_queue *mq = mqd->mq;
+ struct message_header *mhdr = mq->next;
+ void *next, *pnext;
+ int half = -1;
+ int lines = mhdr->lines;
+
+ if (lines == 2)
+ restore_present2(mhdr, MQS_EMPTY);
+ mhdr->present = MQS_EMPTY;
+
+ pnext = mq->next;
+ next = pnext + GRU_CACHE_LINE_BYTES * lines;
+ if (next == mq->limit) {
+ next = mq->start;
+ half = 1;
+ } else if (pnext < mq->start2 && next >= mq->start2) {
+ half = 0;
+ }
+
+ if (half >= 0)
+ mq->hstatus[half] = 1;
+ mq->next = next;
+}
+EXPORT_SYMBOL_GPL(gru_free_message);
+
+/*
+ * Get next message from message queue. Return NULL if no message
+ * present. User must call next_message() to move to next message.
+ * rmq message queue
+ */
+void *gru_get_next_message(struct gru_message_queue_desc *mqd)
+{
+ struct message_queue *mq = mqd->mq;
+ struct message_header *mhdr = mq->next;
+ int present = mhdr->present;
+
+ /* skip NOOP messages */
+ while (present == MQS_NOOP) {
+ gru_free_message(mqd, mhdr);
+ mhdr = mq->next;
+ present = mhdr->present;
+ }
+
+ /* Wait for both halves of 2 line messages */
+ if (present == MQS_FULL && mhdr->lines == 2 &&
+ get_present2(mhdr) == MQS_EMPTY)
+ present = MQS_EMPTY;
+
+ if (!present) {
+ STAT(mesq_receive_none);
+ return NULL;
+ }
+
+ if (mhdr->lines == 2)
+ restore_present2(mhdr, mhdr->present2);
+
+ STAT(mesq_receive);
+ return mhdr;
+}
+EXPORT_SYMBOL_GPL(gru_get_next_message);
+
+/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
+
+/*
+ * Load a DW from a global GPA. The GPA can be a memory or MMR address.
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa)
+{
+ void *cb;
+ void *dsr;
+ int ret, iaa;
+
+ STAT(read_gpa);
+ if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ iaa = gpa >> 62;
+ gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
+ ret = gru_wait(cb);
+ if (ret == CBS_IDLE)
+ *value = *(unsigned long *)dsr;
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_read_gpa);
+
+
+/*
+ * Copy a block of data using the GRU resources
+ */
+int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+ unsigned int bytes)
+{
+ void *cb;
+ void *dsr;
+ int ret;
+
+ STAT(copy_gpa);
+ if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
+ XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
+ ret = gru_wait(cb);
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_copy_gpa);
+
+/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
+/* Temp - will delete after we gain confidence in the GRU */
+
+static int quicktest0(unsigned long arg)
+{
+ unsigned long word0;
+ unsigned long word1;
+ void *cb;
+ void *dsr;
+ unsigned long *p;
+ int ret = -EIO;
+
+ if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ p = dsr;
+ word0 = MAGIC;
+ word1 = 0;
+
+ gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
+ goto done;
+ }
+
+ if (*p != MAGIC) {
+ printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
+ goto done;
+ }
+ gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
+ goto done;
+ }
+
+ if (word0 != word1 || word1 != MAGIC) {
+ printk(KERN_DEBUG
+ "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
+ smp_processor_id(), word1, MAGIC);
+ goto done;
+ }
+ ret = 0;
+
+done:
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
+}
+
+#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
+
+static int quicktest1(unsigned long arg)
+{
+ struct gru_message_queue_desc mqd;
+ void *p, *mq;
+ int i, ret = -EIO;
+ char mes[GRU_CACHE_LINE_BYTES], *m;
+
+ /* Need 1K cacheline aligned that does not cross page boundary */
+ p = kmalloc(4096, 0);
+ if (p == NULL)
+ return -ENOMEM;
+ mq = ALIGNUP(p, 1024);
+ memset(mes, 0xee, sizeof(mes));
+
+ gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+ for (i = 0; i < 6; i++) {
+ mes[8] = i;
+ do {
+ ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+ } while (ret == MQE_CONGESTION);
+ if (ret)
+ break;
+ }
+ if (ret != MQE_QUEUE_FULL || i != 4) {
+ printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
+ smp_processor_id(), ret, i);
+ goto done;
+ }
+
+ for (i = 0; i < 6; i++) {
+ m = gru_get_next_message(&mqd);
+ if (!m || m[8] != i)
+ break;
+ gru_free_message(&mqd, m);
+ }
+ if (i != 4) {
+ printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
+ smp_processor_id(), i, m, m ? m[8] : -1);
+ goto done;
+ }
+ ret = 0;
+
+done:
+ kfree(p);
+ return ret;
+}
+
+static int quicktest2(unsigned long arg)
+{
+ static DECLARE_COMPLETION(cmp);
+ unsigned long han;
+ int blade_id = 0;
+ int numcb = 4;
+ int ret = 0;
+ unsigned long *buf;
+ void *cb0, *cb;
+ struct gru_control_block_status *gen;
+ int i, k, istatus, bytes;
+
+ bytes = numcb * 4 * 8;
+ buf = kmalloc(bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = -EBUSY;
+ han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+ if (!han)
+ goto done;
+
+ gru_lock_async_resource(han, &cb0, NULL);
+ memset(buf, 0xee, bytes);
+ for (i = 0; i < numcb; i++)
+ gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+ XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+ ret = 0;
+ k = numcb;
+ do {
+ gru_wait_async_cbr(han);
+ for (i = 0; i < numcb; i++) {
+ cb = cb0 + i * GRU_HANDLE_STRIDE;
+ istatus = gru_check_status(cb);
+ if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
+ break;
+ }
+ if (i == numcb)
+ continue;
+ if (istatus != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
+ ret = -EFAULT;
+ } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
+ buf[4 * i + 3]) {
+ printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+ smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
+ ret = -EIO;
+ }
+ k--;
+ gen = cb;
+ gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
+ } while (k);
+ BUG_ON(cmp.done);
+
+ gru_unlock_async_resource(han);
+ gru_release_async_resources(han);
+done:
+ kfree(buf);
+ return ret;
+}
+
+#define BUFSIZE 200
+static int quicktest3(unsigned long arg)
+{
+ char buf1[BUFSIZE], buf2[BUFSIZE];
+ int ret = 0;
+
+ memset(buf2, 0, sizeof(buf2));
+ memset(buf1, get_cycles() & 255, sizeof(buf1));
+ gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
+ if (memcmp(buf1, buf2, BUFSIZE)) {
+ printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
+ ret = -EIO;
+ }
+ return ret;
+}
+
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+ int ret = -EINVAL;
+
+ switch (arg & 0xff) {
+ case 0:
+ ret = quicktest0(arg);
+ break;
+ case 1:
+ ret = quicktest1(arg);
+ break;
+ case 2:
+ ret = quicktest2(arg);
+ break;
+ case 3:
+ ret = quicktest3(arg);
+ break;
+ case 99:
+ ret = gru_free_kernel_contexts();
+ break;
+ }
+ return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+ return 0;
+}
+
+void gru_kservices_exit(void)
+{
+ if (gru_free_kernel_contexts())
+ BUG();
+}
+
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 0000000..02aa94d
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,214 @@
+
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __GRU_KSERVICES_H_
+#define __GRU_KSERVICES_H_
+
+
+/*
+ * Message queues using the GRU to send/receive messages.
+ *
+ * These function allow the user to create a message queue for
+ * sending/receiving 1 or 2 cacheline messages using the GRU.
+ *
+ * Processes SENDING messages will use a kernel CBR/DSR to send
+ * the message. This is transparent to the caller.
+ *
+ * The receiver does not use any GRU resources.
+ *
+ * The functions support:
+ * - single receiver
+ * - multiple senders
+ * - cross partition message
+ *
+ * Missing features ZZZ:
+ * - user options for dealing with timeouts, queue full, etc.
+ * - gru_create_message_queue() needs interrupt vector info
+ */
+
+struct gru_message_queue_desc {
+ void *mq; /* message queue vaddress */
+ unsigned long mq_gpa; /* global address of mq */
+ int qlines; /* queue size in CL */
+ int interrupt_vector; /* interrupt vector */
+ int interrupt_pnode; /* pnode for interrupt */
+ int interrupt_apicid; /* lapicid for interrupt */
+};
+
+/*
+ * Initialize a user allocated chunk of memory to be used as
+ * a message queue. The caller must ensure that the queue is
+ * in contiguous physical memory and is cacheline aligned.
+ *
+ * Message queue size is the total number of bytes allocated
+ * to the queue including a 2 cacheline header that is used
+ * to manage the queue.
+ *
+ * Input:
+ * mqd pointer to message queue descriptor
+ * p pointer to user allocated mesq memory.
+ * bytes size of message queue in bytes
+ * vector interrupt vector (zero if no interrupts)
+ * nasid nasid of blade where interrupt is delivered
+ * apicid apicid of cpu for interrupt
+ *
+ * Errors:
+ * 0 OK
+ * >0 error
+ */
+extern int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+ void *p, unsigned int bytes, int nasid, int vector, int apicid);
+
+/*
+ * Send a message to a message queue.
+ *
+ * Note: The message queue transport mechanism uses the first 32
+ * bits of the message. Users should avoid using these bits.
+ *
+ *
+ * Input:
+ * mqd pointer to message queue descriptor
+ * mesg pointer to message. Must be 64-bit aligned
+ * bytes size of message in bytes
+ *
+ * Output:
+ * 0 message sent
+ * >0 Send failure - see error codes below
+ *
+ */
+extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd,
+ void *mesg, unsigned int bytes);
+
+/* Status values for gru_send_message() */
+#define MQE_OK 0 /* message sent successfully */
+#define MQE_CONGESTION 1 /* temporary congestion, try again */
+#define MQE_QUEUE_FULL 2 /* queue is full */
+#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */
+#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */
+#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */
+
+/*
+ * Advance the receive pointer for the message queue to the next message.
+ * Note: current API requires messages to be gotten & freed in order. Future
+ * API extensions may allow for out-of-order freeing.
+ *
+ * Input
+ * mqd pointer to message queue descriptor
+ * mesq message being freed
+ */
+extern void gru_free_message(struct gru_message_queue_desc *mqd,
+ void *mesq);
+
+/*
+ * Get next message from message queue. Returns pointer to
+ * message OR NULL if no message present.
+ * User must call gru_free_message() after message is processed
+ * in order to move the queue pointers to next message.
+ *
+ * Input
+ * mqd pointer to message queue descriptor
+ *
+ * Output:
+ * p pointer to message
+ * NULL no message available
+ */
+extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
+
+
+/*
+ * Read a GRU global GPA. Source can be located in a remote partition.
+ *
+ * Input:
+ * value memory address where MMR value is returned
+ * gpa source numalink physical address of GPA
+ *
+ * Output:
+ * 0 OK
+ * >0 error
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa);
+
+
+/*
+ * Copy data using the GRU. Source or destination can be located in a remote
+ * partition.
+ *
+ * Input:
+ * dest_gpa destination global physical address
+ * src_gpa source global physical address
+ * bytes number of bytes to copy
+ *
+ * Output:
+ * 0 OK
+ * >0 error
+ */
+extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+ unsigned int bytes);
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * cmp - completion structure for waiting for
+ * async completions
+ * output:
+ * handle to identify resource
+ * (0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
+#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 0000000..e77d1b1
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRULIB_H__
+#define __GRULIB_H__
+
+#define GRU_BASENAME "gru"
+#define GRU_FULLNAME "/dev/gru"
+#define GRU_IOCTL_NUM 'G'
+
+/*
+ * Maximum number of GRU segments that a user can have open
+ * ZZZ temp - set high for testing. Revisit.
+ */
+#define GRU_MAX_OPEN_CONTEXTS 32
+
+/* Set Number of Request Blocks */
+#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
+
+/* Set Context Options */
+#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *)
+
+/* Fetch exception detail */
+#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
+
+/* For user call_os handling - normally a TLB fault */
+#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *)
+
+/* For user unload context */
+#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
+
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *)
+
+/* For user TLB flushing (primarily for tests) */
+#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
+
+/* Get some config options (primarily for tests & emulator) */
+#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
+
+/* Various kernel self-tests */
+#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *)
+
+#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
+#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+struct gru_get_gseg_statistics_req {
+ unsigned long gseg;
+ struct gru_gseg_statistics stats;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_create_context_req {
+ unsigned long gseg;
+ unsigned int data_segment_bytes;
+ unsigned int control_blocks;
+ unsigned int maximum_thread_count;
+ unsigned int options;
+ unsigned char tlb_preload_count;
+};
+
+/*
+ * Structure used to pass unload context parameters to the driver
+ */
+struct gru_unload_context_req {
+ unsigned long gseg;
+};
+
+/*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet};
+struct gru_set_context_option_req {
+ unsigned long gseg;
+ int op;
+ int val0;
+ long val1;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_flush_tlb_req {
+ unsigned long gseg;
+ unsigned long vaddr;
+ size_t len;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+ unsigned int op;
+ unsigned int gid;
+ int ctxnum;
+ char data_opt;
+ char lock_cch;
+ char flush_cbrs;
+ char fill[10];
+ pid_t pid;
+ void *buf;
+ size_t buflen;
+ /* ---- output --- */
+ unsigned int num_contexts;
+};
+
+#define GRU_DUMP_MAGIC 0x3474ab6c
+struct gru_dump_context_header {
+ unsigned int magic;
+ unsigned int gid;
+ unsigned char ctxnum;
+ unsigned char cbrcnt;
+ unsigned char dsrcnt;
+ pid_t pid;
+ unsigned long vaddr;
+ int cch_locked;
+ unsigned long data[0];
+};
+
+/*
+ * GRU configuration info (temp - for testing)
+ */
+struct gru_config_info {
+ int cpus;
+ int blades;
+ int nodes;
+ int chiplets;
+ int fill[16];
+};
+
+#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 0000000..1525870
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,975 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/prefetch.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+
+unsigned long gru_options __read_mostly;
+
+static struct device_driver gru_driver = {
+ .name = "gru"
+};
+
+static struct device gru_device = {
+ .init_name = "",
+ .driver = &gru_driver,
+};
+
+struct device *grudev = &gru_device;
+
+/*
+ * Select a gru fault map to be used by the current cpu. Note that
+ * multiple cpus may be using the same map.
+ * ZZZ should be inline but did not work on emulator
+ */
+int gru_cpu_fault_map_id(void)
+{
+#ifdef CONFIG_IA64
+ return uv_blade_processor_id() % GRU_NUM_TFM;
+#else
+ int cpu = smp_processor_id();
+ int id, core;
+
+ core = uv_cpu_core_number(cpu);
+ id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+ return id;
+#endif
+}
+
+/*--------- ASID Management -------------------------------------------
+ *
+ * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
+ * Once MAX is reached, flush the TLB & start over. However,
+ * some asids may still be in use. There won't be many (percentage wise) still
+ * in use. Search active contexts & determine the value of the first
+ * asid in use ("x"s below). Set "limit" to this value.
+ * This defines a block of assignable asids.
+ *
+ * When "limit" is reached, search forward from limit+1 and determine the
+ * next block of assignable asids.
+ *
+ * Repeat until MAX_ASID is reached, then start over again.
+ *
+ * Each time MAX_ASID is reached, increment the asid generation. Since
+ * the search for in-use asids only checks contexts with GRUs currently
+ * assigned, asids in some contexts will be missed. Prior to loading
+ * a context, the asid generation of the GTS asid is rechecked. If it
+ * doesn't match the current generation, a new asid will be assigned.
+ *
+ * 0---------------x------------x---------------------x----|
+ * ^-next ^-limit ^-MAX_ASID
+ *
+ * All asid manipulation & context loading/unloading is protected by the
+ * gs_lock.
+ */
+
+/* Hit the asid limit. Start over */
+static int gru_wrap_asid(struct gru_state *gru)
+{
+ gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+ STAT(asid_wrap);
+ gru->gs_asid_gen++;
+ return MIN_ASID;
+}
+
+/* Find the next chunk of unused asids */
+static int gru_reset_asid_limit(struct gru_state *gru, int asid)
+{
+ int i, gid, inuse_asid, limit;
+
+ gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+ STAT(asid_next);
+ limit = MAX_ASID;
+ if (asid >= limit)
+ asid = gru_wrap_asid(gru);
+ gru_flush_all_tlb(gru);
+ gid = gru->gs_gid;
+again:
+ for (i = 0; i < GRU_NUM_CCH; i++) {
+ if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
+ continue;
+ inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
+ gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
+ gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
+ inuse_asid, i);
+ if (inuse_asid == asid) {
+ asid += ASID_INC;
+ if (asid >= limit) {
+ /*
+ * empty range: reset the range limit and
+ * start over
+ */
+ limit = MAX_ASID;
+ if (asid >= MAX_ASID)
+ asid = gru_wrap_asid(gru);
+ goto again;
+ }
+ }
+
+ if ((inuse_asid > asid) && (inuse_asid < limit))
+ limit = inuse_asid;
+ }
+ gru->gs_asid_limit = limit;
+ gru->gs_asid = asid;
+ gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
+ asid, limit);
+ return asid;
+}
+
+/* Assign a new ASID to a thread context. */
+static int gru_assign_asid(struct gru_state *gru)
+{
+ int asid;
+
+ gru->gs_asid += ASID_INC;
+ asid = gru->gs_asid;
+ if (asid >= gru->gs_asid_limit)
+ asid = gru_reset_asid_limit(gru, asid);
+
+ gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+ return asid;
+}
+
+/*
+ * Clear n bits in a word. Return a word indicating the bits that were cleared.
+ * Optionally, build an array of chars that contain the bit numbers allocated.
+ */
+static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
+ char *idx)
+{
+ unsigned long bits = 0;
+ int i;
+
+ while (n--) {
+ i = find_first_bit(p, mmax);
+ if (i == mmax)
+ BUG();
+ __clear_bit(i, p);
+ __set_bit(i, &bits);
+ if (idx)
+ *idx++ = i;
+ }
+ return bits;
+}
+
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
+ char *cbmap)
+{
+ return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
+ cbmap);
+}
+
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
+ char *dsmap)
+{
+ return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
+ dsmap);
+}
+
+static void reserve_gru_resources(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ gru->gs_active_contexts++;
+ gts->ts_cbr_map =
+ gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
+ gts->ts_cbr_idx);
+ gts->ts_dsr_map =
+ gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+}
+
+static void free_gru_resources(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ gru->gs_active_contexts--;
+ gru->gs_cbr_map |= gts->ts_cbr_map;
+ gru->gs_dsr_map |= gts->ts_dsr_map;
+}
+
+/*
+ * Check if a GRU has sufficient free resources to satisfy an allocation
+ * request. Note: GRU locks may or may not be held when this is called. If
+ * not held, recheck after acquiring the appropriate locks.
+ *
+ * Returns 1 if sufficient resources, 0 if not
+ */
+static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
+ int dsr_au_count, int max_active_contexts)
+{
+ return hweight64(gru->gs_cbr_map) >= cbr_au_count
+ && hweight64(gru->gs_dsr_map) >= dsr_au_count
+ && gru->gs_active_contexts < max_active_contexts;
+}
+
+/*
+ * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
+ * context.
+ */
+static int gru_load_mm_tracker(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ struct gru_mm_struct *gms = gts->ts_gms;
+ struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
+ unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
+ int asid;
+
+ spin_lock(&gms->ms_asid_lock);
+ asid = asids->mt_asid;
+
+ spin_lock(&gru->gs_asid_lock);
+ if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
+ gru->gs_asid_gen)) {
+ asid = gru_assign_asid(gru);
+ asids->mt_asid = asid;
+ asids->mt_asid_gen = gru->gs_asid_gen;
+ STAT(asid_new);
+ } else {
+ STAT(asid_reuse);
+ }
+ spin_unlock(&gru->gs_asid_lock);
+
+ BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
+ asids->mt_ctxbitmap |= ctxbitmap;
+ if (!test_bit(gru->gs_gid, gms->ms_asidmap))
+ __set_bit(gru->gs_gid, gms->ms_asidmap);
+ spin_unlock(&gms->ms_asid_lock);
+
+ gru_dbg(grudev,
+ "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
+ gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
+ gms->ms_asidmap[0]);
+ return asid;
+}
+
+static void gru_unload_mm_tracker(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ struct gru_mm_struct *gms = gts->ts_gms;
+ struct gru_mm_tracker *asids;
+ unsigned short ctxbitmap;
+
+ asids = &gms->ms_asids[gru->gs_gid];
+ ctxbitmap = (1 << gts->ts_ctxnum);
+ spin_lock(&gms->ms_asid_lock);
+ spin_lock(&gru->gs_asid_lock);
+ BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
+ asids->mt_ctxbitmap ^= ctxbitmap;
+ gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+ gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
+ spin_unlock(&gru->gs_asid_lock);
+ spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Decrement the reference count on a GTS structure. Free the structure
+ * if the reference count goes to zero.
+ */
+void gts_drop(struct gru_thread_state *gts)
+{
+ if (gts && atomic_dec_return(>s->ts_refcnt) == 0) {
+ if (gts->ts_gms)
+ gru_drop_mmu_notifier(gts->ts_gms);
+ kfree(gts);
+ STAT(gts_free);
+ }
+}
+
+/*
+ * Locate the GTS structure for the current thread.
+ */
+static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
+ *vdata, int tsid)
+{
+ struct gru_thread_state *gts;
+
+ list_for_each_entry(gts, &vdata->vd_head, ts_next)
+ if (gts->ts_tsid == tsid)
+ return gts;
+ return NULL;
+}
+
+/*
+ * Allocate a thread state structure.
+ */
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ int cbr_au_count, int dsr_au_count,
+ unsigned char tlb_preload_count, int options, int tsid)
+{
+ struct gru_thread_state *gts;
+ struct gru_mm_struct *gms;
+ int bytes;
+
+ bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
+ bytes += sizeof(struct gru_thread_state);
+ gts = kmalloc(bytes, GFP_KERNEL);
+ if (!gts)
+ return ERR_PTR(-ENOMEM);
+
+ STAT(gts_alloc);
+ memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
+ atomic_set(>s->ts_refcnt, 1);
+ mutex_init(>s->ts_ctxlock);
+ gts->ts_cbr_au_count = cbr_au_count;
+ gts->ts_dsr_au_count = dsr_au_count;
+ gts->ts_tlb_preload_count = tlb_preload_count;
+ gts->ts_user_options = options;
+ gts->ts_user_blade_id = -1;
+ gts->ts_user_chiplet_id = -1;
+ gts->ts_tsid = tsid;
+ gts->ts_ctxnum = NULLCTX;
+ gts->ts_tlb_int_select = -1;
+ gts->ts_cch_req_slice = -1;
+ gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
+ if (vma) {
+ gts->ts_mm = current->mm;
+ gts->ts_vma = vma;
+ gms = gru_register_mmu_notifier();
+ if (IS_ERR(gms))
+ goto err;
+ gts->ts_gms = gms;
+ }
+
+ gru_dbg(grudev, "alloc gts %p\n", gts);
+ return gts;
+
+err:
+ gts_drop(gts);
+ return ERR_CAST(gms);
+}
+
+/*
+ * Allocate a vma private data structure.
+ */
+struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
+{
+ struct gru_vma_data *vdata = NULL;
+
+ vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
+ if (!vdata)
+ return NULL;
+
+ STAT(vdata_alloc);
+ INIT_LIST_HEAD(&vdata->vd_head);
+ spin_lock_init(&vdata->vd_lock);
+ gru_dbg(grudev, "alloc vdata %p\n", vdata);
+ return vdata;
+}
+
+/*
+ * Find the thread state structure for the current thread.
+ */
+struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
+ int tsid)
+{
+ struct gru_vma_data *vdata = vma->vm_private_data;
+ struct gru_thread_state *gts;
+
+ spin_lock(&vdata->vd_lock);
+ gts = gru_find_current_gts_nolock(vdata, tsid);
+ spin_unlock(&vdata->vd_lock);
+ gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+ return gts;
+}
+
+/*
+ * Allocate a new thread state for a GSEG. Note that races may allow
+ * another thread to race to create a gts.
+ */
+struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
+ int tsid)
+{
+ struct gru_vma_data *vdata = vma->vm_private_data;
+ struct gru_thread_state *gts, *ngts;
+
+ gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
+ vdata->vd_dsr_au_count,
+ vdata->vd_tlb_preload_count,
+ vdata->vd_user_options, tsid);
+ if (IS_ERR(gts))
+ return gts;
+
+ spin_lock(&vdata->vd_lock);
+ ngts = gru_find_current_gts_nolock(vdata, tsid);
+ if (ngts) {
+ gts_drop(gts);
+ gts = ngts;
+ STAT(gts_double_allocate);
+ } else {
+ list_add(>s->ts_next, &vdata->vd_head);
+ }
+ spin_unlock(&vdata->vd_lock);
+ gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+ return gts;
+}
+
+/*
+ * Free the GRU context assigned to the thread state.
+ */
+static void gru_free_gru_context(struct gru_thread_state *gts)
+{
+ struct gru_state *gru;
+
+ gru = gts->ts_gru;
+ gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
+
+ spin_lock(&gru->gs_lock);
+ gru->gs_gts[gts->ts_ctxnum] = NULL;
+ free_gru_resources(gru, gts);
+ BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
+ __clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
+ gts->ts_ctxnum = NULLCTX;
+ gts->ts_gru = NULL;
+ gts->ts_blade = -1;
+ spin_unlock(&gru->gs_lock);
+
+ gts_drop(gts);
+ STAT(free_context);
+}
+
+/*
+ * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
+ */
+static void prefetch_data(void *p, int num, int stride)
+{
+ while (num-- > 0) {
+ prefetchw(p);
+ p += stride;
+ }
+}
+
+static inline long gru_copy_handle(void *d, void *s)
+{
+ memcpy(d, s, GRU_HANDLE_BYTES);
+ return GRU_HANDLE_BYTES;
+}
+
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
+ unsigned long cbrmap, unsigned long length)
+{
+ int i, scr;
+
+ prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
+ GRU_CACHE_LINE_BYTES);
+
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+ prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
+ prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
+ GRU_CACHE_LINE_BYTES);
+ cb += GRU_HANDLE_STRIDE;
+ }
+}
+
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+ unsigned long cbrmap, unsigned long dsrmap,
+ int data_valid)
+{
+ void *gseg, *cb, *cbe;
+ unsigned long length;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+ gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+ if (data_valid) {
+ save += gru_copy_handle(cb, save);
+ save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+ save);
+ } else {
+ memset(cb, 0, GRU_CACHE_LINE_BYTES);
+ memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+ GRU_CACHE_LINE_BYTES);
+ }
+ /* Flush CBE to hide race in context restart */
+ mb();
+ gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+ cb += GRU_HANDLE_STRIDE;
+ }
+
+ if (data_valid)
+ memcpy(gseg + GRU_DS_BASE, save, length);
+ else
+ memset(gseg + GRU_DS_BASE, 0, length);
+}
+
+static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
+ unsigned long cbrmap, unsigned long dsrmap)
+{
+ void *gseg, *cb, *cbe;
+ unsigned long length;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+
+ /* CBEs may not be coherent. Flush them from cache */
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr)
+ gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+ mb(); /* Let the CL flush complete */
+
+ gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+ save += gru_copy_handle(save, cb);
+ save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
+ cb += GRU_HANDLE_STRIDE;
+ }
+ memcpy(save, gseg + GRU_DS_BASE, length);
+}
+
+void gru_unload_context(struct gru_thread_state *gts, int savestate)
+{
+ struct gru_state *gru = gts->ts_gru;
+ struct gru_context_configuration_handle *cch;
+ int ctxnum = gts->ts_ctxnum;
+
+ if (!is_kernel_context(gts))
+ zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+ cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+ gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
+ gts, gts->ts_cbr_map, gts->ts_dsr_map);
+ lock_cch_handle(cch);
+ if (cch_interrupt_sync(cch))
+ BUG();
+
+ if (!is_kernel_context(gts))
+ gru_unload_mm_tracker(gru, gts);
+ if (savestate) {
+ gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
+ ctxnum, gts->ts_cbr_map,
+ gts->ts_dsr_map);
+ gts->ts_data_valid = 1;
+ }
+
+ if (cch_deallocate(cch))
+ BUG();
+ unlock_cch_handle(cch);
+
+ gru_free_gru_context(gts);
+}
+
+/*
+ * Load a GRU context by copying it from the thread data structure in memory
+ * to the GRU.
+ */
+void gru_load_context(struct gru_thread_state *gts)
+{
+ struct gru_state *gru = gts->ts_gru;
+ struct gru_context_configuration_handle *cch;
+ int i, err, asid, ctxnum = gts->ts_ctxnum;
+
+ cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+ lock_cch_handle(cch);
+ cch->tfm_fault_bit_enable =
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+ || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+ cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+ if (cch->tlb_int_enable) {
+ gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+ cch->tlb_int_select = gts->ts_tlb_int_select;
+ }
+ if (gts->ts_cch_req_slice >= 0) {
+ cch->req_slice_set_enable = 1;
+ cch->req_slice = gts->ts_cch_req_slice;
+ } else {
+ cch->req_slice_set_enable =0;
+ }
+ cch->tfm_done_bit_enable = 0;
+ cch->dsr_allocation_map = gts->ts_dsr_map;
+ cch->cbr_allocation_map = gts->ts_cbr_map;
+
+ if (is_kernel_context(gts)) {
+ cch->unmap_enable = 1;
+ cch->tfm_done_bit_enable = 1;
+ cch->cb_int_enable = 1;
+ cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */
+ } else {
+ cch->unmap_enable = 0;
+ cch->tfm_done_bit_enable = 0;
+ cch->cb_int_enable = 0;
+ asid = gru_load_mm_tracker(gru, gts);
+ for (i = 0; i < 8; i++) {
+ cch->asid[i] = asid + i;
+ cch->sizeavail[i] = gts->ts_sizeavail;
+ }
+ }
+
+ err = cch_allocate(cch);
+ if (err) {
+ gru_dbg(grudev,
+ "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
+ err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
+ BUG();
+ }
+
+ gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
+ gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
+
+ if (cch_start(cch))
+ BUG();
+ unlock_cch_handle(cch);
+
+ gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
+ gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
+}
+
+/*
+ * Update fields in an active CCH:
+ * - retarget interrupts on local blade
+ * - update sizeavail mask
+ */
+int gru_update_cch(struct gru_thread_state *gts)
+{
+ struct gru_context_configuration_handle *cch;
+ struct gru_state *gru = gts->ts_gru;
+ int i, ctxnum = gts->ts_ctxnum, ret = 0;
+
+ cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+ lock_cch_handle(cch);
+ if (cch->state == CCHSTATE_ACTIVE) {
+ if (gru->gs_gts[gts->ts_ctxnum] != gts)
+ goto exit;
+ if (cch_interrupt(cch))
+ BUG();
+ for (i = 0; i < 8; i++)
+ cch->sizeavail[i] = gts->ts_sizeavail;
+ gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+ cch->tlb_int_select = gru_cpu_fault_map_id();
+ cch->tfm_fault_bit_enable =
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+ || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+ if (cch_start(cch))
+ BUG();
+ ret = 1;
+ }
+exit:
+ unlock_cch_handle(cch);
+ return ret;
+}
+
+/*
+ * Update CCH tlb interrupt select. Required when all the following is true:
+ * - task's GRU context is loaded into a GRU
+ * - task is using interrupt notification for TLB faults
+ * - task has migrated to a different cpu on the same blade where
+ * it was previously running.
+ */
+static int gru_retarget_intr(struct gru_thread_state *gts)
+{
+ if (gts->ts_tlb_int_select < 0
+ || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
+ return 0;
+
+ gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
+ gru_cpu_fault_map_id());
+ return gru_update_cch(gts);
+}
+
+/*
+ * Check if a GRU context is allowed to use a specific chiplet. By default
+ * a context is assigned to any blade-local chiplet. However, users can
+ * override this.
+ * Returns 1 if assignment allowed, 0 otherwise
+ */
+static int gru_check_chiplet_assignment(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ int blade_id;
+ int chiplet_id;
+
+ blade_id = gts->ts_user_blade_id;
+ if (blade_id < 0)
+ blade_id = uv_numa_blade_id();
+
+ chiplet_id = gts->ts_user_chiplet_id;
+ return gru->gs_blade_id == blade_id &&
+ (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
+}
+
+/*
+ * Unload the gru context if it is not assigned to the correct blade or
+ * chiplet. Misassignment can occur if the process migrates to a different
+ * blade or if the user changes the selected blade/chiplet.
+ */
+void gru_check_context_placement(struct gru_thread_state *gts)
+{
+ struct gru_state *gru;
+
+ /*
+ * If the current task is the context owner, verify that the
+ * context is correctly placed. This test is skipped for non-owner
+ * references. Pthread apps use non-owner references to the CBRs.
+ */
+ gru = gts->ts_gru;
+ if (!gru || gts->ts_tgid_owner != current->tgid)
+ return;
+
+ if (!gru_check_chiplet_assignment(gru, gts)) {
+ STAT(check_context_unload);
+ gru_unload_context(gts, 1);
+ } else if (gru_retarget_intr(gts)) {
+ STAT(check_context_retarget_intr);
+ }
+}
+
+
+/*
+ * Insufficient GRU resources available on the local blade. Steal a context from
+ * a process. This is a hack until a _real_ resource scheduler is written....
+ */
+#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
+#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
+ ((g)+1) : &(b)->bs_grus[0])
+
+static int is_gts_stealable(struct gru_thread_state *gts,
+ struct gru_blade_state *bs)
+{
+ if (is_kernel_context(gts))
+ return down_write_trylock(&bs->bs_kgts_sema);
+ else
+ return mutex_trylock(>s->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+ struct gru_blade_state *bs)
+{
+ if (is_kernel_context(gts)) {
+ up_write(&bs->bs_kgts_sema);
+ STAT(steal_kernel_context);
+ } else {
+ mutex_unlock(>s->ts_ctxlock);
+ STAT(steal_user_context);
+ }
+}
+
+void gru_steal_context(struct gru_thread_state *gts)
+{
+ struct gru_blade_state *blade;
+ struct gru_state *gru, *gru0;
+ struct gru_thread_state *ngts = NULL;
+ int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+ int blade_id;
+
+ blade_id = gts->ts_user_blade_id;
+ if (blade_id < 0)
+ blade_id = uv_numa_blade_id();
+ cbr = gts->ts_cbr_au_count;
+ dsr = gts->ts_dsr_au_count;
+
+ blade = gru_base[blade_id];
+ spin_lock(&blade->bs_lock);
+
+ ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
+ gru = blade->bs_lru_gru;
+ if (ctxnum == 0)
+ gru = next_gru(blade, gru);
+ blade->bs_lru_gru = gru;
+ blade->bs_lru_ctxnum = ctxnum;
+ ctxnum0 = ctxnum;
+ gru0 = gru;
+ while (1) {
+ if (gru_check_chiplet_assignment(gru, gts)) {
+ if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
+ break;
+ spin_lock(&gru->gs_lock);
+ for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ if (flag && gru == gru0 && ctxnum == ctxnum0)
+ break;
+ ngts = gru->gs_gts[ctxnum];
+ /*
+ * We are grabbing locks out of order, so trylock is
+ * needed. GTSs are usually not locked, so the odds of
+ * success are high. If trylock fails, try to steal a
+ * different GSEG.
+ */
+ if (ngts && is_gts_stealable(ngts, blade))
+ break;
+ ngts = NULL;
+ }
+ spin_unlock(&gru->gs_lock);
+ if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+ break;
+ }
+ if (flag && gru == gru0)
+ break;
+ flag = 1;
+ ctxnum = 0;
+ gru = next_gru(blade, gru);
+ }
+ spin_unlock(&blade->bs_lock);
+
+ if (ngts) {
+ gts->ustats.context_stolen++;
+ ngts->ts_steal_jiffies = jiffies;
+ gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+ gts_stolen(ngts, blade);
+ } else {
+ STAT(steal_context_failed);
+ }
+ gru_dbg(grudev,
+ "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
+ " avail cb %ld, ds %ld\n",
+ gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
+ hweight64(gru->gs_dsr_map));
+}
+
+/*
+ * Assign a gru context.
+ */
+static int gru_assign_context_number(struct gru_state *gru)
+{
+ int ctxnum;
+
+ ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+ __set_bit(ctxnum, &gru->gs_context_map);
+ return ctxnum;
+}
+
+/*
+ * Scan the GRUs on the local blade & assign a GRU context.
+ */
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+{
+ struct gru_state *gru, *grux;
+ int i, max_active_contexts;
+ int blade_id = gts->ts_user_blade_id;
+
+ if (blade_id < 0)
+ blade_id = uv_numa_blade_id();
+again:
+ gru = NULL;
+ max_active_contexts = GRU_NUM_CCH;
+ for_each_gru_on_blade(grux, blade_id, i) {
+ if (!gru_check_chiplet_assignment(grux, gts))
+ continue;
+ if (check_gru_resources(grux, gts->ts_cbr_au_count,
+ gts->ts_dsr_au_count,
+ max_active_contexts)) {
+ gru = grux;
+ max_active_contexts = grux->gs_active_contexts;
+ if (max_active_contexts == 0)
+ break;
+ }
+ }
+
+ if (gru) {
+ spin_lock(&gru->gs_lock);
+ if (!check_gru_resources(gru, gts->ts_cbr_au_count,
+ gts->ts_dsr_au_count, GRU_NUM_CCH)) {
+ spin_unlock(&gru->gs_lock);
+ goto again;
+ }
+ reserve_gru_resources(gru, gts);
+ gts->ts_gru = gru;
+ gts->ts_blade = gru->gs_blade_id;
+ gts->ts_ctxnum = gru_assign_context_number(gru);
+ atomic_inc(>s->ts_refcnt);
+ gru->gs_gts[gts->ts_ctxnum] = gts;
+ spin_unlock(&gru->gs_lock);
+
+ STAT(assign_context);
+ gru_dbg(grudev,
+ "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
+ gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
+ gts->ts_gru->gs_gid, gts->ts_ctxnum,
+ gts->ts_cbr_au_count, gts->ts_dsr_au_count);
+ } else {
+ gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
+ STAT(assign_context_failed);
+ }
+
+ return gru;
+}
+
+/*
+ * gru_nopage
+ *
+ * Map the user's GRU segment
+ *
+ * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
+ */
+int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct gru_thread_state *gts;
+ unsigned long paddr, vaddr;
+ unsigned long expires;
+
+ vaddr = (unsigned long)vmf->virtual_address;
+ gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
+ vma, vaddr, GSEG_BASE(vaddr));
+ STAT(nopfn);
+
+ /* The following check ensures vaddr is a valid address in the VMA */
+ gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+ if (!gts)
+ return VM_FAULT_SIGBUS;
+
+again:
+ mutex_lock(>s->ts_ctxlock);
+ preempt_disable();
+
+ gru_check_context_placement(gts);
+
+ if (!gts->ts_gru) {
+ STAT(load_user_context);
+ if (!gru_assign_gru_context(gts)) {
+ preempt_enable();
+ mutex_unlock(>s->ts_ctxlock);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
+ expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
+ if (time_before(expires, jiffies))
+ gru_steal_context(gts);
+ goto again;
+ }
+ gru_load_context(gts);
+ paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
+ remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
+ paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
+ vma->vm_page_prot);
+ }
+
+ preempt_enable();
+ mutex_unlock(>s->ts_ctxlock);
+
+ return VM_FAULT_NOPAGE;
+}
+
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 0000000..4f76359
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,377 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * PROC INTERFACES
+ *
+ * This file supports the /proc interfaces for the GRU driver
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#define printstat(s, f) printstat_val(s, &gru_stats.f, #f)
+
+static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
+{
+ unsigned long val = atomic_long_read(v);
+
+ seq_printf(s, "%16lu %s\n", val, id);
+}
+
+static int statistics_show(struct seq_file *s, void *p)
+{
+ printstat(s, vdata_alloc);
+ printstat(s, vdata_free);
+ printstat(s, gts_alloc);
+ printstat(s, gts_free);
+ printstat(s, gms_alloc);
+ printstat(s, gms_free);
+ printstat(s, gts_double_allocate);
+ printstat(s, assign_context);
+ printstat(s, assign_context_failed);
+ printstat(s, free_context);
+ printstat(s, load_user_context);
+ printstat(s, load_kernel_context);
+ printstat(s, lock_kernel_context);
+ printstat(s, unlock_kernel_context);
+ printstat(s, steal_user_context);
+ printstat(s, steal_kernel_context);
+ printstat(s, steal_context_failed);
+ printstat(s, nopfn);
+ printstat(s, asid_new);
+ printstat(s, asid_next);
+ printstat(s, asid_wrap);
+ printstat(s, asid_reuse);
+ printstat(s, intr);
+ printstat(s, intr_cbr);
+ printstat(s, intr_tfh);
+ printstat(s, intr_spurious);
+ printstat(s, intr_mm_lock_failed);
+ printstat(s, call_os);
+ printstat(s, call_os_wait_queue);
+ printstat(s, user_flush_tlb);
+ printstat(s, user_unload_context);
+ printstat(s, user_exception);
+ printstat(s, set_context_option);
+ printstat(s, check_context_retarget_intr);
+ printstat(s, check_context_unload);
+ printstat(s, tlb_dropin);
+ printstat(s, tlb_preload_page);
+ printstat(s, tlb_dropin_fail_no_asid);
+ printstat(s, tlb_dropin_fail_upm);
+ printstat(s, tlb_dropin_fail_invalid);
+ printstat(s, tlb_dropin_fail_range_active);
+ printstat(s, tlb_dropin_fail_idle);
+ printstat(s, tlb_dropin_fail_fmm);
+ printstat(s, tlb_dropin_fail_no_exception);
+ printstat(s, tfh_stale_on_fault);
+ printstat(s, mmu_invalidate_range);
+ printstat(s, mmu_invalidate_page);
+ printstat(s, flush_tlb);
+ printstat(s, flush_tlb_gru);
+ printstat(s, flush_tlb_gru_tgh);
+ printstat(s, flush_tlb_gru_zero_asid);
+ printstat(s, copy_gpa);
+ printstat(s, read_gpa);
+ printstat(s, mesq_receive);
+ printstat(s, mesq_receive_none);
+ printstat(s, mesq_send);
+ printstat(s, mesq_send_failed);
+ printstat(s, mesq_noop);
+ printstat(s, mesq_send_unexpected_error);
+ printstat(s, mesq_send_lb_overflow);
+ printstat(s, mesq_send_qlimit_reached);
+ printstat(s, mesq_send_amo_nacked);
+ printstat(s, mesq_send_put_nacked);
+ printstat(s, mesq_qf_locked);
+ printstat(s, mesq_qf_noop_not_full);
+ printstat(s, mesq_qf_switch_head_failed);
+ printstat(s, mesq_qf_unexpected_error);
+ printstat(s, mesq_noop_unexpected_error);
+ printstat(s, mesq_noop_lb_overflow);
+ printstat(s, mesq_noop_qlimit_reached);
+ printstat(s, mesq_noop_amo_nacked);
+ printstat(s, mesq_noop_put_nacked);
+ printstat(s, mesq_noop_page_overflow);
+ return 0;
+}
+
+static ssize_t statistics_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *data)
+{
+ memset(&gru_stats, 0, sizeof(gru_stats));
+ return count;
+}
+
+static int mcs_statistics_show(struct seq_file *s, void *p)
+{
+ int op;
+ unsigned long total, count, max;
+ static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
+ "cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
+ "tfh_write_restart", "tgh_invalidate"};
+
+ seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
+ for (op = 0; op < mcsop_last; op++) {
+ count = atomic_long_read(&mcs_op_statistics[op].count);
+ total = atomic_long_read(&mcs_op_statistics[op].total);
+ max = mcs_op_statistics[op].max;
+ seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count,
+ count ? total / count : 0, max);
+ }
+ return 0;
+}
+
+static ssize_t mcs_statistics_write(struct file *file,
+ const char __user *userbuf, size_t count, loff_t *data)
+{
+ memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics));
+ return count;
+}
+
+static int options_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "#bitmask: 1=trace, 2=statistics\n");
+ seq_printf(s, "0x%lx\n", gru_options);
+ return 0;
+}
+
+static ssize_t options_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *data)
+{
+ int ret;
+
+ ret = kstrtoul_from_user(userbuf, count, 0, &gru_options);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static int cch_seq_show(struct seq_file *file, void *data)
+{
+ long gid = *(long *)data;
+ int i;
+ struct gru_state *gru = GID_TO_GRU(gid);
+ struct gru_thread_state *ts;
+ const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
+
+ if (gid == 0)
+ seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid",
+ "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode");
+ if (gru)
+ for (i = 0; i < GRU_NUM_CCH; i++) {
+ ts = gru->gs_gts[i];
+ if (!ts)
+ continue;
+ seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n",
+ gru->gs_gid, gru->gs_blade_id, i,
+ is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid,
+ is_kernel_context(ts) ? 0 : ts->ts_tgid_owner,
+ ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
+ ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
+ mode[ts->ts_user_options &
+ GRU_OPT_MISS_MASK]);
+ }
+
+ return 0;
+}
+
+static int gru_seq_show(struct seq_file *file, void *data)
+{
+ long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
+ struct gru_state *gru = GID_TO_GRU(gid);
+
+ if (gid == 0) {
+ seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
+ "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
+ seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
+ "busy", "busy", "free", "free", "free");
+ }
+ if (gru) {
+ ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
+ cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+ dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+ seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
+ gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
+ GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
+ ctxfree, cbrfree, dsrfree);
+ }
+
+ return 0;
+}
+
+static void seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static void *seq_start(struct seq_file *file, loff_t *gid)
+{
+ if (*gid < gru_max_gids)
+ return gid;
+ return NULL;
+}
+
+static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
+{
+ (*gid)++;
+ if (*gid < gru_max_gids)
+ return gid;
+ return NULL;
+}
+
+static const struct seq_operations cch_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = cch_seq_show
+};
+
+static const struct seq_operations gru_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = gru_seq_show
+};
+
+static int statistics_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, statistics_show, NULL);
+}
+
+static int mcs_statistics_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mcs_statistics_show, NULL);
+}
+
+static int options_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, options_show, NULL);
+}
+
+static int cch_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &cch_seq_ops);
+}
+
+static int gru_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &gru_seq_ops);
+}
+
+/* *INDENT-OFF* */
+static const struct file_operations statistics_fops = {
+ .open = statistics_open,
+ .read = seq_read,
+ .write = statistics_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations mcs_statistics_fops = {
+ .open = mcs_statistics_open,
+ .read = seq_read,
+ .write = mcs_statistics_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations options_fops = {
+ .open = options_open,
+ .read = seq_read,
+ .write = options_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations cch_fops = {
+ .open = cch_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+static const struct file_operations gru_fops = {
+ .open = gru_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static struct proc_entry {
+ char *name;
+ umode_t mode;
+ const struct file_operations *fops;
+ struct proc_dir_entry *entry;
+} proc_files[] = {
+ {"statistics", 0644, &statistics_fops},
+ {"mcs_statistics", 0644, &mcs_statistics_fops},
+ {"debug_options", 0644, &options_fops},
+ {"cch_status", 0444, &cch_fops},
+ {"gru_status", 0444, &gru_fops},
+ {NULL}
+};
+/* *INDENT-ON* */
+
+static struct proc_dir_entry *proc_gru __read_mostly;
+
+static int create_proc_file(struct proc_entry *p)
+{
+ p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
+ if (!p->entry)
+ return -1;
+ return 0;
+}
+
+static void delete_proc_files(void)
+{
+ struct proc_entry *p;
+
+ if (proc_gru) {
+ for (p = proc_files; p->name; p++)
+ if (p->entry)
+ remove_proc_entry(p->name, proc_gru);
+ proc_remove(proc_gru);
+ }
+}
+
+int gru_proc_init(void)
+{
+ struct proc_entry *p;
+
+ proc_gru = proc_mkdir("sgi_uv/gru", NULL);
+
+ for (p = proc_files; p->name; p++)
+ if (create_proc_file(p))
+ goto err;
+ return 0;
+
+err:
+ delete_proc_files();
+ return -1;
+}
+
+void gru_proc_exit(void)
+{
+ delete_proc_files();
+}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 0000000..5c3ce24
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,678 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * GRU DRIVER TABLES, MACROS, externs, etc
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRUTABLES_H__
+#define __GRUTABLES_H__
+
+/*
+ * GRU Chiplet:
+ * The GRU is a user addressible memory accelerator. It provides
+ * several forms of load, store, memset, bcopy instructions. In addition, it
+ * contains special instructions for AMOs, sending messages to message
+ * queues, etc.
+ *
+ * The GRU is an integral part of the node controller. It connects
+ * directly to the cpu socket. In its current implementation, there are 2
+ * GRU chiplets in the node controller on each blade (~node).
+ *
+ * The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ * Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ * +-----------------+
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * +-----------------+
+ * | system control |
+ * +-----------------+ _______ +-------------+
+ * |/////////////////| / | |
+ * |/////////////////| / | |
+ * |/////////////////| / | instructions|
+ * |/////////////////| / | |
+ * |/////////////////| / | |
+ * |/////////////////| / |-------------|
+ * |/////////////////| / | |
+ * +-----------------+ | |
+ * | context 15 | | data |
+ * +-----------------+ | |
+ * | ...... | \ | |
+ * +-----------------+ \____________ +-------------+
+ * | context 1 |
+ * +-----------------+
+ * | context 0 |
+ * +-----------------+
+ *
+ * Each of the "contexts" is a chunk of memory that can be mmaped into user
+ * space. The context consists of 2 parts:
+ *
+ * - an instruction space that can be directly accessed by the user
+ * to issue GRU instructions and to check instruction status.
+ *
+ * - a data area that acts as normal RAM.
+ *
+ * User instructions contain virtual addresses of data to be accessed by the
+ * GRU. The GRU contains a TLB that is used to convert these user virtual
+ * addresses to physical addresses.
+ *
+ * The "system control" area of the GRU chiplet is used by the kernel driver
+ * to manage user contexts and to perform functions such as TLB dropin and
+ * purging.
+ *
+ * One context may be reserved for the kernel and used for cross-partition
+ * communication. The GRU will also be used to asynchronously zero out
+ * large blocks of memory (not currently implemented).
+ *
+ *
+ * Tables:
+ *
+ * VDATA-VMA Data - Holds a few parameters. Head of linked list of
+ * GTS tables for threads using the GSEG
+ * GTS - Gru Thread State - contains info for managing a GSEG context. A
+ * GTS is allocated for each thread accessing a
+ * GSEG.
+ * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is
+ * not loaded into a GRU
+ * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
+ * where a GSEG has been loaded. Similar to
+ * an mm_struct but for GRU.
+ *
+ * GS - GRU State - Used to manage the state of a GRU chiplet
+ * BS - Blade State - Used to manage state of all GRU chiplets
+ * on a blade
+ *
+ *
+ * Normal task tables for task using GRU.
+ * - 2 threads in process
+ * - 2 GSEGs open in process
+ * - GSEG1 is being used by both threads
+ * - GSEG2 is used only by thread 2
+ *
+ * task -->|
+ * task ---+---> mm ->------ (notifier) -------+-> gms
+ * | |
+ * |--> vma -> vdata ---> gts--->| GSEG1 (thread1)
+ * | | |
+ * | +-> gts--->| GSEG1 (thread2)
+ * | |
+ * |--> vma -> vdata ---> gts--->| GSEG2 (thread2)
+ * .
+ * .
+ *
+ * GSEGs are marked DONTCOPY on fork
+ *
+ * At open
+ * file.private_data -> NULL
+ *
+ * At mmap,
+ * vma -> vdata
+ *
+ * After gseg reference
+ * vma -> vdata ->gts
+ *
+ * After fork
+ * parent
+ * vma -> vdata -> gts
+ * child
+ * (vma is not copied)
+ *
+ */
+
+#include <linux/rmap.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/mmu_notifier.h>
+#include "gru.h"
+#include "grulib.h"
+#include "gruhandles.h"
+
+extern struct gru_stats_s gru_stats;
+extern struct gru_blade_state *gru_base[];
+extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
+extern unsigned int gru_max_gids;
+
+#define GRU_MAX_BLADES MAX_NUMNODES
+#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
+
+#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
+#define GRU_DRIVER_VERSION_STR "0.85"
+
+/*
+ * GRU statistics.
+ */
+struct gru_stats_s {
+ atomic_long_t vdata_alloc;
+ atomic_long_t vdata_free;
+ atomic_long_t gts_alloc;
+ atomic_long_t gts_free;
+ atomic_long_t gms_alloc;
+ atomic_long_t gms_free;
+ atomic_long_t gts_double_allocate;
+ atomic_long_t assign_context;
+ atomic_long_t assign_context_failed;
+ atomic_long_t free_context;
+ atomic_long_t load_user_context;
+ atomic_long_t load_kernel_context;
+ atomic_long_t lock_kernel_context;
+ atomic_long_t unlock_kernel_context;
+ atomic_long_t steal_user_context;
+ atomic_long_t steal_kernel_context;
+ atomic_long_t steal_context_failed;
+ atomic_long_t nopfn;
+ atomic_long_t asid_new;
+ atomic_long_t asid_next;
+ atomic_long_t asid_wrap;
+ atomic_long_t asid_reuse;
+ atomic_long_t intr;
+ atomic_long_t intr_cbr;
+ atomic_long_t intr_tfh;
+ atomic_long_t intr_spurious;
+ atomic_long_t intr_mm_lock_failed;
+ atomic_long_t call_os;
+ atomic_long_t call_os_wait_queue;
+ atomic_long_t user_flush_tlb;
+ atomic_long_t user_unload_context;
+ atomic_long_t user_exception;
+ atomic_long_t set_context_option;
+ atomic_long_t check_context_retarget_intr;
+ atomic_long_t check_context_unload;
+ atomic_long_t tlb_dropin;
+ atomic_long_t tlb_preload_page;
+ atomic_long_t tlb_dropin_fail_no_asid;
+ atomic_long_t tlb_dropin_fail_upm;
+ atomic_long_t tlb_dropin_fail_invalid;
+ atomic_long_t tlb_dropin_fail_range_active;
+ atomic_long_t tlb_dropin_fail_idle;
+ atomic_long_t tlb_dropin_fail_fmm;
+ atomic_long_t tlb_dropin_fail_no_exception;
+ atomic_long_t tfh_stale_on_fault;
+ atomic_long_t mmu_invalidate_range;
+ atomic_long_t mmu_invalidate_page;
+ atomic_long_t flush_tlb;
+ atomic_long_t flush_tlb_gru;
+ atomic_long_t flush_tlb_gru_tgh;
+ atomic_long_t flush_tlb_gru_zero_asid;
+
+ atomic_long_t copy_gpa;
+ atomic_long_t read_gpa;
+
+ atomic_long_t mesq_receive;
+ atomic_long_t mesq_receive_none;
+ atomic_long_t mesq_send;
+ atomic_long_t mesq_send_failed;
+ atomic_long_t mesq_noop;
+ atomic_long_t mesq_send_unexpected_error;
+ atomic_long_t mesq_send_lb_overflow;
+ atomic_long_t mesq_send_qlimit_reached;
+ atomic_long_t mesq_send_amo_nacked;
+ atomic_long_t mesq_send_put_nacked;
+ atomic_long_t mesq_page_overflow;
+ atomic_long_t mesq_qf_locked;
+ atomic_long_t mesq_qf_noop_not_full;
+ atomic_long_t mesq_qf_switch_head_failed;
+ atomic_long_t mesq_qf_unexpected_error;
+ atomic_long_t mesq_noop_unexpected_error;
+ atomic_long_t mesq_noop_lb_overflow;
+ atomic_long_t mesq_noop_qlimit_reached;
+ atomic_long_t mesq_noop_amo_nacked;
+ atomic_long_t mesq_noop_put_nacked;
+ atomic_long_t mesq_noop_page_overflow;
+
+};
+
+enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
+ cchop_deallocate, tfhop_write_only, tfhop_write_restart,
+ tghop_invalidate, mcsop_last};
+
+struct mcs_op_statistic {
+ atomic_long_t count;
+ atomic_long_t total;
+ unsigned long max;
+};
+
+extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+#define OPT_DPRINT 1
+#define OPT_STATS 2
+
+
+#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
+
+/* Delay in jiffies between attempts to assign a GRU context */
+#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000)
+
+/*
+ * If a process has it's context stolen, min delay in jiffies before trying to
+ * steal a context from another process.
+ */
+#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
+
+#define STAT(id) do { \
+ if (gru_options & OPT_STATS) \
+ atomic_long_inc(&gru_stats.id); \
+ } while (0)
+
+#ifdef CONFIG_SGI_GRU_DEBUG
+#define gru_dbg(dev, fmt, x...) \
+ do { \
+ if (gru_options & OPT_DPRINT) \
+ printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\
+ } while (0)
+#else
+#define gru_dbg(x...)
+#endif
+
+/*-----------------------------------------------------------------------------
+ * ASID management
+ */
+#define MAX_ASID 0xfffff0
+#define MIN_ASID 8
+#define ASID_INC 8 /* number of regions */
+
+/* Generate a GRU asid value from a GRU base asid & a virtual address. */
+#define VADDR_HI_BIT 64
+#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
+#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
+
+/*------------------------------------------------------------------------------
+ * File & VMS Tables
+ */
+
+struct gru_state;
+
+/*
+ * This structure is pointed to from the mmstruct via the notifier pointer.
+ * There is one of these per address space.
+ */
+struct gru_mm_tracker { /* pack to reduce size */
+ unsigned int mt_asid_gen:24; /* ASID wrap count */
+ unsigned int mt_asid:24; /* current base ASID for gru */
+ unsigned short mt_ctxbitmap:16;/* bitmap of contexts using
+ asid */
+} __attribute__ ((packed));
+
+struct gru_mm_struct {
+ struct mmu_notifier ms_notifier;
+ atomic_t ms_refcnt;
+ spinlock_t ms_asid_lock; /* protects ASID assignment */
+ atomic_t ms_range_active;/* num range_invals active */
+ char ms_released;
+ wait_queue_head_t ms_wait_queue;
+ DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
+ struct gru_mm_tracker ms_asids[GRU_MAX_GRUS];
+};
+
+/*
+ * One of these structures is allocated when a GSEG is mmaped. The
+ * structure is pointed to by the vma->vm_private_data field in the vma struct.
+ */
+struct gru_vma_data {
+ spinlock_t vd_lock; /* Serialize access to vma */
+ struct list_head vd_head; /* head of linked list of gts */
+ long vd_user_options;/* misc user option flags */
+ int vd_cbr_au_count;
+ int vd_dsr_au_count;
+ unsigned char vd_tlb_preload_count;
+};
+
+/*
+ * One of these is allocated for each thread accessing a mmaped GRU. A linked
+ * list of these structure is hung off the struct gru_vma_data in the mm_struct.
+ */
+struct gru_thread_state {
+ struct list_head ts_next; /* list - head at vma-private */
+ struct mutex ts_ctxlock; /* load/unload CTX lock */
+ struct mm_struct *ts_mm; /* mm currently mapped to
+ context */
+ struct vm_area_struct *ts_vma; /* vma of GRU context */
+ struct gru_state *ts_gru; /* GRU where the context is
+ loaded */
+ struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
+ unsigned char ts_tlb_preload_count; /* TLB preload pages */
+ unsigned long ts_cbr_map; /* map of allocated CBRs */
+ unsigned long ts_dsr_map; /* map of allocated DATA
+ resources */
+ unsigned long ts_steal_jiffies;/* jiffies when context last
+ stolen */
+ long ts_user_options;/* misc user option flags */
+ pid_t ts_tgid_owner; /* task that is using the
+ context - for migration */
+ short ts_user_blade_id;/* user selected blade */
+ char ts_user_chiplet_id;/* user selected chiplet */
+ unsigned short ts_sizeavail; /* Pagesizes in use */
+ int ts_tsid; /* thread that owns the
+ structure */
+ int ts_tlb_int_select;/* target cpu if interrupts
+ enabled */
+ int ts_ctxnum; /* context number where the
+ context is loaded */
+ atomic_t ts_refcnt; /* reference count GTS */
+ unsigned char ts_dsr_au_count;/* Number of DSR resources
+ required for contest */
+ unsigned char ts_cbr_au_count;/* Number of CBR resources
+ required for contest */
+ char ts_cch_req_slice;/* CCH packet slice */
+ char ts_blade; /* If >= 0, migrate context if
+ ref from different blade */
+ char ts_force_cch_reload;
+ char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
+ allocated CB */
+ int ts_data_valid; /* Indicates if ts_gdata has
+ valid data */
+ struct gru_gseg_statistics ustats; /* User statistics */
+ unsigned long ts_gdata[0]; /* save area for GRU data (CB,
+ DS, CBE) */
+};
+
+/*
+ * Threaded programs actually allocate an array of GSEGs when a context is
+ * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
+ * array.
+ */
+#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
+#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \
+ (gts)->ts_tsid * GRU_GSEG_PAGESIZE)
+
+#define NULLCTX (-1) /* if context not loaded into GRU */
+
+/*-----------------------------------------------------------------------------
+ * GRU State Tables
+ */
+
+/*
+ * One of these exists for each GRU chiplet.
+ */
+struct gru_state {
+ struct gru_blade_state *gs_blade; /* GRU state for entire
+ blade */
+ unsigned long gs_gru_base_paddr; /* Physical address of
+ gru segments (64) */
+ void *gs_gru_base_vaddr; /* Virtual address of
+ gru segments (64) */
+ unsigned short gs_gid; /* unique GRU number */
+ unsigned short gs_blade_id; /* blade of GRU */
+ unsigned char gs_chiplet_id; /* blade chiplet of GRU */
+ unsigned char gs_tgh_local_shift; /* used to pick TGH for
+ local flush */
+ unsigned char gs_tgh_first_remote; /* starting TGH# for
+ remote flush */
+ spinlock_t gs_asid_lock; /* lock used for
+ assigning asids */
+ spinlock_t gs_lock; /* lock used for
+ assigning contexts */
+
+ /* -- the following are protected by the gs_asid_lock spinlock ---- */
+ unsigned int gs_asid; /* Next availe ASID */
+ unsigned int gs_asid_limit; /* Limit of available
+ ASIDs */
+ unsigned int gs_asid_gen; /* asid generation.
+ Inc on wrap */
+
+ /* --- the following fields are protected by the gs_lock spinlock --- */
+ unsigned long gs_context_map; /* bitmap to manage
+ contexts in use */
+ unsigned long gs_cbr_map; /* bitmap to manage CB
+ resources */
+ unsigned long gs_dsr_map; /* bitmap used to manage
+ DATA resources */
+ unsigned int gs_reserved_cbrs; /* Number of kernel-
+ reserved cbrs */
+ unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel-
+ reserved dsrs */
+ unsigned short gs_active_contexts; /* number of contexts
+ in use */
+ struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
+ the context */
+ int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */
+};
+
+/*
+ * This structure contains the GRU state for all the GRUs on a blade.
+ */
+struct gru_blade_state {
+ void *kernel_cb; /* First kernel
+ reserved cb */
+ void *kernel_dsr; /* First kernel
+ reserved DSR */
+ struct rw_semaphore bs_kgts_sema; /* lock for kgts */
+ struct gru_thread_state *bs_kgts; /* GTS for kernel use */
+
+ /* ---- the following are used for managing kernel async GRU CBRs --- */
+ int bs_async_dsr_bytes; /* DSRs for async */
+ int bs_async_cbrs; /* CBRs AU for async */
+ struct completion *bs_async_wq;
+
+ /* ---- the following are protected by the bs_lock spinlock ---- */
+ spinlock_t bs_lock; /* lock used for
+ stealing contexts */
+ int bs_lru_ctxnum; /* STEAL - last context
+ stolen */
+ struct gru_state *bs_lru_gru; /* STEAL - last gru
+ stolen */
+
+ struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE];
+};
+
+/*-----------------------------------------------------------------------------
+ * Address Primitives
+ */
+#define get_tfm_for_cpu(g, c) \
+ ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
+#define get_tfh_by_index(g, i) \
+ ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
+#define get_tgh_by_index(g, i) \
+ ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
+#define get_cbe_by_index(g, i) \
+ ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
+ (i)))
+
+/*-----------------------------------------------------------------------------
+ * Useful Macros
+ */
+
+/* Given a blade# & chiplet#, get a pointer to the GRU */
+#define get_gru(b, c) (&gru_base[b]->bs_grus[c])
+
+/* Number of bytes to save/restore when unloading/loading GRU contexts */
+#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES)
+#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
+
+/* Convert a user CB number to the actual CBRNUM */
+#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
+ * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
+
+/* Convert a gid to a pointer to the GRU */
+#define GID_TO_GRU(gid) \
+ (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \
+ (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \
+ bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \
+ NULL)
+
+/* Scan all active GRUs in a GRU bitmap */
+#define for_each_gru_in_bitmap(gid, map) \
+ for_each_set_bit((gid), (map), GRU_MAX_GRUS)
+
+/* Scan all active GRUs on a specific blade */
+#define for_each_gru_on_blade(gru, nid, i) \
+ for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \
+ (i) < GRU_CHIPLETS_PER_BLADE; \
+ (i)++, (gru)++)
+
+/* Scan all GRUs */
+#define foreach_gid(gid) \
+ for ((gid) = 0; (gid) < gru_max_gids; (gid)++)
+
+/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
+#define for_each_gts_on_gru(gts, gru, ctxnum) \
+ for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \
+ if (((gts) = (gru)->gs_gts[ctxnum]))
+
+/* Scan each CBR whose bit is set in a TFM (or copy of) */
+#define for_each_cbr_in_tfm(i, map) \
+ for_each_set_bit((i), (map), GRU_NUM_CBE)
+
+/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
+#define for_each_cbr_in_allocation_map(i, map, k) \
+ for_each_set_bit((k), (map), GRU_CBR_AU) \
+ for ((i) = (k)*GRU_CBR_AU_SIZE; \
+ (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
+
+/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
+#define for_each_dsr_in_allocation_map(i, map, k) \
+ for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \
+ for ((i) = (k) * GRU_DSR_AU_CL; \
+ (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
+
+#define gseg_physical_address(gru, ctxnum) \
+ ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
+#define gseg_virtual_address(gru, ctxnum) \
+ ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
+
+/*-----------------------------------------------------------------------------
+ * Lock / Unlock GRU handles
+ * Use the "delresp" bit in the handle as a "lock" bit.
+ */
+
+/* Lock hierarchy checking enabled only in emulator */
+
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+ return !test_and_set_bit(1, h);
+}
+
+static inline void __lock_handle(void *h)
+{
+ while (test_and_set_bit(1, h))
+ cpu_relax();
+}
+
+static inline void __unlock_handle(void *h)
+{
+ clear_bit(1, h);
+}
+
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+ return __trylock_handle(cch);
+}
+
+static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+ __lock_handle(cch);
+}
+
+static inline void unlock_cch_handle(struct gru_context_configuration_handle
+ *cch)
+{
+ __unlock_handle(cch);
+}
+
+static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+ __lock_handle(tgh);
+}
+
+static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+ __unlock_handle(tgh);
+}
+
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+ return !gts->ts_mm;
+}
+
+/*
+ * The following are for Nehelem-EX. A more general scheme is needed for
+ * future processors.
+ */
+#define UV_MAX_INT_CORES 8
+#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1)
+#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1)
+#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \
+ ((cpu_physical_id(p) >> 1) & 3))
+/*-----------------------------------------------------------------------------
+ * Function prototypes & externs
+ */
+struct gru_unload_context_req;
+
+extern const struct vm_operations_struct gru_vm_ops;
+extern struct device *grudev;
+
+extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
+ int tsid);
+extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
+ *vma, int tsid);
+extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
+ *vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts);
+extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern int gru_update_cch(struct gru_thread_state *gts);
+extern void gts_drop(struct gru_thread_state *gts);
+extern void gru_tgh_flush_init(struct gru_state *gru);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern irqreturn_t gru0_intr(int irq, void *dev_id);
+extern irqreturn_t gru1_intr(int irq, void *dev_id);
+extern irqreturn_t gru_intr_mblade(int irq, void *dev_id);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
+extern int gru_handle_user_call_os(unsigned long address);
+extern int gru_user_flush_tlb(unsigned long arg);
+extern int gru_user_unload_context(unsigned long arg);
+extern int gru_get_exception_detail(unsigned long arg);
+extern int gru_set_context_option(unsigned long address);
+extern void gru_check_context_placement(struct gru_thread_state *gts);
+extern int gru_cpu_fault_map_id(void);
+extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+extern void gru_flush_all_tlb(struct gru_state *gru);
+extern int gru_proc_init(void);
+extern void gru_proc_exit(void);
+
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ int cbr_au_count, int dsr_au_count,
+ unsigned char tlb_preload_count, int options, int tsid);
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
+ int cbr_au_count, char *cbmap);
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
+ int dsr_au_count, char *dsmap);
+extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
+extern struct gru_mm_struct *gru_register_mmu_notifier(void);
+extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+
+extern int gru_ktest(unsigned long arg);
+extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+ unsigned long len);
+
+extern unsigned long gru_options;
+
+#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 0000000..e936d43
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,378 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * MMUOPS callbacks + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+
+#define gru_random() get_cycles()
+
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+ int n;
+
+ n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+ n = gru_random() % n;
+ n += gru->gs_tgh_first_remote;
+ return n;
+}
+
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+ return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+ *gru)
+{
+ struct gru_tlb_global_handle *tgh;
+ int n;
+
+ preempt_disable();
+ if (uv_numa_blade_id() == gru->gs_blade_id)
+ n = get_on_blade_tgh(gru);
+ else
+ n = get_off_blade_tgh(gru);
+ tgh = get_tgh_by_index(gru, n);
+ lock_tgh_handle(tgh);
+
+ return tgh;
+}
+
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+ unlock_tgh_handle(tgh);
+ preempt_enable();
+}
+
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ * - GRU contexts are not loaded into a GRU unless a reference is made to
+ * the data segment or control block (this is true, not an assumption).
+ * If a DS/CB is referenced, the user will also issue instructions that
+ * cause TLBmisses. It is not necessary to optimize for the case where
+ * contexts are loaded but no instructions cause TLB misses. (I know
+ * this will happen but I'm not optimizing for it).
+ * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ * a few usec but in unusual cases, it could be longer. Avoid if
+ * possible.
+ * - intrablade process migration between cpus is not frequent but is
+ * common.
+ * - a GRU context is not typically migrated to a different GRU on the
+ * blade because of intrablade migration
+ * - interblade migration is rare. Processes migrate their GRU context to
+ * the new blade.
+ * - if interblade migration occurs, migration back to the original blade
+ * is very very rare (ie., no optimization for this case)
+ * - most GRU instruction operate on a subset of the user REGIONS. Code
+ * & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ * loaded into the GRU.
+ * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ * the above array
+ * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ * in the GRU for the address space. This bitmap must be passed to the
+ * GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ * - scan the asidmap for GRUs where the context has been loaded, ie,
+ * asid is non-zero.
+ * - for each gru found:
+ * - if the ctxtmap is non-zero, there are active contexts in the
+ * GRU. TLB invalidate instructions must be issued to the GRU.
+ * - if the ctxtmap is zero, no context is active. Set the ASID to
+ * zero to force a full TLB invalidation. This is fast but will
+ * cause a lot of TLB misses if the context is reloaded onto the
+ * GRU
+ *
+ */
+
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+ unsigned long len)
+{
+ struct gru_state *gru;
+ struct gru_mm_tracker *asids;
+ struct gru_tlb_global_handle *tgh;
+ unsigned long num;
+ int grupagesize, pagesize, pageshift, gid, asid;
+
+ /* ZZZ TODO - handle huge pages */
+ pageshift = PAGE_SHIFT;
+ pagesize = (1UL << pageshift);
+ grupagesize = GRU_PAGESIZE(pageshift);
+ num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+
+ STAT(flush_tlb);
+ gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+ start, len, gms->ms_asidmap[0]);
+
+ spin_lock(&gms->ms_asid_lock);
+ for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+ STAT(flush_tlb_gru);
+ gru = GID_TO_GRU(gid);
+ asids = gms->ms_asids + gid;
+ asid = asids->mt_asid;
+ if (asids->mt_ctxbitmap && asid) {
+ STAT(flush_tlb_gru_tgh);
+ asid = GRUASID(asid, start);
+ gru_dbg(grudev,
+ " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
+ gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
+ tgh = get_lock_tgh_handle(gru);
+ tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
+ num - 1, asids->mt_ctxbitmap);
+ get_unlock_tgh_handle(tgh);
+ } else {
+ STAT(flush_tlb_gru_zero_asid);
+ asids->mt_asid = 0;
+ __clear_bit(gru->gs_gid, gms->ms_asidmap);
+ gru_dbg(grudev,
+ " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+ gid, asid, asids->mt_ctxbitmap,
+ gms->ms_asidmap[0]);
+ }
+ }
+ spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+ struct gru_tlb_global_handle *tgh;
+
+ gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+ tgh = get_lock_tgh_handle(gru);
+ tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
+ get_unlock_tgh_handle(tgh);
+}
+
+/*
+ * MMUOPS notifier callout functions
+ */
+static void gru_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ STAT(mmu_invalidate_range);
+ atomic_inc(&gms->ms_range_active);
+ gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+ start, end, atomic_read(&gms->ms_range_active));
+ gru_flush_tlb_range(gms, start, end - start);
+}
+
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ /* ..._and_test() provides needed barrier */
+ (void)atomic_dec_and_test(&gms->ms_range_active);
+
+ wake_up_all(&gms->ms_wait_queue);
+ gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
+}
+
+static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+ unsigned long address)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ STAT(mmu_invalidate_page);
+ gru_flush_tlb_range(gms, address, PAGE_SIZE);
+ gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
+}
+
+static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ gms->ms_released = 1;
+ gru_dbg(grudev, "gms %p\n", gms);
+}
+
+
+static const struct mmu_notifier_ops gru_mmuops = {
+ .invalidate_page = gru_invalidate_page,
+ .invalidate_range_start = gru_invalidate_range_start,
+ .invalidate_range_end = gru_invalidate_range_end,
+ .release = gru_release,
+};
+
+/* Move this to the basic mmu_notifier file. But for now... */
+static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
+ const struct mmu_notifier_ops *ops)
+{
+ struct mmu_notifier *mn, *gru_mn = NULL;
+
+ if (mm->mmu_notifier_mm) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
+ hlist)
+ if (mn->ops == ops) {
+ gru_mn = mn;
+ break;
+ }
+ rcu_read_unlock();
+ }
+ return gru_mn;
+}
+
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+ struct gru_mm_struct *gms;
+ struct mmu_notifier *mn;
+ int err;
+
+ mn = mmu_find_ops(current->mm, &gru_mmuops);
+ if (mn) {
+ gms = container_of(mn, struct gru_mm_struct, ms_notifier);
+ atomic_inc(&gms->ms_refcnt);
+ } else {
+ gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+ if (!gms)
+ return ERR_PTR(-ENOMEM);
+ STAT(gms_alloc);
+ spin_lock_init(&gms->ms_asid_lock);
+ gms->ms_notifier.ops = &gru_mmuops;
+ atomic_set(&gms->ms_refcnt, 1);
+ init_waitqueue_head(&gms->ms_wait_queue);
+ err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
+ if (err)
+ goto error;
+ }
+ if (gms)
+ gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+ atomic_read(&gms->ms_refcnt));
+ return gms;
+error:
+ kfree(gms);
+ return ERR_PTR(err);
+}
+
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+ gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
+ atomic_read(&gms->ms_refcnt), gms->ms_released);
+ if (atomic_dec_return(&gms->ms_refcnt) == 0) {
+ if (!gms->ms_released)
+ mmu_notifier_unregister(&gms->ms_notifier, current->mm);
+ kfree(gms);
+ STAT(gms_free);
+ }
+}
+
+/*
+ * Setup TGH parameters. There are:
+ * - 24 TGH handles per GRU chiplet
+ * - a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ * use by blade-local cpus
+ * - the rest are used by off-blade cpus. This usage is
+ * less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH 16
+
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+ int cpus, shift = 0, n;
+
+ cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+ /* n = cpus rounded up to next power of 2 */
+ if (cpus) {
+ n = 1 << fls(cpus - 1);
+
+ /*
+ * shift count for converting local cpu# to TGH index
+ * 0 if cpus <= MAX_LOCAL_TGH,
+ * 1 if cpus <= 2*MAX_LOCAL_TGH,
+ * etc
+ */
+ shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+ }
+ gru->gs_tgh_local_shift = shift;
+
+ /* first starting TGH index to use for remote purges */
+ gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+
+}
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
new file mode 100644
index 0000000..4fc40d8
--- /dev/null
+++ b/drivers/misc/sgi-xp/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for SGI's XP devices.
+#
+
+obj-$(CONFIG_SGI_XP) += xp.o
+xp-y := xp_main.o
+xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o
+xp-$(CONFIG_X86_64) += xp_uv.o
+
+obj-$(CONFIG_SGI_XP) += xpc.o
+xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
+xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o
+xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o
+xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o
+xpc-$(CONFIG_X86_64) += xpc_uv.o
+
+obj-$(CONFIG_SGI_XP) += xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
new file mode 100644
index 0000000..c862cd4
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp.h
@@ -0,0 +1,358 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XP_H
+#define _DRIVERS_MISC_SGIXP_XP_H
+
+#include <linux/mutex.h>
+
+#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
+#include <asm/uv/uv.h>
+#define is_uv() is_uv_system()
+#endif
+
+#ifndef is_uv
+#define is_uv() 0
+#endif
+
+#if defined CONFIG_IA64
+#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
+#define is_shub() ia64_platform_is("sn2")
+#endif
+
+#ifndef is_shub1
+#define is_shub1() 0
+#endif
+
+#ifndef is_shub2
+#define is_shub2() 0
+#endif
+
+#ifndef is_shub
+#define is_shub() 0
+#endif
+
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition) BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
+/*
+ * Define the maximum number of partitions the system can possibly support.
+ * It is based on the maximum number of hardware partitionable regions. The
+ * term 'region' in this context refers to the minimum number of nodes that
+ * can comprise an access protection grouping. The access protection is in
+ * regards to memory, IPI and IOI.
+ *
+ * The maximum number of hardware partitionable regions is equal to the
+ * maximum number of nodes in the entire system divided by the minimum number
+ * of nodes that comprise an access protection grouping.
+ */
+#define XP_MAX_NPARTITIONS_SN2 64
+#define XP_MAX_NPARTITIONS_UV 256
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * If the need for additional channels arises, one can simply increase
+ * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the absolute MAXIMUM number of channels possible (eight), then one
+ * will need to make changes to the XPC code to accommodate for this.
+ *
+ * The absolute maximum number of channels possible is limited to eight for
+ * performance reasons on sn2 hardware. The internal cross partition structures
+ * require sixteen bytes per channel, and eight allows all of this
+ * interface-shared info to fit in one 128-byte cacheline.
+ */
+#define XPC_MEM_CHANNEL 0 /* memory channel number */
+#define XPC_NET_CHANNEL 1 /* network channel number */
+
+#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */
+
+#if XPC_MAX_NCHANNELS > 8
+#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
+#endif
+
+/*
+ * Define macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+#define XPC_MSG_MAX_SIZE 128
+#define XPC_MSG_HDR_MAX_SIZE 16
+#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
+
+#define XPC_MSG_SIZE(_payload_size) \
+ ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
+ is_uv() ? 64 : 128)
+
+
+/*
+ * Define the return values and values passed to user's callout functions.
+ * (It is important to add new value codes at the end just preceding
+ * xpUnknownReason, which must have the highest numerical value.)
+ */
+enum xp_retval {
+ xpSuccess = 0,
+
+ xpNotConnected, /* 1: channel is not connected */
+ xpConnected, /* 2: channel connected (opened) */
+ xpRETIRED1, /* 3: (formerly xpDisconnected) */
+
+ xpMsgReceived, /* 4: message received */
+ xpMsgDelivered, /* 5: message delivered and acknowledged */
+
+ xpRETIRED2, /* 6: (formerly xpTransferFailed) */
+
+ xpNoWait, /* 7: operation would require wait */
+ xpRetry, /* 8: retry operation */
+ xpTimeout, /* 9: timeout in xpc_allocate_msg_wait() */
+ xpInterrupted, /* 10: interrupted wait */
+
+ xpUnequalMsgSizes, /* 11: message size disparity between sides */
+ xpInvalidAddress, /* 12: invalid address */
+
+ xpNoMemory, /* 13: no memory available for XPC structures */
+ xpLackOfResources, /* 14: insufficient resources for operation */
+ xpUnregistered, /* 15: channel is not registered */
+ xpAlreadyRegistered, /* 16: channel is already registered */
+
+ xpPartitionDown, /* 17: remote partition is down */
+ xpNotLoaded, /* 18: XPC module is not loaded */
+ xpUnloading, /* 19: this side is unloading XPC module */
+
+ xpBadMagic, /* 20: XPC MAGIC string not found */
+
+ xpReactivating, /* 21: remote partition was reactivated */
+
+ xpUnregistering, /* 22: this side is unregistering channel */
+ xpOtherUnregistering, /* 23: other side is unregistering channel */
+
+ xpCloneKThread, /* 24: cloning kernel thread */
+ xpCloneKThreadFailed, /* 25: cloning kernel thread failed */
+
+ xpNoHeartbeat, /* 26: remote partition has no heartbeat */
+
+ xpPioReadError, /* 27: PIO read error */
+ xpPhysAddrRegFailed, /* 28: registration of phys addr range failed */
+
+ xpRETIRED3, /* 29: (formerly xpBteDirectoryError) */
+ xpRETIRED4, /* 30: (formerly xpBtePoisonError) */
+ xpRETIRED5, /* 31: (formerly xpBteWriteError) */
+ xpRETIRED6, /* 32: (formerly xpBteAccessError) */
+ xpRETIRED7, /* 33: (formerly xpBtePWriteError) */
+ xpRETIRED8, /* 34: (formerly xpBtePReadError) */
+ xpRETIRED9, /* 35: (formerly xpBteTimeOutError) */
+ xpRETIRED10, /* 36: (formerly xpBteXtalkError) */
+ xpRETIRED11, /* 37: (formerly xpBteNotAvailable) */
+ xpRETIRED12, /* 38: (formerly xpBteUnmappedError) */
+
+ xpBadVersion, /* 39: bad version number */
+ xpVarsNotSet, /* 40: the XPC variables are not set up */
+ xpNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */
+ xpInvalidPartid, /* 42: invalid partition ID */
+ xpLocalPartid, /* 43: local partition ID */
+
+ xpOtherGoingDown, /* 44: other side going down, reason unknown */
+ xpSystemGoingDown, /* 45: system is going down, reason unknown */
+ xpSystemHalt, /* 46: system is being halted */
+ xpSystemReboot, /* 47: system is being rebooted */
+ xpSystemPoweroff, /* 48: system is being powered off */
+
+ xpDisconnecting, /* 49: channel disconnecting (closing) */
+
+ xpOpenCloseError, /* 50: channel open/close protocol error */
+
+ xpDisconnected, /* 51: channel disconnected (closed) */
+
+ xpBteCopyError, /* 52: bte_copy() returned error */
+ xpSalError, /* 53: sn SAL error */
+ xpRsvdPageNotSet, /* 54: the reserved page is not set up */
+ xpPayloadTooBig, /* 55: payload too large for message slot */
+
+ xpUnsupported, /* 56: unsupported functionality or resource */
+ xpNeedMoreInfo, /* 57: more info is needed by SAL */
+
+ xpGruCopyError, /* 58: gru_copy_gru() returned error */
+ xpGruSendMqError, /* 59: gru send message queue related error */
+
+ xpBadChannelNumber, /* 60: invalid channel number */
+ xpBadMsgType, /* 61: invalid message type */
+ xpBiosError, /* 62: BIOS error */
+
+ xpUnknownReason /* 63: unknown reason - must be last in enum */
+};
+
+/*
+ * Define the callout function type used by XPC to update the user on
+ * connection activity and state changes via the user function registered
+ * by xpc_connect().
+ *
+ * Arguments:
+ *
+ * reason - reason code.
+ * partid - partition ID associated with condition.
+ * ch_number - channel # associated with condition.
+ * data - pointer to optional data.
+ * key - pointer to optional user-defined value provided as the "key"
+ * argument to xpc_connect().
+ *
+ * A reason code of xpConnected indicates that a connection has been
+ * established to the specified partition on the specified channel. The data
+ * argument indicates the max number of entries allowed in the message queue.
+ *
+ * A reason code of xpMsgReceived indicates that a XPC message arrived from
+ * the specified partition on the specified channel. The data argument
+ * specifies the address of the message's payload. The user must call
+ * xpc_received() when finished with the payload.
+ *
+ * All other reason codes indicate failure. The data argmument is NULL.
+ * When a failure reason code is received, one can assume that the channel
+ * is not connected.
+ */
+typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
+ int ch_number, void *data, void *key);
+
+/*
+ * Define the callout function type used by XPC to notify the user of
+ * messages received and delivered via the user function registered by
+ * xpc_send_notify().
+ *
+ * Arguments:
+ *
+ * reason - reason code.
+ * partid - partition ID associated with condition.
+ * ch_number - channel # associated with condition.
+ * key - pointer to optional user-defined value provided as the "key"
+ * argument to xpc_send_notify().
+ *
+ * A reason code of xpMsgDelivered indicates that the message was delivered
+ * to the intended recipient and that they have acknowledged its receipt by
+ * calling xpc_received().
+ *
+ * All other reason codes indicate failure.
+ *
+ * NOTE: The user defined function must be callable by an interrupt handler
+ * and thus cannot block.
+ */
+typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
+ int ch_number, void *key);
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC. As long as a registration entry exists, for any
+ * partition that comes up, XPC will attempt to establish a connection on
+ * that channel. Notification that a connection has been made will occur via
+ * the xpc_channel_func function.
+ *
+ * The 'func' field points to the function to call when aynchronous
+ * notification is required for such events as: a connection established/lost,
+ * or an incoming message received, or an error condition encountered. A
+ * non-NULL 'func' field indicates that there is an active registration for
+ * the channel.
+ */
+struct xpc_registration {
+ struct mutex mutex;
+ xpc_channel_func func; /* function to call */
+ void *key; /* pointer to user's key */
+ u16 nentries; /* #of msg entries in local msg queue */
+ u16 entry_size; /* message queue's message entry size */
+ u32 assigned_limit; /* limit on #of assigned kthreads */
+ u32 idle_limit; /* limit on #of idle kthreads */
+} ____cacheline_aligned;
+
+#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
+
+/* the following are valid xpc_send() or xpc_send_notify() flags */
+#define XPC_WAIT 0 /* wait flag */
+#define XPC_NOWAIT 1 /* no wait flag */
+
+struct xpc_interface {
+ void (*connect) (int);
+ void (*disconnect) (int);
+ enum xp_retval (*send) (short, int, u32, void *, u16);
+ enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+ xpc_notify_func, void *);
+ void (*received) (short, int, void *);
+ enum xp_retval (*partid_to_nasids) (short, void *);
+};
+
+extern struct xpc_interface xpc_interface;
+
+extern void xpc_set_interface(void (*)(int),
+ void (*)(int),
+ enum xp_retval (*)(short, int, u32, void *, u16),
+ enum xp_retval (*)(short, int, u32, void *, u16,
+ xpc_notify_func, void *),
+ void (*)(short, int, void *),
+ enum xp_retval (*)(short, void *));
+extern void xpc_clear_interface(void);
+
+extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
+ u16, u32, u32);
+extern void xpc_disconnect(int);
+
+static inline enum xp_retval
+xpc_send(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size)
+{
+ return xpc_interface.send(partid, ch_number, flags, payload,
+ payload_size);
+}
+
+static inline enum xp_retval
+xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size, xpc_notify_func func, void *key)
+{
+ return xpc_interface.send_notify(partid, ch_number, flags, payload,
+ payload_size, func, key);
+}
+
+static inline void
+xpc_received(short partid, int ch_number, void *payload)
+{
+ return xpc_interface.received(partid, ch_number, payload);
+}
+
+static inline enum xp_retval
+xpc_partid_to_nasids(short partid, void *nasids)
+{
+ return xpc_interface.partid_to_nasids(partid, nasids);
+}
+
+extern short xp_max_npartitions;
+extern short xp_partition_id;
+extern u8 xp_region_size;
+
+extern unsigned long (*xp_pa) (void *);
+extern unsigned long (*xp_socket_pa) (unsigned long);
+extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
+ size_t);
+extern int (*xp_cpu_to_nasid) (int);
+extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
+extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
+
+extern u64 xp_nofault_PIOR_target;
+extern int xp_nofault_PIOR(void *);
+extern int xp_error_PIOR(void);
+
+extern struct device *xp;
+extern enum xp_retval xp_init_sn2(void);
+extern enum xp_retval xp_init_uv(void);
+extern void xp_exit_sn2(void);
+extern void xp_exit_uv(void);
+
+#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
new file mode 100644
index 0000000..01be66d
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -0,0 +1,286 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) base.
+ *
+ * XP provides a base from which its users can interact
+ * with XPC, yet not be dependent on XPC.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include "xp.h"
+
+/* define the XP debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xp_dbg_name = {
+ .name = "xp"
+};
+
+struct device xp_dbg_subname = {
+ .init_name = "", /* set to "" */
+ .driver = &xp_dbg_name
+};
+
+struct device *xp = &xp_dbg_subname;
+
+/* max #of partitions possible */
+short xp_max_npartitions;
+EXPORT_SYMBOL_GPL(xp_max_npartitions);
+
+short xp_partition_id;
+EXPORT_SYMBOL_GPL(xp_partition_id);
+
+u8 xp_region_size;
+EXPORT_SYMBOL_GPL(xp_region_size);
+
+unsigned long (*xp_pa) (void *addr);
+EXPORT_SYMBOL_GPL(xp_pa);
+
+unsigned long (*xp_socket_pa) (unsigned long gpa);
+EXPORT_SYMBOL_GPL(xp_socket_pa);
+
+enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
+ const unsigned long src_gpa, size_t len);
+EXPORT_SYMBOL_GPL(xp_remote_memcpy);
+
+int (*xp_cpu_to_nasid) (int cpuid);
+EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
+
+enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr,
+ unsigned long size);
+EXPORT_SYMBOL_GPL(xp_expand_memprotect);
+enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr,
+ unsigned long size);
+EXPORT_SYMBOL_GPL(xp_restrict_memprotect);
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
+EXPORT_SYMBOL_GPL(xpc_registrations);
+
+/*
+ * Initialize the XPC interface to indicate that XPC isn't loaded.
+ */
+static enum xp_retval
+xpc_notloaded(void)
+{
+ return xpNotLoaded;
+}
+
+struct xpc_interface xpc_interface = {
+ (void (*)(int))xpc_notloaded,
+ (void (*)(int))xpc_notloaded,
+ (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
+ (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
+ void *))xpc_notloaded,
+ (void (*)(short, int, void *))xpc_notloaded,
+ (enum xp_retval(*)(short, void *))xpc_notloaded
+};
+EXPORT_SYMBOL_GPL(xpc_interface);
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect) (int),
+ void (*disconnect) (int),
+ enum xp_retval (*send) (short, int, u32, void *, u16),
+ enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+ xpc_notify_func, void *),
+ void (*received) (short, int, void *),
+ enum xp_retval (*partid_to_nasids) (short, void *))
+{
+ xpc_interface.connect = connect;
+ xpc_interface.disconnect = disconnect;
+ xpc_interface.send = send;
+ xpc_interface.send_notify = send_notify;
+ xpc_interface.received = received;
+ xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+EXPORT_SYMBOL_GPL(xpc_set_interface);
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+ xpc_interface.connect = (void (*)(int))xpc_notloaded;
+ xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
+ xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
+ xpc_notloaded;
+ xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
+ u16, xpc_notify_func,
+ void *))xpc_notloaded;
+ xpc_interface.received = (void (*)(short, int, void *))
+ xpc_notloaded;
+ xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
+ xpc_notloaded;
+}
+EXPORT_SYMBOL_GPL(xpc_clear_interface);
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to register for connection.
+ * func - function to call for asynchronous notification of channel
+ * state changes (i.e., connection, disconnection, error) and
+ * the arrival of incoming messages.
+ * key - pointer to optional user-defined value that gets passed back
+ * to the user on any callouts made to func.
+ * payload_size - size in bytes of the XPC message's payload area which
+ * contains a user-defined message. The user should make
+ * this large enough to hold their largest message.
+ * nentries - max #of XPC message entries a message queue can contain.
+ * The actual number, which is determined when a connection
+ * is established and may be less then requested, will be
+ * passed to the user via the xpConnected callout.
+ * assigned_limit - max number of kthreads allowed to be processing
+ * messages (per connection) at any given instant.
+ * idle_limit - max number of kthreads allowed to be idle at any given
+ * instant.
+ */
+enum xp_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+ u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+ struct xpc_registration *registration;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+ DBUG_ON(payload_size == 0 || nentries == 0);
+ DBUG_ON(func == NULL);
+ DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+ if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
+ return xpPayloadTooBig;
+
+ registration = &xpc_registrations[ch_number];
+
+ if (mutex_lock_interruptible(®istration->mutex) != 0)
+ return xpInterrupted;
+
+ /* if XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func != NULL) {
+ mutex_unlock(®istration->mutex);
+ return xpAlreadyRegistered;
+ }
+
+ /* register the channel for connection */
+ registration->entry_size = XPC_MSG_SIZE(payload_size);
+ registration->nentries = nentries;
+ registration->assigned_limit = assigned_limit;
+ registration->idle_limit = idle_limit;
+ registration->key = key;
+ registration->func = func;
+
+ mutex_unlock(®istration->mutex);
+
+ xpc_interface.connect(ch_number);
+
+ return xpSuccess;
+}
+EXPORT_SYMBOL_GPL(xpc_connect);
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+ struct xpc_registration *registration;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+ registration = &xpc_registrations[ch_number];
+
+ /*
+ * We've decided not to make this a down_interruptible(), since we
+ * figured XPC's users will just turn around and call xpc_disconnect()
+ * again anyways, so we might as well wait, if need be.
+ */
+ mutex_lock(®istration->mutex);
+
+ /* if !XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func == NULL) {
+ mutex_unlock(®istration->mutex);
+ return;
+ }
+
+ /* remove the connection registration for the specified channel */
+ registration->func = NULL;
+ registration->key = NULL;
+ registration->nentries = 0;
+ registration->entry_size = 0;
+ registration->assigned_limit = 0;
+ registration->idle_limit = 0;
+
+ xpc_interface.disconnect(ch_number);
+
+ mutex_unlock(®istration->mutex);
+
+ return;
+}
+EXPORT_SYMBOL_GPL(xpc_disconnect);
+
+int __init
+xp_init(void)
+{
+ enum xp_retval ret;
+ int ch_number;
+
+ /* initialize the connection registration mutex */
+ for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
+ mutex_init(&xpc_registrations[ch_number].mutex);
+
+ if (is_shub())
+ ret = xp_init_sn2();
+ else if (is_uv())
+ ret = xp_init_uv();
+ else
+ ret = 0;
+
+ if (ret != xpSuccess)
+ return ret;
+
+ return 0;
+}
+
+module_init(xp_init);
+
+void __exit
+xp_exit(void)
+{
+ if (is_shub())
+ xp_exit_sn2();
+ else if (is_uv())
+ xp_exit_uv();
+}
+
+module_exit(xp_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S
new file mode 100644
index 0000000..e38d433
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_nofault.S
@@ -0,0 +1,35 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it. This function
+ * will be registered as a nofault code block. In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ * The definition of "consumption" and the time it takes for an MCA
+ * to surface is processor implementation specific. This code
+ * is sufficient on Itanium through the Montvale processor family.
+ * It may need to be adjusted for future processor implementations.
+ *
+ * extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+ .global xp_nofault_PIOR
+xp_nofault_PIOR:
+ mov r8=r0 // Stage a success return value
+ ld8.acq r9=[r32];; // PIO Read the specified register
+ adds r9=1,r9;; // Add to force consumption
+ srlz.i;; // Allow time for MCA to surface
+ br.ret.sptk.many b0;; // Return success
+
+ .global xp_error_PIOR
+xp_error_PIOR:
+ mov r8=1 // Return value of 1
+ br.ret.sptk.many b0;; // Return failure
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
new file mode 100644
index 0000000..d8e463f
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -0,0 +1,190 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) sn2-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/sn_sal.h>
+#include "xp.h"
+
+/*
+ * The export of xp_nofault_PIOR needs to happen here since it is defined
+ * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
+ * defined here.
+ */
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+
+u64 xp_nofault_PIOR_target;
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+
+/*
+ * Register a nofault code region which performs a cross-partition PIO read.
+ * If the PIO read times out, the MCA handler will consume the error and
+ * return to a kernel-provided instruction to indicate an error. This PIO read
+ * exists because it is guaranteed to timeout if the destination is down
+ * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
+ * which unfortunately we have to work around).
+ */
+static enum xp_retval
+xp_register_nofault_code_sn2(void)
+{
+ int ret;
+ u64 func_addr;
+ u64 err_func_addr;
+
+ func_addr = *(u64 *)xp_nofault_PIOR;
+ err_func_addr = *(u64 *)xp_error_PIOR;
+ ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
+ 1, 1);
+ if (ret != 0) {
+ dev_err(xp, "can't register nofault code, error=%d\n", ret);
+ return xpSalError;
+ }
+ /*
+ * Setup the nofault PIO read target. (There is no special reason why
+ * SH_IPI_ACCESS was selected.)
+ */
+ if (is_shub1())
+ xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+ else if (is_shub2())
+ xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+
+ return xpSuccess;
+}
+
+static void
+xp_unregister_nofault_code_sn2(void)
+{
+ u64 func_addr = *(u64 *)xp_nofault_PIOR;
+ u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+ /* unregister the PIO read nofault code region */
+ (void)sn_register_nofault_code(func_addr, err_func_addr,
+ err_func_addr, 1, 0);
+}
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_sn2(void *addr)
+{
+ return __pa(addr);
+}
+
+/*
+ * Convert a global physical to a socket physical address.
+ */
+static unsigned long
+xp_socket_pa_sn2(unsigned long gpa)
+{
+ return gpa;
+}
+
+/*
+ * Wrapper for bte_copy().
+ *
+ * dst_pa - physical address of the destination of the transfer.
+ * src_pa - physical address of the source of the transfer.
+ * len - number of bytes to transfer from source to destination.
+ *
+ * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
+ */
+static enum xp_retval
+xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
+ size_t len)
+{
+ bte_result_t ret;
+
+ ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ if (ret == BTE_SUCCESS)
+ return xpSuccess;
+
+ if (is_shub2()) {
+ dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
+ "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
+ src_pa, len);
+ } else {
+ dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
+ "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
+ }
+
+ return xpBteCopyError;
+}
+
+static int
+xp_cpu_to_nasid_sn2(int cpuid)
+{
+ return cpuid_to_nasid(cpuid);
+}
+
+static enum xp_retval
+xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size)
+{
+ u64 nasid_array = 0;
+ int ret;
+
+ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+ &nasid_array);
+ if (ret != 0) {
+ dev_err(xp, "sn_change_memprotect(,, "
+ "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+ return xpSalError;
+ }
+ return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size)
+{
+ u64 nasid_array = 0;
+ int ret;
+
+ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+ &nasid_array);
+ if (ret != 0) {
+ dev_err(xp, "sn_change_memprotect(,, "
+ "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+ return xpSalError;
+ }
+ return xpSuccess;
+}
+
+enum xp_retval
+xp_init_sn2(void)
+{
+ BUG_ON(!is_shub());
+
+ xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
+ xp_partition_id = sn_partition_id;
+ xp_region_size = sn_region_size;
+
+ xp_pa = xp_pa_sn2;
+ xp_socket_pa = xp_socket_pa_sn2;
+ xp_remote_memcpy = xp_remote_memcpy_sn2;
+ xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
+ xp_expand_memprotect = xp_expand_memprotect_sn2;
+ xp_restrict_memprotect = xp_restrict_memprotect_sn2;
+
+ return xp_register_nofault_code_sn2();
+}
+
+void
+xp_exit_sn2(void)
+{
+ BUG_ON(!is_shub());
+
+ xp_unregister_nofault_code_sn2();
+}
+
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 0000000..a0d0932
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,171 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) uv-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/grukservices.h"
+#include "xp.h"
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_uv(void *addr)
+{
+ return uv_gpa(addr);
+}
+
+/*
+ * Convert a global physical to socket physical address.
+ */
+static unsigned long
+xp_socket_pa_uv(unsigned long gpa)
+{
+ return uv_gpa_to_soc_phys_ram(gpa);
+}
+
+static enum xp_retval
+xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa,
+ size_t len)
+{
+ int ret;
+ unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa));
+
+ BUG_ON(!uv_gpa_in_mmr_space(src_gpa));
+ BUG_ON(len != 8);
+
+ ret = gru_read_gpa(dst_va, src_gpa);
+ if (ret == 0)
+ return xpSuccess;
+
+ dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+ "len=%ld\n", dst_gpa, src_gpa, len);
+ return xpGruCopyError;
+}
+
+
+static enum xp_retval
+xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
+ size_t len)
+{
+ int ret;
+
+ if (uv_gpa_in_mmr_space(src_gpa))
+ return xp_remote_mmr_read(dst_gpa, src_gpa, len);
+
+ ret = gru_copy_gpa(dst_gpa, src_gpa, len);
+ if (ret == 0)
+ return xpSuccess;
+
+ dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+ "len=%ld\n", dst_gpa, src_gpa, len);
+ return xpGruCopyError;
+}
+
+static int
+xp_cpu_to_nasid_uv(int cpuid)
+{
+ /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
+ return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
+}
+
+static enum xp_retval
+xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+ int ret;
+
+#if defined CONFIG_X86_64
+ ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW);
+ if (ret != BIOS_STATUS_SUCCESS) {
+ dev_err(xp, "uv_bios_change_memprotect(,, "
+ "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret);
+ return xpBiosError;
+ }
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ u64 nasid_array;
+
+ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+ &nasid_array);
+ if (ret != 0) {
+ dev_err(xp, "sn_change_memprotect(,, "
+ "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+ return xpSalError;
+ }
+#else
+ #error not a supported configuration
+#endif
+ return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+ int ret;
+
+#if defined CONFIG_X86_64
+ ret = uv_bios_change_memprotect(phys_addr, size,
+ UV_MEMPROT_RESTRICT_ACCESS);
+ if (ret != BIOS_STATUS_SUCCESS) {
+ dev_err(xp, "uv_bios_change_memprotect(,, "
+ "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret);
+ return xpBiosError;
+ }
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ u64 nasid_array;
+
+ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+ &nasid_array);
+ if (ret != 0) {
+ dev_err(xp, "sn_change_memprotect(,, "
+ "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+ return xpSalError;
+ }
+#else
+ #error not a supported configuration
+#endif
+ return xpSuccess;
+}
+
+enum xp_retval
+xp_init_uv(void)
+{
+ BUG_ON(!is_uv());
+
+ xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+ xp_partition_id = sn_partition_id;
+ xp_region_size = sn_region_size;
+
+ xp_pa = xp_pa_uv;
+ xp_socket_pa = xp_socket_pa_uv;
+ xp_remote_memcpy = xp_remote_memcpy_uv;
+ xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
+ xp_expand_memprotect = xp_expand_memprotect_uv;
+ xp_restrict_memprotect = xp_restrict_memprotect_uv;
+
+ return xpSuccess;
+}
+
+void
+xp_exit_uv(void)
+{
+ BUG_ON(!is_uv());
+}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
new file mode 100644
index 0000000..b94d5f7
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -0,0 +1,1004 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) structures and macros.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XPC_H
+#define _DRIVERS_MISC_SGIXP_XPC_H
+
+#include <linux/wait.h>
+#include <linux/completion.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include "xp.h"
+
+/*
+ * XPC Version numbers consist of a major and minor number. XPC can always
+ * talk to versions with same major #, and never talk to versions with a
+ * different major #.
+ */
+#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
+#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
+#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
+
+/* define frequency of the heartbeat and frequency how often it's checked */
+#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
+
+/* define the process name of HB checker and the CPU it is pinned to */
+#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
+#define XPC_HB_CHECK_CPU 0
+
+/* define the process name of the discovery thread */
+#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
+
+/*
+ * the reserved page
+ *
+ * SAL reserves one page of memory per partition for XPC. Though a full page
+ * in length (16384 bytes), its starting address is not page aligned, but it
+ * is cacheline aligned. The reserved page consists of the following:
+ *
+ * reserved page header
+ *
+ * The first two 64-byte cachelines of the reserved page contain the
+ * header (struct xpc_rsvd_page). Before SAL initialization has completed,
+ * SAL has set up the following fields of the reserved page header:
+ * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
+ * other fields are set up by XPC. (xpc_rsvd_page points to the local
+ * partition's reserved page.)
+ *
+ * part_nasids mask
+ * mach_nasids mask
+ *
+ * SAL also sets up two bitmaps (or masks), one that reflects the actual
+ * nasids in this partition (part_nasids), and the other that reflects
+ * the actual nasids in the entire machine (mach_nasids). We're only
+ * interested in the even numbered nasids (which contain the processors
+ * and/or memory), so we only need half as many bits to represent the
+ * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
+ * to either divide or multiply by 2. The part_nasids mask is located
+ * starting at the first cacheline following the reserved page header. The
+ * mach_nasids mask follows right after the part_nasids mask. The size in
+ * bytes of each mask is reflected by the reserved page header field
+ * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
+ * and xpc_mach_nasids.)
+ *
+ * vars (ia64-sn2 only)
+ * vars part (ia64-sn2 only)
+ *
+ * Immediately following the mach_nasids mask are the XPC variables
+ * required by other partitions. First are those that are generic to all
+ * partitions (vars), followed on the next available cacheline by those
+ * which are partition specific (vars part). These are setup by XPC.
+ * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
+ *
+ * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
+ * initialized.
+ */
+struct xpc_rsvd_page {
+ u64 SAL_signature; /* SAL: unique signature */
+ u64 SAL_version; /* SAL: version */
+ short SAL_partid; /* SAL: partition ID */
+ short max_npartitions; /* value of XPC_MAX_PARTITIONS */
+ u8 version;
+ u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */
+ unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
+ union {
+ struct {
+ unsigned long vars_pa; /* phys addr */
+ } sn2;
+ struct {
+ unsigned long heartbeat_gpa; /* phys addr */
+ unsigned long activate_gru_mq_desc_gpa; /* phys addr */
+ } uv;
+ } sn;
+ u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */
+ u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */
+};
+
+#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
+
+/*
+ * Define the structures by which XPC variables can be exported to other
+ * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
+ */
+
+/*
+ * The following structure describes the partition generic variables
+ * needed by other partitions in order to properly initialize.
+ *
+ * struct xpc_vars version number also applies to struct xpc_vars_part.
+ * Changes to either structure and/or related functionality should be
+ * reflected by incrementing either the major or minor version numbers
+ * of struct xpc_vars.
+ */
+struct xpc_vars_sn2 {
+ u8 version;
+ u64 heartbeat;
+ DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+ u64 heartbeat_offline; /* if 0, heartbeat should be changing */
+ int activate_IRQ_nasid;
+ int activate_IRQ_phys_cpuid;
+ unsigned long vars_part_pa;
+ unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */
+ struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */
+};
+
+#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */
+
+/*
+ * The following structure describes the per partition specific variables.
+ *
+ * An array of these structures, one per partition, will be defined. As a
+ * partition becomes active XPC will copy the array entry corresponding to
+ * itself from that partition. It is desirable that the size of this structure
+ * evenly divides into a 128-byte cacheline, such that none of the entries in
+ * this array crosses a 128-byte cacheline boundary. As it is now, each entry
+ * occupies 64-bytes.
+ */
+struct xpc_vars_part_sn2 {
+ u64 magic;
+
+ unsigned long openclose_args_pa; /* phys addr of open and close args */
+ unsigned long GPs_pa; /* physical address of Get/Put values */
+
+ unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */
+
+ int notify_IRQ_nasid; /* nasid of where to send notify IRQs */
+ int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */
+
+ u8 nchannels; /* #of defined channels supported */
+
+ u8 reserved[23]; /* pad to a full 64 bytes */
+};
+
+/*
+ * The vars_part MAGIC numbers play a part in the first contact protocol.
+ *
+ * MAGIC1 indicates that the per partition specific variables for a remote
+ * partition have been initialized by this partition.
+ *
+ * MAGIC2 indicates that this partition has pulled the remote partititions
+ * per partition variables that pertain to this partition.
+ */
+#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
+
+#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
+ XPC_RP_HEADER_SIZE))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
+ xpc_nasid_mask_nlongs)
+#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \
+ (XPC_RP_MACH_NASIDS(_rp) + \
+ xpc_nasid_mask_nlongs))
+
+
+/*
+ * The following structure describes the partition's heartbeat info which
+ * will be periodically read by other partitions to determine whether this
+ * XPC is still 'alive'.
+ */
+struct xpc_heartbeat_uv {
+ unsigned long value;
+ unsigned long offline; /* if 0, heartbeat should be changing */
+};
+
+/*
+ * Info pertinent to a GRU message queue using a watch list for irq generation.
+ */
+struct xpc_gru_mq_uv {
+ void *address; /* address of GRU message queue */
+ unsigned int order; /* size of GRU message queue as a power of 2 */
+ int irq; /* irq raised when message is received in mq */
+ int mmr_blade; /* blade where watchlist was allocated from */
+ unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */
+ unsigned long mmr_value; /* value of irq mmr located on mmr_blade */
+ int watchlist_num; /* number of watchlist allocatd by BIOS */
+ void *gru_mq_desc; /* opaque structure used by the GRU driver */
+};
+
+/*
+ * The activate_mq is used to send/receive GRU messages that affect XPC's
+ * partition active state and channel state. This is uv only.
+ */
+struct xpc_activate_mq_msghdr_uv {
+ unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
+ short partid; /* sender's partid */
+ u8 act_state; /* sender's act_state at time msg sent */
+ u8 type; /* message's type */
+ unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
+};
+
+/* activate_mq defined message types */
+#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0
+
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2
+
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV 7
+
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 8
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 9
+
+struct xpc_activate_mq_msg_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+};
+
+struct xpc_activate_mq_msg_activate_req_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ unsigned long rp_gpa;
+ unsigned long heartbeat_gpa;
+ unsigned long activate_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_deactivate_req_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closerequest_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+ enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closereply_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+};
+
+struct xpc_activate_mq_msg_chctl_openrequest_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+ short entry_size; /* size of notify_mq's GRU messages */
+ short local_nentries; /* ??? Is this needed? What is? */
+};
+
+struct xpc_activate_mq_msg_chctl_openreply_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+ short remote_nentries; /* ??? Is this needed? What is? */
+ short local_nentries; /* ??? Is this needed? What is? */
+ unsigned long notify_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_chctl_opencomplete_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+};
+
+/*
+ * Functions registered by add_timer() or called by kernel_thread() only
+ * allow for a single 64-bit argument. The following macros can be used to
+ * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
+ * the passed argument.
+ */
+#define XPC_PACK_ARGS(_arg1, _arg2) \
+ ((((u64)_arg1) & 0xffffffff) | \
+ ((((u64)_arg2) & 0xffffffff) << 32))
+
+#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff)
+
+/*
+ * Define a Get/Put value pair (pointers) used with a message queue.
+ */
+struct xpc_gp_sn2 {
+ s64 get; /* Get value */
+ s64 put; /* Put value */
+};
+
+#define XPC_GP_SIZE \
+ L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS)
+
+/*
+ * Define a structure that contains arguments associated with opening and
+ * closing a channel.
+ */
+struct xpc_openclose_args {
+ u16 reason; /* reason why channel is closing */
+ u16 entry_size; /* sizeof each message entry */
+ u16 remote_nentries; /* #of message entries in remote msg queue */
+ u16 local_nentries; /* #of message entries in local msg queue */
+ unsigned long local_msgqueue_pa; /* phys addr of local message queue */
+};
+
+#define XPC_OPENCLOSE_ARGS_SIZE \
+ L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
+ XPC_MAX_NCHANNELS)
+
+
+/*
+ * Structures to define a fifo singly-linked list.
+ */
+
+struct xpc_fifo_entry_uv {
+ struct xpc_fifo_entry_uv *next;
+};
+
+struct xpc_fifo_head_uv {
+ struct xpc_fifo_entry_uv *first;
+ struct xpc_fifo_entry_uv *last;
+ spinlock_t lock;
+ int n_entries;
+};
+
+/*
+ * Define a sn2 styled message.
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message entry (within a message queue) must be a 128-byte
+ * cacheline sized multiple in order to facilitate the BTE transfer of messages
+ * from one message queue to another.
+ */
+struct xpc_msg_sn2 {
+ u8 flags; /* FOR XPC INTERNAL USE ONLY */
+ u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
+ s64 number; /* FOR XPC INTERNAL USE ONLY */
+
+ u64 payload; /* user defined portion of message */
+};
+
+/* struct xpc_msg_sn2 flags */
+
+#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */
+#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */
+#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */
+
+/*
+ * The format of a uv XPC notify_mq GRU message is as follows:
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message (payload and header) sent via the GRU must be either 1
+ * or 2 GRU_CACHE_LINE_BYTES in length.
+ */
+
+struct xpc_notify_mq_msghdr_uv {
+ union {
+ unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
+ struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */
+ } u;
+ short partid; /* FOR XPC INTERNAL USE ONLY */
+ u8 ch_number; /* FOR XPC INTERNAL USE ONLY */
+ u8 size; /* FOR XPC INTERNAL USE ONLY */
+ unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */
+};
+
+struct xpc_notify_mq_msg_uv {
+ struct xpc_notify_mq_msghdr_uv hdr;
+ unsigned long payload;
+};
+
+/*
+ * Define sn2's notify entry.
+ *
+ * This is used to notify a message's sender that their message was received
+ * and consumed by the intended recipient.
+ */
+struct xpc_notify_sn2 {
+ u8 type; /* type of notification */
+
+ /* the following two fields are only used if type == XPC_N_CALL */
+ xpc_notify_func func; /* user's notify function */
+ void *key; /* pointer to user's key */
+};
+
+/* struct xpc_notify_sn2 type of notification */
+
+#define XPC_N_CALL 0x01 /* notify function provided by user */
+
+/*
+ * Define uv's version of the notify entry. It additionally is used to allocate
+ * a msg slot on the remote partition into which is copied a sent message.
+ */
+struct xpc_send_msg_slot_uv {
+ struct xpc_fifo_entry_uv next;
+ unsigned int msg_slot_number;
+ xpc_notify_func func; /* user's notify function */
+ void *key; /* pointer to user's key */
+};
+
+/*
+ * Define the structure that manages all the stuff required by a channel. In
+ * particular, they are used to manage the messages sent across the channel.
+ *
+ * This structure is private to a partition, and is NOT shared across the
+ * partition boundary.
+ *
+ * There is an array of these structures for each remote partition. It is
+ * allocated at the time a partition becomes active. The array contains one
+ * of these structures for each potential channel connection to that partition.
+ */
+
+/*
+ * The following is sn2 only.
+ *
+ * Each channel structure manages two message queues (circular buffers).
+ * They are allocated at the time a channel connection is made. One of
+ * these message queues (local_msgqueue) holds the locally created messages
+ * that are destined for the remote partition. The other of these message
+ * queues (remote_msgqueue) is a locally cached copy of the remote partition's
+ * own local_msgqueue.
+ *
+ * The following is a description of the Get/Put pointers used to manage these
+ * two message queues. Consider the local_msgqueue to be on one partition
+ * and the remote_msgqueue to be its cached copy on another partition. A
+ * description of what each of the lettered areas contains is included.
+ *
+ *
+ * local_msgqueue remote_msgqueue
+ *
+ * |/////////| |/////////|
+ * w_remote_GP.get --> +---------+ |/////////|
+ * | F | |/////////|
+ * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
+ * | | | |
+ * | | | E |
+ * | | | |
+ * | | +---------+ <-- w_local_GP.get
+ * | B | |/////////|
+ * | | |////D////|
+ * | | |/////////|
+ * | | +---------+ <-- w_remote_GP.put
+ * | | |////C////|
+ * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
+ * | | |/////////|
+ * | A | |/////////|
+ * | | |/////////|
+ * w_local_GP.put --> +---------+ |/////////|
+ * |/////////| |/////////|
+ *
+ *
+ * ( remote_GP.[get|put] are cached copies of the remote
+ * partition's local_GP->[get|put], and thus their values can
+ * lag behind their counterparts on the remote partition. )
+ *
+ *
+ * A - Messages that have been allocated, but have not yet been sent to the
+ * remote partition.
+ *
+ * B - Messages that have been sent, but have not yet been acknowledged by the
+ * remote partition as having been received.
+ *
+ * C - Area that needs to be prepared for the copying of sent messages, by
+ * the clearing of the message flags of any previously received messages.
+ *
+ * D - Area into which sent messages are to be copied from the remote
+ * partition's local_msgqueue and then delivered to their intended
+ * recipients. [ To allow for a multi-message copy, another pointer
+ * (next_msg_to_pull) has been added to keep track of the next message
+ * number needing to be copied (pulled). It chases after w_remote_GP.put.
+ * Any messages lying between w_local_GP.get and next_msg_to_pull have
+ * been copied and are ready to be delivered. ]
+ *
+ * E - Messages that have been copied and delivered, but have not yet been
+ * acknowledged by the recipient as having been received.
+ *
+ * F - Messages that have been acknowledged, but XPC has not yet notified the
+ * sender that the message was received by its intended recipient.
+ * This is also an area that needs to be prepared for the allocating of
+ * new messages, by the clearing of the message flags of the acknowledged
+ * messages.
+ */
+
+struct xpc_channel_sn2 {
+ struct xpc_openclose_args *local_openclose_args; /* args passed on */
+ /* opening or closing of channel */
+
+ void *local_msgqueue_base; /* base address of kmalloc'd space */
+ struct xpc_msg_sn2 *local_msgqueue; /* local message queue */
+ void *remote_msgqueue_base; /* base address of kmalloc'd space */
+ struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */
+ /* partition's local message queue */
+ unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
+ /* local message queue */
+
+ struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */
+
+ /* various flavors of local and remote Get/Put values */
+
+ struct xpc_gp_sn2 *local_GP; /* local Get/Put values */
+ struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */
+ struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */
+ struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */
+ s64 next_msg_to_pull; /* Put value of next msg to pull */
+
+ struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
+};
+
+struct xpc_channel_uv {
+ void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */
+ /* gru mq descriptor */
+
+ struct xpc_send_msg_slot_uv *send_msg_slots;
+ void *recv_msg_slots; /* each slot will hold a xpc_notify_mq_msg_uv */
+ /* structure plus the user's payload */
+
+ struct xpc_fifo_head_uv msg_slot_free_list;
+ struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */
+};
+
+struct xpc_channel {
+ short partid; /* ID of remote partition connected */
+ spinlock_t lock; /* lock for updating this structure */
+ unsigned int flags; /* general flags */
+
+ enum xp_retval reason; /* reason why channel is disconnect'g */
+ int reason_line; /* line# disconnect initiated from */
+
+ u16 number; /* channel # */
+
+ u16 entry_size; /* sizeof each msg entry */
+ u16 local_nentries; /* #of msg entries in local msg queue */
+ u16 remote_nentries; /* #of msg entries in remote msg queue */
+
+ atomic_t references; /* #of external references to queues */
+
+ atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
+ wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
+
+ u8 delayed_chctl_flags; /* chctl flags received, but delayed */
+ /* action until channel disconnected */
+
+ atomic_t n_to_notify; /* #of msg senders to notify */
+
+ xpc_channel_func func; /* user's channel function */
+ void *key; /* pointer to user's key */
+
+ struct completion wdisconnect_wait; /* wait for channel disconnect */
+
+ /* kthread management related fields */
+
+ atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
+ u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
+ atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
+ u32 kthreads_idle_limit; /* limit on #of kthreads idle */
+ atomic_t kthreads_active; /* #of kthreads actively working */
+
+ wait_queue_head_t idle_wq; /* idle kthread wait queue */
+
+ union {
+ struct xpc_channel_sn2 sn2;
+ struct xpc_channel_uv uv;
+ } sn;
+
+} ____cacheline_aligned;
+
+/* struct xpc_channel flags */
+
+#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
+
+#define XPC_C_ROPENCOMPLETE 0x00000002 /* remote open channel complete */
+#define XPC_C_OPENCOMPLETE 0x00000004 /* local open channel complete */
+#define XPC_C_ROPENREPLY 0x00000008 /* remote open channel reply */
+#define XPC_C_OPENREPLY 0x00000010 /* local open channel reply */
+#define XPC_C_ROPENREQUEST 0x00000020 /* remote open channel request */
+#define XPC_C_OPENREQUEST 0x00000040 /* local open channel request */
+
+#define XPC_C_SETUP 0x00000080 /* channel's msgqueues are alloc'd */
+#define XPC_C_CONNECTEDCALLOUT 0x00000100 /* connected callout initiated */
+#define XPC_C_CONNECTEDCALLOUT_MADE \
+ 0x00000200 /* connected callout completed */
+#define XPC_C_CONNECTED 0x00000400 /* local channel is connected */
+#define XPC_C_CONNECTING 0x00000800 /* channel is being connected */
+
+#define XPC_C_RCLOSEREPLY 0x00001000 /* remote close channel reply */
+#define XPC_C_CLOSEREPLY 0x00002000 /* local close channel reply */
+#define XPC_C_RCLOSEREQUEST 0x00004000 /* remote close channel request */
+#define XPC_C_CLOSEREQUEST 0x00008000 /* local close channel request */
+
+#define XPC_C_DISCONNECTED 0x00010000 /* channel is disconnected */
+#define XPC_C_DISCONNECTING 0x00020000 /* channel is being disconnected */
+#define XPC_C_DISCONNECTINGCALLOUT \
+ 0x00040000 /* disconnecting callout initiated */
+#define XPC_C_DISCONNECTINGCALLOUT_MADE \
+ 0x00080000 /* disconnecting callout completed */
+#define XPC_C_WDISCONNECT 0x00100000 /* waiting for channel disconnect */
+
+/*
+ * The channel control flags (chctl) union consists of a 64-bit variable which
+ * is divided up into eight bytes, ordered from right to left. Byte zero
+ * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
+ * can have one or more of the chctl flags set in it.
+ */
+
+union xpc_channel_ctl_flags {
+ u64 all_flags;
+ u8 flags[XPC_MAX_NCHANNELS];
+};
+
+/* chctl flags */
+#define XPC_CHCTL_CLOSEREQUEST 0x01
+#define XPC_CHCTL_CLOSEREPLY 0x02
+#define XPC_CHCTL_OPENREQUEST 0x04
+#define XPC_CHCTL_OPENREPLY 0x08
+#define XPC_CHCTL_OPENCOMPLETE 0x10
+#define XPC_CHCTL_MSGREQUEST 0x20
+
+#define XPC_OPENCLOSE_CHCTL_FLAGS \
+ (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
+ XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
+ XPC_CHCTL_OPENCOMPLETE)
+#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST
+
+static inline int
+xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+ int ch_number;
+
+ for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+ if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
+ return 1;
+ }
+ return 0;
+}
+
+static inline int
+xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+ int ch_number;
+
+ for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+ if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Manage channels on a partition basis. There is one of these structures
+ * for each partition (a partition will never utilize the structure that
+ * represents itself).
+ */
+
+struct xpc_partition_sn2 {
+ unsigned long remote_amos_page_pa; /* paddr of partition's amos page */
+ int activate_IRQ_nasid; /* active partition's act/deact nasid */
+ int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */
+
+ unsigned long remote_vars_pa; /* phys addr of partition's vars */
+ unsigned long remote_vars_part_pa; /* paddr of partition's vars part */
+ u8 remote_vars_version; /* version# of partition's vars */
+
+ void *local_GPs_base; /* base address of kmalloc'd space */
+ struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */
+ void *remote_GPs_base; /* base address of kmalloc'd space */
+ struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */
+ /* Get/Put values */
+ unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
+ /* Get/Put values */
+
+ void *local_openclose_args_base; /* base address of kmalloc'd space */
+ struct xpc_openclose_args *local_openclose_args; /* local's args */
+ unsigned long remote_openclose_args_pa; /* phys addr of remote's args */
+
+ int notify_IRQ_nasid; /* nasid of where to send notify IRQs */
+ int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */
+ char notify_IRQ_owner[8]; /* notify IRQ's owner's name */
+
+ struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */
+ struct amo *local_chctl_amo_va; /* address of chctl flags' amo */
+
+ struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */
+};
+
+struct xpc_partition_uv {
+ unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
+ struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
+ /* partition's heartbeat */
+ unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */
+ /* activate mq's gru mq */
+ /* descriptor */
+ void *cached_activate_gru_mq_desc; /* cached copy of partition's */
+ /* activate mq's gru mq descriptor */
+ struct mutex cached_activate_gru_mq_desc_mutex;
+ spinlock_t flags_lock; /* protect updating of flags */
+ unsigned int flags; /* general flags */
+ u8 remote_act_state; /* remote partition's act_state */
+ u8 act_state_req; /* act_state request from remote partition */
+ enum xp_retval reason; /* reason for deactivate act_state request */
+};
+
+/* struct xpc_partition_uv flags */
+
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001
+#define XPC_P_ENGAGED_UV 0x00000002
+
+/* struct xpc_partition_uv act_state change requests */
+
+#define XPC_P_ASR_ACTIVATE_UV 0x01
+#define XPC_P_ASR_REACTIVATE_UV 0x02
+#define XPC_P_ASR_DEACTIVATE_UV 0x03
+
+struct xpc_partition {
+
+ /* XPC HB infrastructure */
+
+ u8 remote_rp_version; /* version# of partition's rsvd pg */
+ unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
+ unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */
+ u64 last_heartbeat; /* HB at last read */
+ u32 activate_IRQ_rcvd; /* IRQs since activation */
+ spinlock_t act_lock; /* protect updating of act_state */
+ u8 act_state; /* from XPC HB viewpoint */
+ enum xp_retval reason; /* reason partition is deactivating */
+ int reason_line; /* line# deactivation initiated from */
+
+ unsigned long disengage_timeout; /* timeout in jiffies */
+ struct timer_list disengage_timer;
+
+ /* XPC infrastructure referencing and teardown control */
+
+ u8 setup_state; /* infrastructure setup state */
+ wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
+ atomic_t references; /* #of references to infrastructure */
+
+ u8 nchannels; /* #of defined channels supported */
+ atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
+ atomic_t nchannels_engaged; /* #of channels engaged with remote part */
+ struct xpc_channel *channels; /* array of channel structures */
+
+ /* fields used for managing channel avialability and activity */
+
+ union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
+ spinlock_t chctl_lock; /* chctl flags lock */
+
+ void *remote_openclose_args_base; /* base address of kmalloc'd space */
+ struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
+ /* args */
+
+ /* channel manager related fields */
+
+ atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
+ wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
+
+ union {
+ struct xpc_partition_sn2 sn2;
+ struct xpc_partition_uv uv;
+ } sn;
+
+} ____cacheline_aligned;
+
+struct xpc_arch_operations {
+ int (*setup_partitions) (void);
+ void (*teardown_partitions) (void);
+ void (*process_activate_IRQ_rcvd) (void);
+ enum xp_retval (*get_partition_rsvd_page_pa)
+ (void *, u64 *, unsigned long *, size_t *);
+ int (*setup_rsvd_page) (struct xpc_rsvd_page *);
+
+ void (*allow_hb) (short);
+ void (*disallow_hb) (short);
+ void (*disallow_all_hbs) (void);
+ void (*increment_heartbeat) (void);
+ void (*offline_heartbeat) (void);
+ void (*online_heartbeat) (void);
+ void (*heartbeat_init) (void);
+ void (*heartbeat_exit) (void);
+ enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
+
+ void (*request_partition_activation) (struct xpc_rsvd_page *,
+ unsigned long, int);
+ void (*request_partition_reactivation) (struct xpc_partition *);
+ void (*request_partition_deactivation) (struct xpc_partition *);
+ void (*cancel_partition_deactivation_request) (struct xpc_partition *);
+ enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
+ void (*teardown_ch_structures) (struct xpc_partition *);
+
+ enum xp_retval (*make_first_contact) (struct xpc_partition *);
+
+ u64 (*get_chctl_all_flags) (struct xpc_partition *);
+ void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
+ void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
+ void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
+ void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+ void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
+ void (*process_msg_chctl_flags) (struct xpc_partition *, int);
+
+ enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
+ unsigned long);
+
+ enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
+ void (*teardown_msg_structures) (struct xpc_channel *);
+
+ void (*indicate_partition_engaged) (struct xpc_partition *);
+ void (*indicate_partition_disengaged) (struct xpc_partition *);
+ void (*assume_partition_disengaged) (short);
+ int (*partition_engaged) (short);
+ int (*any_partition_engaged) (void);
+
+ int (*n_of_deliverable_payloads) (struct xpc_channel *);
+ enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
+ u16, u8, xpc_notify_func, void *);
+ void *(*get_deliverable_payload) (struct xpc_channel *);
+ void (*received_payload) (struct xpc_channel *, void *);
+ void (*notify_senders_of_disconnect) (struct xpc_channel *);
+};
+
+/* struct xpc_partition act_state values (for XPC HB) */
+
+#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */
+#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */
+#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */
+#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */
+#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */
+
+#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
+ xpc_deactivate_partition(__LINE__, (_p), (_reason))
+
+/* struct xpc_partition setup_state values */
+
+#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */
+#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */
+#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
+#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */
+
+/*
+ * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
+ * following interval #of seconds before checking for dropped notify IRQs.
+ * These can occur whenever an IRQ's associated amo write doesn't complete
+ * until after the IRQ was received.
+ */
+#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ)
+
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90
+
+/* interval in seconds to print 'waiting deactivation' messages */
+#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10
+
+#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0]))
+
+/* found in xp_main.c */
+extern struct xpc_registration xpc_registrations[];
+
+/* found in xpc_main.c */
+extern struct device *xpc_part;
+extern struct device *xpc_chan;
+extern struct xpc_arch_operations xpc_arch_ops;
+extern int xpc_disengage_timelimit;
+extern int xpc_disengage_timedout;
+extern int xpc_activate_IRQ_rcvd;
+extern spinlock_t xpc_activate_IRQ_rcvd_lock;
+extern wait_queue_head_t xpc_activate_IRQ_wq;
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern void xpc_activate_partition(struct xpc_partition *);
+extern void xpc_activate_kthreads(struct xpc_channel *, int);
+extern void xpc_create_kthreads(struct xpc_channel *, int, int);
+extern void xpc_disconnect_wait(int);
+
+/* found in xpc_sn2.c */
+extern int xpc_init_sn2(void);
+extern void xpc_exit_sn2(void);
+
+/* found in xpc_uv.c */
+extern int xpc_init_uv(void);
+extern void xpc_exit_uv(void);
+
+/* found in xpc_partition.c */
+extern int xpc_exiting;
+extern int xpc_nasid_mask_nlongs;
+extern struct xpc_rsvd_page *xpc_rsvd_page;
+extern unsigned long *xpc_mach_nasids;
+extern struct xpc_partition *xpc_partitions;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern int xpc_setup_rsvd_page(void);
+extern void xpc_teardown_rsvd_page(void);
+extern int xpc_identify_activate_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
+extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
+extern void xpc_mark_partition_inactive(struct xpc_partition *);
+extern void xpc_discovery(void);
+extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
+ struct xpc_rsvd_page *,
+ unsigned long *);
+extern void xpc_deactivate_partition(const int, struct xpc_partition *,
+ enum xp_retval);
+extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
+
+/* found in xpc_channel.c */
+extern void xpc_initiate_connect(int);
+extern void xpc_initiate_disconnect(int);
+extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
+extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
+extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
+ xpc_notify_func, void *);
+extern void xpc_initiate_received(short, int, void *);
+extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
+extern void xpc_connected_callout(struct xpc_channel *);
+extern void xpc_deliver_payload(struct xpc_channel *);
+extern void xpc_disconnect_channel(const int, struct xpc_channel *,
+ enum xp_retval, unsigned long *);
+extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
+
+static inline void
+xpc_wakeup_channel_mgr(struct xpc_partition *part)
+{
+ if (atomic_inc_return(&part->channel_mgr_requests) == 1)
+ wake_up(&part->channel_mgr_wq);
+}
+
+/*
+ * These next two inlines are used to keep us from tearing down a channel's
+ * msg queues while a thread may be referencing them.
+ */
+static inline void
+xpc_msgqueue_ref(struct xpc_channel *ch)
+{
+ atomic_inc(&ch->references);
+}
+
+static inline void
+xpc_msgqueue_deref(struct xpc_channel *ch)
+{
+ s32 refs = atomic_dec_return(&ch->references);
+
+ DBUG_ON(refs < 0);
+ if (refs == 0)
+ xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
+}
+
+#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
+ xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
+
+/*
+ * These two inlines are used to keep us from tearing down a partition's
+ * setup infrastructure while a thread may be referencing it.
+ */
+static inline void
+xpc_part_deref(struct xpc_partition *part)
+{
+ s32 refs = atomic_dec_return(&part->references);
+
+ DBUG_ON(refs < 0);
+ if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
+ wake_up(&part->teardown_wq);
+}
+
+static inline int
+xpc_part_ref(struct xpc_partition *part)
+{
+ int setup;
+
+ atomic_inc(&part->references);
+ setup = (part->setup_state == XPC_P_SS_SETUP);
+ if (!setup)
+ xpc_part_deref(part);
+
+ return setup;
+}
+
+/*
+ * The following macro is to be used for the setting of the reason and
+ * reason_line fields in both the struct xpc_channel and struct xpc_partition
+ * structures.
+ */
+#define XPC_SET_REASON(_p, _reason, _line) \
+ { \
+ (_p)->reason = _reason; \
+ (_p)->reason_line = _line; \
+ }
+
+#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
new file mode 100644
index 0000000..128d561
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -0,0 +1,1014 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) channel support.
+ *
+ * This is the part of XPC that manages the channels and
+ * sends/receives messages across them to/from other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include "xpc.h"
+
+/*
+ * Process a connect message from a remote partition.
+ *
+ * Note: xpc_process_connect() is expecting to be called with the
+ * spin_lock_irqsave held and will leave it locked upon return.
+ */
+static void
+xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ enum xp_retval ret;
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ if (!(ch->flags & XPC_C_OPENREQUEST) ||
+ !(ch->flags & XPC_C_ROPENREQUEST)) {
+ /* nothing more to do for now */
+ return;
+ }
+ DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
+
+ if (!(ch->flags & XPC_C_SETUP)) {
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ ret = xpc_arch_ops.setup_msg_structures(ch);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+
+ if (ret != xpSuccess)
+ XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+ else
+ ch->flags |= XPC_C_SETUP;
+
+ if (ch->flags & XPC_C_DISCONNECTING)
+ return;
+ }
+
+ if (!(ch->flags & XPC_C_OPENREPLY)) {
+ ch->flags |= XPC_C_OPENREPLY;
+ xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
+ }
+
+ if (!(ch->flags & XPC_C_ROPENREPLY))
+ return;
+
+ if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
+ ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
+ xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
+ }
+
+ if (!(ch->flags & XPC_C_ROPENCOMPLETE))
+ return;
+
+ dev_info(xpc_chan, "channel %d to partition %d connected\n",
+ ch->number, ch->partid);
+
+ ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
+}
+
+/*
+ * spin_lock_irqsave() is expected to be held on entry.
+ */
+static void
+xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ if (!(ch->flags & XPC_C_DISCONNECTING))
+ return;
+
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+ /* make sure all activity has settled down first */
+
+ if (atomic_read(&ch->kthreads_assigned) > 0 ||
+ atomic_read(&ch->references) > 0) {
+ return;
+ }
+ DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING) {
+ /* can't proceed until the other side disengages from us */
+ if (xpc_arch_ops.partition_engaged(ch->partid))
+ return;
+
+ } else {
+
+ /* as long as the other side is up do the full protocol */
+
+ if (!(ch->flags & XPC_C_RCLOSEREQUEST))
+ return;
+
+ if (!(ch->flags & XPC_C_CLOSEREPLY)) {
+ ch->flags |= XPC_C_CLOSEREPLY;
+ xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
+ }
+
+ if (!(ch->flags & XPC_C_RCLOSEREPLY))
+ return;
+ }
+
+ /* wake those waiting for notify completion */
+ if (atomic_read(&ch->n_to_notify) > 0) {
+ /* we do callout while holding ch->lock, callout can't block */
+ xpc_arch_ops.notify_senders_of_disconnect(ch);
+ }
+
+ /* both sides are disconnected now */
+
+ if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ xpc_disconnect_callout(ch, xpDisconnected);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+
+ DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
+ /* it's now safe to free the channel's message queues */
+ xpc_arch_ops.teardown_msg_structures(ch);
+
+ ch->func = NULL;
+ ch->key = NULL;
+ ch->entry_size = 0;
+ ch->local_nentries = 0;
+ ch->remote_nentries = 0;
+ ch->kthreads_assigned_limit = 0;
+ ch->kthreads_idle_limit = 0;
+
+ /*
+ * Mark the channel disconnected and clear all other flags, including
+ * XPC_C_SETUP (because of call to
+ * xpc_arch_ops.teardown_msg_structures()) but not including
+ * XPC_C_WDISCONNECT (if it was set).
+ */
+ ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
+
+ atomic_dec(&part->nchannels_active);
+
+ if (channel_was_connected) {
+ dev_info(xpc_chan, "channel %d to partition %d disconnected, "
+ "reason=%d\n", ch->number, ch->partid, ch->reason);
+ }
+
+ if (ch->flags & XPC_C_WDISCONNECT) {
+ /* we won't lose the CPU since we're holding ch->lock */
+ complete(&ch->wdisconnect_wait);
+ } else if (ch->delayed_chctl_flags) {
+ if (part->act_state != XPC_P_AS_DEACTIVATING) {
+ /* time to take action on any delayed chctl flags */
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch->number] |=
+ ch->delayed_chctl_flags;
+ spin_unlock(&part->chctl_lock);
+ }
+ ch->delayed_chctl_flags = 0;
+ }
+}
+
+/*
+ * Process a change in the channel's remote connection state.
+ */
+static void
+xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
+ u8 chctl_flags)
+{
+ unsigned long irq_flags;
+ struct xpc_openclose_args *args =
+ &part->remote_openclose_args[ch_number];
+ struct xpc_channel *ch = &part->channels[ch_number];
+ enum xp_retval reason;
+ enum xp_retval ret;
+ int create_kthread = 0;
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+again:
+
+ if ((ch->flags & XPC_C_DISCONNECTED) &&
+ (ch->flags & XPC_C_WDISCONNECT)) {
+ /*
+ * Delay processing chctl flags until thread waiting disconnect
+ * has had a chance to see that the channel is disconnected.
+ */
+ ch->delayed_chctl_flags |= chctl_flags;
+ goto out;
+ }
+
+ if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
+
+ dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
+ "from partid=%d, channel=%d\n", args->reason,
+ ch->partid, ch->number);
+
+ /*
+ * If RCLOSEREQUEST is set, we're probably waiting for
+ * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
+ * with this RCLOSEREQUEST in the chctl_flags.
+ */
+
+ if (ch->flags & XPC_C_RCLOSEREQUEST) {
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
+ DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
+
+ DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
+ chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
+ ch->flags |= XPC_C_RCLOSEREPLY;
+
+ /* both sides have finished disconnecting */
+ xpc_process_disconnect(ch, &irq_flags);
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+ goto again;
+ }
+
+ if (ch->flags & XPC_C_DISCONNECTED) {
+ if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
+ if (part->chctl.flags[ch_number] &
+ XPC_CHCTL_OPENREQUEST) {
+
+ DBUG_ON(ch->delayed_chctl_flags != 0);
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch_number] |=
+ XPC_CHCTL_CLOSEREQUEST;
+ spin_unlock(&part->chctl_lock);
+ }
+ goto out;
+ }
+
+ XPC_SET_REASON(ch, 0, 0);
+ ch->flags &= ~XPC_C_DISCONNECTED;
+
+ atomic_inc(&part->nchannels_active);
+ ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
+ }
+
+ chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
+ XPC_CHCTL_OPENCOMPLETE);
+
+ /*
+ * The meaningful CLOSEREQUEST connection state fields are:
+ * reason = reason connection is to be closed
+ */
+
+ ch->flags |= XPC_C_RCLOSEREQUEST;
+
+ if (!(ch->flags & XPC_C_DISCONNECTING)) {
+ reason = args->reason;
+ if (reason <= xpSuccess || reason > xpUnknownReason)
+ reason = xpUnknownReason;
+ else if (reason == xpUnregistering)
+ reason = xpOtherUnregistering;
+
+ XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+ DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
+ goto out;
+ }
+
+ xpc_process_disconnect(ch, &irq_flags);
+ }
+
+ if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
+
+ dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
+ "%d, channel=%d\n", ch->partid, ch->number);
+
+ if (ch->flags & XPC_C_DISCONNECTED) {
+ DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
+ goto out;
+ }
+
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+ if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+ if (part->chctl.flags[ch_number] &
+ XPC_CHCTL_CLOSEREQUEST) {
+
+ DBUG_ON(ch->delayed_chctl_flags != 0);
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch_number] |=
+ XPC_CHCTL_CLOSEREPLY;
+ spin_unlock(&part->chctl_lock);
+ }
+ goto out;
+ }
+
+ ch->flags |= XPC_C_RCLOSEREPLY;
+
+ if (ch->flags & XPC_C_CLOSEREPLY) {
+ /* both sides have finished disconnecting */
+ xpc_process_disconnect(ch, &irq_flags);
+ }
+ }
+
+ if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
+
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
+ "local_nentries=%d) received from partid=%d, "
+ "channel=%d\n", args->entry_size, args->local_nentries,
+ ch->partid, ch->number);
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING ||
+ (ch->flags & XPC_C_ROPENREQUEST)) {
+ goto out;
+ }
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+ ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
+ goto out;
+ }
+ DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
+ XPC_C_OPENREQUEST)));
+ DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+ XPC_C_OPENREPLY | XPC_C_CONNECTED));
+
+ /*
+ * The meaningful OPENREQUEST connection state fields are:
+ * entry_size = size of channel's messages in bytes
+ * local_nentries = remote partition's local_nentries
+ */
+ if (args->entry_size == 0 || args->local_nentries == 0) {
+ /* assume OPENREQUEST was delayed by mistake */
+ goto out;
+ }
+
+ ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
+ ch->remote_nentries = args->local_nentries;
+
+ if (ch->flags & XPC_C_OPENREQUEST) {
+ if (args->entry_size != ch->entry_size) {
+ XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+ &irq_flags);
+ goto out;
+ }
+ } else {
+ ch->entry_size = args->entry_size;
+
+ XPC_SET_REASON(ch, 0, 0);
+ ch->flags &= ~XPC_C_DISCONNECTED;
+
+ atomic_inc(&part->nchannels_active);
+ }
+
+ xpc_process_connect(ch, &irq_flags);
+ }
+
+ if (chctl_flags & XPC_CHCTL_OPENREPLY) {
+
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
+ "0x%lx, local_nentries=%d, remote_nentries=%d) "
+ "received from partid=%d, channel=%d\n",
+ args->local_msgqueue_pa, args->local_nentries,
+ args->remote_nentries, ch->partid, ch->number);
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+ goto out;
+
+ if (!(ch->flags & XPC_C_OPENREQUEST)) {
+ XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+ &irq_flags);
+ goto out;
+ }
+
+ DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+ DBUG_ON(ch->flags & XPC_C_CONNECTED);
+
+ /*
+ * The meaningful OPENREPLY connection state fields are:
+ * local_msgqueue_pa = physical address of remote
+ * partition's local_msgqueue
+ * local_nentries = remote partition's local_nentries
+ * remote_nentries = remote partition's remote_nentries
+ */
+ DBUG_ON(args->local_msgqueue_pa == 0);
+ DBUG_ON(args->local_nentries == 0);
+ DBUG_ON(args->remote_nentries == 0);
+
+ ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
+ args->local_msgqueue_pa);
+ if (ret != xpSuccess) {
+ XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
+ goto out;
+ }
+ ch->flags |= XPC_C_ROPENREPLY;
+
+ if (args->local_nentries < ch->remote_nentries) {
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+ "remote_nentries=%d, old remote_nentries=%d, "
+ "partid=%d, channel=%d\n",
+ args->local_nentries, ch->remote_nentries,
+ ch->partid, ch->number);
+
+ ch->remote_nentries = args->local_nentries;
+ }
+ if (args->remote_nentries < ch->local_nentries) {
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+ "local_nentries=%d, old local_nentries=%d, "
+ "partid=%d, channel=%d\n",
+ args->remote_nentries, ch->local_nentries,
+ ch->partid, ch->number);
+
+ ch->local_nentries = args->remote_nentries;
+ }
+
+ xpc_process_connect(ch, &irq_flags);
+ }
+
+ if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
+
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+ goto out;
+
+ if (!(ch->flags & XPC_C_OPENREQUEST) ||
+ !(ch->flags & XPC_C_OPENREPLY)) {
+ XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+ &irq_flags);
+ goto out;
+ }
+
+ DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+ DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
+ DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
+
+ ch->flags |= XPC_C_ROPENCOMPLETE;
+
+ xpc_process_connect(ch, &irq_flags);
+ create_kthread = 1;
+ }
+
+out:
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ if (create_kthread)
+ xpc_create_kthreads(ch, 1, 0);
+}
+
+/*
+ * Attempt to establish a channel connection to a remote partition.
+ */
+static enum xp_retval
+xpc_connect_channel(struct xpc_channel *ch)
+{
+ unsigned long irq_flags;
+ struct xpc_registration *registration = &xpc_registrations[ch->number];
+
+ if (mutex_trylock(®istration->mutex) == 0)
+ return xpRetry;
+
+ if (!XPC_CHANNEL_REGISTERED(ch->number)) {
+ mutex_unlock(®istration->mutex);
+ return xpUnregistered;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+ DBUG_ON(ch->flags & XPC_C_CONNECTED);
+ DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ mutex_unlock(®istration->mutex);
+ return ch->reason;
+ }
+
+ /* add info from the channel connect registration to the channel */
+
+ ch->kthreads_assigned_limit = registration->assigned_limit;
+ ch->kthreads_idle_limit = registration->idle_limit;
+ DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
+ DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
+ DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
+
+ ch->func = registration->func;
+ DBUG_ON(registration->func == NULL);
+ ch->key = registration->key;
+
+ ch->local_nentries = registration->nentries;
+
+ if (ch->flags & XPC_C_ROPENREQUEST) {
+ if (registration->entry_size != ch->entry_size) {
+ /* the local and remote sides aren't the same */
+
+ /*
+ * Because XPC_DISCONNECT_CHANNEL() can block we're
+ * forced to up the registration sema before we unlock
+ * the channel lock. But that's okay here because we're
+ * done with the part that required the registration
+ * sema. XPC_DISCONNECT_CHANNEL() requires that the
+ * channel lock be locked and will unlock and relock
+ * the channel lock as needed.
+ */
+ mutex_unlock(®istration->mutex);
+ XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpUnequalMsgSizes;
+ }
+ } else {
+ ch->entry_size = registration->entry_size;
+
+ XPC_SET_REASON(ch, 0, 0);
+ ch->flags &= ~XPC_C_DISCONNECTED;
+
+ atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
+ }
+
+ mutex_unlock(®istration->mutex);
+
+ /* initiate the connection */
+
+ ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
+ xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
+
+ xpc_process_connect(ch, &irq_flags);
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ return xpSuccess;
+}
+
+void
+xpc_process_sent_chctl_flags(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ union xpc_channel_ctl_flags chctl;
+ struct xpc_channel *ch;
+ int ch_number;
+ u32 ch_flags;
+
+ chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
+
+ /*
+ * Initiate channel connections for registered channels.
+ *
+ * For each connected channel that has pending messages activate idle
+ * kthreads and/or create new kthreads as needed.
+ */
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ /*
+ * Process any open or close related chctl flags, and then deal
+ * with connecting or disconnecting the channel as required.
+ */
+
+ if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
+ xpc_process_openclose_chctl_flags(part, ch_number,
+ chctl.flags[ch_number]);
+ }
+
+ ch_flags = ch->flags; /* need an atomic snapshot of flags */
+
+ if (ch_flags & XPC_C_DISCONNECTING) {
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ xpc_process_disconnect(ch, &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ continue;
+ }
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ continue;
+
+ if (!(ch_flags & XPC_C_CONNECTED)) {
+ if (!(ch_flags & XPC_C_OPENREQUEST)) {
+ DBUG_ON(ch_flags & XPC_C_SETUP);
+ (void)xpc_connect_channel(ch);
+ }
+ continue;
+ }
+
+ /*
+ * Process any message related chctl flags, this may involve
+ * the activation of kthreads to deliver any pending messages
+ * sent from the other partition.
+ */
+
+ if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+ xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
+ }
+}
+
+/*
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down. XPC responds by tearing down the XPartition Communication
+ * infrastructure used for the just downed partition.
+ *
+ * XPC's heartbeat code will never call this function and xpc_partition_up()
+ * at the same time. Nor will it ever make multiple calls to either function
+ * at the same time.
+ */
+void
+xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
+{
+ unsigned long irq_flags;
+ int ch_number;
+ struct xpc_channel *ch;
+
+ dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
+ XPC_PARTID(part), reason);
+
+ if (!xpc_part_ref(part)) {
+ /* infrastructure for this partition isn't currently set up */
+ return;
+ }
+
+ /* disconnect channels associated with the partition going down */
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ xpc_msgqueue_ref(ch);
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+ XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ xpc_msgqueue_deref(ch);
+ }
+
+ xpc_wakeup_channel_mgr(part);
+
+ xpc_part_deref(part);
+}
+
+/*
+ * Called by XP at the time of channel connection registration to cause
+ * XPC to establish connections to all currently active partitions.
+ */
+void
+xpc_initiate_connect(int ch_number)
+{
+ short partid;
+ struct xpc_partition *part;
+ struct xpc_channel *ch;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_part_ref(part)) {
+ ch = &part->channels[ch_number];
+
+ /*
+ * Initiate the establishment of a connection on the
+ * newly registered channel to the remote partition.
+ */
+ xpc_wakeup_channel_mgr(part);
+ xpc_part_deref(part);
+ }
+ }
+}
+
+void
+xpc_connected_callout(struct xpc_channel *ch)
+{
+ /* let the registerer know that a connection has been established */
+
+ if (ch->func != NULL) {
+ dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+
+ ch->func(xpConnected, ch->partid, ch->number,
+ (void *)(u64)ch->local_nentries, ch->key);
+
+ dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+ }
+}
+
+/*
+ * Called by XP at the time of channel connection unregistration to cause
+ * XPC to teardown all current connections for the specified channel.
+ *
+ * Before returning xpc_initiate_disconnect() will wait until all connections
+ * on the specified channel have been closed/torndown. So the caller can be
+ * assured that they will not be receiving any more callouts from XPC to the
+ * function they registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to unregister.
+ */
+void
+xpc_initiate_disconnect(int ch_number)
+{
+ unsigned long irq_flags;
+ short partid;
+ struct xpc_partition *part;
+ struct xpc_channel *ch;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+ /* initiate the channel disconnect for every active partition */
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_part_ref(part)) {
+ ch = &part->channels[ch_number];
+ xpc_msgqueue_ref(ch);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+ if (!(ch->flags & XPC_C_DISCONNECTED)) {
+ ch->flags |= XPC_C_WDISCONNECT;
+
+ XPC_DISCONNECT_CHANNEL(ch, xpUnregistering,
+ &irq_flags);
+ }
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_msgqueue_deref(ch);
+ xpc_part_deref(part);
+ }
+ }
+
+ xpc_disconnect_wait(ch_number);
+}
+
+/*
+ * To disconnect a channel, and reflect it back to all who may be waiting.
+ *
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
+ *
+ * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
+ */
+void
+xpc_disconnect_channel(const int line, struct xpc_channel *ch,
+ enum xp_retval reason, unsigned long *irq_flags)
+{
+ u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+ return;
+
+ DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
+
+ dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
+ reason, line, ch->partid, ch->number);
+
+ XPC_SET_REASON(ch, reason, line);
+
+ ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
+ /* some of these may not have been set */
+ ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
+ XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+ XPC_C_CONNECTING | XPC_C_CONNECTED);
+
+ xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
+
+ if (channel_was_connected)
+ ch->flags |= XPC_C_WASCONNECTED;
+
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+ /* wake all idle kthreads so they can exit */
+ if (atomic_read(&ch->kthreads_idle) > 0) {
+ wake_up_all(&ch->idle_wq);
+
+ } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+ /* start a kthread that will do the xpDisconnecting callout */
+ xpc_create_kthreads(ch, 1, 1);
+ }
+
+ /* wake those waiting to allocate an entry from the local msg queue */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+}
+
+void
+xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
+{
+ /*
+ * Let the channel's registerer know that the channel is being
+ * disconnected. We don't want to do this if the registerer was never
+ * informed of a connection being made.
+ */
+
+ if (ch->func != NULL) {
+ dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+ "channel=%d\n", reason, ch->partid, ch->number);
+
+ ch->func(reason, ch->partid, ch->number, NULL, ch->key);
+
+ dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+ "channel=%d\n", reason, ch->partid, ch->number);
+ }
+}
+
+/*
+ * Wait for a message entry to become available for the specified channel,
+ * but don't wait any longer than 1 jiffy.
+ */
+enum xp_retval
+xpc_allocate_msg_wait(struct xpc_channel *ch)
+{
+ enum xp_retval ret;
+ DEFINE_WAIT(wait);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ DBUG_ON(ch->reason == xpInterrupted);
+ return ch->reason;
+ }
+
+ atomic_inc(&ch->n_on_msg_allocate_wq);
+ prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
+ ret = schedule_timeout(1);
+ finish_wait(&ch->msg_allocate_wq, &wait);
+ atomic_dec(&ch->n_on_msg_allocate_wq);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ DBUG_ON(ch->reason == xpInterrupted);
+ } else if (ret == 0) {
+ ret = xpTimeout;
+ } else {
+ ret = xpInterrupted;
+ }
+
+ return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * Once sent, this routine will not wait for the message to be received, nor
+ * will notification be given when it does happen.
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel # to send message on.
+ * flags - see xp.h for valid flags.
+ * payload - pointer to the payload which is to be sent.
+ * payload_size - size of the payload in bytes.
+ */
+enum xp_retval
+xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ enum xp_retval ret = xpUnknownReason;
+
+ dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+ partid, ch_number);
+
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+ DBUG_ON(payload == NULL);
+
+ if (xpc_part_ref(part)) {
+ ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+ flags, payload, payload_size, 0, NULL, NULL);
+ xpc_part_deref(part);
+ }
+
+ return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * This routine will not wait for the message to be sent or received.
+ *
+ * Once the remote end of the channel has received the message, the function
+ * passed as an argument to xpc_initiate_send_notify() will be called. This
+ * allows the sender to free up or re-use any buffers referenced by the
+ * message, but does NOT mean the message has been processed at the remote
+ * end by a receiver.
+ *
+ * If this routine returns an error, the caller's function will NOT be called.
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel # to send message on.
+ * flags - see xp.h for valid flags.
+ * payload - pointer to the payload which is to be sent.
+ * payload_size - size of the payload in bytes.
+ * func - function to call with asynchronous notification of message
+ * receipt. THIS FUNCTION MUST BE NON-BLOCKING.
+ * key - user-defined key to be passed to the function when it's called.
+ */
+enum xp_retval
+xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size, xpc_notify_func func, void *key)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ enum xp_retval ret = xpUnknownReason;
+
+ dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+ partid, ch_number);
+
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+ DBUG_ON(payload == NULL);
+ DBUG_ON(func == NULL);
+
+ if (xpc_part_ref(part)) {
+ ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+ flags, payload, payload_size, XPC_N_CALL, func, key);
+ xpc_part_deref(part);
+ }
+ return ret;
+}
+
+/*
+ * Deliver a message's payload to its intended recipient.
+ */
+void
+xpc_deliver_payload(struct xpc_channel *ch)
+{
+ void *payload;
+
+ payload = xpc_arch_ops.get_deliverable_payload(ch);
+ if (payload != NULL) {
+
+ /*
+ * This ref is taken to protect the payload itself from being
+ * freed before the user is finished with it, which the user
+ * indicates by calling xpc_initiate_received().
+ */
+ xpc_msgqueue_ref(ch);
+
+ atomic_inc(&ch->kthreads_active);
+
+ if (ch->func != NULL) {
+ dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
+ "partid=%d channel=%d\n", payload, ch->partid,
+ ch->number);
+
+ /* deliver the message to its intended recipient */
+ ch->func(xpMsgReceived, ch->partid, ch->number, payload,
+ ch->key);
+
+ dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
+ "partid=%d channel=%d\n", payload, ch->partid,
+ ch->number);
+ }
+
+ atomic_dec(&ch->kthreads_active);
+ }
+}
+
+/*
+ * Acknowledge receipt of a delivered message's payload.
+ *
+ * This function, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel # message received on.
+ * payload - pointer to the payload area allocated via
+ * xpc_initiate_send() or xpc_initiate_send_notify().
+ */
+void
+xpc_initiate_received(short partid, int ch_number, void *payload)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_channel *ch;
+
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+
+ ch = &part->channels[ch_number];
+ xpc_arch_ops.received_payload(ch, payload);
+
+ /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */
+ xpc_msgqueue_deref(ch);
+}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
new file mode 100644
index 0000000..7f32712
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -0,0 +1,1374 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) support - standard version.
+ *
+ * XPC provides a message passing capability that crosses partition
+ * boundaries. This module is made up of two parts:
+ *
+ * partition This part detects the presence/absence of other
+ * partitions. It provides a heartbeat and monitors
+ * the heartbeats of other partitions.
+ *
+ * channel This part manages the channels and sends/receives
+ * messages across them to/from other partitions.
+ *
+ * There are a couple of additional functions residing in XP, which
+ * provide an interface to XPC for its users.
+ *
+ *
+ * Caveats:
+ *
+ * . Currently on sn2, we have no way to determine which nasid an IRQ
+ * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ * followed by an IPI. The amo indicates where data is to be pulled
+ * from, so after the IPI arrives, the remote partition checks the amo
+ * word. The IPI can actually arrive before the amo however, so other
+ * code must periodically check for this case. Also, remote amo
+ * operations do not reliably time out. Thus we do a remote PIO read
+ * solely to know whether the remote partition is down and whether we
+ * should stop sending IPIs to it. This remote PIO read operation is
+ * set up in a special nofault region so SAL knows to ignore (and
+ * cleanup) any errors due to the remote amo write, PIO read, and/or
+ * PIO write operations.
+ *
+ * If/when new hardware solves this IPI problem, we should abandon
+ * the current approach.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kdebug.h>
+#include <linux/kthread.h>
+#include "xpc.h"
+
+#ifdef CONFIG_X86_64
+#include <asm/traps.h>
+#endif
+
+/* define two XPC debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpc_dbg_name = {
+ .name = "xpc"
+};
+
+struct device xpc_part_dbg_subname = {
+ .init_name = "", /* set to "part" at xpc_init() time */
+ .driver = &xpc_dbg_name
+};
+
+struct device xpc_chan_dbg_subname = {
+ .init_name = "", /* set to "chan" at xpc_init() time */
+ .driver = &xpc_dbg_name
+};
+
+struct device *xpc_part = &xpc_part_dbg_subname;
+struct device *xpc_chan = &xpc_chan_dbg_subname;
+
+static int xpc_kdebug_ignore;
+
+/* systune related variables for /proc/sys directories */
+
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
+
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
+
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit; /* = 0 */
+static int xpc_disengage_max_timelimit = 120;
+
+static struct ctl_table xpc_sys_xpc_hb_dir[] = {
+ {
+ .procname = "hb_interval",
+ .data = &xpc_hb_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &xpc_hb_min_interval,
+ .extra2 = &xpc_hb_max_interval},
+ {
+ .procname = "hb_check_interval",
+ .data = &xpc_hb_check_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &xpc_hb_check_min_interval,
+ .extra2 = &xpc_hb_check_max_interval},
+ {}
+};
+static struct ctl_table xpc_sys_xpc_dir[] = {
+ {
+ .procname = "hb",
+ .mode = 0555,
+ .child = xpc_sys_xpc_hb_dir},
+ {
+ .procname = "disengage_timelimit",
+ .data = &xpc_disengage_timelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &xpc_disengage_min_timelimit,
+ .extra2 = &xpc_disengage_max_timelimit},
+ {}
+};
+static struct ctl_table xpc_sys_dir[] = {
+ {
+ .procname = "xpc",
+ .mode = 0555,
+ .child = xpc_sys_xpc_dir},
+ {}
+};
+static struct ctl_table_header *xpc_sysctl;
+
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
+
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
+
+/* IRQ handler notifies this wait queue on receipt of an IRQ */
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
+
+static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
+
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_COMPLETION(xpc_discovery_exited);
+
+static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
+
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+ .notifier_call = xpc_system_reboot,
+};
+
+static int xpc_system_die(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_die_notifier = {
+ .notifier_call = xpc_system_die,
+};
+
+struct xpc_arch_operations xpc_arch_ops;
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage.
+ */
+static void
+xpc_timeout_partition_disengage(unsigned long data)
+{
+ struct xpc_partition *part = (struct xpc_partition *)data;
+
+ DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
+
+ (void)xpc_partition_disengaged(part);
+
+ DBUG_ON(part->disengage_timeout != 0);
+ DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
+}
+
+/*
+ * Timer to produce the heartbeat. The timer structures function is
+ * already set when this is initially called. A tunable is used to
+ * specify when the next timeout should occur.
+ */
+static void
+xpc_hb_beater(unsigned long dummy)
+{
+ xpc_arch_ops.increment_heartbeat();
+
+ if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+ xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
+ add_timer(&xpc_hb_timer);
+}
+
+static void
+xpc_start_hb_beater(void)
+{
+ xpc_arch_ops.heartbeat_init();
+ init_timer(&xpc_hb_timer);
+ xpc_hb_timer.function = xpc_hb_beater;
+ xpc_hb_beater(0);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+ del_timer_sync(&xpc_hb_timer);
+ xpc_arch_ops.heartbeat_exit();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active. If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+ struct xpc_partition *part;
+ short partid;
+ enum xp_retval ret;
+
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+ if (xpc_exiting)
+ break;
+
+ if (partid == xp_partition_id)
+ continue;
+
+ part = &xpc_partitions[partid];
+
+ if (part->act_state == XPC_P_AS_INACTIVE ||
+ part->act_state == XPC_P_AS_DEACTIVATING) {
+ continue;
+ }
+
+ ret = xpc_arch_ops.get_remote_heartbeat(part);
+ if (ret != xpSuccess)
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ }
+}
+
+/*
+ * This thread is responsible for nearly all of the partition
+ * activation/deactivation.
+ */
+static int
+xpc_hb_checker(void *ignore)
+{
+ int force_IRQ = 0;
+
+ /* this thread was marked active by xpc_hb_init() */
+
+ set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
+
+ /* set our heartbeating to other partitions into motion */
+ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
+ xpc_start_hb_beater();
+
+ while (!xpc_exiting) {
+
+ dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
+ "been received\n",
+ (int)(xpc_hb_check_timeout - jiffies),
+ xpc_activate_IRQ_rcvd);
+
+ /* checking of remote heartbeats is skewed by IRQ handling */
+ if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+ xpc_hb_check_timeout = jiffies +
+ (xpc_hb_check_interval * HZ);
+
+ dev_dbg(xpc_part, "checking remote heartbeats\n");
+ xpc_check_remote_hb();
+
+ /*
+ * On sn2 we need to periodically recheck to ensure no
+ * IRQ/amo pairs have been missed.
+ */
+ if (is_shub())
+ force_IRQ = 1;
+ }
+
+ /* check for outstanding IRQs */
+ if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
+ force_IRQ = 0;
+ dev_dbg(xpc_part, "processing activate IRQs "
+ "received\n");
+ xpc_arch_ops.process_activate_IRQ_rcvd();
+ }
+
+ /* wait for IRQ or timeout */
+ (void)wait_event_interruptible(xpc_activate_IRQ_wq,
+ (time_is_before_eq_jiffies(
+ xpc_hb_check_timeout) ||
+ xpc_activate_IRQ_rcvd > 0 ||
+ xpc_exiting));
+ }
+
+ xpc_stop_hb_beater();
+
+ dev_dbg(xpc_part, "heartbeat checker is exiting\n");
+
+ /* mark this thread as having exited */
+ complete(&xpc_hb_checker_exited);
+ return 0;
+}
+
+/*
+ * This thread will attempt to discover other partitions to activate
+ * based on info provided by SAL. This new thread is short lived and
+ * will exit once discovery is complete.
+ */
+static int
+xpc_initiate_discovery(void *ignore)
+{
+ xpc_discovery();
+
+ dev_dbg(xpc_part, "discovery thread is exiting\n");
+
+ /* mark this thread as having exited */
+ complete(&xpc_discovery_exited);
+ return 0;
+}
+
+/*
+ * The first kthread assigned to a newly activated partition is the one
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
+ * that kthread until the partition is brought down, at which time that kthread
+ * returns back to XPC HB. (The return of that kthread will signify to XPC HB
+ * that XPC has dismantled all communication infrastructure for the associated
+ * partition.) This kthread becomes the channel manager for that partition.
+ *
+ * Each active partition has a channel manager, who, besides connecting and
+ * disconnecting channels, will ensure that each of the partition's connected
+ * channels has the required number of assigned kthreads to get the work done.
+ */
+static void
+xpc_channel_mgr(struct xpc_partition *part)
+{
+ while (part->act_state != XPC_P_AS_DEACTIVATING ||
+ atomic_read(&part->nchannels_active) > 0 ||
+ !xpc_partition_disengaged(part)) {
+
+ xpc_process_sent_chctl_flags(part);
+
+ /*
+ * Wait until we've been requested to activate kthreads or
+ * all of the channel's message queues have been torn down or
+ * a signal is pending.
+ *
+ * The channel_mgr_requests is set to 1 after being awakened,
+ * This is done to prevent the channel mgr from making one pass
+ * through the loop for each request, since he will
+ * be servicing all the requests in one pass. The reason it's
+ * set to 1 instead of 0 is so that other kthreads will know
+ * that the channel mgr is running and won't bother trying to
+ * wake him up.
+ */
+ atomic_dec(&part->channel_mgr_requests);
+ (void)wait_event_interruptible(part->channel_mgr_wq,
+ (atomic_read(&part->channel_mgr_requests) > 0 ||
+ part->chctl.all_flags != 0 ||
+ (part->act_state == XPC_P_AS_DEACTIVATING &&
+ atomic_read(&part->nchannels_active) == 0 &&
+ xpc_partition_disengaged(part))));
+ atomic_set(&part->channel_mgr_requests, 1);
+ }
+}
+
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kzalloc will give us cachline aligned memory by default */
+ *base = kzalloc(size, flags);
+ if (*base == NULL)
+ return NULL;
+
+ if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+ return *base;
+
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kzalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL)
+ return NULL;
+
+ return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
+{
+ enum xp_retval ret;
+ int ch_number;
+ struct xpc_channel *ch;
+ short partid = XPC_PARTID(part);
+
+ /*
+ * Allocate all of the channel structures as a contiguous chunk of
+ * memory.
+ */
+ DBUG_ON(part->channels != NULL);
+ part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
+ GFP_KERNEL);
+ if (part->channels == NULL) {
+ dev_err(xpc_chan, "can't get memory for channels\n");
+ return xpNoMemory;
+ }
+
+ /* allocate the remote open and close args */
+
+ part->remote_openclose_args =
+ xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+ GFP_KERNEL, &part->
+ remote_openclose_args_base);
+ if (part->remote_openclose_args == NULL) {
+ dev_err(xpc_chan, "can't get memory for remote connect args\n");
+ ret = xpNoMemory;
+ goto out_1;
+ }
+
+ part->chctl.all_flags = 0;
+ spin_lock_init(&part->chctl_lock);
+
+ atomic_set(&part->channel_mgr_requests, 1);
+ init_waitqueue_head(&part->channel_mgr_wq);
+
+ part->nchannels = XPC_MAX_NCHANNELS;
+
+ atomic_set(&part->nchannels_active, 0);
+ atomic_set(&part->nchannels_engaged, 0);
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ ch->partid = partid;
+ ch->number = ch_number;
+ ch->flags = XPC_C_DISCONNECTED;
+
+ atomic_set(&ch->kthreads_assigned, 0);
+ atomic_set(&ch->kthreads_idle, 0);
+ atomic_set(&ch->kthreads_active, 0);
+
+ atomic_set(&ch->references, 0);
+ atomic_set(&ch->n_to_notify, 0);
+
+ spin_lock_init(&ch->lock);
+ init_completion(&ch->wdisconnect_wait);
+
+ atomic_set(&ch->n_on_msg_allocate_wq, 0);
+ init_waitqueue_head(&ch->msg_allocate_wq);
+ init_waitqueue_head(&ch->idle_wq);
+ }
+
+ ret = xpc_arch_ops.setup_ch_structures(part);
+ if (ret != xpSuccess)
+ goto out_2;
+
+ /*
+ * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+ * we're declaring that this partition is ready to go.
+ */
+ part->setup_state = XPC_P_SS_SETUP;
+
+ return xpSuccess;
+
+ /* setup of ch structures failed */
+out_2:
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+out_1:
+ kfree(part->channels);
+ part->channels = NULL;
+ return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+ DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+ DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+
+ /*
+ * Make this partition inaccessible to local processes by marking it
+ * as no longer setup. Then wait before proceeding with the teardown
+ * until all existing references cease.
+ */
+ DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+ part->setup_state = XPC_P_SS_WTEARDOWN;
+
+ wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+ /* now we can begin tearing down the infrastructure */
+
+ xpc_arch_ops.teardown_ch_structures(part);
+
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+
+ part->setup_state = XPC_P_SS_TORNDOWN;
+}
+
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
+ */
+static int
+xpc_activating(void *__partid)
+{
+ short partid = (u64)__partid;
+ struct xpc_partition *part = &xpc_partitions[partid];
+ unsigned long irq_flags;
+
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING) {
+ part->act_state = XPC_P_AS_INACTIVE;
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ part->remote_rp_pa = 0;
+ return 0;
+ }
+
+ /* indicate the thread is activating */
+ DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+ part->act_state = XPC_P_AS_ACTIVATING;
+
+ XPC_SET_REASON(part, 0, 0);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ dev_dbg(xpc_part, "activating partition %d\n", partid);
+
+ xpc_arch_ops.allow_hb(partid);
+
+ if (xpc_setup_ch_structures(part) == xpSuccess) {
+ (void)xpc_part_ref(part); /* this will always succeed */
+
+ if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
+ xpc_mark_partition_active(part);
+ xpc_channel_mgr(part);
+ /* won't return until partition is deactivating */
+ }
+
+ xpc_part_deref(part);
+ xpc_teardown_ch_structures(part);
+ }
+
+ xpc_arch_ops.disallow_hb(partid);
+ xpc_mark_partition_inactive(part);
+
+ if (part->reason == xpReactivating) {
+ /* interrupting ourselves results in activating partition */
+ xpc_arch_ops.request_partition_reactivation(part);
+ }
+
+ return 0;
+}
+
+void
+xpc_activate_partition(struct xpc_partition *part)
+{
+ short partid = XPC_PARTID(part);
+ unsigned long irq_flags;
+ struct task_struct *kthread;
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+
+ DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
+
+ part->act_state = XPC_P_AS_ACTIVATION_REQ;
+ XPC_SET_REASON(part, xpCloneKThread, __LINE__);
+
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
+ partid);
+ if (IS_ERR(kthread)) {
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ part->act_state = XPC_P_AS_INACTIVE;
+ XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ }
+}
+
+void
+xpc_activate_kthreads(struct xpc_channel *ch, int needed)
+{
+ int idle = atomic_read(&ch->kthreads_idle);
+ int assigned = atomic_read(&ch->kthreads_assigned);
+ int wakeup;
+
+ DBUG_ON(needed <= 0);
+
+ if (idle > 0) {
+ wakeup = (needed > idle) ? idle : needed;
+ needed -= wakeup;
+
+ dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
+ "channel=%d\n", wakeup, ch->partid, ch->number);
+
+ /* only wakeup the requested number of kthreads */
+ wake_up_nr(&ch->idle_wq, wakeup);
+ }
+
+ if (needed <= 0)
+ return;
+
+ if (needed + assigned > ch->kthreads_assigned_limit) {
+ needed = ch->kthreads_assigned_limit - assigned;
+ if (needed <= 0)
+ return;
+ }
+
+ dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
+ needed, ch->partid, ch->number);
+
+ xpc_create_kthreads(ch, needed, 0);
+}
+
+/*
+ * This function is where XPC's kthreads wait for messages to deliver.
+ */
+static void
+xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
+{
+ int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+ xpc_arch_ops.n_of_deliverable_payloads;
+
+ do {
+ /* deliver messages to their intended recipients */
+
+ while (n_of_deliverable_payloads(ch) > 0 &&
+ !(ch->flags & XPC_C_DISCONNECTING)) {
+ xpc_deliver_payload(ch);
+ }
+
+ if (atomic_inc_return(&ch->kthreads_idle) >
+ ch->kthreads_idle_limit) {
+ /* too many idle kthreads on this channel */
+ atomic_dec(&ch->kthreads_idle);
+ break;
+ }
+
+ dev_dbg(xpc_chan, "idle kthread calling "
+ "wait_event_interruptible_exclusive()\n");
+
+ (void)wait_event_interruptible_exclusive(ch->idle_wq,
+ (n_of_deliverable_payloads(ch) > 0 ||
+ (ch->flags & XPC_C_DISCONNECTING)));
+
+ atomic_dec(&ch->kthreads_idle);
+
+ } while (!(ch->flags & XPC_C_DISCONNECTING));
+}
+
+static int
+xpc_kthread_start(void *args)
+{
+ short partid = XPC_UNPACK_ARG1(args);
+ u16 ch_number = XPC_UNPACK_ARG2(args);
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_channel *ch;
+ int n_needed;
+ unsigned long irq_flags;
+ int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+ xpc_arch_ops.n_of_deliverable_payloads;
+
+ dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
+ partid, ch_number);
+
+ ch = &part->channels[ch_number];
+
+ if (!(ch->flags & XPC_C_DISCONNECTING)) {
+
+ /* let registerer know that connection has been established */
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+ ch->flags |= XPC_C_CONNECTEDCALLOUT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_connected_callout(ch);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ /*
+ * It is possible that while the callout was being
+ * made that the remote partition sent some messages.
+ * If that is the case, we may need to activate
+ * additional kthreads to help deliver them. We only
+ * need one less than total #of messages to deliver.
+ */
+ n_needed = n_of_deliverable_payloads(ch) - 1;
+ if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
+ xpc_activate_kthreads(ch, n_needed);
+
+ } else {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ }
+
+ xpc_kthread_waitmsgs(part, ch);
+ }
+
+ /* let registerer know that connection is disconnecting */
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+ ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_disconnect_callout(ch, xpDisconnecting);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+ atomic_dec_return(&part->nchannels_engaged) == 0) {
+ xpc_arch_ops.indicate_partition_disengaged(part);
+ }
+
+ xpc_msgqueue_deref(ch);
+
+ dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
+ partid, ch_number);
+
+ xpc_part_deref(part);
+ return 0;
+}
+
+/*
+ * For each partition that XPC has established communications with, there is
+ * a minimum of one kernel thread assigned to perform any operation that
+ * may potentially sleep or block (basically the callouts to the asynchronous
+ * functions registered via xpc_connect()).
+ *
+ * Additional kthreads are created and destroyed by XPC as the workload
+ * demands.
+ *
+ * A kthread is assigned to one of the active channels that exists for a given
+ * partition.
+ */
+void
+xpc_create_kthreads(struct xpc_channel *ch, int needed,
+ int ignore_disconnecting)
+{
+ unsigned long irq_flags;
+ u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct task_struct *kthread;
+ void (*indicate_partition_disengaged) (struct xpc_partition *) =
+ xpc_arch_ops.indicate_partition_disengaged;
+
+ while (needed-- > 0) {
+
+ /*
+ * The following is done on behalf of the newly created
+ * kthread. That kthread is responsible for doing the
+ * counterpart to the following before it exits.
+ */
+ if (ignore_disconnecting) {
+ if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
+ /* kthreads assigned had gone to zero */
+ BUG_ON(!(ch->flags &
+ XPC_C_DISCONNECTINGCALLOUT_MADE));
+ break;
+ }
+
+ } else if (ch->flags & XPC_C_DISCONNECTING) {
+ break;
+
+ } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+ atomic_inc_return(&part->nchannels_engaged) == 1) {
+ xpc_arch_ops.indicate_partition_engaged(part);
+ }
+ (void)xpc_part_ref(part);
+ xpc_msgqueue_ref(ch);
+
+ kthread = kthread_run(xpc_kthread_start, (void *)args,
+ "xpc%02dc%d", ch->partid, ch->number);
+ if (IS_ERR(kthread)) {
+ /* the fork failed */
+
+ /*
+ * NOTE: if (ignore_disconnecting &&
+ * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
+ * then we'll deadlock if all other kthreads assigned
+ * to this channel are blocked in the channel's
+ * registerer, because the only thing that will unblock
+ * them is the xpDisconnecting callout that this
+ * failed kthread_run() would have made.
+ */
+
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+ atomic_dec_return(&part->nchannels_engaged) == 0) {
+ indicate_partition_disengaged(part);
+ }
+ xpc_msgqueue_deref(ch);
+ xpc_part_deref(part);
+
+ if (atomic_read(&ch->kthreads_assigned) <
+ ch->kthreads_idle_limit) {
+ /*
+ * Flag this as an error only if we have an
+ * insufficient #of kthreads for the channel
+ * to function.
+ */
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ }
+ break;
+ }
+ }
+}
+
+void
+xpc_disconnect_wait(int ch_number)
+{
+ unsigned long irq_flags;
+ short partid;
+ struct xpc_partition *part;
+ struct xpc_channel *ch;
+ int wakeup_channel_mgr;
+
+ /* now wait for all callouts to the caller's function to cease */
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (!xpc_part_ref(part))
+ continue;
+
+ ch = &part->channels[ch_number];
+
+ if (!(ch->flags & XPC_C_WDISCONNECT)) {
+ xpc_part_deref(part);
+ continue;
+ }
+
+ wait_for_completion(&ch->wdisconnect_wait);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+ wakeup_channel_mgr = 0;
+
+ if (ch->delayed_chctl_flags) {
+ if (part->act_state != XPC_P_AS_DEACTIVATING) {
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch->number] |=
+ ch->delayed_chctl_flags;
+ spin_unlock(&part->chctl_lock);
+ wakeup_channel_mgr = 1;
+ }
+ ch->delayed_chctl_flags = 0;
+ }
+
+ ch->flags &= ~XPC_C_WDISCONNECT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ if (wakeup_channel_mgr)
+ xpc_wakeup_channel_mgr(part);
+
+ xpc_part_deref(part);
+ }
+}
+
+static int
+xpc_setup_partitions(void)
+{
+ short partid;
+ struct xpc_partition *part;
+
+ xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
+ xp_max_npartitions, GFP_KERNEL);
+ if (xpc_partitions == NULL) {
+ dev_err(xpc_part, "can't get memory for partition structure\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * The first few fields of each entry of xpc_partitions[] need to
+ * be initialized now so that calls to xpc_connect() and
+ * xpc_disconnect() can be made prior to the activation of any remote
+ * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+ * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+ * PARTITION HAS BEEN ACTIVATED.
+ */
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+ part->activate_IRQ_rcvd = 0;
+ spin_lock_init(&part->act_lock);
+ part->act_state = XPC_P_AS_INACTIVE;
+ XPC_SET_REASON(part, 0, 0);
+
+ init_timer(&part->disengage_timer);
+ part->disengage_timer.function =
+ xpc_timeout_partition_disengage;
+ part->disengage_timer.data = (unsigned long)part;
+
+ part->setup_state = XPC_P_SS_UNSET;
+ init_waitqueue_head(&part->teardown_wq);
+ atomic_set(&part->references, 0);
+ }
+
+ return xpc_arch_ops.setup_partitions();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+ xpc_arch_ops.teardown_partitions();
+ kfree(xpc_partitions);
+}
+
+static void
+xpc_do_exit(enum xp_retval reason)
+{
+ short partid;
+ int active_part_count, printed_waiting_msg = 0;
+ struct xpc_partition *part;
+ unsigned long printmsg_time, disengage_timeout = 0;
+
+ /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+ DBUG_ON(xpc_exiting == 1);
+
+ /*
+ * Let the heartbeat checker thread and the discovery thread
+ * (if one is running) know that they should exit. Also wake up
+ * the heartbeat checker thread in case it's sleeping.
+ */
+ xpc_exiting = 1;
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+ /* wait for the discovery thread to exit */
+ wait_for_completion(&xpc_discovery_exited);
+
+ /* wait for the heartbeat checker thread to exit */
+ wait_for_completion(&xpc_hb_checker_exited);
+
+ /* sleep for a 1/3 of a second or so */
+ (void)msleep_interruptible(300);
+
+ /* wait for all partitions to become inactive */
+
+ printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+ xpc_disengage_timedout = 0;
+
+ do {
+ active_part_count = 0;
+
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_AS_INACTIVE) {
+ continue;
+ }
+
+ active_part_count++;
+
+ XPC_DEACTIVATE_PARTITION(part, reason);
+
+ if (part->disengage_timeout > disengage_timeout)
+ disengage_timeout = part->disengage_timeout;
+ }
+
+ if (xpc_arch_ops.any_partition_engaged()) {
+ if (time_is_before_jiffies(printmsg_time)) {
+ dev_info(xpc_part, "waiting for remote "
+ "partitions to deactivate, timeout in "
+ "%ld seconds\n", (disengage_timeout -
+ jiffies) / HZ);
+ printmsg_time = jiffies +
+ (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+ printed_waiting_msg = 1;
+ }
+
+ } else if (active_part_count > 0) {
+ if (printed_waiting_msg) {
+ dev_info(xpc_part, "waiting for local partition"
+ " to deactivate\n");
+ printed_waiting_msg = 0;
+ }
+
+ } else {
+ if (!xpc_disengage_timedout) {
+ dev_info(xpc_part, "all partitions have "
+ "deactivated\n");
+ }
+ break;
+ }
+
+ /* sleep for a 1/3 of a second or so */
+ (void)msleep_interruptible(300);
+
+ } while (1);
+
+ DBUG_ON(xpc_arch_ops.any_partition_engaged());
+
+ xpc_teardown_rsvd_page();
+
+ if (reason == xpUnloading) {
+ (void)unregister_die_notifier(&xpc_die_notifier);
+ (void)unregister_reboot_notifier(&xpc_reboot_notifier);
+ }
+
+ /* clear the interface to XPC's functions */
+ xpc_clear_interface();
+
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ xpc_teardown_partitions();
+
+ if (is_shub())
+ xpc_exit_sn2();
+ else if (is_uv())
+ xpc_exit_uv();
+}
+
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+ enum xp_retval reason;
+
+ switch (event) {
+ case SYS_RESTART:
+ reason = xpSystemReboot;
+ break;
+ case SYS_HALT:
+ reason = xpSystemHalt;
+ break;
+ case SYS_POWER_OFF:
+ reason = xpSystemPoweroff;
+ break;
+ default:
+ reason = xpSystemGoingDown;
+ }
+
+ xpc_do_exit(reason);
+ return NOTIFY_DONE;
+}
+
+/* Used to only allow one cpu to complete disconnect */
+static unsigned int xpc_die_disconnecting;
+
+/*
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
+ */
+static void
+xpc_die_deactivate(void)
+{
+ struct xpc_partition *part;
+ short partid;
+ int any_engaged;
+ long keep_waiting;
+ long wait_to_print;
+
+ if (cmpxchg(&xpc_die_disconnecting, 0, 1))
+ return;
+
+ /* keep xpc_hb_checker thread from doing anything (just in case) */
+ xpc_exiting = 1;
+
+ xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */
+
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_arch_ops.partition_engaged(partid) ||
+ part->act_state != XPC_P_AS_INACTIVE) {
+ xpc_arch_ops.request_partition_deactivation(part);
+ xpc_arch_ops.indicate_partition_disengaged(part);
+ }
+ }
+
+ /*
+ * Though we requested that all other partitions deactivate from us,
+ * we only wait until they've all disengaged or we've reached the
+ * defined timelimit.
+ *
+ * Given that one iteration through the following while-loop takes
+ * approximately 200 microseconds, calculate the #of loops to take
+ * before bailing and the #of loops before printing a waiting message.
+ */
+ keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+ wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
+
+ while (1) {
+ any_engaged = xpc_arch_ops.any_partition_engaged();
+ if (!any_engaged) {
+ dev_info(xpc_part, "all partitions have deactivated\n");
+ break;
+ }
+
+ if (!keep_waiting--) {
+ for (partid = 0; partid < xp_max_npartitions;
+ partid++) {
+ if (xpc_arch_ops.partition_engaged(partid)) {
+ dev_info(xpc_part, "deactivate from "
+ "remote partition %d timed "
+ "out\n", partid);
+ }
+ }
+ break;
+ }
+
+ if (!wait_to_print--) {
+ dev_info(xpc_part, "waiting for remote partitions to "
+ "deactivate, timeout in %ld seconds\n",
+ keep_waiting / (1000 * 5));
+ wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+ 1000 * 5;
+ }
+
+ udelay(200);
+ }
+}
+
+/*
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
+ */
+static int
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
+{
+#ifdef CONFIG_IA64 /* !!! temporary kludge */
+ switch (event) {
+ case DIE_MACHINE_RESTART:
+ case DIE_MACHINE_HALT:
+ xpc_die_deactivate();
+ break;
+
+ case DIE_KDEBUG_ENTER:
+ /* Should lack of heartbeat be ignored by other partitions? */
+ if (!xpc_kdebug_ignore)
+ break;
+
+ /* fall through */
+ case DIE_MCA_MONARCH_ENTER:
+ case DIE_INIT_MONARCH_ENTER:
+ xpc_arch_ops.offline_heartbeat();
+ break;
+
+ case DIE_KDEBUG_LEAVE:
+ /* Is lack of heartbeat being ignored by other partitions? */
+ if (!xpc_kdebug_ignore)
+ break;
+
+ /* fall through */
+ case DIE_MCA_MONARCH_LEAVE:
+ case DIE_INIT_MONARCH_LEAVE:
+ xpc_arch_ops.online_heartbeat();
+ break;
+ }
+#else
+ struct die_args *die_args = _die_args;
+
+ switch (event) {
+ case DIE_TRAP:
+ if (die_args->trapnr == X86_TRAP_DF)
+ xpc_die_deactivate();
+
+ if (((die_args->trapnr == X86_TRAP_MF) ||
+ (die_args->trapnr == X86_TRAP_XF)) &&
+ !user_mode(die_args->regs))
+ xpc_die_deactivate();
+
+ break;
+ case DIE_INT3:
+ case DIE_DEBUG:
+ break;
+ case DIE_OOPS:
+ case DIE_GPF:
+ default:
+ xpc_die_deactivate();
+ }
+#endif
+
+ return NOTIFY_DONE;
+}
+
+int __init
+xpc_init(void)
+{
+ int ret;
+ struct task_struct *kthread;
+
+ dev_set_name(xpc_part, "part");
+ dev_set_name(xpc_chan, "chan");
+
+ if (is_shub()) {
+ /*
+ * The ia64-sn2 architecture supports at most 64 partitions.
+ * And the inability to unregister remote amos restricts us
+ * further to only support exactly 64 partitions on this
+ * architecture, no less.
+ */
+ if (xp_max_npartitions != 64) {
+ dev_err(xpc_part, "max #of partitions not set to 64\n");
+ ret = -EINVAL;
+ } else {
+ ret = xpc_init_sn2();
+ }
+
+ } else if (is_uv()) {
+ ret = xpc_init_uv();
+
+ } else {
+ ret = -ENODEV;
+ }
+
+ if (ret != 0)
+ return ret;
+
+ ret = xpc_setup_partitions();
+ if (ret != 0) {
+ dev_err(xpc_part, "can't get memory for partition structure\n");
+ goto out_1;
+ }
+
+ xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
+ /*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+ ret = xpc_setup_rsvd_page();
+ if (ret != 0) {
+ dev_err(xpc_part, "can't setup our reserved page\n");
+ goto out_2;
+ }
+
+ /* add ourselves to the reboot_notifier_list */
+ ret = register_reboot_notifier(&xpc_reboot_notifier);
+ if (ret != 0)
+ dev_warn(xpc_part, "can't register reboot notifier\n");
+
+ /* add ourselves to the die_notifier list */
+ ret = register_die_notifier(&xpc_die_notifier);
+ if (ret != 0)
+ dev_warn(xpc_part, "can't register die notifier\n");
+
+ /*
+ * The real work-horse behind xpc. This processes incoming
+ * interrupts and monitors remote heartbeats.
+ */
+ kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
+ if (IS_ERR(kthread)) {
+ dev_err(xpc_part, "failed while forking hb check thread\n");
+ ret = -EBUSY;
+ goto out_3;
+ }
+
+ /*
+ * Startup a thread that will attempt to discover other partitions to
+ * activate based on info provided by SAL. This new thread is short
+ * lived and will exit once discovery is complete.
+ */
+ kthread = kthread_run(xpc_initiate_discovery, NULL,
+ XPC_DISCOVERY_THREAD_NAME);
+ if (IS_ERR(kthread)) {
+ dev_err(xpc_part, "failed while forking discovery thread\n");
+
+ /* mark this new thread as a non-starter */
+ complete(&xpc_discovery_exited);
+
+ xpc_do_exit(xpUnloading);
+ return -EBUSY;
+ }
+
+ /* set the interface to point at XPC's functions */
+ xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
+ xpc_initiate_send, xpc_initiate_send_notify,
+ xpc_initiate_received, xpc_initiate_partid_to_nasids);
+
+ return 0;
+
+ /* initialization was not successful */
+out_3:
+ xpc_teardown_rsvd_page();
+
+ (void)unregister_die_notifier(&xpc_die_notifier);
+ (void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_2:
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ xpc_teardown_partitions();
+out_1:
+ if (is_shub())
+ xpc_exit_sn2();
+ else if (is_uv())
+ xpc_exit_uv();
+ return ret;
+}
+
+module_init(xpc_init);
+
+void __exit
+xpc_exit(void)
+{
+ xpc_do_exit(xpUnloading);
+}
+
+module_exit(xpc_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
+MODULE_LICENSE("GPL");
+
+module_param(xpc_hb_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
+ "heartbeat increments.");
+
+module_param(xpc_hb_check_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
+ "heartbeat checks.");
+
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+ "for disengage to complete.");
+
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+ "other partitions when dropping into kdebug.");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
new file mode 100644
index 0000000..6956f7e
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -0,0 +1,541 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) partition support.
+ *
+ * This is the part of XPC that detects the presence/absence of
+ * other partitions. It provides a heartbeat and monitors the
+ * heartbeats of other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/hardirq.h>
+#include <linux/slab.h>
+#include "xpc.h"
+#include <asm/uv/uv_hub.h>
+
+/* XPC is exiting flag */
+int xpc_exiting;
+
+/* this partition's reserved page pointers */
+struct xpc_rsvd_page *xpc_rsvd_page;
+static unsigned long *xpc_part_nasids;
+unsigned long *xpc_mach_nasids;
+
+static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */
+int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */
+
+struct xpc_partition *xpc_partitions;
+
+/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kmalloc will give us cachline aligned memory by default */
+ *base = kmalloc(size, flags);
+ if (*base == NULL)
+ return NULL;
+
+ if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+ return *base;
+
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kmalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL)
+ return NULL;
+
+ return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Given a nasid, get the physical address of the partition's reserved page
+ * for that nasid. This function returns 0 on any error.
+ */
+static unsigned long
+xpc_get_rsvd_page_pa(int nasid)
+{
+ enum xp_retval ret;
+ u64 cookie = 0;
+ unsigned long rp_pa = nasid; /* seed with nasid */
+ size_t len = 0;
+ size_t buf_len = 0;
+ void *buf = buf;
+ void *buf_base = NULL;
+ enum xp_retval (*get_partition_rsvd_page_pa)
+ (void *, u64 *, unsigned long *, size_t *) =
+ xpc_arch_ops.get_partition_rsvd_page_pa;
+
+ while (1) {
+
+ /* !!! rp_pa will need to be _gpa on UV.
+ * ??? So do we save it into the architecture specific parts
+ * ??? of the xpc_partition structure? Do we rename this
+ * ??? function or have two versions? Rename rp_pa for UV to
+ * ??? rp_gpa?
+ */
+ ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
+
+ dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
+ "address=0x%016lx, len=0x%016lx\n", ret,
+ (unsigned long)cookie, rp_pa, len);
+
+ if (ret != xpNeedMoreInfo)
+ break;
+
+ /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
+ if (is_shub())
+ len = L1_CACHE_ALIGN(len);
+
+ if (len > buf_len) {
+ if (buf_base != NULL)
+ kfree(buf_base);
+ buf_len = L1_CACHE_ALIGN(len);
+ buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
+ &buf_base);
+ if (buf_base == NULL) {
+ dev_err(xpc_part, "unable to kmalloc "
+ "len=0x%016lx\n", buf_len);
+ ret = xpNoMemory;
+ break;
+ }
+ }
+
+ ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
+ break;
+ }
+ }
+
+ kfree(buf_base);
+
+ if (ret != xpSuccess)
+ rp_pa = 0;
+
+ dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
+ return rp_pa;
+}
+
+/*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+int
+xpc_setup_rsvd_page(void)
+{
+ int ret;
+ struct xpc_rsvd_page *rp;
+ unsigned long rp_pa;
+ unsigned long new_ts_jiffies;
+
+ /* get the local reserved page's address */
+
+ preempt_disable();
+ rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
+ preempt_enable();
+ if (rp_pa == 0) {
+ dev_err(xpc_part, "SAL failed to locate the reserved page\n");
+ return -ESRCH;
+ }
+ rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
+
+ if (rp->SAL_version < 3) {
+ /* SAL_versions < 3 had a SAL_partid defined as a u8 */
+ rp->SAL_partid &= 0xff;
+ }
+ BUG_ON(rp->SAL_partid != xp_partition_id);
+
+ if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
+ dev_err(xpc_part, "the reserved page's partid of %d is outside "
+ "supported range (< 0 || >= %d)\n", rp->SAL_partid,
+ xp_max_npartitions);
+ return -EINVAL;
+ }
+
+ rp->version = XPC_RP_VERSION;
+ rp->max_npartitions = xp_max_npartitions;
+
+ /* establish the actual sizes of the nasid masks */
+ if (rp->SAL_version == 1) {
+ /* SAL_version 1 didn't set the nasids_size field */
+ rp->SAL_nasids_size = 128;
+ }
+ xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
+ xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
+ BITS_PER_BYTE);
+
+ /* setup the pointers to the various items in the reserved page */
+ xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+ xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+
+ ret = xpc_arch_ops.setup_rsvd_page(rp);
+ if (ret != 0)
+ return ret;
+
+ /*
+ * Set timestamp of when reserved page was setup by XPC.
+ * This signifies to the remote partition that our reserved
+ * page is initialized.
+ */
+ new_ts_jiffies = jiffies;
+ if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
+ new_ts_jiffies++;
+ rp->ts_jiffies = new_ts_jiffies;
+
+ xpc_rsvd_page = rp;
+ return 0;
+}
+
+void
+xpc_teardown_rsvd_page(void)
+{
+ /* a zero timestamp indicates our rsvd page is not initialized */
+ xpc_rsvd_page->ts_jiffies = 0;
+}
+
+/*
+ * Get a copy of a portion of the remote partition's rsvd page.
+ *
+ * remote_rp points to a buffer that is cacheline aligned for BTE copies and
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
+ */
+enum xp_retval
+xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
+ struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
+{
+ int l;
+ enum xp_retval ret;
+
+ /* get the reserved page's physical address */
+
+ *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+ if (*remote_rp_pa == 0)
+ return xpNoRsvdPageAddr;
+
+ /* pull over the reserved page header and part_nasids mask */
+ ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
+ XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
+ if (ret != xpSuccess)
+ return ret;
+
+ if (discovered_nasids != NULL) {
+ unsigned long *remote_part_nasids =
+ XPC_RP_PART_NASIDS(remote_rp);
+
+ for (l = 0; l < xpc_nasid_mask_nlongs; l++)
+ discovered_nasids[l] |= remote_part_nasids[l];
+ }
+
+ /* zero timestamp indicates the reserved page has not been setup */
+ if (remote_rp->ts_jiffies == 0)
+ return xpRsvdPageNotSet;
+
+ if (XPC_VERSION_MAJOR(remote_rp->version) !=
+ XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
+ return xpBadVersion;
+ }
+
+ /* check that both remote and local partids are valid for each side */
+ if (remote_rp->SAL_partid < 0 ||
+ remote_rp->SAL_partid >= xp_max_npartitions ||
+ remote_rp->max_npartitions <= xp_partition_id) {
+ return xpInvalidPartid;
+ }
+
+ if (remote_rp->SAL_partid == xp_partition_id)
+ return xpLocalPartid;
+
+ return xpSuccess;
+}
+
+/*
+ * See if the other side has responded to a partition deactivate request
+ * from us. Though we requested the remote partition to deactivate with regard
+ * to us, we really only need to wait for the other side to disengage from us.
+ */
+int
+xpc_partition_disengaged(struct xpc_partition *part)
+{
+ short partid = XPC_PARTID(part);
+ int disengaged;
+
+ disengaged = !xpc_arch_ops.partition_engaged(partid);
+ if (part->disengage_timeout) {
+ if (!disengaged) {
+ if (time_is_after_jiffies(part->disengage_timeout)) {
+ /* timelimit hasn't been reached yet */
+ return 0;
+ }
+
+ /*
+ * Other side hasn't responded to our deactivate
+ * request in a timely fashion, so assume it's dead.
+ */
+
+ dev_info(xpc_part, "deactivate request to remote "
+ "partition %d timed out\n", partid);
+ xpc_disengage_timedout = 1;
+ xpc_arch_ops.assume_partition_disengaged(partid);
+ disengaged = 1;
+ }
+ part->disengage_timeout = 0;
+
+ /* cancel the timer function, provided it's not us */
+ if (!in_interrupt())
+ del_singleshot_timer_sync(&part->disengage_timer);
+
+ DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
+ part->act_state != XPC_P_AS_INACTIVE);
+ if (part->act_state != XPC_P_AS_INACTIVE)
+ xpc_wakeup_channel_mgr(part);
+
+ xpc_arch_ops.cancel_partition_deactivation_request(part);
+ }
+ return disengaged;
+}
+
+/*
+ * Mark specified partition as active.
+ */
+enum xp_retval
+xpc_mark_partition_active(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ enum xp_retval ret;
+
+ dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ if (part->act_state == XPC_P_AS_ACTIVATING) {
+ part->act_state = XPC_P_AS_ACTIVE;
+ ret = xpSuccess;
+ } else {
+ DBUG_ON(part->reason == xpSuccess);
+ ret = part->reason;
+ }
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ return ret;
+}
+
+/*
+ * Start the process of deactivating the specified partition.
+ */
+void
+xpc_deactivate_partition(const int line, struct xpc_partition *part,
+ enum xp_retval reason)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+
+ if (part->act_state == XPC_P_AS_INACTIVE) {
+ XPC_SET_REASON(part, reason, line);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ if (reason == xpReactivating) {
+ /* we interrupt ourselves to reactivate partition */
+ xpc_arch_ops.request_partition_reactivation(part);
+ }
+ return;
+ }
+ if (part->act_state == XPC_P_AS_DEACTIVATING) {
+ if ((part->reason == xpUnloading && reason != xpUnloading) ||
+ reason == xpReactivating) {
+ XPC_SET_REASON(part, reason, line);
+ }
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ return;
+ }
+
+ part->act_state = XPC_P_AS_DEACTIVATING;
+ XPC_SET_REASON(part, reason, line);
+
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ /* ask remote partition to deactivate with regard to us */
+ xpc_arch_ops.request_partition_deactivation(part);
+
+ /* set a timelimit on the disengage phase of the deactivation request */
+ part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
+ part->disengage_timer.expires = part->disengage_timeout;
+ add_timer(&part->disengage_timer);
+
+ dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+ XPC_PARTID(part), reason);
+
+ xpc_partition_going_down(part, reason);
+}
+
+/*
+ * Mark specified partition as inactive.
+ */
+void
+xpc_mark_partition_inactive(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+
+ dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
+ XPC_PARTID(part));
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ part->act_state = XPC_P_AS_INACTIVE;
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ part->remote_rp_pa = 0;
+}
+
+/*
+ * SAL has provided a partition and machine mask. The partition mask
+ * contains a bit for each even nasid in our partition. The machine
+ * mask contains a bit for each even nasid in the entire machine.
+ *
+ * Using those two bit arrays, we can determine which nasids are
+ * known in the machine. Each should also have a reserved page
+ * initialized if they are available for partitioning.
+ */
+void
+xpc_discovery(void)
+{
+ void *remote_rp_base;
+ struct xpc_rsvd_page *remote_rp;
+ unsigned long remote_rp_pa;
+ int region;
+ int region_size;
+ int max_regions;
+ int nasid;
+ struct xpc_rsvd_page *rp;
+ unsigned long *discovered_nasids;
+ enum xp_retval ret;
+
+ remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+ xpc_nasid_mask_nbytes,
+ GFP_KERNEL, &remote_rp_base);
+ if (remote_rp == NULL)
+ return;
+
+ discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
+ GFP_KERNEL);
+ if (discovered_nasids == NULL) {
+ kfree(remote_rp_base);
+ return;
+ }
+
+ rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
+
+ /*
+ * The term 'region' in this context refers to the minimum number of
+ * nodes that can comprise an access protection grouping. The access
+ * protection is in regards to memory, IOI and IPI.
+ */
+ region_size = xp_region_size;
+
+ if (is_uv())
+ max_regions = 256;
+ else {
+ max_regions = 64;
+
+ switch (region_size) {
+ case 128:
+ max_regions *= 2;
+ case 64:
+ max_regions *= 2;
+ case 32:
+ max_regions *= 2;
+ region_size = 16;
+ DBUG_ON(!is_shub2());
+ }
+ }
+
+ for (region = 0; region < max_regions; region++) {
+
+ if (xpc_exiting)
+ break;
+
+ dev_dbg(xpc_part, "searching region %d\n", region);
+
+ for (nasid = (region * region_size * 2);
+ nasid < ((region + 1) * region_size * 2); nasid += 2) {
+
+ if (xpc_exiting)
+ break;
+
+ dev_dbg(xpc_part, "checking nasid %d\n", nasid);
+
+ if (test_bit(nasid / 2, xpc_part_nasids)) {
+ dev_dbg(xpc_part, "PROM indicates Nasid %d is "
+ "part of the local partition; skipping "
+ "region\n", nasid);
+ break;
+ }
+
+ if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
+ dev_dbg(xpc_part, "PROM indicates Nasid %d was "
+ "not on Numa-Link network at reset\n",
+ nasid);
+ continue;
+ }
+
+ if (test_bit(nasid / 2, discovered_nasids)) {
+ dev_dbg(xpc_part, "Nasid %d is part of a "
+ "partition which was previously "
+ "discovered\n", nasid);
+ continue;
+ }
+
+ /* pull over the rsvd page header & part_nasids mask */
+
+ ret = xpc_get_remote_rp(nasid, discovered_nasids,
+ remote_rp, &remote_rp_pa);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_part, "unable to get reserved page "
+ "from nasid %d, reason=%d\n", nasid,
+ ret);
+
+ if (ret == xpLocalPartid)
+ break;
+
+ continue;
+ }
+
+ xpc_arch_ops.request_partition_activation(remote_rp,
+ remote_rp_pa, nasid);
+ }
+ }
+
+ kfree(discovered_nasids);
+ kfree(remote_rp_base);
+}
+
+/*
+ * Given a partid, get the nasids owned by that partition from the
+ * remote partition's reserved page.
+ */
+enum xp_retval
+xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
+{
+ struct xpc_partition *part;
+ unsigned long part_nasid_pa;
+
+ part = &xpc_partitions[partid];
+ if (part->remote_rp_pa == 0)
+ return xpPartitionDown;
+
+ memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
+
+ part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+
+ return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
+ xpc_nasid_mask_nbytes);
+}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
new file mode 100644
index 0000000..7d71c04
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -0,0 +1,2462 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) sn2-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <asm/uncached.h>
+#include <asm/sn/mspec.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap. The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2)
+#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8)
+#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64)
+
+/*
+ * Memory for XPC's amo variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify
+ * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
+ * partitions (i.e., XPCs) consider themselves currently engaged with the
+ * local XPC and 1 amo variable to request partition deactivation.
+ */
+#define XPC_NOTIFY_IRQ_AMOS_SN2 0
+#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \
+ XP_MAX_NPARTITIONS_SN2)
+#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \
+ XP_NASID_MASK_WORDS_SN2)
+#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1)
+
+/*
+ * Buffer used to store a local copy of portions of a remote partition's
+ * reserved page (either its header and part_nasids mask, or its vars).
+ */
+static void *xpc_remote_copy_buffer_base_sn2;
+static char *xpc_remote_copy_buffer_sn2;
+
+static struct xpc_vars_sn2 *xpc_vars_sn2;
+static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
+
+static int
+xpc_setup_partitions_sn2(void)
+{
+ /* nothing needs to be done */
+ return 0;
+}
+
+static void
+xpc_teardown_partitions_sn2(void)
+{
+ /* nothing needs to be done */
+}
+
+/* SH_IPI_ACCESS shub register value on startup */
+static u64 xpc_sh1_IPI_access_sn2;
+static u64 xpc_sh2_IPI_access0_sn2;
+static u64 xpc_sh2_IPI_access1_sn2;
+static u64 xpc_sh2_IPI_access2_sn2;
+static u64 xpc_sh2_IPI_access3_sn2;
+
+/*
+ * Change protections to allow IPI operations.
+ */
+static void
+xpc_allow_IPI_ops_sn2(void)
+{
+ int node;
+ int nasid;
+
+ /* !!! The following should get moved into SAL. */
+ if (is_shub2()) {
+ xpc_sh2_IPI_access0_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
+ xpc_sh2_IPI_access1_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
+ xpc_sh2_IPI_access2_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
+ xpc_sh2_IPI_access3_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+ -1UL);
+ }
+ } else {
+ xpc_sh1_IPI_access_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+ -1UL);
+ }
+ }
+}
+
+/*
+ * Restrict protections to disallow IPI operations.
+ */
+static void
+xpc_disallow_IPI_ops_sn2(void)
+{
+ int node;
+ int nasid;
+
+ /* !!! The following should get moved into SAL. */
+ if (is_shub2()) {
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+ xpc_sh2_IPI_access0_sn2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+ xpc_sh2_IPI_access1_sn2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+ xpc_sh2_IPI_access2_sn2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+ xpc_sh2_IPI_access3_sn2);
+ }
+ } else {
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+ xpc_sh1_IPI_access_sn2);
+ }
+ }
+}
+
+/*
+ * The following set of functions are used for the sending and receiving of
+ * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
+ * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
+ * is associated with channel activity (SGI_XPC_NOTIFY).
+ */
+
+static u64
+xpc_receive_IRQ_amo_sn2(struct amo *amo)
+{
+ return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
+}
+
+static enum xp_retval
+xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
+ int vector)
+{
+ int ret = 0;
+ unsigned long irq_flags;
+
+ local_irq_save(irq_flags);
+
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
+ sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ return (ret == 0) ? xpSuccess : xpPioReadError;
+}
+
+static struct amo *
+xpc_init_IRQ_amo_sn2(int index)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page + index;
+
+ (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */
+ return amo;
+}
+
+/*
+ * Functions associated with SGI_XPC_ACTIVATE IRQ.
+ */
+
+/*
+ * Notify the heartbeat check thread that an activate IRQ has been received.
+ */
+static irqreturn_t
+xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ xpc_activate_IRQ_rcvd++;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Flag the appropriate amo variable and send an IRQ to the specified node.
+ */
+static void
+xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
+ int to_nasid, int to_phys_cpuid)
+{
+ struct amo *amos = (struct amo *)__va(amos_page_pa +
+ (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+ sizeof(struct amo)));
+
+ (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)],
+ BIT_MASK(from_nasid / 2), to_nasid,
+ to_phys_cpuid, SGI_XPC_ACTIVATE);
+}
+
+static void
+xpc_send_local_activate_IRQ_sn2(int from_nasid)
+{
+ unsigned long irq_flags;
+ struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
+ (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+ sizeof(struct amo)));
+
+ /* fake the sending and receipt of an activate IRQ from remote nasid */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
+ FETCHOP_OR, BIT_MASK(from_nasid / 2));
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ xpc_activate_IRQ_rcvd++;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+/*
+ * Functions associated with SGI_XPC_NOTIFY IRQ.
+ */
+
+/*
+ * Check to see if any chctl flags were sent from the specified partition.
+ */
+static void
+xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
+{
+ union xpc_channel_ctl_flags chctl;
+ unsigned long irq_flags;
+
+ chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
+ local_chctl_amo_va);
+ if (chctl.all_flags == 0)
+ return;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.all_flags |= chctl.all_flags;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
+ "0x%llx\n", XPC_PARTID(part), chctl.all_flags);
+
+ xpc_wakeup_channel_mgr(part);
+}
+
+/*
+ * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
+ * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
+ * than one partition, we use an amo structure per partition to indicate
+ * whether a partition has sent an IRQ or not. If it has, then wake up the
+ * associated kthread to handle it.
+ *
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
+ * running on other partitions.
+ *
+ * Noteworthy Arguments:
+ *
+ * irq - Interrupt ReQuest number. NOT USED.
+ *
+ * dev_id - partid of IRQ's potential sender.
+ */
+static irqreturn_t
+xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
+{
+ short partid = (short)(u64)dev_id;
+ struct xpc_partition *part = &xpc_partitions[partid];
+
+ DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2);
+
+ if (xpc_part_ref(part)) {
+ xpc_check_for_sent_chctl_flags_sn2(part);
+
+ xpc_part_deref(part);
+ }
+ return IRQ_HANDLED;
+}
+
+/*
+ * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
+ * because the write to their associated amo variable completed after the IRQ
+ * was received.
+ */
+static void
+xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+ if (xpc_part_ref(part)) {
+ xpc_check_for_sent_chctl_flags_sn2(part);
+
+ part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
+ XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+ add_timer(&part_sn2->dropped_notify_IRQ_timer);
+ xpc_part_deref(part);
+ }
+}
+
+/*
+ * Send a notify IRQ to the remote partition that is associated with the
+ * specified channel.
+ */
+static void
+xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+ char *chctl_flag_string, unsigned long *irq_flags)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ union xpc_channel_ctl_flags chctl = { 0 };
+ enum xp_retval ret;
+
+ if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) {
+ chctl.flags[ch->number] = chctl_flag;
+ ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
+ chctl.all_flags,
+ part_sn2->notify_IRQ_nasid,
+ part_sn2->notify_IRQ_phys_cpuid,
+ SGI_XPC_NOTIFY);
+ dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
+ chctl_flag_string, ch->partid, ch->number, ret);
+ if (unlikely(ret != xpSuccess)) {
+ if (irq_flags != NULL)
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ if (irq_flags != NULL)
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+ }
+}
+
+#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
+ xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
+
+/*
+ * Make it look like the remote partition, which is associated with the
+ * specified channel, sent us a notify IRQ. This faked IRQ will be handled
+ * by xpc_check_for_dropped_notify_IRQ_sn2().
+ */
+static void
+xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+ char *chctl_flag_string)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ union xpc_channel_ctl_flags chctl = { 0 };
+
+ chctl.flags[ch->number] = chctl_flag;
+ FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
+ variable), FETCHOP_OR, chctl.all_flags);
+ dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
+ chctl_flag_string, ch->partid, ch->number);
+}
+
+#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
+ xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
+
+static void
+xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
+ unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+ args->reason = ch->reason;
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+ args->entry_size = ch->entry_size;
+ args->local_nentries = ch->local_nentries;
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+ args->remote_nentries = ch->remote_nentries;
+ args->local_nentries = ch->local_nentries;
+ args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue);
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch,
+ unsigned long *irq_flags)
+{
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags);
+}
+
+static void
+xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
+{
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
+{
+ XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
+ unsigned long msgqueue_pa)
+{
+ ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
+ return xpSuccess;
+}
+
+/*
+ * This next set of functions are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static void
+xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+ BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static void
+xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ /*
+ * Send activate IRQ to get other side to see that we've cleared our
+ * bit in their engaged partitions amo.
+ */
+ xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+ cnodeid_to_nasid(0),
+ part_sn2->activate_IRQ_nasid,
+ part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_assume_partition_disengaged_sn2(short partid)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+ /* clear bit(s) based on partid mask in our partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~BIT(partid));
+}
+
+static int
+xpc_partition_engaged_sn2(short partid)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+ /* our partition's amo variable ANDed with partid mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+ BIT(partid)) != 0;
+}
+
+static int
+xpc_any_partition_engaged_sn2(void)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+ /* our partition's amo variable */
+ return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
+}
+
+/* original protection values for each node */
+static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
+
+/*
+ * Change protections to allow amo operations on non-Shub 1.1 systems.
+ */
+static enum xp_retval
+xpc_allow_amo_ops_sn2(struct amo *amos_page)
+{
+ enum xp_retval ret = xpSuccess;
+
+ /*
+ * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
+ * collides with memory operations. On those systems we call
+ * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
+ */
+ if (!enable_shub_wars_1_1())
+ ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE);
+
+ return ret;
+}
+
+/*
+ * Change protections to allow amo operations on Shub 1.1 systems.
+ */
+static void
+xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
+{
+ int node;
+ int nasid;
+
+ if (!enable_shub_wars_1_1())
+ return;
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ /* save current protection values */
+ xpc_prot_vec_sn2[node] =
+ (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0));
+ /* open up everything */
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+ -1UL);
+ }
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
+ size_t *len)
+{
+ s64 status;
+ enum xp_retval ret;
+
+ status = sn_partition_reserved_page_pa((u64)buf, cookie,
+ (u64 *)rp_pa, (u64 *)len);
+ if (status == SALRET_OK)
+ ret = xpSuccess;
+ else if (status == SALRET_MORE_PASSES)
+ ret = xpNeedMoreInfo;
+ else
+ ret = xpSalError;
+
+ return ret;
+}
+
+
+static int
+xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
+{
+ struct amo *amos_page;
+ int i;
+ int ret;
+
+ xpc_vars_sn2 = XPC_RP_VARS(rp);
+
+ rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2);
+
+ /* vars_part array follows immediately after vars */
+ xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
+ XPC_RP_VARS_SIZE);
+
+ /*
+ * Before clearing xpc_vars_sn2, see if a page of amos had been
+ * previously allocated. If not we'll need to allocate one and set
+ * permissions so that cross-partition amos are allowed.
+ *
+ * The allocated amo page needs MCA reporting to remain disabled after
+ * XPC has unloaded. To make this work, we keep a copy of the pointer
+ * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure,
+ * which is pointed to by the reserved page, and re-use that saved copy
+ * on subsequent loads of XPC. This amo page is never freed, and its
+ * memory protections are never restricted.
+ */
+ amos_page = xpc_vars_sn2->amos_page;
+ if (amos_page == NULL) {
+ amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
+ if (amos_page == NULL) {
+ dev_err(xpc_part, "can't allocate page of amos\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Open up amo-R/W to cpu. This is done on Shub 1.1 systems
+ * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
+ */
+ ret = xpc_allow_amo_ops_sn2(amos_page);
+ if (ret != xpSuccess) {
+ dev_err(xpc_part, "can't allow amo operations\n");
+ uncached_free_page(__IA64_UNCACHED_OFFSET |
+ TO_PHYS((u64)amos_page), 1);
+ return -EPERM;
+ }
+ }
+
+ /* clear xpc_vars_sn2 */
+ memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2));
+
+ xpc_vars_sn2->version = XPC_V_VERSION;
+ xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
+ xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
+ xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2);
+ xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
+ xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */
+
+ /* clear xpc_vars_part_sn2 */
+ memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
+ XP_MAX_NPARTITIONS_SN2);
+
+ /* initialize the activate IRQ related amo variables */
+ for (i = 0; i < xpc_nasid_mask_nlongs; i++)
+ (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
+
+ /* initialize the engaged remote partitions related amo variables */
+ (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
+ (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
+
+ return 0;
+}
+
+static int
+xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask)
+{
+ return test_bit(partid, heartbeating_to_mask);
+}
+
+static void
+xpc_allow_hb_sn2(short partid)
+{
+ DBUG_ON(xpc_vars_sn2 == NULL);
+ set_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_hb_sn2(short partid)
+{
+ DBUG_ON(xpc_vars_sn2 == NULL);
+ clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_all_hbs_sn2(void)
+{
+ DBUG_ON(xpc_vars_sn2 == NULL);
+ bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions);
+}
+
+static void
+xpc_increment_heartbeat_sn2(void)
+{
+ xpc_vars_sn2->heartbeat++;
+}
+
+static void
+xpc_offline_heartbeat_sn2(void)
+{
+ xpc_increment_heartbeat_sn2();
+ xpc_vars_sn2->heartbeat_offline = 1;
+}
+
+static void
+xpc_online_heartbeat_sn2(void)
+{
+ xpc_increment_heartbeat_sn2();
+ xpc_vars_sn2->heartbeat_offline = 0;
+}
+
+static void
+xpc_heartbeat_init_sn2(void)
+{
+ DBUG_ON(xpc_vars_sn2 == NULL);
+
+ bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+ xpc_online_heartbeat_sn2();
+}
+
+static void
+xpc_heartbeat_exit_sn2(void)
+{
+ xpc_offline_heartbeat_sn2();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
+{
+ struct xpc_vars_sn2 *remote_vars;
+ enum xp_retval ret;
+
+ remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+ /* pull the remote vars structure that contains the heartbeat */
+ ret = xp_remote_memcpy(xp_pa(remote_vars),
+ part->sn.sn2.remote_vars_pa,
+ XPC_RP_VARS_SIZE);
+ if (ret != xpSuccess)
+ return ret;
+
+ dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, "
+ "heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part),
+ remote_vars->heartbeat, part->last_heartbeat,
+ remote_vars->heartbeat_offline,
+ remote_vars->heartbeating_to_mask[0]);
+
+ if ((remote_vars->heartbeat == part->last_heartbeat &&
+ !remote_vars->heartbeat_offline) ||
+ !xpc_hb_allowed_sn2(sn_partition_id,
+ remote_vars->heartbeating_to_mask)) {
+ ret = xpNoHeartbeat;
+ } else {
+ part->last_heartbeat = remote_vars->heartbeat;
+ }
+
+ return ret;
+}
+
+/*
+ * Get a copy of the remote partition's XPC variables from the reserved page.
+ *
+ * remote_vars points to a buffer that is cacheline aligned for BTE copies and
+ * assumed to be of size XPC_RP_VARS_SIZE.
+ */
+static enum xp_retval
+xpc_get_remote_vars_sn2(unsigned long remote_vars_pa,
+ struct xpc_vars_sn2 *remote_vars)
+{
+ enum xp_retval ret;
+
+ if (remote_vars_pa == 0)
+ return xpVarsNotSet;
+
+ /* pull over the cross partition variables */
+ ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa,
+ XPC_RP_VARS_SIZE);
+ if (ret != xpSuccess)
+ return ret;
+
+ if (XPC_VERSION_MAJOR(remote_vars->version) !=
+ XPC_VERSION_MAJOR(XPC_V_VERSION)) {
+ return xpBadVersion;
+ }
+
+ return xpSuccess;
+}
+
+static void
+xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
+ unsigned long remote_rp_pa, int nasid)
+{
+ xpc_send_local_activate_IRQ_sn2(nasid);
+}
+
+static void
+xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
+{
+ xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
+}
+
+static void
+xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+ (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+ BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ /*
+ * Send activate IRQ to get other side to see that we've set our
+ * bit in their deactivate request amo.
+ */
+ xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+ cnodeid_to_nasid(0),
+ part_sn2->activate_IRQ_nasid,
+ part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+ (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static int
+xpc_partition_deactivation_requested_sn2(short partid)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_DEACTIVATE_REQUEST_AMO_SN2;
+
+ /* our partition's amo variable ANDed with partid mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+ BIT(partid)) != 0;
+}
+
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
+ unsigned long *remote_rp_ts_jiffies,
+ unsigned long remote_rp_pa,
+ unsigned long remote_vars_pa,
+ struct xpc_vars_sn2 *remote_vars)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+ part->remote_rp_version = remote_rp_version;
+ dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
+ part->remote_rp_version);
+
+ part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies;
+ dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n",
+ part->remote_rp_ts_jiffies);
+
+ part->remote_rp_pa = remote_rp_pa;
+ dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+ part_sn2->remote_vars_pa = remote_vars_pa;
+ dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
+ part_sn2->remote_vars_pa);
+
+ part->last_heartbeat = remote_vars->heartbeat - 1;
+ dev_dbg(xpc_part, " last_heartbeat = 0x%016llx\n",
+ part->last_heartbeat);
+
+ part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
+ dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
+ part_sn2->remote_vars_part_pa);
+
+ part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
+ dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n",
+ part_sn2->activate_IRQ_nasid);
+
+ part_sn2->activate_IRQ_phys_cpuid =
+ remote_vars->activate_IRQ_phys_cpuid;
+ dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n",
+ part_sn2->activate_IRQ_phys_cpuid);
+
+ part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
+ dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
+ part_sn2->remote_amos_page_pa);
+
+ part_sn2->remote_vars_version = remote_vars->version;
+ dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
+ part_sn2->remote_vars_version);
+}
+
+/*
+ * Prior code has determined the nasid which generated a activate IRQ.
+ * Inspect that nasid to determine if its partition needs to be activated
+ * or deactivated.
+ *
+ * A partition is considered "awaiting activation" if our partition
+ * flags indicate it is not active and it has a heartbeat. A
+ * partition is considered "awaiting deactivation" if our partition
+ * flags indicate it is active but it has no heartbeat or it is not
+ * sending its heartbeat to us.
+ *
+ * To determine the heartbeat, the remote nasid must have a properly
+ * initialized reserved page.
+ */
+static void
+xpc_identify_activate_IRQ_req_sn2(int nasid)
+{
+ struct xpc_rsvd_page *remote_rp;
+ struct xpc_vars_sn2 *remote_vars;
+ unsigned long remote_rp_pa;
+ unsigned long remote_vars_pa;
+ int remote_rp_version;
+ int reactivate = 0;
+ unsigned long remote_rp_ts_jiffies = 0;
+ short partid;
+ struct xpc_partition *part;
+ struct xpc_partition_sn2 *part_sn2;
+ enum xp_retval ret;
+
+ /* pull over the reserved page structure */
+
+ remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2;
+
+ ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
+ if (ret != xpSuccess) {
+ dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
+ "which sent interrupt, reason=%d\n", nasid, ret);
+ return;
+ }
+
+ remote_vars_pa = remote_rp->sn.sn2.vars_pa;
+ remote_rp_version = remote_rp->version;
+ remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+
+ partid = remote_rp->SAL_partid;
+ part = &xpc_partitions[partid];
+ part_sn2 = &part->sn.sn2;
+
+ /* pull over the cross partition variables */
+
+ remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+ ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
+ if (ret != xpSuccess) {
+ dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
+ "which sent interrupt, reason=%d\n", nasid, ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return;
+ }
+
+ part->activate_IRQ_rcvd++;
+
+ dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
+ "%lld:0x%lx\n", (int)nasid, (int)partid,
+ part->activate_IRQ_rcvd,
+ remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
+
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_AS_INACTIVE) {
+
+ xpc_update_partition_info_sn2(part, remote_rp_version,
+ &remote_rp_ts_jiffies,
+ remote_rp_pa, remote_vars_pa,
+ remote_vars);
+
+ if (xpc_partition_deactivation_requested_sn2(partid)) {
+ /*
+ * Other side is waiting on us to deactivate even though
+ * we already have.
+ */
+ return;
+ }
+
+ xpc_activate_partition(part);
+ return;
+ }
+
+ DBUG_ON(part->remote_rp_version == 0);
+ DBUG_ON(part_sn2->remote_vars_version == 0);
+
+ if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) {
+
+ /* the other side rebooted */
+
+ DBUG_ON(xpc_partition_engaged_sn2(partid));
+ DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
+
+ xpc_update_partition_info_sn2(part, remote_rp_version,
+ &remote_rp_ts_jiffies,
+ remote_rp_pa, remote_vars_pa,
+ remote_vars);
+ reactivate = 1;
+ }
+
+ if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
+ /* still waiting on other side to disengage from us */
+ return;
+ }
+
+ if (reactivate)
+ XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+ else if (xpc_partition_deactivation_requested_sn2(partid))
+ XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
+}
+
+/*
+ * Loop through the activation amo variables and process any bits
+ * which are set. Each bit indicates a nasid sending a partition
+ * activation or deactivation request.
+ *
+ * Return #of IRQs detected.
+ */
+int
+xpc_identify_activate_IRQ_sender_sn2(void)
+{
+ int l;
+ int b;
+ unsigned long nasid_mask_long;
+ u64 nasid; /* remote nasid */
+ int n_IRQs_detected = 0;
+ struct amo *act_amos;
+
+ act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
+
+ /* scan through activate amo variables looking for non-zero entries */
+ for (l = 0; l < xpc_nasid_mask_nlongs; l++) {
+
+ if (xpc_exiting)
+ break;
+
+ nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]);
+
+ b = find_first_bit(&nasid_mask_long, BITS_PER_LONG);
+ if (b >= BITS_PER_LONG) {
+ /* no IRQs from nasids in this amo variable */
+ continue;
+ }
+
+ dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l,
+ nasid_mask_long);
+
+ /*
+ * If this nasid has been added to the machine since
+ * our partition was reset, this will retain the
+ * remote nasid in our reserved pages machine mask.
+ * This is used in the event of module reload.
+ */
+ xpc_mach_nasids[l] |= nasid_mask_long;
+
+ /* locate the nasid(s) which sent interrupts */
+
+ do {
+ n_IRQs_detected++;
+ nasid = (l * BITS_PER_LONG + b) * 2;
+ dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid);
+ xpc_identify_activate_IRQ_req_sn2(nasid);
+
+ b = find_next_bit(&nasid_mask_long, BITS_PER_LONG,
+ b + 1);
+ } while (b < BITS_PER_LONG);
+ }
+ return n_IRQs_detected;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_sn2(void)
+{
+ unsigned long irq_flags;
+ int n_IRQs_expected;
+ int n_IRQs_detected;
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ n_IRQs_expected = xpc_activate_IRQ_rcvd;
+ xpc_activate_IRQ_rcvd = 0;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
+ if (n_IRQs_detected < n_IRQs_expected) {
+ /* retry once to help avoid missing amo */
+ (void)xpc_identify_activate_IRQ_sender_sn2();
+ }
+}
+
+/*
+ * Setup the channel structures that are sn2 specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ struct xpc_channel_sn2 *ch_sn2;
+ enum xp_retval retval;
+ int ret;
+ int cpuid;
+ int ch_number;
+ struct timer_list *timer;
+ short partid = XPC_PARTID(part);
+
+ /* allocate all the required GET/PUT values */
+
+ part_sn2->local_GPs =
+ xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+ &part_sn2->local_GPs_base);
+ if (part_sn2->local_GPs == NULL) {
+ dev_err(xpc_chan, "can't get memory for local get/put "
+ "values\n");
+ return xpNoMemory;
+ }
+
+ part_sn2->remote_GPs =
+ xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+ &part_sn2->remote_GPs_base);
+ if (part_sn2->remote_GPs == NULL) {
+ dev_err(xpc_chan, "can't get memory for remote get/put "
+ "values\n");
+ retval = xpNoMemory;
+ goto out_1;
+ }
+
+ part_sn2->remote_GPs_pa = 0;
+
+ /* allocate all the required open and close args */
+
+ part_sn2->local_openclose_args =
+ xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+ GFP_KERNEL, &part_sn2->
+ local_openclose_args_base);
+ if (part_sn2->local_openclose_args == NULL) {
+ dev_err(xpc_chan, "can't get memory for local connect args\n");
+ retval = xpNoMemory;
+ goto out_2;
+ }
+
+ part_sn2->remote_openclose_args_pa = 0;
+
+ part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
+
+ part_sn2->notify_IRQ_nasid = 0;
+ part_sn2->notify_IRQ_phys_cpuid = 0;
+ part_sn2->remote_chctl_amo_va = NULL;
+
+ sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
+ ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
+ IRQF_SHARED, part_sn2->notify_IRQ_owner,
+ (void *)(u64)partid);
+ if (ret != 0) {
+ dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
+ "errno=%d\n", -ret);
+ retval = xpLackOfResources;
+ goto out_3;
+ }
+
+ /* Setup a timer to check for dropped notify IRQs */
+ timer = &part_sn2->dropped_notify_IRQ_timer;
+ init_timer(timer);
+ timer->function =
+ (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2;
+ timer->data = (unsigned long)part;
+ timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+ add_timer(timer);
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch_sn2 = &part->channels[ch_number].sn.sn2;
+
+ ch_sn2->local_GP = &part_sn2->local_GPs[ch_number];
+ ch_sn2->local_openclose_args =
+ &part_sn2->local_openclose_args[ch_number];
+
+ mutex_init(&ch_sn2->msg_to_pull_mutex);
+ }
+
+ /*
+ * Setup the per partition specific variables required by the
+ * remote partition to establish channel connections with us.
+ *
+ * The setting of the magic # indicates that these per partition
+ * specific variables are ready to be used.
+ */
+ xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
+ xpc_vars_part_sn2[partid].openclose_args_pa =
+ xp_pa(part_sn2->local_openclose_args);
+ xpc_vars_part_sn2[partid].chctl_amo_pa =
+ xp_pa(part_sn2->local_chctl_amo_va);
+ cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
+ xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
+ xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
+ cpu_physical_id(cpuid);
+ xpc_vars_part_sn2[partid].nchannels = part->nchannels;
+ xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2;
+
+ return xpSuccess;
+
+ /* setup of ch structures failed */
+out_3:
+ kfree(part_sn2->local_openclose_args_base);
+ part_sn2->local_openclose_args = NULL;
+out_2:
+ kfree(part_sn2->remote_GPs_base);
+ part_sn2->remote_GPs = NULL;
+out_1:
+ kfree(part_sn2->local_GPs_base);
+ part_sn2->local_GPs = NULL;
+ return retval;
+}
+
+/*
+ * Teardown the channel structures that are sn2 specific.
+ */
+static void
+xpc_teardown_ch_structures_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ short partid = XPC_PARTID(part);
+
+ /*
+ * Indicate that the variables specific to the remote partition are no
+ * longer available for its use.
+ */
+ xpc_vars_part_sn2[partid].magic = 0;
+
+ /* in case we've still got outstanding timers registered... */
+ del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
+ free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
+
+ kfree(part_sn2->local_openclose_args_base);
+ part_sn2->local_openclose_args = NULL;
+ kfree(part_sn2->remote_GPs_base);
+ part_sn2->remote_GPs = NULL;
+ kfree(part_sn2->local_GPs_base);
+ part_sn2->local_GPs = NULL;
+ part_sn2->local_chctl_amo_va = NULL;
+}
+
+/*
+ * Create a wrapper that hides the underlying mechanism for pulling a cacheline
+ * (or multiple cachelines) from a remote partition.
+ *
+ * src_pa must be a cacheline aligned physical address on the remote partition.
+ * dst must be a cacheline aligned virtual address on this partition.
+ * cnt must be cacheline sized
+ */
+/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
+static enum xp_retval
+xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
+ const unsigned long src_pa, size_t cnt)
+{
+ enum xp_retval ret;
+
+ DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa));
+ DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
+ DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ return part->reason;
+
+ ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
+ " ret=%d\n", XPC_PARTID(part), ret);
+ }
+ return ret;
+}
+
+/*
+ * Pull the remote per partition specific variables from the specified
+ * partition.
+ */
+static enum xp_retval
+xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ u8 buffer[L1_CACHE_BYTES * 2];
+ struct xpc_vars_part_sn2 *pulled_entry_cacheline =
+ (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
+ struct xpc_vars_part_sn2 *pulled_entry;
+ unsigned long remote_entry_cacheline_pa;
+ unsigned long remote_entry_pa;
+ short partid = XPC_PARTID(part);
+ enum xp_retval ret;
+
+ /* pull the cacheline that contains the variables we're interested in */
+
+ DBUG_ON(part_sn2->remote_vars_part_pa !=
+ L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
+ DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
+
+ remote_entry_pa = part_sn2->remote_vars_part_pa +
+ sn_partition_id * sizeof(struct xpc_vars_part_sn2);
+
+ remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
+
+ pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
+ + (remote_entry_pa &
+ (L1_CACHE_BYTES - 1)));
+
+ ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
+ remote_entry_cacheline_pa,
+ L1_CACHE_BYTES);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
+ "partition %d, ret=%d\n", partid, ret);
+ return ret;
+ }
+
+ /* see if they've been set up yet */
+
+ if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 &&
+ pulled_entry->magic != XPC_VP_MAGIC2_SN2) {
+
+ if (pulled_entry->magic != 0) {
+ dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
+ "partition %d has bad magic value (=0x%llx)\n",
+ partid, sn_partition_id, pulled_entry->magic);
+ return xpBadMagic;
+ }
+
+ /* they've not been initialized yet */
+ return xpRetry;
+ }
+
+ if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) {
+
+ /* validate the variables */
+
+ if (pulled_entry->GPs_pa == 0 ||
+ pulled_entry->openclose_args_pa == 0 ||
+ pulled_entry->chctl_amo_pa == 0) {
+
+ dev_err(xpc_chan, "partition %d's XPC vars_part for "
+ "partition %d are not valid\n", partid,
+ sn_partition_id);
+ return xpInvalidAddress;
+ }
+
+ /* the variables we imported look to be valid */
+
+ part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
+ part_sn2->remote_openclose_args_pa =
+ pulled_entry->openclose_args_pa;
+ part_sn2->remote_chctl_amo_va =
+ (struct amo *)__va(pulled_entry->chctl_amo_pa);
+ part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
+ part_sn2->notify_IRQ_phys_cpuid =
+ pulled_entry->notify_IRQ_phys_cpuid;
+
+ if (part->nchannels > pulled_entry->nchannels)
+ part->nchannels = pulled_entry->nchannels;
+
+ /* let the other side know that we've pulled their variables */
+
+ xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2;
+ }
+
+ if (pulled_entry->magic == XPC_VP_MAGIC1_SN2)
+ return xpRetry;
+
+ return xpSuccess;
+}
+
+/*
+ * Establish first contact with the remote partititon. This involves pulling
+ * the XPC per partition variables from the remote partition and waiting for
+ * the remote partition to pull ours.
+ */
+static enum xp_retval
+xpc_make_first_contact_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ enum xp_retval ret;
+
+ /*
+ * Register the remote partition's amos with SAL so it can handle
+ * and cleanup errors within that address range should the remote
+ * partition go down. We don't unregister this range because it is
+ * difficult to tell when outstanding writes to the remote partition
+ * are finished and thus when it is safe to unregister. This should
+ * not result in wasted space in the SAL xp_addr_region table because
+ * we should get the same page for remote_amos_page_pa after module
+ * reloads and system reboots.
+ */
+ if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
+ PAGE_SIZE, 1) < 0) {
+ dev_warn(xpc_part, "xpc_activating(%d) failed to register "
+ "xp_addr region\n", XPC_PARTID(part));
+
+ ret = xpPhysAddrRegFailed;
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return ret;
+ }
+
+ /*
+ * Send activate IRQ to get other side to activate if they've not
+ * already begun to do so.
+ */
+ xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+ cnodeid_to_nasid(0),
+ part_sn2->activate_IRQ_nasid,
+ part_sn2->activate_IRQ_phys_cpuid);
+
+ while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
+ if (ret != xpRetry) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return ret;
+ }
+
+ dev_dbg(xpc_part, "waiting to make first contact with "
+ "partition %d\n", XPC_PARTID(part));
+
+ /* wait a 1/4 of a second or so */
+ (void)msleep_interruptible(250);
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ return part->reason;
+ }
+
+ return xpSuccess;
+}
+
+/*
+ * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
+ */
+static u64
+xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ unsigned long irq_flags;
+ union xpc_channel_ctl_flags chctl;
+ enum xp_retval ret;
+
+ /*
+ * See if there are any chctl flags to be handled.
+ */
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ chctl = part->chctl;
+ if (chctl.all_flags != 0)
+ part->chctl.all_flags = 0;
+
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ if (xpc_any_openclose_chctl_flags_set(&chctl)) {
+ ret = xpc_pull_remote_cachelines_sn2(part, part->
+ remote_openclose_args,
+ part_sn2->
+ remote_openclose_args_pa,
+ XPC_OPENCLOSE_ARGS_SIZE);
+ if (ret != xpSuccess) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ dev_dbg(xpc_chan, "failed to pull openclose args from "
+ "partition %d, ret=%d\n", XPC_PARTID(part),
+ ret);
+
+ /* don't bother processing chctl flags anymore */
+ chctl.all_flags = 0;
+ }
+ }
+
+ if (xpc_any_msg_chctl_flags_set(&chctl)) {
+ ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
+ part_sn2->remote_GPs_pa,
+ XPC_GP_SIZE);
+ if (ret != xpSuccess) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ dev_dbg(xpc_chan, "failed to pull GPs from partition "
+ "%d, ret=%d\n", XPC_PARTID(part), ret);
+
+ /* don't bother processing chctl flags anymore */
+ chctl.all_flags = 0;
+ }
+ }
+
+ return chctl.all_flags;
+}
+
+/*
+ * Allocate the local message queue and the notify queue.
+ */
+static enum xp_retval
+xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ unsigned long irq_flags;
+ int nentries;
+ size_t nbytes;
+
+ for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+
+ nbytes = nentries * ch->entry_size;
+ ch_sn2->local_msgqueue =
+ xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
+ &ch_sn2->local_msgqueue_base);
+ if (ch_sn2->local_msgqueue == NULL)
+ continue;
+
+ nbytes = nentries * sizeof(struct xpc_notify_sn2);
+ ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+ if (ch_sn2->notify_queue == NULL) {
+ kfree(ch_sn2->local_msgqueue_base);
+ ch_sn2->local_msgqueue = NULL;
+ continue;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->local_nentries) {
+ dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
+ "partid=%d, channel=%d\n", nentries,
+ ch->local_nentries, ch->partid, ch->number);
+
+ ch->local_nentries = nentries;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
+ "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
+ return xpNoMemory;
+}
+
+/*
+ * Allocate the cached remote message queue.
+ */
+static enum xp_retval
+xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ unsigned long irq_flags;
+ int nentries;
+ size_t nbytes;
+
+ DBUG_ON(ch->remote_nentries <= 0);
+
+ for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+
+ nbytes = nentries * ch->entry_size;
+ ch_sn2->remote_msgqueue =
+ xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
+ remote_msgqueue_base);
+ if (ch_sn2->remote_msgqueue == NULL)
+ continue;
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->remote_nentries) {
+ dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
+ "partid=%d, channel=%d\n", nentries,
+ ch->remote_nentries, ch->partid, ch->number);
+
+ ch->remote_nentries = nentries;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+ return xpNoMemory;
+}
+
+/*
+ * Allocate message queues and other stuff associated with a channel.
+ *
+ * Note: Assumes all of the channel sizes are filled in.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ enum xp_retval ret;
+
+ DBUG_ON(ch->flags & XPC_C_SETUP);
+
+ ret = xpc_allocate_local_msgqueue_sn2(ch);
+ if (ret == xpSuccess) {
+
+ ret = xpc_allocate_remote_msgqueue_sn2(ch);
+ if (ret != xpSuccess) {
+ kfree(ch_sn2->local_msgqueue_base);
+ ch_sn2->local_msgqueue = NULL;
+ kfree(ch_sn2->notify_queue);
+ ch_sn2->notify_queue = NULL;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Free up message queues and other stuff that were allocated for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ ch_sn2->remote_msgqueue_pa = 0;
+
+ ch_sn2->local_GP->get = 0;
+ ch_sn2->local_GP->put = 0;
+ ch_sn2->remote_GP.get = 0;
+ ch_sn2->remote_GP.put = 0;
+ ch_sn2->w_local_GP.get = 0;
+ ch_sn2->w_local_GP.put = 0;
+ ch_sn2->w_remote_GP.get = 0;
+ ch_sn2->w_remote_GP.put = 0;
+ ch_sn2->next_msg_to_pull = 0;
+
+ if (ch->flags & XPC_C_SETUP) {
+ dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
+ ch->flags, ch->partid, ch->number);
+
+ kfree(ch_sn2->local_msgqueue_base);
+ ch_sn2->local_msgqueue = NULL;
+ kfree(ch_sn2->remote_msgqueue_base);
+ ch_sn2->remote_msgqueue = NULL;
+ kfree(ch_sn2->notify_queue);
+ ch_sn2->notify_queue = NULL;
+ }
+}
+
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
+{
+ struct xpc_notify_sn2 *notify;
+ u8 notify_type;
+ s64 get = ch->sn.sn2.w_remote_GP.get - 1;
+
+ while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+ notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries];
+
+ /*
+ * See if the notify entry indicates it was associated with
+ * a message who's sender wants to be notified. It is possible
+ * that it is, but someone else is doing or has done the
+ * notification.
+ */
+ notify_type = notify->type;
+ if (notify_type == 0 ||
+ cmpxchg(¬ify->type, notify_type, 0) != notify_type) {
+ continue;
+ }
+
+ DBUG_ON(notify_type != XPC_N_CALL);
+
+ atomic_dec(&ch->n_to_notify);
+
+ if (notify->func != NULL) {
+ dev_dbg(xpc_chan, "notify->func() called, notify=0x%p "
+ "msg_number=%lld partid=%d channel=%d\n",
+ (void *)notify, get, ch->partid, ch->number);
+
+ notify->func(reason, ch->partid, ch->number,
+ notify->key);
+
+ dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p"
+ " msg_number=%lld partid=%d channel=%d\n",
+ (void *)notify, get, ch->partid, ch->number);
+ }
+ }
+}
+
+static void
+xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
+{
+ xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
+}
+
+/*
+ * Clear some of the msg flags in the local message queue.
+ */
+static inline void
+xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 get;
+
+ get = ch_sn2->w_remote_GP.get;
+ do {
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+ (get % ch->local_nentries) *
+ ch->entry_size);
+ DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+ msg->flags = 0;
+ } while (++get < ch_sn2->remote_GP.get);
+}
+
+/*
+ * Clear some of the msg flags in the remote message queue.
+ */
+static inline void
+xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 put, remote_nentries = ch->remote_nentries;
+
+ /* flags are zeroed when the buffer is allocated */
+ if (ch_sn2->remote_GP.put < remote_nentries)
+ return;
+
+ put = max(ch_sn2->w_remote_GP.put, remote_nentries);
+ do {
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+ (put % remote_nentries) *
+ ch->entry_size);
+ DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+ DBUG_ON(!(msg->flags & XPC_M_SN2_DONE));
+ DBUG_ON(msg->number != put - remote_nentries);
+ msg->flags = 0;
+ } while (++put < ch_sn2->remote_GP.put);
+}
+
+static int
+xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch)
+{
+ return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
+}
+
+static void
+xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
+{
+ struct xpc_channel *ch = &part->channels[ch_number];
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ int npayloads_sent;
+
+ ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
+
+ /* See what, if anything, has changed for each connected channel */
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
+ ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
+ /* nothing changed since GPs were last pulled */
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /*
+ * First check to see if messages recently sent by us have been
+ * received by the other side. (The remote GET value will have
+ * changed since we last looked at it.)
+ */
+
+ if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
+
+ /*
+ * We need to notify any senders that want to be notified
+ * that their sent messages have been received by their
+ * intended recipients. We need to do this before updating
+ * w_remote_GP.get so that we don't allocate the same message
+ * queue entries prematurely (see xpc_allocate_msg()).
+ */
+ if (atomic_read(&ch->n_to_notify) > 0) {
+ /*
+ * Notify senders that messages sent have been
+ * received and delivered by the other side.
+ */
+ xpc_notify_senders_sn2(ch, xpMsgDelivered,
+ ch_sn2->remote_GP.get);
+ }
+
+ /*
+ * Clear msg->flags in previously sent messages, so that
+ * they're ready for xpc_allocate_msg().
+ */
+ xpc_clear_local_msgqueue_flags_sn2(ch);
+
+ ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
+
+ dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, "
+ "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
+ ch->number);
+
+ /*
+ * If anyone was waiting for message queue entries to become
+ * available, wake them up.
+ */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+ }
+
+ /*
+ * Now check for newly sent messages by the other side. (The remote
+ * PUT value will have changed since we last looked at it.)
+ */
+
+ if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
+ /*
+ * Clear msg->flags in previously received messages, so that
+ * they're ready for xpc_get_deliverable_payload_sn2().
+ */
+ xpc_clear_remote_msgqueue_flags_sn2(ch);
+
+ smp_wmb(); /* ensure flags have been cleared before bte_copy */
+ ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
+
+ dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, "
+ "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
+ ch->number);
+
+ npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch);
+ if (npayloads_sent > 0) {
+ dev_dbg(xpc_chan, "msgs waiting to be copied and "
+ "delivered=%d, partid=%d, channel=%d\n",
+ npayloads_sent, ch->partid, ch->number);
+
+ if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
+ xpc_activate_kthreads(ch, npayloads_sent);
+ }
+ }
+
+ xpc_msgqueue_deref(ch);
+}
+
+static struct xpc_msg_sn2 *
+xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ unsigned long remote_msg_pa;
+ struct xpc_msg_sn2 *msg;
+ u32 msg_index;
+ u32 nmsgs;
+ u64 msg_offset;
+ enum xp_retval ret;
+
+ if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
+ /* we were interrupted by a signal */
+ return NULL;
+ }
+
+ while (get >= ch_sn2->next_msg_to_pull) {
+
+ /* pull as many messages as are ready and able to be pulled */
+
+ msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
+
+ DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
+ nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
+ if (msg_index + nmsgs > ch->remote_nentries) {
+ /* ignore the ones that wrap the msg queue for now */
+ nmsgs = ch->remote_nentries - msg_index;
+ }
+
+ msg_offset = msg_index * ch->entry_size;
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+ msg_offset);
+ remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
+
+ ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
+ nmsgs * ch->entry_size);
+ if (ret != xpSuccess) {
+
+ dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
+ " msg %lld from partition %d, channel=%d, "
+ "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
+ ch->partid, ch->number, ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+ return NULL;
+ }
+
+ ch_sn2->next_msg_to_pull += nmsgs;
+ }
+
+ mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+
+ /* return the message we were looking for */
+ msg_offset = (get % ch->remote_nentries) * ch->entry_size;
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset);
+
+ return msg;
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ void *payload = NULL;
+ s64 get;
+
+ do {
+ if (ch->flags & XPC_C_DISCONNECTING)
+ break;
+
+ get = ch_sn2->w_local_GP.get;
+ smp_rmb(); /* guarantee that .get loads before .put */
+ if (get == ch_sn2->w_remote_GP.put)
+ break;
+
+ /* There are messages waiting to be pulled and delivered.
+ * We need to try to secure one for ourselves. We'll do this
+ * by trying to increment w_local_GP.get and hope that no one
+ * else beats us to it. If they do, we'll we'll simply have
+ * to try again for the next one.
+ */
+
+ if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
+ /* we got the entry referenced by get */
+
+ dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, "
+ "partid=%d, channel=%d\n", get + 1,
+ ch->partid, ch->number);
+
+ /* pull the message from the remote partition */
+
+ msg = xpc_pull_remote_msg_sn2(ch, get);
+
+ if (msg != NULL) {
+ DBUG_ON(msg->number != get);
+ DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+ DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+
+ payload = &msg->payload;
+ }
+ break;
+ }
+
+ } while (1);
+
+ return payload;
+}
+
+/*
+ * Now we actually send the messages that are ready to be sent by advancing
+ * the local message queue's Put value and then send a chctl msgrequest to the
+ * recipient partition.
+ */
+static void
+xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 put = initial_put + 1;
+ int send_msgrequest = 0;
+
+ while (1) {
+
+ while (1) {
+ if (put == ch_sn2->w_local_GP.put)
+ break;
+
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+ local_msgqueue + (put %
+ ch->local_nentries) *
+ ch->entry_size);
+
+ if (!(msg->flags & XPC_M_SN2_READY))
+ break;
+
+ put++;
+ }
+
+ if (put == initial_put) {
+ /* nothing's changed */
+ break;
+ }
+
+ if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
+ initial_put) {
+ /* someone else beat us to it */
+ DBUG_ON(ch_sn2->local_GP->put < initial_put);
+ break;
+ }
+
+ /* we just set the new value of local_GP->put */
+
+ dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, "
+ "channel=%d\n", put, ch->partid, ch->number);
+
+ send_msgrequest = 1;
+
+ /*
+ * We need to ensure that the message referenced by
+ * local_GP->put is not XPC_M_SN2_READY or that local_GP->put
+ * equals w_local_GP.put, so we'll go have a look.
+ */
+ initial_put = put;
+ }
+
+ if (send_msgrequest)
+ xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel.
+ */
+static enum xp_retval
+xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
+ struct xpc_msg_sn2 **address_of_msg)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ enum xp_retval ret;
+ s64 put;
+
+ /*
+ * Get the next available message entry from the local message queue.
+ * If none are available, we'll make sure that we grab the latest
+ * GP values.
+ */
+ ret = xpTimeout;
+
+ while (1) {
+
+ put = ch_sn2->w_local_GP.put;
+ smp_rmb(); /* guarantee that .put loads before .get */
+ if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
+
+ /* There are available message entries. We need to try
+ * to secure one for ourselves. We'll do this by trying
+ * to increment w_local_GP.put as long as someone else
+ * doesn't beat us to it. If they do, we'll have to
+ * try again.
+ */
+ if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
+ put) {
+ /* we got the entry referenced by put */
+ break;
+ }
+ continue; /* try again */
+ }
+
+ /*
+ * There aren't any available msg entries at this time.
+ *
+ * In waiting for a message entry to become available,
+ * we set a timeout in case the other side is not sending
+ * completion interrupts. This lets us fake a notify IRQ
+ * that will cause the notify IRQ handler to fetch the latest
+ * GP values as if an interrupt was sent by the other side.
+ */
+ if (ret == xpTimeout)
+ xpc_send_chctl_local_msgrequest_sn2(ch);
+
+ if (flags & XPC_NOWAIT)
+ return xpNoWait;
+
+ ret = xpc_allocate_msg_wait(ch);
+ if (ret != xpInterrupted && ret != xpTimeout)
+ return ret;
+ }
+
+ /* get the message's address and initialize it */
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+ (put % ch->local_nentries) *
+ ch->entry_size);
+
+ DBUG_ON(msg->flags != 0);
+ msg->number = put;
+
+ dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, "
+ "msg_number=%lld, partid=%d, channel=%d\n", put + 1,
+ (void *)msg, msg->number, ch->partid, ch->number);
+
+ *address_of_msg = msg;
+ return xpSuccess;
+}
+
+/*
+ * Common code that does the actual sending of the message by advancing the
+ * local message queue's Put value and sends a chctl msgrequest to the
+ * partition the message is being sent to.
+ */
+static enum xp_retval
+xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload,
+ u16 payload_size, u8 notify_type, xpc_notify_func func,
+ void *key)
+{
+ enum xp_retval ret = xpSuccess;
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg = msg;
+ struct xpc_notify_sn2 *notify = notify;
+ s64 msg_number;
+ s64 put;
+
+ DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
+
+ if (XPC_MSG_SIZE(payload_size) > ch->entry_size)
+ return xpPayloadTooBig;
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ goto out_1;
+ }
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ ret = xpNotConnected;
+ goto out_1;
+ }
+
+ ret = xpc_allocate_msg_sn2(ch, flags, &msg);
+ if (ret != xpSuccess)
+ goto out_1;
+
+ msg_number = msg->number;
+
+ if (notify_type != 0) {
+ /*
+ * Tell the remote side to send an ACK interrupt when the
+ * message has been delivered.
+ */
+ msg->flags |= XPC_M_SN2_INTERRUPT;
+
+ atomic_inc(&ch->n_to_notify);
+
+ notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries];
+ notify->func = func;
+ notify->key = key;
+ notify->type = notify_type;
+
+ /* ??? Is a mb() needed here? */
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ /*
+ * An error occurred between our last error check and
+ * this one. We will try to clear the type field from
+ * the notify entry. If we succeed then
+ * xpc_disconnect_channel() didn't already process
+ * the notify entry.
+ */
+ if (cmpxchg(¬ify->type, notify_type, 0) ==
+ notify_type) {
+ atomic_dec(&ch->n_to_notify);
+ ret = ch->reason;
+ }
+ goto out_1;
+ }
+ }
+
+ memcpy(&msg->payload, payload, payload_size);
+
+ msg->flags |= XPC_M_SN2_READY;
+
+ /*
+ * The preceding store of msg->flags must occur before the following
+ * load of local_GP->put.
+ */
+ smp_mb();
+
+ /* see if the message is next in line to be sent, if so send it */
+
+ put = ch_sn2->local_GP->put;
+ if (put == msg_number)
+ xpc_send_msgs_sn2(ch, put);
+
+out_1:
+ xpc_msgqueue_deref(ch);
+ return ret;
+}
+
+/*
+ * Now we actually acknowledge the messages that have been delivered and ack'd
+ * by advancing the cached remote message queue's Get value and if requested
+ * send a chctl msgrequest to the message sender's partition.
+ *
+ * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition
+ * that sent the message.
+ */
+static void
+xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 get = initial_get + 1;
+ int send_msgrequest = 0;
+
+ while (1) {
+
+ while (1) {
+ if (get == ch_sn2->w_local_GP.get)
+ break;
+
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+ remote_msgqueue + (get %
+ ch->remote_nentries) *
+ ch->entry_size);
+
+ if (!(msg->flags & XPC_M_SN2_DONE))
+ break;
+
+ msg_flags |= msg->flags;
+ get++;
+ }
+
+ if (get == initial_get) {
+ /* nothing's changed */
+ break;
+ }
+
+ if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
+ initial_get) {
+ /* someone else beat us to it */
+ DBUG_ON(ch_sn2->local_GP->get <= initial_get);
+ break;
+ }
+
+ /* we just set the new value of local_GP->get */
+
+ dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, "
+ "channel=%d\n", get, ch->partid, ch->number);
+
+ send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT);
+
+ /*
+ * We need to ensure that the message referenced by
+ * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get
+ * equals w_local_GP.get, so we'll go have a look.
+ */
+ initial_get = get;
+ }
+
+ if (send_msgrequest)
+ xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+static void
+xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
+{
+ struct xpc_msg_sn2 *msg;
+ s64 msg_number;
+ s64 get;
+
+ msg = container_of(payload, struct xpc_msg_sn2, payload);
+ msg_number = msg->number;
+
+ dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n",
+ (void *)msg, msg_number, ch->partid, ch->number);
+
+ DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) !=
+ msg_number % ch->remote_nentries);
+ DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+ DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+
+ msg->flags |= XPC_M_SN2_DONE;
+
+ /*
+ * The preceding store of msg->flags must occur before the following
+ * load of local_GP->get.
+ */
+ smp_mb();
+
+ /*
+ * See if this message is next in line to be acknowledged as having
+ * been delivered.
+ */
+ get = ch->sn.sn2.local_GP->get;
+ if (get == msg_number)
+ xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
+}
+
+static struct xpc_arch_operations xpc_arch_ops_sn2 = {
+ .setup_partitions = xpc_setup_partitions_sn2,
+ .teardown_partitions = xpc_teardown_partitions_sn2,
+ .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2,
+ .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2,
+ .setup_rsvd_page = xpc_setup_rsvd_page_sn2,
+
+ .allow_hb = xpc_allow_hb_sn2,
+ .disallow_hb = xpc_disallow_hb_sn2,
+ .disallow_all_hbs = xpc_disallow_all_hbs_sn2,
+ .increment_heartbeat = xpc_increment_heartbeat_sn2,
+ .offline_heartbeat = xpc_offline_heartbeat_sn2,
+ .online_heartbeat = xpc_online_heartbeat_sn2,
+ .heartbeat_init = xpc_heartbeat_init_sn2,
+ .heartbeat_exit = xpc_heartbeat_exit_sn2,
+ .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2,
+
+ .request_partition_activation =
+ xpc_request_partition_activation_sn2,
+ .request_partition_reactivation =
+ xpc_request_partition_reactivation_sn2,
+ .request_partition_deactivation =
+ xpc_request_partition_deactivation_sn2,
+ .cancel_partition_deactivation_request =
+ xpc_cancel_partition_deactivation_request_sn2,
+
+ .setup_ch_structures = xpc_setup_ch_structures_sn2,
+ .teardown_ch_structures = xpc_teardown_ch_structures_sn2,
+
+ .make_first_contact = xpc_make_first_contact_sn2,
+
+ .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2,
+ .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2,
+ .send_chctl_closereply = xpc_send_chctl_closereply_sn2,
+ .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2,
+ .send_chctl_openreply = xpc_send_chctl_openreply_sn2,
+ .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2,
+ .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2,
+
+ .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2,
+
+ .setup_msg_structures = xpc_setup_msg_structures_sn2,
+ .teardown_msg_structures = xpc_teardown_msg_structures_sn2,
+
+ .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2,
+ .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2,
+ .partition_engaged = xpc_partition_engaged_sn2,
+ .any_partition_engaged = xpc_any_partition_engaged_sn2,
+ .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2,
+
+ .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2,
+ .send_payload = xpc_send_payload_sn2,
+ .get_deliverable_payload = xpc_get_deliverable_payload_sn2,
+ .received_payload = xpc_received_payload_sn2,
+ .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2,
+};
+
+int
+xpc_init_sn2(void)
+{
+ int ret;
+ size_t buf_size;
+
+ xpc_arch_ops = xpc_arch_ops_sn2;
+
+ if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
+ dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
+ "larger than %d\n", XPC_MSG_HDR_MAX_SIZE);
+ return -E2BIG;
+ }
+
+ buf_size = max(XPC_RP_VARS_SIZE,
+ XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
+ xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size,
+ GFP_KERNEL,
+ &xpc_remote_copy_buffer_base_sn2);
+ if (xpc_remote_copy_buffer_sn2 == NULL) {
+ dev_err(xpc_part, "can't get memory for remote copy buffer\n");
+ return -ENOMEM;
+ }
+
+ /* open up protections for IPI and [potentially] amo operations */
+ xpc_allow_IPI_ops_sn2();
+ xpc_allow_amo_ops_shub_wars_1_1_sn2();
+
+ /*
+ * This is safe to do before the xpc_hb_checker thread has started
+ * because the handler releases a wait queue. If an interrupt is
+ * received before the thread is waiting, it will not go to sleep,
+ * but rather immediately process the interrupt.
+ */
+ ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
+ "xpc hb", NULL);
+ if (ret != 0) {
+ dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
+ "errno=%d\n", -ret);
+ xpc_disallow_IPI_ops_sn2();
+ kfree(xpc_remote_copy_buffer_base_sn2);
+ }
+ return ret;
+}
+
+void
+xpc_exit_sn2(void)
+{
+ free_irq(SGI_XPC_ACTIVATE, NULL);
+ xpc_disallow_IPI_ops_sn2();
+ kfree(xpc_remote_copy_buffer_base_sn2);
+}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 0000000..340b44d
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,1813 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) uv-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_irq.h>
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/gru.h"
+#include "../sgi-gru/grukservices.h"
+#include "xpc.h"
+
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+struct uv_IO_APIC_route_entry {
+ __u64 vector : 8,
+ delivery_mode : 3,
+ dest_mode : 1,
+ delivery_status : 1,
+ polarity : 1,
+ __reserved_1 : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15,
+ dest : 32;
+};
+#endif
+
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
+
+#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
+#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
+ XPC_ACTIVATE_MSG_SIZE_UV)
+#define XPC_ACTIVATE_IRQ_NAME "xpc_activate"
+
+#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES)
+#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
+ XPC_NOTIFY_MSG_SIZE_UV)
+#define XPC_NOTIFY_IRQ_NAME "xpc_notify"
+
+static int xpc_mq_node = -1;
+
+static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
+static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
+
+static int
+xpc_setup_partitions_uv(void)
+{
+ short partid;
+ struct xpc_partition_uv *part_uv;
+
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part_uv = &xpc_partitions[partid].sn.uv;
+
+ mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
+ spin_lock_init(&part_uv->flags_lock);
+ part_uv->remote_act_state = XPC_P_AS_INACTIVE;
+ }
+ return 0;
+}
+
+static void
+xpc_teardown_partitions_uv(void)
+{
+ short partid;
+ struct xpc_partition_uv *part_uv;
+ unsigned long irq_flags;
+
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part_uv = &xpc_partitions[partid].sn.uv;
+
+ if (part_uv->cached_activate_gru_mq_desc != NULL) {
+ mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ kfree(part_uv->cached_activate_gru_mq_desc);
+ part_uv->cached_activate_gru_mq_desc = NULL;
+ mutex_unlock(&part_uv->
+ cached_activate_gru_mq_desc_mutex);
+ }
+ }
+}
+
+static int
+xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
+{
+ int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+ mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+ UV_AFFINITY_CPU);
+ if (mq->irq < 0)
+ return mq->irq;
+
+ mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
+ mq->irq = SGI_XPC_ACTIVATE;
+ else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
+ mq->irq = SGI_XPC_NOTIFY;
+ else
+ return -EINVAL;
+
+ mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
+ uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
+#else
+ #error not a supported configuration
+#endif
+
+ return 0;
+}
+
+static void
+xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
+{
+#if defined CONFIG_X86_64
+ uv_teardown_irq(mq->irq);
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ int mmr_pnode;
+ unsigned long mmr_value;
+
+ mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+ mmr_value = 1UL << 16;
+
+ uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
+#else
+ #error not a supported configuration
+#endif
+}
+
+static int
+xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
+{
+ int ret;
+
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+ ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address),
+ mq->order, &mq->mmr_offset);
+ if (ret < 0) {
+ dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
+ ret);
+ return -EBUSY;
+ }
+#elif defined CONFIG_X86_64
+ ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
+ mq->order, &mq->mmr_offset);
+ if (ret < 0) {
+ dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
+ "ret=%d\n", ret);
+ return ret;
+ }
+#else
+ #error not a supported configuration
+#endif
+
+ mq->watchlist_num = ret;
+ return 0;
+}
+
+static void
+xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
+{
+ int ret;
+ int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+ ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+ BUG_ON(ret != BIOS_STATUS_SUCCESS);
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+ BUG_ON(ret != SALRET_OK);
+#else
+ #error not a supported configuration
+#endif
+}
+
+static struct xpc_gru_mq_uv *
+xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
+ irq_handler_t irq_handler)
+{
+ enum xp_retval xp_ret;
+ int ret;
+ int nid;
+ int nasid;
+ int pg_order;
+ struct page *page;
+ struct xpc_gru_mq_uv *mq;
+ struct uv_IO_APIC_route_entry *mmr_value;
+
+ mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
+ if (mq == NULL) {
+ dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+ "a xpc_gru_mq_uv structure\n");
+ ret = -ENOMEM;
+ goto out_0;
+ }
+
+ mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
+ GFP_KERNEL);
+ if (mq->gru_mq_desc == NULL) {
+ dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+ "a gru_message_queue_desc structure\n");
+ ret = -ENOMEM;
+ goto out_1;
+ }
+
+ pg_order = get_order(mq_size);
+ mq->order = pg_order + PAGE_SHIFT;
+ mq_size = 1UL << mq->order;
+
+ mq->mmr_blade = uv_cpu_to_blade_id(cpu);
+
+ nid = cpu_to_node(cpu);
+ page = __alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+ pg_order);
+ if (page == NULL) {
+ dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+ "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
+ ret = -ENOMEM;
+ goto out_2;
+ }
+ mq->address = page_address(page);
+
+ /* enable generation of irq when GRU mq operation occurs to this mq */
+ ret = xpc_gru_mq_watchlist_alloc_uv(mq);
+ if (ret != 0)
+ goto out_3;
+
+ ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
+ if (ret != 0)
+ goto out_4;
+
+ ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
+ if (ret != 0) {
+ dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
+ mq->irq, -ret);
+ goto out_5;
+ }
+
+ nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
+
+ mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
+ ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
+ nasid, mmr_value->vector, mmr_value->dest);
+ if (ret != 0) {
+ dev_err(xpc_part, "gru_create_message_queue() returned "
+ "error=%d\n", ret);
+ ret = -EINVAL;
+ goto out_6;
+ }
+
+ /* allow other partitions to access this GRU mq */
+ xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
+ if (xp_ret != xpSuccess) {
+ ret = -EACCES;
+ goto out_6;
+ }
+
+ return mq;
+
+ /* something went wrong */
+out_6:
+ free_irq(mq->irq, NULL);
+out_5:
+ xpc_release_gru_mq_irq_uv(mq);
+out_4:
+ xpc_gru_mq_watchlist_free_uv(mq);
+out_3:
+ free_pages((unsigned long)mq->address, pg_order);
+out_2:
+ kfree(mq->gru_mq_desc);
+out_1:
+ kfree(mq);
+out_0:
+ return ERR_PTR(ret);
+}
+
+static void
+xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
+{
+ unsigned int mq_size;
+ int pg_order;
+ int ret;
+
+ /* disallow other partitions to access GRU mq */
+ mq_size = 1UL << mq->order;
+ ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
+ BUG_ON(ret != xpSuccess);
+
+ /* unregister irq handler and release mq irq/vector mapping */
+ free_irq(mq->irq, NULL);
+ xpc_release_gru_mq_irq_uv(mq);
+
+ /* disable generation of irq when GRU mq op occurs to this mq */
+ xpc_gru_mq_watchlist_free_uv(mq);
+
+ pg_order = mq->order - PAGE_SHIFT;
+ free_pages((unsigned long)mq->address, pg_order);
+
+ kfree(mq);
+}
+
+static enum xp_retval
+xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
+ size_t msg_size)
+{
+ enum xp_retval xp_ret;
+ int ret;
+
+ while (1) {
+ ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
+ if (ret == MQE_OK) {
+ xp_ret = xpSuccess;
+ break;
+ }
+
+ if (ret == MQE_QUEUE_FULL) {
+ dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+ "error=MQE_QUEUE_FULL\n");
+ /* !!! handle QLimit reached; delay & try again */
+ /* ??? Do we add a limit to the number of retries? */
+ (void)msleep_interruptible(10);
+ } else if (ret == MQE_CONGESTION) {
+ dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+ "error=MQE_CONGESTION\n");
+ /* !!! handle LB Overflow; simply try again */
+ /* ??? Do we add a limit to the number of retries? */
+ } else {
+ /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
+ dev_err(xpc_chan, "gru_send_message_gpa() returned "
+ "error=%d\n", ret);
+ xp_ret = xpGruSendMqError;
+ break;
+ }
+ }
+ return xp_ret;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_uv(void)
+{
+ unsigned long irq_flags;
+ short partid;
+ struct xpc_partition *part;
+ u8 act_state_req;
+
+ DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (part->sn.uv.act_state_req == 0)
+ continue;
+
+ xpc_activate_IRQ_rcvd--;
+ BUG_ON(xpc_activate_IRQ_rcvd < 0);
+
+ act_state_req = part->sn.uv.act_state_req;
+ part->sn.uv.act_state_req = 0;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
+ if (part->act_state == XPC_P_AS_INACTIVE)
+ xpc_activate_partition(part);
+ else if (part->act_state == XPC_P_AS_DEACTIVATING)
+ XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+ } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
+ if (part->act_state == XPC_P_AS_INACTIVE)
+ xpc_activate_partition(part);
+ else
+ XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+ } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
+ XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
+
+ } else {
+ BUG();
+ }
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (xpc_activate_IRQ_rcvd == 0)
+ break;
+ }
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+}
+
+static void
+xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
+ struct xpc_activate_mq_msghdr_uv *msg_hdr,
+ int part_setup,
+ int *wakeup_hb_checker)
+{
+ unsigned long irq_flags;
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ struct xpc_openclose_args *args;
+
+ part_uv->remote_act_state = msg_hdr->act_state;
+
+ switch (msg_hdr->type) {
+ case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+ /* syncing of remote_act_state was just done above */
+ break;
+
+ case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+ struct xpc_activate_mq_msg_activate_req_uv *msg;
+
+ /*
+ * ??? Do we deal here with ts_jiffies being different
+ * ??? if act_state != XPC_P_AS_INACTIVE instead of
+ * ??? below?
+ */
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_activate_req_uv, hdr);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+ part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+ part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+ part_uv->heartbeat_gpa = msg->heartbeat_gpa;
+
+ if (msg->activate_gru_mq_desc_gpa !=
+ part_uv->activate_gru_mq_desc_gpa) {
+ spin_lock(&part_uv->flags_lock);
+ part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+ spin_unlock(&part_uv->flags_lock);
+ part_uv->activate_gru_mq_desc_gpa =
+ msg->activate_gru_mq_desc_gpa;
+ }
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+ struct xpc_activate_mq_msg_deactivate_req_uv *msg;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_deactivate_req_uv, hdr);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+ part_uv->reason = msg->reason;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ return;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+ struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+
+ if (!part_setup)
+ break;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_closerequest_uv,
+ hdr);
+ args = &part->remote_openclose_args[msg->ch_number];
+ args->reason = msg->reason;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+ struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+
+ if (!part_setup)
+ break;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_closereply_uv,
+ hdr);
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+ struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+ if (!part_setup)
+ break;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_openrequest_uv,
+ hdr);
+ args = &part->remote_openclose_args[msg->ch_number];
+ args->entry_size = msg->entry_size;
+ args->local_nentries = msg->local_nentries;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+ struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+ if (!part_setup)
+ break;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_openreply_uv, hdr);
+ args = &part->remote_openclose_args[msg->ch_number];
+ args->remote_nentries = msg->remote_nentries;
+ args->local_nentries = msg->local_nentries;
+ args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
+ struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
+
+ if (!part_setup)
+ break;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ }
+ case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags |= XPC_P_ENGAGED_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ break;
+
+ case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags &= ~XPC_P_ENGAGED_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ break;
+
+ default:
+ dev_err(xpc_part, "received unknown activate_mq msg type=%d "
+ "from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
+
+ /* get hb checker to deactivate from the remote partition */
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+ part_uv->reason = xpBadMsgType;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ return;
+ }
+
+ if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+ part->remote_rp_ts_jiffies != 0) {
+ /*
+ * ??? Does what we do here need to be sensitive to
+ * ??? act_state or remote_act_state?
+ */
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ }
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+ struct xpc_activate_mq_msghdr_uv *msg_hdr;
+ short partid;
+ struct xpc_partition *part;
+ int wakeup_hb_checker = 0;
+ int part_referenced;
+
+ while (1) {
+ msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
+ if (msg_hdr == NULL)
+ break;
+
+ partid = msg_hdr->partid;
+ if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+ dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
+ "received invalid partid=0x%x in message\n",
+ partid);
+ } else {
+ part = &xpc_partitions[partid];
+
+ part_referenced = xpc_part_ref(part);
+ xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+ part_referenced,
+ &wakeup_hb_checker);
+ if (part_referenced)
+ xpc_part_deref(part);
+ }
+
+ gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
+ }
+
+ if (wakeup_hb_checker)
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+ return IRQ_HANDLED;
+}
+
+static enum xp_retval
+xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
+ unsigned long gru_mq_desc_gpa)
+{
+ enum xp_retval ret;
+
+ ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
+ sizeof(struct gru_message_queue_desc));
+ if (ret == xpSuccess)
+ gru_mq_desc->mq = NULL;
+
+ return ret;
+}
+
+static enum xp_retval
+xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
+ int msg_type)
+{
+ struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ struct gru_message_queue_desc *gru_mq_desc;
+ unsigned long irq_flags;
+ enum xp_retval ret;
+
+ DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
+
+ msg_hdr->type = msg_type;
+ msg_hdr->partid = xp_partition_id;
+ msg_hdr->act_state = part->act_state;
+ msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
+
+ mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+again:
+ if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
+ gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
+ if (gru_mq_desc == NULL) {
+ gru_mq_desc = kmalloc(sizeof(struct
+ gru_message_queue_desc),
+ GFP_KERNEL);
+ if (gru_mq_desc == NULL) {
+ ret = xpNoMemory;
+ goto done;
+ }
+ part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
+ }
+
+ ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
+ part_uv->
+ activate_gru_mq_desc_gpa);
+ if (ret != xpSuccess)
+ goto done;
+
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ }
+
+ /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
+ ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
+ msg_size);
+ if (ret != xpSuccess) {
+ smp_rmb(); /* ensure a fresh copy of part_uv->flags */
+ if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
+ goto again;
+ }
+done:
+ mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
+ return ret;
+}
+
+static void
+xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
+ size_t msg_size, int msg_type)
+{
+ enum xp_retval ret;
+
+ ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+ if (unlikely(ret != xpSuccess))
+ XPC_DEACTIVATE_PARTITION(part, ret);
+}
+
+static void
+xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
+ void *msg, size_t msg_size, int msg_type)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ enum xp_retval ret;
+
+ ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+ if (unlikely(ret != xpSuccess)) {
+ if (irq_flags != NULL)
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ if (irq_flags != NULL)
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+}
+
+static void
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
+{
+ unsigned long irq_flags;
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+
+ /*
+ * !!! Make our side think that the remote partition sent an activate
+ * !!! mq message our way by doing what the activate IRQ handler would
+ * !!! do had one really been sent.
+ */
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = act_state_req;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
+ size_t *len)
+{
+ s64 status;
+ enum xp_retval ret;
+
+#if defined CONFIG_X86_64
+ status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
+ (u64 *)len);
+ if (status == BIOS_STATUS_SUCCESS)
+ ret = xpSuccess;
+ else if (status == BIOS_STATUS_MORE_PASSES)
+ ret = xpNeedMoreInfo;
+ else
+ ret = xpBiosError;
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+ status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
+ if (status == SALRET_OK)
+ ret = xpSuccess;
+ else if (status == SALRET_MORE_PASSES)
+ ret = xpNeedMoreInfo;
+ else
+ ret = xpSalError;
+
+#else
+ #error not a supported configuration
+#endif
+
+ return ret;
+}
+
+static int
+xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
+{
+ xpc_heartbeat_uv =
+ &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+ rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+ rp->sn.uv.activate_gru_mq_desc_gpa =
+ uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
+ return 0;
+}
+
+static void
+xpc_allow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_all_hbs_uv(void)
+{
+}
+
+static void
+xpc_increment_heartbeat_uv(void)
+{
+ xpc_heartbeat_uv->value++;
+}
+
+static void
+xpc_offline_heartbeat_uv(void)
+{
+ xpc_increment_heartbeat_uv();
+ xpc_heartbeat_uv->offline = 1;
+}
+
+static void
+xpc_online_heartbeat_uv(void)
+{
+ xpc_increment_heartbeat_uv();
+ xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_init_uv(void)
+{
+ xpc_heartbeat_uv->value = 1;
+ xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_exit_uv(void)
+{
+ xpc_offline_heartbeat_uv();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
+{
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ enum xp_retval ret;
+
+ ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+ part_uv->heartbeat_gpa,
+ sizeof(struct xpc_heartbeat_uv));
+ if (ret != xpSuccess)
+ return ret;
+
+ if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+ !part_uv->cached_heartbeat.offline) {
+
+ ret = xpNoHeartbeat;
+ } else {
+ part->last_heartbeat = part_uv->cached_heartbeat.value;
+ }
+ return ret;
+}
+
+static void
+xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+ unsigned long remote_rp_gpa, int nasid)
+{
+ short partid = remote_rp->SAL_partid;
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_activate_mq_msg_activate_req_uv msg;
+
+ part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
+ part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+ part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
+ part->sn.uv.activate_gru_mq_desc_gpa =
+ remote_rp->sn.uv.activate_gru_mq_desc_gpa;
+
+ /*
+ * ??? Is it a good idea to make this conditional on what is
+ * ??? potentially stale state information?
+ */
+ if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
+ msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+ msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
+ msg.activate_gru_mq_desc_gpa =
+ xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
+ }
+
+ if (part->act_state == XPC_P_AS_INACTIVE)
+ xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_reactivation_uv(struct xpc_partition *part)
+{
+ xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_deactivation_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_deactivate_req_uv msg;
+
+ /*
+ * ??? Is it a good idea to make this conditional on what is
+ * ??? potentially stale state information?
+ */
+ if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
+ part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
+
+ msg.reason = part->reason;
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
+ }
+}
+
+static void
+xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
+{
+ /* nothing needs to be done */
+ return;
+}
+
+static void
+xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
+{
+ head->first = NULL;
+ head->last = NULL;
+ spin_lock_init(&head->lock);
+ head->n_entries = 0;
+}
+
+static void *
+xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
+{
+ unsigned long irq_flags;
+ struct xpc_fifo_entry_uv *first;
+
+ spin_lock_irqsave(&head->lock, irq_flags);
+ first = head->first;
+ if (head->first != NULL) {
+ head->first = first->next;
+ if (head->first == NULL)
+ head->last = NULL;
+
+ head->n_entries--;
+ BUG_ON(head->n_entries < 0);
+
+ first->next = NULL;
+ }
+ spin_unlock_irqrestore(&head->lock, irq_flags);
+ return first;
+}
+
+static void
+xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
+ struct xpc_fifo_entry_uv *last)
+{
+ unsigned long irq_flags;
+
+ last->next = NULL;
+ spin_lock_irqsave(&head->lock, irq_flags);
+ if (head->last != NULL)
+ head->last->next = last;
+ else
+ head->first = last;
+ head->last = last;
+ head->n_entries++;
+ spin_unlock_irqrestore(&head->lock, irq_flags);
+}
+
+static int
+xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
+{
+ return head->n_entries;
+}
+
+/*
+ * Setup the channel structures that are uv specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_uv(struct xpc_partition *part)
+{
+ struct xpc_channel_uv *ch_uv;
+ int ch_number;
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch_uv = &part->channels[ch_number].sn.uv;
+
+ xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+ xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+ }
+
+ return xpSuccess;
+}
+
+/*
+ * Teardown the channel structures that are uv specific.
+ */
+static void
+xpc_teardown_ch_structures_uv(struct xpc_partition *part)
+{
+ /* nothing needs to be done */
+ return;
+}
+
+static enum xp_retval
+xpc_make_first_contact_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_uv msg;
+
+ /*
+ * We send a sync msg to get the remote partition's remote_act_state
+ * updated to our current act_state which at this point should
+ * be XPC_P_AS_ACTIVATING.
+ */
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
+
+ while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
+ (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
+
+ dev_dbg(xpc_part, "waiting to make first contact with "
+ "partition %d\n", XPC_PARTID(part));
+
+ /* wait a 1/4 of a second or so */
+ (void)msleep_interruptible(250);
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ return part->reason;
+ }
+
+ return xpSuccess;
+}
+
+static u64
+xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ union xpc_channel_ctl_flags chctl;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ chctl = part->chctl;
+ if (chctl.all_flags != 0)
+ part->chctl.all_flags = 0;
+
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+ return chctl.all_flags;
+}
+
+static enum xp_retval
+xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+ struct xpc_send_msg_slot_uv *msg_slot;
+ unsigned long irq_flags;
+ int nentries;
+ int entry;
+ size_t nbytes;
+
+ for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+ nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
+ ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+ if (ch_uv->send_msg_slots == NULL)
+ continue;
+
+ for (entry = 0; entry < nentries; entry++) {
+ msg_slot = &ch_uv->send_msg_slots[entry];
+
+ msg_slot->msg_slot_number = entry;
+ xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
+ &msg_slot->next);
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->local_nentries)
+ ch->local_nentries = nentries;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ return xpNoMemory;
+}
+
+static enum xp_retval
+xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+ struct xpc_notify_mq_msg_uv *msg_slot;
+ unsigned long irq_flags;
+ int nentries;
+ int entry;
+ size_t nbytes;
+
+ for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+ nbytes = nentries * ch->entry_size;
+ ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+ if (ch_uv->recv_msg_slots == NULL)
+ continue;
+
+ for (entry = 0; entry < nentries; entry++) {
+ msg_slot = ch_uv->recv_msg_slots +
+ entry * ch->entry_size;
+
+ msg_slot->hdr.msg_slot_number = entry;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->remote_nentries)
+ ch->remote_nentries = nentries;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ return xpNoMemory;
+}
+
+/*
+ * Allocate msg_slots associated with the channel.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_uv(struct xpc_channel *ch)
+{
+ static enum xp_retval ret;
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+ DBUG_ON(ch->flags & XPC_C_SETUP);
+
+ ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
+ gru_message_queue_desc),
+ GFP_KERNEL);
+ if (ch_uv->cached_notify_gru_mq_desc == NULL)
+ return xpNoMemory;
+
+ ret = xpc_allocate_send_msg_slot_uv(ch);
+ if (ret == xpSuccess) {
+
+ ret = xpc_allocate_recv_msg_slot_uv(ch);
+ if (ret != xpSuccess) {
+ kfree(ch_uv->send_msg_slots);
+ xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+ }
+ }
+ return ret;
+}
+
+/*
+ * Free up msg_slots and clear other stuff that were setup for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ kfree(ch_uv->cached_notify_gru_mq_desc);
+ ch_uv->cached_notify_gru_mq_desc = NULL;
+
+ if (ch->flags & XPC_C_SETUP) {
+ xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+ kfree(ch_uv->send_msg_slots);
+ xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+ kfree(ch_uv->recv_msg_slots);
+ }
+}
+
+static void
+xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
+
+ msg.ch_number = ch->number;
+ msg.reason = ch->reason;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_closereply_uv msg;
+
+ msg.ch_number = ch->number;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
+}
+
+static void
+xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
+
+ msg.ch_number = ch->number;
+ msg.entry_size = ch->entry_size;
+ msg.local_nentries = ch->local_nentries;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_openreply_uv msg;
+
+ msg.ch_number = ch->number;
+ msg.local_nentries = ch->local_nentries;
+ msg.remote_nentries = ch->remote_nentries;
+ msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
+}
+
+static void
+xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
+
+ msg.ch_number = ch->number;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
+ unsigned long gru_mq_desc_gpa)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+ DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
+ return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
+ gru_mq_desc_gpa);
+}
+
+static void
+xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_uv msg;
+
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
+}
+
+static void
+xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_uv msg;
+
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
+}
+
+static void
+xpc_assume_partition_disengaged_uv(short partid)
+{
+ struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags &= ~XPC_P_ENGAGED_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+}
+
+static int
+xpc_partition_engaged_uv(short partid)
+{
+ return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
+}
+
+static int
+xpc_any_partition_engaged_uv(void)
+{
+ struct xpc_partition_uv *part_uv;
+ short partid;
+
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part_uv = &xpc_partitions[partid].sn.uv;
+ if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
+ return 1;
+ }
+ return 0;
+}
+
+static enum xp_retval
+xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
+ struct xpc_send_msg_slot_uv **address_of_msg_slot)
+{
+ enum xp_retval ret;
+ struct xpc_send_msg_slot_uv *msg_slot;
+ struct xpc_fifo_entry_uv *entry;
+
+ while (1) {
+ entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
+ if (entry != NULL)
+ break;
+
+ if (flags & XPC_NOWAIT)
+ return xpNoWait;
+
+ ret = xpc_allocate_msg_wait(ch);
+ if (ret != xpInterrupted && ret != xpTimeout)
+ return ret;
+ }
+
+ msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
+ *address_of_msg_slot = msg_slot;
+ return xpSuccess;
+}
+
+static void
+xpc_free_msg_slot_uv(struct xpc_channel *ch,
+ struct xpc_send_msg_slot_uv *msg_slot)
+{
+ xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
+
+ /* wakeup anyone waiting for a free msg slot */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+}
+
+static void
+xpc_notify_sender_uv(struct xpc_channel *ch,
+ struct xpc_send_msg_slot_uv *msg_slot,
+ enum xp_retval reason)
+{
+ xpc_notify_func func = msg_slot->func;
+
+ if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
+
+ atomic_dec(&ch->n_to_notify);
+
+ dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
+ "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+ msg_slot->msg_slot_number, ch->partid, ch->number);
+
+ func(reason, ch->partid, ch->number, msg_slot->key);
+
+ dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
+ "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+ msg_slot->msg_slot_number, ch->partid, ch->number);
+ }
+}
+
+static void
+xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
+ struct xpc_notify_mq_msg_uv *msg)
+{
+ struct xpc_send_msg_slot_uv *msg_slot;
+ int entry = msg->hdr.msg_slot_number % ch->local_nentries;
+
+ msg_slot = &ch->sn.uv.send_msg_slots[entry];
+
+ BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
+ msg_slot->msg_slot_number += ch->local_nentries;
+
+ if (msg_slot->func != NULL)
+ xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
+
+ xpc_free_msg_slot_uv(ch, msg_slot);
+}
+
+static void
+xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
+ struct xpc_notify_mq_msg_uv *msg)
+{
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ struct xpc_channel *ch;
+ struct xpc_channel_uv *ch_uv;
+ struct xpc_notify_mq_msg_uv *msg_slot;
+ unsigned long irq_flags;
+ int ch_number = msg->hdr.ch_number;
+
+ if (unlikely(ch_number >= part->nchannels)) {
+ dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
+ "channel number=0x%x in message from partid=%d\n",
+ ch_number, XPC_PARTID(part));
+
+ /* get hb checker to deactivate from the remote partition */
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+ part_uv->reason = xpBadChannelNumber;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+ return;
+ }
+
+ ch = &part->channels[ch_number];
+ xpc_msgqueue_ref(ch);
+
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /* see if we're really dealing with an ACK for a previously sent msg */
+ if (msg->hdr.size == 0) {
+ xpc_handle_notify_mq_ack_uv(ch, msg);
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /* we're dealing with a normal message sent via the notify_mq */
+ ch_uv = &ch->sn.uv;
+
+ msg_slot = ch_uv->recv_msg_slots +
+ (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
+
+ BUG_ON(msg_slot->hdr.size != 0);
+
+ memcpy(msg_slot, msg, msg->hdr.size);
+
+ xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
+
+ if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
+ /*
+ * If there is an existing idle kthread get it to deliver
+ * the payload, otherwise we'll have to get the channel mgr
+ * for this partition to create a kthread to do the delivery.
+ */
+ if (atomic_read(&ch->kthreads_idle) > 0)
+ wake_up_nr(&ch->idle_wq, 1);
+ else
+ xpc_send_chctl_local_msgrequest_uv(part, ch->number);
+ }
+ xpc_msgqueue_deref(ch);
+}
+
+static irqreturn_t
+xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
+{
+ struct xpc_notify_mq_msg_uv *msg;
+ short partid;
+ struct xpc_partition *part;
+
+ while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
+ NULL) {
+
+ partid = msg->hdr.partid;
+ if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+ dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
+ "invalid partid=0x%x in message\n", partid);
+ } else {
+ part = &xpc_partitions[partid];
+
+ if (xpc_part_ref(part)) {
+ xpc_handle_notify_mq_msg_uv(part, msg);
+ xpc_part_deref(part);
+ }
+ }
+
+ gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int
+xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
+{
+ return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
+}
+
+static void
+xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
+{
+ struct xpc_channel *ch = &part->channels[ch_number];
+ int ndeliverable_payloads;
+
+ xpc_msgqueue_ref(ch);
+
+ ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
+
+ if (ndeliverable_payloads > 0 &&
+ (ch->flags & XPC_C_CONNECTED) &&
+ (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
+
+ xpc_activate_kthreads(ch, ndeliverable_payloads);
+ }
+
+ xpc_msgqueue_deref(ch);
+}
+
+static enum xp_retval
+xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
+ u16 payload_size, u8 notify_type, xpc_notify_func func,
+ void *key)
+{
+ enum xp_retval ret = xpSuccess;
+ struct xpc_send_msg_slot_uv *msg_slot = NULL;
+ struct xpc_notify_mq_msg_uv *msg;
+ u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
+ size_t msg_size;
+
+ DBUG_ON(notify_type != XPC_N_CALL);
+
+ msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
+ if (msg_size > ch->entry_size)
+ return xpPayloadTooBig;
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ goto out_1;
+ }
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ ret = xpNotConnected;
+ goto out_1;
+ }
+
+ ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
+ if (ret != xpSuccess)
+ goto out_1;
+
+ if (func != NULL) {
+ atomic_inc(&ch->n_to_notify);
+
+ msg_slot->key = key;
+ smp_wmb(); /* a non-NULL func must hit memory after the key */
+ msg_slot->func = func;
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ goto out_2;
+ }
+ }
+
+ msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
+ msg->hdr.partid = xp_partition_id;
+ msg->hdr.ch_number = ch->number;
+ msg->hdr.size = msg_size;
+ msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
+ memcpy(&msg->payload, payload, payload_size);
+
+ ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+ msg_size);
+ if (ret == xpSuccess)
+ goto out_1;
+
+ XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+out_2:
+ if (func != NULL) {
+ /*
+ * Try to NULL the msg_slot's func field. If we fail, then
+ * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
+ * case we need to pretend we succeeded to send the message
+ * since the user will get a callout for the disconnect error
+ * by xpc_notify_senders_of_disconnect_uv(), and to also get an
+ * error returned here will confuse them. Additionally, since
+ * in this case the channel is being disconnected we don't need
+ * to put the the msg_slot back on the free list.
+ */
+ if (cmpxchg(&msg_slot->func, func, NULL) != func) {
+ ret = xpSuccess;
+ goto out_1;
+ }
+
+ msg_slot->key = NULL;
+ atomic_dec(&ch->n_to_notify);
+ }
+ xpc_free_msg_slot_uv(ch, msg_slot);
+out_1:
+ xpc_msgqueue_deref(ch);
+ return ret;
+}
+
+/*
+ * Tell the callers of xpc_send_notify() that the status of their payloads
+ * is unknown because the channel is now disconnecting.
+ *
+ * We don't worry about putting these msg_slots on the free list since the
+ * msg_slots themselves are about to be kfree'd.
+ */
+static void
+xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
+{
+ struct xpc_send_msg_slot_uv *msg_slot;
+ int entry;
+
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+
+ for (entry = 0; entry < ch->local_nentries; entry++) {
+
+ if (atomic_read(&ch->n_to_notify) == 0)
+ break;
+
+ msg_slot = &ch->sn.uv.send_msg_slots[entry];
+ if (msg_slot->func != NULL)
+ xpc_notify_sender_uv(ch, msg_slot, ch->reason);
+ }
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
+{
+ struct xpc_fifo_entry_uv *entry;
+ struct xpc_notify_mq_msg_uv *msg;
+ void *payload = NULL;
+
+ if (!(ch->flags & XPC_C_DISCONNECTING)) {
+ entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
+ if (entry != NULL) {
+ msg = container_of(entry, struct xpc_notify_mq_msg_uv,
+ hdr.u.next);
+ payload = &msg->payload;
+ }
+ }
+ return payload;
+}
+
+static void
+xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
+{
+ struct xpc_notify_mq_msg_uv *msg;
+ enum xp_retval ret;
+
+ msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
+
+ /* return an ACK to the sender of this message */
+
+ msg->hdr.partid = xp_partition_id;
+ msg->hdr.size = 0; /* size of zero indicates this is an ACK */
+
+ ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+ sizeof(struct xpc_notify_mq_msghdr_uv));
+ if (ret != xpSuccess)
+ XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+}
+
+static struct xpc_arch_operations xpc_arch_ops_uv = {
+ .setup_partitions = xpc_setup_partitions_uv,
+ .teardown_partitions = xpc_teardown_partitions_uv,
+ .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
+ .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
+ .setup_rsvd_page = xpc_setup_rsvd_page_uv,
+
+ .allow_hb = xpc_allow_hb_uv,
+ .disallow_hb = xpc_disallow_hb_uv,
+ .disallow_all_hbs = xpc_disallow_all_hbs_uv,
+ .increment_heartbeat = xpc_increment_heartbeat_uv,
+ .offline_heartbeat = xpc_offline_heartbeat_uv,
+ .online_heartbeat = xpc_online_heartbeat_uv,
+ .heartbeat_init = xpc_heartbeat_init_uv,
+ .heartbeat_exit = xpc_heartbeat_exit_uv,
+ .get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
+
+ .request_partition_activation =
+ xpc_request_partition_activation_uv,
+ .request_partition_reactivation =
+ xpc_request_partition_reactivation_uv,
+ .request_partition_deactivation =
+ xpc_request_partition_deactivation_uv,
+ .cancel_partition_deactivation_request =
+ xpc_cancel_partition_deactivation_request_uv,
+
+ .setup_ch_structures = xpc_setup_ch_structures_uv,
+ .teardown_ch_structures = xpc_teardown_ch_structures_uv,
+
+ .make_first_contact = xpc_make_first_contact_uv,
+
+ .get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
+ .send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
+ .send_chctl_closereply = xpc_send_chctl_closereply_uv,
+ .send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
+ .send_chctl_openreply = xpc_send_chctl_openreply_uv,
+ .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
+ .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
+
+ .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
+
+ .setup_msg_structures = xpc_setup_msg_structures_uv,
+ .teardown_msg_structures = xpc_teardown_msg_structures_uv,
+
+ .indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
+ .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
+ .assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
+ .partition_engaged = xpc_partition_engaged_uv,
+ .any_partition_engaged = xpc_any_partition_engaged_uv,
+
+ .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
+ .send_payload = xpc_send_payload_uv,
+ .get_deliverable_payload = xpc_get_deliverable_payload_uv,
+ .received_payload = xpc_received_payload_uv,
+ .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
+};
+
+static int
+xpc_init_mq_node(int nid)
+{
+ int cpu;
+
+ get_online_cpus();
+
+ for_each_cpu(cpu, cpumask_of_node(nid)) {
+ xpc_activate_mq_uv =
+ xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
+ XPC_ACTIVATE_IRQ_NAME,
+ xpc_handle_activate_IRQ_uv);
+ if (!IS_ERR(xpc_activate_mq_uv))
+ break;
+ }
+ if (IS_ERR(xpc_activate_mq_uv)) {
+ put_online_cpus();
+ return PTR_ERR(xpc_activate_mq_uv);
+ }
+
+ for_each_cpu(cpu, cpumask_of_node(nid)) {
+ xpc_notify_mq_uv =
+ xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
+ XPC_NOTIFY_IRQ_NAME,
+ xpc_handle_notify_IRQ_uv);
+ if (!IS_ERR(xpc_notify_mq_uv))
+ break;
+ }
+ if (IS_ERR(xpc_notify_mq_uv)) {
+ xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+ put_online_cpus();
+ return PTR_ERR(xpc_notify_mq_uv);
+ }
+
+ put_online_cpus();
+ return 0;
+}
+
+int
+xpc_init_uv(void)
+{
+ int nid;
+ int ret = 0;
+
+ xpc_arch_ops = xpc_arch_ops_uv;
+
+ if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+ dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
+ XPC_MSG_HDR_MAX_SIZE);
+ return -E2BIG;
+ }
+
+ if (xpc_mq_node < 0)
+ for_each_online_node(nid) {
+ ret = xpc_init_mq_node(nid);
+
+ if (!ret)
+ break;
+ }
+ else
+ ret = xpc_init_mq_node(xpc_mq_node);
+
+ if (ret < 0)
+ dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
+ -ret);
+
+ return ret;
+}
+
+void
+xpc_exit_uv(void)
+{
+ xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
+ xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+}
+
+module_param(xpc_mq_node, int, 0);
+MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
new file mode 100644
index 0000000..557f978
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -0,0 +1,608 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * Cross Partition Network Interface (XPNET) support
+ *
+ * XPNET provides a virtual network layered on top of the Cross
+ * Partition communication layer.
+ *
+ * XPNET provides direct point-to-point and broadcast-like support
+ * for an ethernet-like device. The ethernet broadcast medium is
+ * replaced with a point-to-point message structure which passes
+ * pointers to a DMA-capable block that a remote partition should
+ * retrieve and pass to the upper level networking layer.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include "xp.h"
+
+/*
+ * The message payload transferred by XPC.
+ *
+ * buf_pa is the physical address where the DMA should pull from.
+ *
+ * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
+ * cacheline boundary. To accomplish this, we record the number of
+ * bytes from the beginning of the first cacheline to the first useful
+ * byte of the skb (leadin_ignore) and the number of bytes from the
+ * last useful byte of the skb to the end of the last cacheline
+ * (tailout_ignore).
+ *
+ * size is the number of bytes to transfer which includes the skb->len
+ * (useful bytes of the senders skb) plus the leadin and tailout
+ */
+struct xpnet_message {
+ u16 version; /* Version for this message */
+ u16 embedded_bytes; /* #of bytes embedded in XPC message */
+ u32 magic; /* Special number indicating this is xpnet */
+ unsigned long buf_pa; /* phys address of buffer to retrieve */
+ u32 size; /* #of bytes in buffer */
+ u8 leadin_ignore; /* #of bytes to ignore at the beginning */
+ u8 tailout_ignore; /* #of bytes to ignore at the end */
+ unsigned char data; /* body of small packets */
+};
+
+/*
+ * Determine the size of our message, the cacheline aligned size,
+ * and then the number of message will request from XPC.
+ *
+ * XPC expects each message to exist in an individual cacheline.
+ */
+#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE
+#define XPNET_MSG_DATA_MAX \
+ (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
+#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE)
+
+#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
+#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
+
+/*
+ * Version number of XPNET implementation. XPNET can always talk to versions
+ * with same major #, and never talk to versions with a different version.
+ */
+#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor))
+#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4)
+#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf)
+
+#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */
+#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */
+#define XPNET_MAGIC 0x88786984 /* "XNET" */
+
+#define XPNET_VALID_MSG(_m) \
+ ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
+ && (msg->magic == XPNET_MAGIC))
+
+#define XPNET_DEVICE_NAME "xp0"
+
+/*
+ * When messages are queued with xpc_send_notify, a kmalloc'd buffer
+ * of the following type is passed as a notification cookie. When the
+ * notification function is called, we use the cookie to decide
+ * whether all outstanding message sends have completed. The skb can
+ * then be released.
+ */
+struct xpnet_pending_msg {
+ struct sk_buff *skb;
+ atomic_t use_count;
+};
+
+struct net_device *xpnet_device;
+
+/*
+ * When we are notified of other partitions activating, we add them to
+ * our bitmask of partitions to which we broadcast.
+ */
+static unsigned long *xpnet_broadcast_partitions;
+/* protect above */
+static DEFINE_SPINLOCK(xpnet_broadcast_lock);
+
+/*
+ * Since the Block Transfer Engine (BTE) is being used for the transfer
+ * and it relies upon cache-line size transfers, we need to reserve at
+ * least one cache-line for head and tail alignment. The BTE is
+ * limited to 8MB transfers.
+ *
+ * Testing has shown that changing MTU to greater than 64KB has no effect
+ * on TCP as the two sides negotiate a Max Segment Size that is limited
+ * to 64K. Other protocols May use packets greater than this, but for
+ * now, the default is 64KB.
+ */
+#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 32KB has been determined to be the ideal */
+#define XPNET_DEF_MTU (0x8000UL)
+
+/*
+ * The partid is encapsulated in the MAC address beginning in the following
+ * octet and it consists of two octets.
+ */
+#define XPNET_PARTID_OCTET 2
+
+/* Define the XPNET debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpnet_dbg_name = {
+ .name = "xpnet"
+};
+
+struct device xpnet_dbg_subname = {
+ .init_name = "", /* set to "" */
+ .driver = &xpnet_dbg_name
+};
+
+struct device *xpnet = &xpnet_dbg_subname;
+
+/*
+ * Packet was recevied by XPC and forwarded to us.
+ */
+static void
+xpnet_receive(short partid, int channel, struct xpnet_message *msg)
+{
+ struct sk_buff *skb;
+ void *dst;
+ enum xp_retval ret;
+
+ if (!XPNET_VALID_MSG(msg)) {
+ /*
+ * Packet with a different XPC version. Ignore.
+ */
+ xpc_received(partid, channel, (void *)msg);
+
+ xpnet_device->stats.rx_errors++;
+
+ return;
+ }
+ dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+ /* reserve an extra cache line */
+ skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
+ if (!skb) {
+ dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
+ msg->size + L1_CACHE_BYTES);
+
+ xpc_received(partid, channel, (void *)msg);
+
+ xpnet_device->stats.rx_errors++;
+
+ return;
+ }
+
+ /*
+ * The allocated skb has some reserved space.
+ * In order to use xp_remote_memcpy(), we need to get the
+ * skb->data pointer moved forward.
+ */
+ skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
+ (L1_CACHE_BYTES - 1)) +
+ msg->leadin_ignore));
+
+ /*
+ * Update the tail pointer to indicate data actually
+ * transferred.
+ */
+ skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
+
+ /*
+ * Move the data over from the other side.
+ */
+ if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
+ (msg->embedded_bytes != 0)) {
+ dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
+ "%lu)\n", skb->data, &msg->data,
+ (size_t)msg->embedded_bytes);
+
+ skb_copy_to_linear_data(skb, &msg->data,
+ (size_t)msg->embedded_bytes);
+ } else {
+ dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+ dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
+ "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
+ (void *)msg->buf_pa, msg->size);
+
+ ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
+ if (ret != xpSuccess) {
+ /*
+ * !!! Need better way of cleaning skb. Currently skb
+ * !!! appears in_use and we can't just call
+ * !!! dev_kfree_skb.
+ */
+ dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+ "returned error=0x%x\n", dst,
+ (void *)msg->buf_pa, msg->size, ret);
+
+ xpc_received(partid, channel, (void *)msg);
+
+ xpnet_device->stats.rx_errors++;
+
+ return;
+ }
+ }
+
+ dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+ (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+ skb->len);
+
+ skb->protocol = eth_type_trans(skb, xpnet_device);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ dev_dbg(xpnet, "passing skb to network layer\n"
+ "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n",
+ (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
+ skb_end_pointer(skb), skb->len);
+
+ xpnet_device->stats.rx_packets++;
+ xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN;
+
+ netif_rx_ni(skb);
+ xpc_received(partid, channel, (void *)msg);
+}
+
+/*
+ * This is the handler which XPC calls during any sort of change in
+ * state or message reception on a connection.
+ */
+static void
+xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
+ void *data, void *key)
+{
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+ DBUG_ON(channel != XPC_NET_CHANNEL);
+
+ switch (reason) {
+ case xpMsgReceived: /* message received */
+ DBUG_ON(data == NULL);
+
+ xpnet_receive(partid, channel, (struct xpnet_message *)data);
+ break;
+
+ case xpConnected: /* connection completed to a partition */
+ spin_lock_bh(&xpnet_broadcast_lock);
+ __set_bit(partid, xpnet_broadcast_partitions);
+ spin_unlock_bh(&xpnet_broadcast_lock);
+
+ netif_carrier_on(xpnet_device);
+
+ dev_dbg(xpnet, "%s connected to partition %d\n",
+ xpnet_device->name, partid);
+ break;
+
+ default:
+ spin_lock_bh(&xpnet_broadcast_lock);
+ __clear_bit(partid, xpnet_broadcast_partitions);
+ spin_unlock_bh(&xpnet_broadcast_lock);
+
+ if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
+ xp_max_npartitions)) {
+ netif_carrier_off(xpnet_device);
+ }
+
+ dev_dbg(xpnet, "%s disconnected from partition %d\n",
+ xpnet_device->name, partid);
+ break;
+ }
+}
+
+static int
+xpnet_dev_open(struct net_device *dev)
+{
+ enum xp_retval ret;
+
+ dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
+ "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+ (unsigned long)XPNET_MSG_SIZE,
+ (unsigned long)XPNET_MSG_NENTRIES,
+ (unsigned long)XPNET_MAX_KTHREADS,
+ (unsigned long)XPNET_MAX_IDLE_KTHREADS);
+
+ ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
+ XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
+ XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
+ if (ret != xpSuccess) {
+ dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
+ "ret=%d\n", dev->name, ret);
+
+ return -ENOMEM;
+ }
+
+ dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
+
+ return 0;
+}
+
+static int
+xpnet_dev_stop(struct net_device *dev)
+{
+ xpc_disconnect(XPC_NET_CHANNEL);
+
+ dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
+
+ return 0;
+}
+
+static int
+xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+ /* 68 comes from min TCP+IP+MAC header */
+ if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
+ dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
+ "between 68 and %ld\n", dev->name, new_mtu,
+ XPNET_MAX_MTU);
+ return -EINVAL;
+ }
+
+ dev->mtu = new_mtu;
+ dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
+ return 0;
+}
+
+/*
+ * Notification that the other end has received the message and
+ * DMA'd the skb information. At this point, they are done with
+ * our side. When all recipients are done processing, we
+ * release the skb and then release our pending message structure.
+ */
+static void
+xpnet_send_completed(enum xp_retval reason, short partid, int channel,
+ void *__qm)
+{
+ struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
+
+ DBUG_ON(queued_msg == NULL);
+
+ dev_dbg(xpnet, "message to %d notified with reason %d\n",
+ partid, reason);
+
+ if (atomic_dec_return(&queued_msg->use_count) == 0) {
+ dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
+ (void *)queued_msg->skb->head);
+
+ dev_kfree_skb_any(queued_msg->skb);
+ kfree(queued_msg);
+ }
+}
+
+static void
+xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
+ u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
+{
+ u8 msg_buffer[XPNET_MSG_SIZE];
+ struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+ u16 msg_size = sizeof(struct xpnet_message);
+ enum xp_retval ret;
+
+ msg->embedded_bytes = embedded_bytes;
+ if (unlikely(embedded_bytes != 0)) {
+ msg->version = XPNET_VERSION_EMBED;
+ dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+ &msg->data, skb->data, (size_t)embedded_bytes);
+ skb_copy_from_linear_data(skb, &msg->data,
+ (size_t)embedded_bytes);
+ msg_size += embedded_bytes - 1;
+ } else {
+ msg->version = XPNET_VERSION;
+ }
+ msg->magic = XPNET_MAGIC;
+ msg->size = end_addr - start_addr;
+ msg->leadin_ignore = (u64)skb->data - start_addr;
+ msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+ msg->buf_pa = xp_pa((void *)start_addr);
+
+ dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+ "msg->buf_pa=0x%lx, msg->size=%u, "
+ "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+ dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+ atomic_inc(&queued_msg->use_count);
+
+ ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
+ msg_size, xpnet_send_completed, queued_msg);
+ if (unlikely(ret != xpSuccess))
+ atomic_dec(&queued_msg->use_count);
+}
+
+/*
+ * Network layer has formatted a packet (skb) and is ready to place it
+ * "on the wire". Prepare and send an xpnet_message to all partitions
+ * which have connected with us and are targets of this packet.
+ *
+ * MAC-NOTE: For the XPNET driver, the MAC address contains the
+ * destination partid. If the destination partid octets are 0xffff,
+ * this packet is to be broadcast to all connected partitions.
+ */
+static int
+xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct xpnet_pending_msg *queued_msg;
+ u64 start_addr, end_addr;
+ short dest_partid;
+ u16 embedded_bytes = 0;
+
+ dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+ (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+ skb->len);
+
+ if (skb->data[0] == 0x33) {
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK; /* nothing needed to be done */
+ }
+
+ /*
+ * The xpnet_pending_msg tracks how many outstanding
+ * xpc_send_notifies are relying on this skb. When none
+ * remain, release the skb.
+ */
+ queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
+ if (queued_msg == NULL) {
+ dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
+ "packet\n", sizeof(struct xpnet_pending_msg));
+
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* get the beginning of the first cacheline and end of last */
+ start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+ end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
+
+ /* calculate how many bytes to embed in the XPC message */
+ if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
+ /* skb->data does fit so embed */
+ embedded_bytes = skb->len;
+ }
+
+ /*
+ * Since the send occurs asynchronously, we set the count to one
+ * and begin sending. Any sends that happen to complete before
+ * we are done sending will not free the skb. We will be left
+ * with that task during exit. This also handles the case of
+ * a packet destined for a partition which is no longer up.
+ */
+ atomic_set(&queued_msg->use_count, 1);
+ queued_msg->skb = skb;
+
+ if (skb->data[0] == 0xff) {
+ /* we are being asked to broadcast to all partitions */
+ for_each_set_bit(dest_partid, xpnet_broadcast_partitions,
+ xp_max_npartitions) {
+
+ xpnet_send(skb, queued_msg, start_addr, end_addr,
+ embedded_bytes, dest_partid);
+ }
+ } else {
+ dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
+ dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
+
+ if (dest_partid >= 0 &&
+ dest_partid < xp_max_npartitions &&
+ test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
+
+ xpnet_send(skb, queued_msg, start_addr, end_addr,
+ embedded_bytes, dest_partid);
+ }
+ }
+
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+
+ if (atomic_dec_return(&queued_msg->use_count) == 0) {
+ dev_kfree_skb(skb);
+ kfree(queued_msg);
+ }
+
+ return NETDEV_TX_OK;
+}
+
+/*
+ * Deal with transmit timeouts coming from the network layer.
+ */
+static void
+xpnet_dev_tx_timeout(struct net_device *dev)
+{
+ dev->stats.tx_errors++;
+}
+
+static const struct net_device_ops xpnet_netdev_ops = {
+ .ndo_open = xpnet_dev_open,
+ .ndo_stop = xpnet_dev_stop,
+ .ndo_start_xmit = xpnet_dev_hard_start_xmit,
+ .ndo_change_mtu = xpnet_dev_change_mtu,
+ .ndo_tx_timeout = xpnet_dev_tx_timeout,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+static int __init
+xpnet_init(void)
+{
+ int result;
+
+ if (!is_shub() && !is_uv())
+ return -ENODEV;
+
+ dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+
+ xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) *
+ sizeof(long), GFP_KERNEL);
+ if (xpnet_broadcast_partitions == NULL)
+ return -ENOMEM;
+
+ /*
+ * use ether_setup() to init the majority of our device
+ * structure and then override the necessary pieces.
+ */
+ xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, NET_NAME_UNKNOWN,
+ ether_setup);
+ if (xpnet_device == NULL) {
+ kfree(xpnet_broadcast_partitions);
+ return -ENOMEM;
+ }
+
+ netif_carrier_off(xpnet_device);
+
+ xpnet_device->netdev_ops = &xpnet_netdev_ops;
+ xpnet_device->mtu = XPNET_DEF_MTU;
+
+ /*
+ * Multicast assumes the LSB of the first octet is set for multicast
+ * MAC addresses. We chose the first octet of the MAC to be unlikely
+ * to collide with any vendor's officially issued MAC.
+ */
+ xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */
+
+ xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+ xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
+
+ /*
+ * ether_setup() sets this to a multicast device. We are
+ * really not supporting multicast at this time.
+ */
+ xpnet_device->flags &= ~IFF_MULTICAST;
+
+ /*
+ * No need to checksum as it is a DMA transfer. The BTE will
+ * report an error if the data is not retrievable and the
+ * packet will be dropped.
+ */
+ xpnet_device->features = NETIF_F_HW_CSUM;
+
+ result = register_netdev(xpnet_device);
+ if (result != 0) {
+ free_netdev(xpnet_device);
+ kfree(xpnet_broadcast_partitions);
+ }
+
+ return result;
+}
+
+module_init(xpnet_init);
+
+static void __exit
+xpnet_exit(void)
+{
+ dev_info(xpnet, "unregistering network device %s\n",
+ xpnet_device[0].name);
+
+ unregister_netdev(xpnet_device);
+ free_netdev(xpnet_device);
+ kfree(xpnet_broadcast_partitions);
+}
+
+module_exit(xpnet_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c
new file mode 100644
index 0000000..ee120dc
--- /dev/null
+++ b/drivers/misc/spear13xx_pcie_gadget.c
@@ -0,0 +1,797 @@
+/*
+ * drivers/misc/spear13xx_pcie_gadget.c
+ *
+ * Copyright (C) 2010 ST Microelectronics
+ * Pratyush Anand<pratyush.anand@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pci_regs.h>
+#include <linux/configfs.h>
+#include <mach/pcie.h>
+#include <mach/misc_regs.h>
+
+#define IN0_MEM_SIZE (200 * 1024 * 1024 - 1)
+/* In current implementation address translation is done using IN0 only.
+ * So IN1 start address and IN0 end address has been kept same
+*/
+#define IN1_MEM_SIZE (0 * 1024 * 1024 - 1)
+#define IN_IO_SIZE (20 * 1024 * 1024 - 1)
+#define IN_CFG0_SIZE (12 * 1024 * 1024 - 1)
+#define IN_CFG1_SIZE (12 * 1024 * 1024 - 1)
+#define IN_MSG_SIZE (12 * 1024 * 1024 - 1)
+/* Keep default BAR size as 4K*/
+/* AORAM would be mapped by default*/
+#define INBOUND_ADDR_MASK (SPEAR13XX_SYSRAM1_SIZE - 1)
+
+#define INT_TYPE_NO_INT 0
+#define INT_TYPE_INTX 1
+#define INT_TYPE_MSI 2
+struct spear_pcie_gadget_config {
+ void __iomem *base;
+ void __iomem *va_app_base;
+ void __iomem *va_dbi_base;
+ char int_type[10];
+ ulong requested_msi;
+ ulong configured_msi;
+ ulong bar0_size;
+ ulong bar0_rw_offset;
+ void __iomem *va_bar0_address;
+};
+
+struct pcie_gadget_target {
+ struct configfs_subsystem subsys;
+ struct spear_pcie_gadget_config config;
+};
+
+struct pcie_gadget_target_attr {
+ struct configfs_attribute attr;
+ ssize_t (*show)(struct spear_pcie_gadget_config *config,
+ char *buf);
+ ssize_t (*store)(struct spear_pcie_gadget_config *config,
+ const char *buf,
+ size_t count);
+};
+
+static void enable_dbi_access(struct pcie_app_reg __iomem *app_reg)
+{
+ /* Enable DBI access */
+ writel(readl(&app_reg->slv_armisc) | (1 << AXI_OP_DBI_ACCESS_ID),
+ &app_reg->slv_armisc);
+ writel(readl(&app_reg->slv_awmisc) | (1 << AXI_OP_DBI_ACCESS_ID),
+ &app_reg->slv_awmisc);
+
+}
+
+static void disable_dbi_access(struct pcie_app_reg __iomem *app_reg)
+{
+ /* disable DBI access */
+ writel(readl(&app_reg->slv_armisc) & ~(1 << AXI_OP_DBI_ACCESS_ID),
+ &app_reg->slv_armisc);
+ writel(readl(&app_reg->slv_awmisc) & ~(1 << AXI_OP_DBI_ACCESS_ID),
+ &app_reg->slv_awmisc);
+
+}
+
+static void spear_dbi_read_reg(struct spear_pcie_gadget_config *config,
+ int where, int size, u32 *val)
+{
+ struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+ ulong va_address;
+
+ /* Enable DBI access */
+ enable_dbi_access(app_reg);
+
+ va_address = (ulong)config->va_dbi_base + (where & ~0x3);
+
+ *val = readl(va_address);
+
+ if (size == 1)
+ *val = (*val >> (8 * (where & 3))) & 0xff;
+ else if (size == 2)
+ *val = (*val >> (8 * (where & 3))) & 0xffff;
+
+ /* Disable DBI access */
+ disable_dbi_access(app_reg);
+}
+
+static void spear_dbi_write_reg(struct spear_pcie_gadget_config *config,
+ int where, int size, u32 val)
+{
+ struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+ ulong va_address;
+
+ /* Enable DBI access */
+ enable_dbi_access(app_reg);
+
+ va_address = (ulong)config->va_dbi_base + (where & ~0x3);
+
+ if (size == 4)
+ writel(val, va_address);
+ else if (size == 2)
+ writew(val, va_address + (where & 2));
+ else if (size == 1)
+ writeb(val, va_address + (where & 3));
+
+ /* Disable DBI access */
+ disable_dbi_access(app_reg);
+}
+
+#define PCI_FIND_CAP_TTL 48
+
+static int pci_find_own_next_cap_ttl(struct spear_pcie_gadget_config *config,
+ u32 pos, int cap, int *ttl)
+{
+ u32 id;
+
+ while ((*ttl)--) {
+ spear_dbi_read_reg(config, pos, 1, &pos);
+ if (pos < 0x40)
+ break;
+ pos &= ~3;
+ spear_dbi_read_reg(config, pos + PCI_CAP_LIST_ID, 1, &id);
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+ pos += PCI_CAP_LIST_NEXT;
+ }
+ return 0;
+}
+
+static int pci_find_own_next_cap(struct spear_pcie_gadget_config *config,
+ u32 pos, int cap)
+{
+ int ttl = PCI_FIND_CAP_TTL;
+
+ return pci_find_own_next_cap_ttl(config, pos, cap, &ttl);
+}
+
+static int pci_find_own_cap_start(struct spear_pcie_gadget_config *config,
+ u8 hdr_type)
+{
+ u32 status;
+
+ spear_dbi_read_reg(config, PCI_STATUS, 2, &status);
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return 0;
+
+ switch (hdr_type) {
+ case PCI_HEADER_TYPE_NORMAL:
+ case PCI_HEADER_TYPE_BRIDGE:
+ return PCI_CAPABILITY_LIST;
+ case PCI_HEADER_TYPE_CARDBUS:
+ return PCI_CB_CAPABILITY_LIST;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+/*
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it. Possible values for @cap:
+ *
+ * %PCI_CAP_ID_PM Power Management
+ * %PCI_CAP_ID_AGP Accelerated Graphics Port
+ * %PCI_CAP_ID_VPD Vital Product Data
+ * %PCI_CAP_ID_SLOTID Slot Identification
+ * %PCI_CAP_ID_MSI Message Signalled Interrupts
+ * %PCI_CAP_ID_CHSWP CompactPCI HotSwap
+ * %PCI_CAP_ID_PCIX PCI-X
+ * %PCI_CAP_ID_EXP PCI Express
+ */
+static int pci_find_own_capability(struct spear_pcie_gadget_config *config,
+ int cap)
+{
+ u32 pos;
+ u32 hdr_type;
+
+ spear_dbi_read_reg(config, PCI_HEADER_TYPE, 1, &hdr_type);
+
+ pos = pci_find_own_cap_start(config, hdr_type);
+ if (pos)
+ pos = pci_find_own_next_cap(config, pos, cap);
+
+ return pos;
+}
+
+static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id)
+{
+ return 0;
+}
+
+/*
+ * configfs interfaces show/store functions
+ */
+
+static struct pcie_gadget_target *to_target(struct config_item *item)
+{
+ return item ?
+ container_of(to_configfs_subsystem(to_config_group(item)),
+ struct pcie_gadget_target, subsys) : NULL;
+}
+
+static ssize_t pcie_gadget_link_show(struct config_item *item, char *buf)
+{
+ struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+
+ if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID))
+ return sprintf(buf, "UP");
+ else
+ return sprintf(buf, "DOWN");
+}
+
+static ssize_t pcie_gadget_link_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+
+ if (sysfs_streq(buf, "UP"))
+ writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID),
+ &app_reg->app_ctrl_0);
+ else if (sysfs_streq(buf, "DOWN"))
+ writel(readl(&app_reg->app_ctrl_0)
+ & ~(1 << APP_LTSSM_ENABLE_ID),
+ &app_reg->app_ctrl_0);
+ else
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t pcie_gadget_int_type_show(struct config_item *item, char *buf)
+{
+ return sprintf(buf, "%s", to_target(item)->int_type);
+}
+
+static ssize_t pcie_gadget_int_type_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ u32 cap, vec, flags;
+ ulong vector;
+
+ if (sysfs_streq(buf, "INTA"))
+ spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1);
+
+ else if (sysfs_streq(buf, "MSI")) {
+ vector = config->requested_msi;
+ vec = 0;
+ while (vector > 1) {
+ vector /= 2;
+ vec++;
+ }
+ spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 0);
+ cap = pci_find_own_capability(config, PCI_CAP_ID_MSI);
+ spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags);
+ flags &= ~PCI_MSI_FLAGS_QMASK;
+ flags |= vec << 1;
+ spear_dbi_write_reg(config, cap + PCI_MSI_FLAGS, 1, flags);
+ } else
+ return -EINVAL;
+
+ strcpy(config->int_type, buf);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_no_of_msi_show(struct config_item *item, char *buf)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+ u32 cap, vec, flags;
+ ulong vector;
+
+ if ((readl(&app_reg->msg_status) & (1 << CFG_MSI_EN_ID))
+ != (1 << CFG_MSI_EN_ID))
+ vector = 0;
+ else {
+ cap = pci_find_own_capability(config, PCI_CAP_ID_MSI);
+ spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags);
+ flags &= ~PCI_MSI_FLAGS_QSIZE;
+ vec = flags >> 4;
+ vector = 1;
+ while (vec--)
+ vector *= 2;
+ }
+ config->configured_msi = vector;
+
+ return sprintf(buf, "%lu", vector);
+}
+
+static ssize_t pcie_gadget_no_of_msi_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ int ret;
+
+ ret = kstrtoul(buf, 0, &to_target(item)->requested_msi);
+ if (ret)
+ return ret;
+
+ if (config->requested_msi > 32)
+ config->requested_msi = 32;
+
+ return count;
+}
+
+static ssize_t pcie_gadget_inta_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+ ulong en;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &en);
+ if (ret)
+ return ret;
+
+ if (en)
+ writel(readl(&app_reg->app_ctrl_0) | (1 << SYS_INT_ID),
+ &app_reg->app_ctrl_0);
+ else
+ writel(readl(&app_reg->app_ctrl_0) & ~(1 << SYS_INT_ID),
+ &app_reg->app_ctrl_0);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_send_msi_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+ ulong vector;
+ u32 ven_msi;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &vector);
+ if (ret)
+ return ret;
+
+ if (!config->configured_msi)
+ return -EINVAL;
+
+ if (vector >= config->configured_msi)
+ return -EINVAL;
+
+ ven_msi = readl(&app_reg->ven_msi_1);
+ ven_msi &= ~VEN_MSI_FUN_NUM_MASK;
+ ven_msi |= 0 << VEN_MSI_FUN_NUM_ID;
+ ven_msi &= ~VEN_MSI_TC_MASK;
+ ven_msi |= 0 << VEN_MSI_TC_ID;
+ ven_msi &= ~VEN_MSI_VECTOR_MASK;
+ ven_msi |= vector << VEN_MSI_VECTOR_ID;
+
+ /* generating interrupt for msi vector */
+ ven_msi |= VEN_MSI_REQ_EN;
+ writel(ven_msi, &app_reg->ven_msi_1);
+ udelay(1);
+ ven_msi &= ~VEN_MSI_REQ_EN;
+ writel(ven_msi, &app_reg->ven_msi_1);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_vendor_id_show(struct config_item *item, char *buf)
+{
+ u32 id;
+
+ spear_dbi_read_reg(to_target(item), PCI_VENDOR_ID, 2, &id);
+
+ return sprintf(buf, "%x", id);
+}
+
+static ssize_t pcie_gadget_vendor_id_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ ulong id;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &id);
+ if (ret)
+ return ret;
+
+ spear_dbi_write_reg(to_target(item), PCI_VENDOR_ID, 2, id);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_device_id_show(struct config_item *item, char *buf)
+{
+ u32 id;
+
+ spear_dbi_read_reg(to_target(item), PCI_DEVICE_ID, 2, &id);
+
+ return sprintf(buf, "%x", id);
+}
+
+static ssize_t pcie_gadget_device_id_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ ulong id;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &id);
+ if (ret)
+ return ret;
+
+ spear_dbi_write_reg(to_target(item), PCI_DEVICE_ID, 2, id);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_bar0_size_show(struct config_item *item, char *buf)
+{
+ return sprintf(buf, "%lx", to_target(item)->bar0_size);
+}
+
+static ssize_t pcie_gadget_bar0_size_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ ulong size;
+ u32 pos, pos1;
+ u32 no_of_bit = 0;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &size);
+ if (ret)
+ return ret;
+
+ /* min bar size is 256 */
+ if (size <= 0x100)
+ size = 0x100;
+ /* max bar size is 1MB*/
+ else if (size >= 0x100000)
+ size = 0x100000;
+ else {
+ pos = 0;
+ pos1 = 0;
+ while (pos < 21) {
+ pos = find_next_bit((ulong *)&size, 21, pos);
+ if (pos != 21)
+ pos1 = pos + 1;
+ pos++;
+ no_of_bit++;
+ }
+ if (no_of_bit == 2)
+ pos1--;
+
+ size = 1 << pos1;
+ }
+ config->bar0_size = size;
+ spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, size - 1);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_bar0_address_show(struct config_item *item,
+ char *buf)
+{
+ struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+
+ u32 address = readl(&app_reg->pim0_mem_addr_start);
+
+ return sprintf(buf, "%x", address);
+}
+
+static ssize_t pcie_gadget_bar0_address_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+ ulong address;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &address);
+ if (ret)
+ return ret;
+
+ address &= ~(config->bar0_size - 1);
+ if (config->va_bar0_address)
+ iounmap(config->va_bar0_address);
+ config->va_bar0_address = ioremap(address, config->bar0_size);
+ if (!config->va_bar0_address)
+ return -ENOMEM;
+
+ writel(address, &app_reg->pim0_mem_addr_start);
+
+ return count;
+}
+
+static ssize_t pcie_gadget_bar0_rw_offset_show(struct config_item *item,
+ char *buf)
+{
+ return sprintf(buf, "%lx", to_target(item)->bar0_rw_offset);
+}
+
+static ssize_t pcie_gadget_bar0_rw_offset_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ ulong offset;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &offset);
+ if (ret)
+ return ret;
+
+ if (offset % 4)
+ return -EINVAL;
+
+ to_target(item)->bar0_rw_offset = offset;
+
+ return count;
+}
+
+static ssize_t pcie_gadget_bar0_data_show(struct config_item *item, char *buf)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ ulong data;
+
+ if (!config->va_bar0_address)
+ return -ENOMEM;
+
+ data = readl((ulong)config->va_bar0_address + config->bar0_rw_offset);
+
+ return sprintf(buf, "%lx", data);
+}
+
+static ssize_t pcie_gadget_bar0_data_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct spear_pcie_gadget_config *config = to_target(item)
+ ulong data;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &data);
+ if (ret)
+ return ret;
+
+ if (!config->va_bar0_address)
+ return -ENOMEM;
+
+ writel(data, (ulong)config->va_bar0_address + config->bar0_rw_offset);
+
+ return count;
+}
+
+CONFIGFS_ATTR(pcie_gadget_, link);
+CONFIGFS_ATTR(pcie_gadget_, int_type);
+CONFIGFS_ATTR(pcie_gadget_, no_of_msi);
+CONFIGFS_ATTR_WO(pcie_gadget_, inta);
+CONFIGFS_ATTR_WO(pcie_gadget_, send_msi);
+CONFIGFS_ATTR(pcie_gadget_, vendor_id);
+CONFIGFS_ATTR(pcie_gadget_, device_id);
+CONFIGFS_ATTR(pcie_gadget_, bar0_size);
+CONFIGFS_ATTR(pcie_gadget_, bar0_address);
+CONFIGFS_ATTR(pcie_gadget_, bar0_rw_offset);
+CONFIGFS_ATTR(pcie_gadget_, bar0_data);
+
+static struct configfs_attribute *pcie_gadget_target_attrs[] = {
+ &pcie_gadget_attr_link,
+ &pcie_gadget_attr_int_type,
+ &pcie_gadget_attr_no_of_msi,
+ &pcie_gadget_attr_inta,
+ &pcie_gadget_attr_send_msi,
+ &pcie_gadget_attr_vendor_id,
+ &pcie_gadget_attr_device_id,
+ &pcie_gadget_attr_bar0_size,
+ &pcie_gadget_attr_bar0_address,
+ &pcie_gadget_attr_bar0_rw_offset,
+ &pcie_gadget_attr_bar0_data,
+ NULL,
+};
+
+static struct config_item_type pcie_gadget_target_type = {
+ .ct_attrs = pcie_gadget_target_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static void spear13xx_pcie_device_init(struct spear_pcie_gadget_config *config)
+{
+ struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+
+ /*setup registers for outbound translation */
+
+ writel(config->base, &app_reg->in0_mem_addr_start);
+ writel(app_reg->in0_mem_addr_start + IN0_MEM_SIZE,
+ &app_reg->in0_mem_addr_limit);
+ writel(app_reg->in0_mem_addr_limit + 1, &app_reg->in1_mem_addr_start);
+ writel(app_reg->in1_mem_addr_start + IN1_MEM_SIZE,
+ &app_reg->in1_mem_addr_limit);
+ writel(app_reg->in1_mem_addr_limit + 1, &app_reg->in_io_addr_start);
+ writel(app_reg->in_io_addr_start + IN_IO_SIZE,
+ &app_reg->in_io_addr_limit);
+ writel(app_reg->in_io_addr_limit + 1, &app_reg->in_cfg0_addr_start);
+ writel(app_reg->in_cfg0_addr_start + IN_CFG0_SIZE,
+ &app_reg->in_cfg0_addr_limit);
+ writel(app_reg->in_cfg0_addr_limit + 1, &app_reg->in_cfg1_addr_start);
+ writel(app_reg->in_cfg1_addr_start + IN_CFG1_SIZE,
+ &app_reg->in_cfg1_addr_limit);
+ writel(app_reg->in_cfg1_addr_limit + 1, &app_reg->in_msg_addr_start);
+ writel(app_reg->in_msg_addr_start + IN_MSG_SIZE,
+ &app_reg->in_msg_addr_limit);
+
+ writel(app_reg->in0_mem_addr_start, &app_reg->pom0_mem_addr_start);
+ writel(app_reg->in1_mem_addr_start, &app_reg->pom1_mem_addr_start);
+ writel(app_reg->in_io_addr_start, &app_reg->pom_io_addr_start);
+
+ /*setup registers for inbound translation */
+
+ /* Keep AORAM mapped at BAR0 as default */
+ config->bar0_size = INBOUND_ADDR_MASK + 1;
+ spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, INBOUND_ADDR_MASK);
+ spear_dbi_write_reg(config, PCI_BASE_ADDRESS_0, 4, 0xC);
+ config->va_bar0_address = ioremap(SPEAR13XX_SYSRAM1_BASE,
+ config->bar0_size);
+
+ writel(SPEAR13XX_SYSRAM1_BASE, &app_reg->pim0_mem_addr_start);
+ writel(0, &app_reg->pim1_mem_addr_start);
+ writel(INBOUND_ADDR_MASK + 1, &app_reg->mem0_addr_offset_limit);
+
+ writel(0x0, &app_reg->pim_io_addr_start);
+ writel(0x0, &app_reg->pim_io_addr_start);
+ writel(0x0, &app_reg->pim_rom_addr_start);
+
+ writel(DEVICE_TYPE_EP | (1 << MISCTRL_EN_ID)
+ | ((u32)1 << REG_TRANSLATION_ENABLE),
+ &app_reg->app_ctrl_0);
+ /* disable all rx interrupts */
+ writel(0, &app_reg->int_mask);
+
+ /* Select INTA as default*/
+ spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1);
+}
+
+static int spear_pcie_gadget_probe(struct platform_device *pdev)
+{
+ struct resource *res0, *res1;
+ unsigned int status = 0;
+ int irq;
+ struct clk *clk;
+ static struct pcie_gadget_target *target;
+ struct spear_pcie_gadget_config *config;
+ struct config_item *cg_item;
+ struct configfs_subsystem *subsys;
+
+ target = devm_kzalloc(&pdev->dev, sizeof(*target), GFP_KERNEL);
+ if (!target) {
+ dev_err(&pdev->dev, "out of memory\n");
+ return -ENOMEM;
+ }
+
+ cg_item = &target->subsys.su_group.cg_item;
+ sprintf(cg_item->ci_namebuf, "pcie_gadget.%d", pdev->id);
+ cg_item->ci_type = &pcie_gadget_target_type;
+ config = &target->config;
+
+ /* get resource for application registers*/
+ res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ config->va_app_base = devm_ioremap_resource(&pdev->dev, res0);
+ if (IS_ERR(config->va_app_base)) {
+ dev_err(&pdev->dev, "ioremap fail\n");
+ return PTR_ERR(config->va_app_base);
+ }
+
+ /* get resource for dbi registers*/
+ res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ config->base = (void __iomem *)res1->start;
+
+ config->va_dbi_base = devm_ioremap_resource(&pdev->dev, res1);
+ if (IS_ERR(config->va_dbi_base)) {
+ dev_err(&pdev->dev, "ioremap fail\n");
+ return PTR_ERR(config->va_dbi_base);
+ }
+
+ platform_set_drvdata(pdev, target);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "no update irq?\n");
+ return irq;
+ }
+
+ status = devm_request_irq(&pdev->dev, irq, spear_pcie_gadget_irq,
+ 0, pdev->name, NULL);
+ if (status) {
+ dev_err(&pdev->dev,
+ "pcie gadget interrupt IRQ%d already claimed\n", irq);
+ return status;
+ }
+
+ /* Register configfs hooks */
+ subsys = &target->subsys;
+ config_group_init(&subsys->su_group);
+ mutex_init(&subsys->su_mutex);
+ status = configfs_register_subsystem(subsys);
+ if (status)
+ return status;
+
+ /*
+ * init basic pcie application registers
+ * do not enable clock if it is PCIE0.Ideally , all controller should
+ * have been independent from others with respect to clock. But PCIE1
+ * and 2 depends on PCIE0.So PCIE0 clk is provided during board init.
+ */
+ if (pdev->id == 1) {
+ /*
+ * Ideally CFG Clock should have been also enabled here. But
+ * it is done currently during board init routne
+ */
+ clk = clk_get_sys("pcie1", NULL);
+ if (IS_ERR(clk)) {
+ pr_err("%s:couldn't get clk for pcie1\n", __func__);
+ return PTR_ERR(clk);
+ }
+ status = clk_enable(clk);
+ if (status) {
+ pr_err("%s:couldn't enable clk for pcie1\n", __func__);
+ return status;
+ }
+ } else if (pdev->id == 2) {
+ /*
+ * Ideally CFG Clock should have been also enabled here. But
+ * it is done currently during board init routne
+ */
+ clk = clk_get_sys("pcie2", NULL);
+ if (IS_ERR(clk)) {
+ pr_err("%s:couldn't get clk for pcie2\n", __func__);
+ return PTR_ERR(clk);
+ }
+ status = clk_enable(clk);
+ if (status) {
+ pr_err("%s:couldn't enable clk for pcie2\n", __func__);
+ return status;
+ }
+ }
+ spear13xx_pcie_device_init(config);
+
+ return 0;
+}
+
+static int spear_pcie_gadget_remove(struct platform_device *pdev)
+{
+ static struct pcie_gadget_target *target;
+
+ target = platform_get_drvdata(pdev);
+
+ configfs_unregister_subsystem(&target->subsys);
+
+ return 0;
+}
+
+static void spear_pcie_gadget_shutdown(struct platform_device *pdev)
+{
+}
+
+static struct platform_driver spear_pcie_gadget_driver = {
+ .probe = spear_pcie_gadget_probe,
+ .remove = spear_pcie_gadget_remove,
+ .shutdown = spear_pcie_gadget_shutdown,
+ .driver = {
+ .name = "pcie-gadget-spear",
+ .bus = &platform_bus_type
+ },
+};
+
+module_platform_driver(spear_pcie_gadget_driver);
+
+MODULE_ALIAS("platform:pcie-gadget-spear");
+MODULE_AUTHOR("Pratyush Anand");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
new file mode 100644
index 0000000..736dae7
--- /dev/null
+++ b/drivers/misc/sram.c
@@ -0,0 +1,426 @@
+/*
+ * Generic on-chip SRAM allocation driver
+ *
+ * Copyright (C) 2012 Philipp Zabel, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/clk.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/list_sort.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define SRAM_GRANULARITY 32
+
+struct sram_partition {
+ void __iomem *base;
+
+ struct gen_pool *pool;
+ struct bin_attribute battr;
+ struct mutex lock;
+};
+
+struct sram_dev {
+ struct device *dev;
+ void __iomem *virt_base;
+
+ struct gen_pool *pool;
+ struct clk *clk;
+
+ struct sram_partition *partition;
+ u32 partitions;
+};
+
+struct sram_reserve {
+ struct list_head list;
+ u32 start;
+ u32 size;
+ bool export;
+ bool pool;
+ const char *label;
+};
+
+static ssize_t sram_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t pos, size_t count)
+{
+ struct sram_partition *part;
+
+ part = container_of(attr, struct sram_partition, battr);
+
+ mutex_lock(&part->lock);
+ memcpy_fromio(buf, part->base + pos, count);
+ mutex_unlock(&part->lock);
+
+ return count;
+}
+
+static ssize_t sram_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t pos, size_t count)
+{
+ struct sram_partition *part;
+
+ part = container_of(attr, struct sram_partition, battr);
+
+ mutex_lock(&part->lock);
+ memcpy_toio(part->base + pos, buf, count);
+ mutex_unlock(&part->lock);
+
+ return count;
+}
+
+static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block,
+ phys_addr_t start, struct sram_partition *part)
+{
+ int ret;
+
+ part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+ NUMA_NO_NODE, block->label);
+ if (IS_ERR(part->pool))
+ return PTR_ERR(part->pool);
+
+ ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start,
+ block->size, NUMA_NO_NODE);
+ if (ret < 0) {
+ dev_err(sram->dev, "failed to register subpool: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block,
+ phys_addr_t start, struct sram_partition *part)
+{
+ sysfs_bin_attr_init(&part->battr);
+ part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL,
+ "%llx.sram",
+ (unsigned long long)start);
+ if (!part->battr.attr.name)
+ return -ENOMEM;
+
+ part->battr.attr.mode = S_IRUSR | S_IWUSR;
+ part->battr.read = sram_read;
+ part->battr.write = sram_write;
+ part->battr.size = block->size;
+
+ return device_create_bin_file(sram->dev, &part->battr);
+}
+
+static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
+ phys_addr_t start)
+{
+ int ret;
+ struct sram_partition *part = &sram->partition[sram->partitions];
+
+ mutex_init(&part->lock);
+ part->base = sram->virt_base + block->start;
+
+ if (block->pool) {
+ ret = sram_add_pool(sram, block, start, part);
+ if (ret)
+ return ret;
+ }
+ if (block->export) {
+ ret = sram_add_export(sram, block, start, part);
+ if (ret)
+ return ret;
+ }
+ sram->partitions++;
+
+ return 0;
+}
+
+static void sram_free_partitions(struct sram_dev *sram)
+{
+ struct sram_partition *part;
+
+ if (!sram->partitions)
+ return;
+
+ part = &sram->partition[sram->partitions - 1];
+ for (; sram->partitions; sram->partitions--, part--) {
+ if (part->battr.size)
+ device_remove_bin_file(sram->dev, &part->battr);
+
+ if (part->pool &&
+ gen_pool_avail(part->pool) < gen_pool_size(part->pool))
+ dev_err(sram->dev, "removed pool while SRAM allocated\n");
+ }
+}
+
+static int sram_reserve_cmp(void *priv, struct list_head *a,
+ struct list_head *b)
+{
+ struct sram_reserve *ra = list_entry(a, struct sram_reserve, list);
+ struct sram_reserve *rb = list_entry(b, struct sram_reserve, list);
+
+ return ra->start - rb->start;
+}
+
+static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
+{
+ struct device_node *np = sram->dev->of_node, *child;
+ unsigned long size, cur_start, cur_size;
+ struct sram_reserve *rblocks, *block;
+ struct list_head reserve_list;
+ unsigned int nblocks, exports = 0;
+ const char *label;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&reserve_list);
+
+ size = resource_size(res);
+
+ /*
+ * We need an additional block to mark the end of the memory region
+ * after the reserved blocks from the dt are processed.
+ */
+ nblocks = (np) ? of_get_available_child_count(np) + 1 : 1;
+ rblocks = kzalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL);
+ if (!rblocks)
+ return -ENOMEM;
+
+ block = &rblocks[0];
+ for_each_available_child_of_node(np, child) {
+ struct resource child_res;
+
+ ret = of_address_to_resource(child, 0, &child_res);
+ if (ret < 0) {
+ dev_err(sram->dev,
+ "could not get address for node %s\n",
+ child->full_name);
+ goto err_chunks;
+ }
+
+ if (child_res.start < res->start || child_res.end > res->end) {
+ dev_err(sram->dev,
+ "reserved block %s outside the sram area\n",
+ child->full_name);
+ ret = -EINVAL;
+ goto err_chunks;
+ }
+
+ block->start = child_res.start - res->start;
+ block->size = resource_size(&child_res);
+ list_add_tail(&block->list, &reserve_list);
+
+ if (of_find_property(child, "export", NULL))
+ block->export = true;
+
+ if (of_find_property(child, "pool", NULL))
+ block->pool = true;
+
+ if ((block->export || block->pool) && block->size) {
+ exports++;
+
+ label = NULL;
+ ret = of_property_read_string(child, "label", &label);
+ if (ret && ret != -EINVAL) {
+ dev_err(sram->dev,
+ "%s has invalid label name\n",
+ child->full_name);
+ goto err_chunks;
+ }
+ if (!label)
+ label = child->name;
+
+ block->label = devm_kstrdup(sram->dev,
+ label, GFP_KERNEL);
+ if (!block->label)
+ goto err_chunks;
+
+ dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n",
+ block->export ? "exported " : "", block->label,
+ block->start, block->start + block->size);
+ } else {
+ dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
+ block->start, block->start + block->size);
+ }
+
+ block++;
+ }
+ child = NULL;
+
+ /* the last chunk marks the end of the region */
+ rblocks[nblocks - 1].start = size;
+ rblocks[nblocks - 1].size = 0;
+ list_add_tail(&rblocks[nblocks - 1].list, &reserve_list);
+
+ list_sort(NULL, &reserve_list, sram_reserve_cmp);
+
+ if (exports) {
+ sram->partition = devm_kzalloc(sram->dev,
+ exports * sizeof(*sram->partition),
+ GFP_KERNEL);
+ if (!sram->partition) {
+ ret = -ENOMEM;
+ goto err_chunks;
+ }
+ }
+
+ cur_start = 0;
+ list_for_each_entry(block, &reserve_list, list) {
+ /* can only happen if sections overlap */
+ if (block->start < cur_start) {
+ dev_err(sram->dev,
+ "block at 0x%x starts after current offset 0x%lx\n",
+ block->start, cur_start);
+ ret = -EINVAL;
+ sram_free_partitions(sram);
+ goto err_chunks;
+ }
+
+ if ((block->export || block->pool) && block->size) {
+ ret = sram_add_partition(sram, block,
+ res->start + block->start);
+ if (ret) {
+ sram_free_partitions(sram);
+ goto err_chunks;
+ }
+ }
+
+ /* current start is in a reserved block, so continue after it */
+ if (block->start == cur_start) {
+ cur_start = block->start + block->size;
+ continue;
+ }
+
+ /*
+ * allocate the space between the current starting
+ * address and the following reserved block, or the
+ * end of the region.
+ */
+ cur_size = block->start - cur_start;
+
+ dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n",
+ cur_start, cur_start + cur_size);
+
+ ret = gen_pool_add_virt(sram->pool,
+ (unsigned long)sram->virt_base + cur_start,
+ res->start + cur_start, cur_size, -1);
+ if (ret < 0) {
+ sram_free_partitions(sram);
+ goto err_chunks;
+ }
+
+ /* next allocation after this reserved block */
+ cur_start = block->start + block->size;
+ }
+
+ err_chunks:
+ if (child)
+ of_node_put(child);
+
+ kfree(rblocks);
+
+ return ret;
+}
+
+static int sram_probe(struct platform_device *pdev)
+{
+ struct sram_dev *sram;
+ struct resource *res;
+ size_t size;
+ int ret;
+
+ sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL);
+ if (!sram)
+ return -ENOMEM;
+
+ sram->dev = &pdev->dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(sram->dev, "found no memory resource\n");
+ return -EINVAL;
+ }
+
+ size = resource_size(res);
+
+ if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) {
+ dev_err(sram->dev, "could not request region for resource\n");
+ return -EBUSY;
+ }
+
+ sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size);
+ if (IS_ERR(sram->virt_base))
+ return PTR_ERR(sram->virt_base);
+
+ sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+ NUMA_NO_NODE, NULL);
+ if (IS_ERR(sram->pool))
+ return PTR_ERR(sram->pool);
+
+ ret = sram_reserve_regions(sram, res);
+ if (ret)
+ return ret;
+
+ sram->clk = devm_clk_get(sram->dev, NULL);
+ if (IS_ERR(sram->clk))
+ sram->clk = NULL;
+ else
+ clk_prepare_enable(sram->clk);
+
+ platform_set_drvdata(pdev, sram);
+
+ dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n",
+ gen_pool_size(sram->pool) / 1024, sram->virt_base);
+
+ return 0;
+}
+
+static int sram_remove(struct platform_device *pdev)
+{
+ struct sram_dev *sram = platform_get_drvdata(pdev);
+
+ sram_free_partitions(sram);
+
+ if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
+ dev_err(sram->dev, "removed while SRAM allocated\n");
+
+ if (sram->clk)
+ clk_disable_unprepare(sram->clk);
+
+ return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id sram_dt_ids[] = {
+ { .compatible = "mmio-sram" },
+ {}
+};
+#endif
+
+static struct platform_driver sram_driver = {
+ .driver = {
+ .name = "sram",
+ .of_match_table = of_match_ptr(sram_dt_ids),
+ },
+ .probe = sram_probe,
+ .remove = sram_remove,
+};
+
+static int __init sram_init(void)
+{
+ return platform_driver_register(&sram_driver);
+}
+
+postcore_initcall(sram_init);
diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig
new file mode 100644
index 0000000..f34dcc5
--- /dev/null
+++ b/drivers/misc/ti-st/Kconfig
@@ -0,0 +1,17 @@
+#
+# TI's shared transport line discipline and the protocol
+# drivers (BT, FM and GPS)
+#
+menu "Texas Instruments shared transport line discipline"
+config TI_ST
+ tristate "Shared transport core driver"
+ depends on NET && GPIOLIB && TTY
+ select FW_LOADER
+ help
+ This enables the shared transport core driver for TI
+ BT / FM and GPS combo chips. This enables protocol drivers
+ to register themselves with core and send data, the responses
+ are returned to relevant protocol drivers based on their
+ packet types.
+
+endmenu
diff --git a/drivers/misc/ti-st/Makefile b/drivers/misc/ti-st/Makefile
new file mode 100644
index 0000000..78d7ebb
--- /dev/null
+++ b/drivers/misc/ti-st/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for TI's shared transport line discipline
+# and its protocol drivers (BT, FM, GPS)
+#
+obj-$(CONFIG_TI_ST) += st_drv.o
+st_drv-objs := st_core.o st_kim.o st_ll.o
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
new file mode 100644
index 0000000..6e3af8b
--- /dev/null
+++ b/drivers/misc/ti-st/st_core.c
@@ -0,0 +1,923 @@
+/*
+ * Shared Transport Line discipline driver Core
+ * This hooks up ST KIM driver and ST LL driver
+ * Copyright (C) 2009-2010 Texas Instruments
+ * Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#define pr_fmt(fmt) "(stc): " fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+
+#include <linux/ti_wilink_st.h>
+
+extern void st_kim_recv(void *, const unsigned char *, long);
+void st_int_recv(void *, const unsigned char *, long);
+/* function pointer pointing to either,
+ * st_kim_recv during registration to receive fw download responses
+ * st_int_recv after registration to receive proto stack responses
+ */
+static void (*st_recv) (void *, const unsigned char *, long);
+
+/********************************************************************/
+static void add_channel_to_table(struct st_data_s *st_gdata,
+ struct st_proto_s *new_proto)
+{
+ pr_info("%s: id %d\n", __func__, new_proto->chnl_id);
+ /* list now has the channel id as index itself */
+ st_gdata->list[new_proto->chnl_id] = new_proto;
+ st_gdata->is_registered[new_proto->chnl_id] = true;
+}
+
+static void remove_channel_from_table(struct st_data_s *st_gdata,
+ struct st_proto_s *proto)
+{
+ pr_info("%s: id %d\n", __func__, proto->chnl_id);
+/* st_gdata->list[proto->chnl_id] = NULL; */
+ st_gdata->is_registered[proto->chnl_id] = false;
+}
+
+/*
+ * called from KIM during firmware download.
+ *
+ * This is a wrapper function to tty->ops->write_room.
+ * It returns number of free space available in
+ * uart tx buffer.
+ */
+int st_get_uart_wr_room(struct st_data_s *st_gdata)
+{
+ struct tty_struct *tty;
+ if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) {
+ pr_err("tty unavailable to perform write");
+ return -1;
+ }
+ tty = st_gdata->tty;
+ return tty->ops->write_room(tty);
+}
+
+/* can be called in from
+ * -- KIM (during fw download)
+ * -- ST Core (during st_write)
+ *
+ * This is the internal write function - a wrapper
+ * to tty->ops->write
+ */
+int st_int_write(struct st_data_s *st_gdata,
+ const unsigned char *data, int count)
+{
+ struct tty_struct *tty;
+ if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) {
+ pr_err("tty unavailable to perform write");
+ return -EINVAL;
+ }
+ tty = st_gdata->tty;
+#ifdef VERBOSE
+ print_hex_dump(KERN_DEBUG, "<out<", DUMP_PREFIX_NONE,
+ 16, 1, data, count, 0);
+#endif
+ return tty->ops->write(tty, data, count);
+
+}
+
+/*
+ * push the skb received to relevant
+ * protocol stacks
+ */
+static void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
+{
+ pr_debug(" %s(prot:%d) ", __func__, chnl_id);
+
+ if (unlikely
+ (st_gdata == NULL || st_gdata->rx_skb == NULL
+ || st_gdata->is_registered[chnl_id] == false)) {
+ pr_err("chnl_id %d not registered, no data to send?",
+ chnl_id);
+ kfree_skb(st_gdata->rx_skb);
+ return;
+ }
+ /* this cannot fail
+ * this shouldn't take long
+ * - should be just skb_queue_tail for the
+ * protocol stack driver
+ */
+ if (likely(st_gdata->list[chnl_id]->recv != NULL)) {
+ if (unlikely
+ (st_gdata->list[chnl_id]->recv
+ (st_gdata->list[chnl_id]->priv_data, st_gdata->rx_skb)
+ != 0)) {
+ pr_err(" proto stack %d's ->recv failed", chnl_id);
+ kfree_skb(st_gdata->rx_skb);
+ return;
+ }
+ } else {
+ pr_err(" proto stack %d's ->recv null", chnl_id);
+ kfree_skb(st_gdata->rx_skb);
+ }
+ return;
+}
+
+/**
+ * st_reg_complete -
+ * to call registration complete callbacks
+ * of all protocol stack drivers
+ * This function is being called with spin lock held, protocol drivers are
+ * only expected to complete their waits and do nothing more than that.
+ */
+static void st_reg_complete(struct st_data_s *st_gdata, char err)
+{
+ unsigned char i = 0;
+ pr_info(" %s ", __func__);
+ for (i = 0; i < ST_MAX_CHANNELS; i++) {
+ if (likely(st_gdata != NULL &&
+ st_gdata->is_registered[i] == true &&
+ st_gdata->list[i]->reg_complete_cb != NULL)) {
+ st_gdata->list[i]->reg_complete_cb
+ (st_gdata->list[i]->priv_data, err);
+ pr_info("protocol %d's cb sent %d\n", i, err);
+ if (err) { /* cleanup registered protocol */
+ st_gdata->is_registered[i] = false;
+ if (st_gdata->protos_registered)
+ st_gdata->protos_registered--;
+ }
+ }
+ }
+}
+
+static inline int st_check_data_len(struct st_data_s *st_gdata,
+ unsigned char chnl_id, int len)
+{
+ int room = skb_tailroom(st_gdata->rx_skb);
+
+ pr_debug("len %d room %d", len, room);
+
+ if (!len) {
+ /* Received packet has only packet header and
+ * has zero length payload. So, ask ST CORE to
+ * forward the packet to protocol driver (BT/FM/GPS)
+ */
+ st_send_frame(chnl_id, st_gdata);
+
+ } else if (len > room) {
+ /* Received packet's payload length is larger.
+ * We can't accommodate it in created skb.
+ */
+ pr_err("Data length is too large len %d room %d", len,
+ room);
+ kfree_skb(st_gdata->rx_skb);
+ } else {
+ /* Packet header has non-zero payload length and
+ * we have enough space in created skb. Lets read
+ * payload data */
+ st_gdata->rx_state = ST_W4_DATA;
+ st_gdata->rx_count = len;
+ return len;
+ }
+
+ /* Change ST state to continue to process next
+ * packet */
+ st_gdata->rx_state = ST_W4_PACKET_TYPE;
+ st_gdata->rx_skb = NULL;
+ st_gdata->rx_count = 0;
+ st_gdata->rx_chnl = 0;
+
+ return 0;
+}
+
+/**
+ * st_wakeup_ack - internal function for action when wake-up ack
+ * received
+ */
+static inline void st_wakeup_ack(struct st_data_s *st_gdata,
+ unsigned char cmd)
+{
+ struct sk_buff *waiting_skb;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&st_gdata->lock, flags);
+ /* de-Q from waitQ and Q in txQ now that the
+ * chip is awake
+ */
+ while ((waiting_skb = skb_dequeue(&st_gdata->tx_waitq)))
+ skb_queue_tail(&st_gdata->txq, waiting_skb);
+
+ /* state forwarded to ST LL */
+ st_ll_sleep_state(st_gdata, (unsigned long)cmd);
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+ /* wake up to send the recently copied skbs from waitQ */
+ st_tx_wakeup(st_gdata);
+}
+
+/**
+ * st_int_recv - ST's internal receive function.
+ * Decodes received RAW data and forwards to corresponding
+ * client drivers (Bluetooth,FM,GPS..etc).
+ * This can receive various types of packets,
+ * HCI-Events, ACL, SCO, 4 types of HCI-LL PM packets
+ * CH-8 packets from FM, CH-9 packets from GPS cores.
+ */
+void st_int_recv(void *disc_data,
+ const unsigned char *data, long count)
+{
+ char *ptr;
+ struct st_proto_s *proto;
+ unsigned short payload_len = 0;
+ int len = 0;
+ unsigned char type = 0;
+ unsigned char *plen;
+ struct st_data_s *st_gdata = (struct st_data_s *)disc_data;
+ unsigned long flags;
+
+ ptr = (char *)data;
+ /* tty_receive sent null ? */
+ if (unlikely(ptr == NULL) || (st_gdata == NULL)) {
+ pr_err(" received null from TTY ");
+ return;
+ }
+
+ pr_debug("count %ld rx_state %ld"
+ "rx_count %ld", count, st_gdata->rx_state,
+ st_gdata->rx_count);
+
+ spin_lock_irqsave(&st_gdata->lock, flags);
+ /* Decode received bytes here */
+ while (count) {
+ if (st_gdata->rx_count) {
+ len = min_t(unsigned int, st_gdata->rx_count, count);
+ memcpy(skb_put(st_gdata->rx_skb, len), ptr, len);
+ st_gdata->rx_count -= len;
+ count -= len;
+ ptr += len;
+
+ if (st_gdata->rx_count)
+ continue;
+
+ /* Check ST RX state machine , where are we? */
+ switch (st_gdata->rx_state) {
+ /* Waiting for complete packet ? */
+ case ST_W4_DATA:
+ pr_debug("Complete pkt received");
+ /* Ask ST CORE to forward
+ * the packet to protocol driver */
+ st_send_frame(st_gdata->rx_chnl, st_gdata);
+
+ st_gdata->rx_state = ST_W4_PACKET_TYPE;
+ st_gdata->rx_skb = NULL;
+ continue;
+ /* parse the header to know details */
+ case ST_W4_HEADER:
+ proto = st_gdata->list[st_gdata->rx_chnl];
+ plen =
+ &st_gdata->rx_skb->data
+ [proto->offset_len_in_hdr];
+ pr_debug("plen pointing to %x\n", *plen);
+ if (proto->len_size == 1)/* 1 byte len field */
+ payload_len = *(unsigned char *)plen;
+ else if (proto->len_size == 2)
+ payload_len =
+ __le16_to_cpu(*(unsigned short *)plen);
+ else
+ pr_info("%s: invalid length "
+ "for id %d\n",
+ __func__, proto->chnl_id);
+ st_check_data_len(st_gdata, proto->chnl_id,
+ payload_len);
+ pr_debug("off %d, pay len %d\n",
+ proto->offset_len_in_hdr, payload_len);
+ continue;
+ } /* end of switch rx_state */
+ }
+
+ /* end of if rx_count */
+ /* Check first byte of packet and identify module
+ * owner (BT/FM/GPS) */
+ switch (*ptr) {
+ case LL_SLEEP_IND:
+ case LL_SLEEP_ACK:
+ case LL_WAKE_UP_IND:
+ pr_debug("PM packet");
+ /* this takes appropriate action based on
+ * sleep state received --
+ */
+ st_ll_sleep_state(st_gdata, *ptr);
+ /* if WAKEUP_IND collides copy from waitq to txq
+ * and assume chip awake
+ */
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ if (st_ll_getstate(st_gdata) == ST_LL_AWAKE)
+ st_wakeup_ack(st_gdata, LL_WAKE_UP_ACK);
+ spin_lock_irqsave(&st_gdata->lock, flags);
+
+ ptr++;
+ count--;
+ continue;
+ case LL_WAKE_UP_ACK:
+ pr_debug("PM packet");
+
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ /* wake up ack received */
+ st_wakeup_ack(st_gdata, *ptr);
+ spin_lock_irqsave(&st_gdata->lock, flags);
+
+ ptr++;
+ count--;
+ continue;
+ /* Unknow packet? */
+ default:
+ type = *ptr;
+
+ /* Default case means non-HCILL packets,
+ * possibilities are packets for:
+ * (a) valid protocol - Supported Protocols within
+ * the ST_MAX_CHANNELS.
+ * (b) registered protocol - Checked by
+ * "st_gdata->list[type] == NULL)" are supported
+ * protocols only.
+ * Rules out any invalid protocol and
+ * unregistered protocols with channel ID < 16.
+ */
+
+ if ((type >= ST_MAX_CHANNELS) ||
+ (st_gdata->list[type] == NULL)) {
+ pr_err("chip/interface misbehavior: "
+ "dropping frame starting "
+ "with 0x%02x\n", type);
+ goto done;
+ }
+
+ st_gdata->rx_skb = alloc_skb(
+ st_gdata->list[type]->max_frame_size,
+ GFP_ATOMIC);
+ if (st_gdata->rx_skb == NULL) {
+ pr_err("out of memory: dropping\n");
+ goto done;
+ }
+
+ skb_reserve(st_gdata->rx_skb,
+ st_gdata->list[type]->reserve);
+ /* next 2 required for BT only */
+ st_gdata->rx_skb->cb[0] = type; /*pkt_type*/
+ st_gdata->rx_skb->cb[1] = 0; /*incoming*/
+ st_gdata->rx_chnl = *ptr;
+ st_gdata->rx_state = ST_W4_HEADER;
+ st_gdata->rx_count = st_gdata->list[type]->hdr_len;
+ pr_debug("rx_count %ld\n", st_gdata->rx_count);
+ };
+ ptr++;
+ count--;
+ }
+done:
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ pr_debug("done %s", __func__);
+ return;
+}
+
+/**
+ * st_int_dequeue - internal de-Q function.
+ * If the previous data set was not written
+ * completely, return that skb which has the pending data.
+ * In normal cases, return top of txq.
+ */
+static struct sk_buff *st_int_dequeue(struct st_data_s *st_gdata)
+{
+ struct sk_buff *returning_skb;
+
+ pr_debug("%s", __func__);
+ if (st_gdata->tx_skb != NULL) {
+ returning_skb = st_gdata->tx_skb;
+ st_gdata->tx_skb = NULL;
+ return returning_skb;
+ }
+ return skb_dequeue(&st_gdata->txq);
+}
+
+/**
+ * st_int_enqueue - internal Q-ing function.
+ * Will either Q the skb to txq or the tx_waitq
+ * depending on the ST LL state.
+ * If the chip is asleep, then Q it onto waitq and
+ * wakeup the chip.
+ * txq and waitq needs protection since the other contexts
+ * may be sending data, waking up chip.
+ */
+static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
+{
+ unsigned long flags = 0;
+
+ pr_debug("%s", __func__);
+ spin_lock_irqsave(&st_gdata->lock, flags);
+
+ switch (st_ll_getstate(st_gdata)) {
+ case ST_LL_AWAKE:
+ pr_debug("ST LL is AWAKE, sending normally");
+ skb_queue_tail(&st_gdata->txq, skb);
+ break;
+ case ST_LL_ASLEEP_TO_AWAKE:
+ skb_queue_tail(&st_gdata->tx_waitq, skb);
+ break;
+ case ST_LL_AWAKE_TO_ASLEEP:
+ pr_err("ST LL is illegal state(%ld),"
+ "purging received skb.", st_ll_getstate(st_gdata));
+ kfree_skb(skb);
+ break;
+ case ST_LL_ASLEEP:
+ skb_queue_tail(&st_gdata->tx_waitq, skb);
+ st_ll_wakeup(st_gdata);
+ break;
+ default:
+ pr_err("ST LL is illegal state(%ld),"
+ "purging received skb.", st_ll_getstate(st_gdata));
+ kfree_skb(skb);
+ break;
+ }
+
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ pr_debug("done %s", __func__);
+ return;
+}
+
+/*
+ * internal wakeup function
+ * called from either
+ * - TTY layer when write's finished
+ * - st_write (in context of the protocol stack)
+ */
+static void work_fn_write_wakeup(struct work_struct *work)
+{
+ struct st_data_s *st_gdata = container_of(work, struct st_data_s,
+ work_write_wakeup);
+
+ st_tx_wakeup((void *)st_gdata);
+}
+void st_tx_wakeup(struct st_data_s *st_data)
+{
+ struct sk_buff *skb;
+ unsigned long flags; /* for irq save flags */
+ pr_debug("%s", __func__);
+ /* check for sending & set flag sending here */
+ if (test_and_set_bit(ST_TX_SENDING, &st_data->tx_state)) {
+ pr_debug("ST already sending");
+ /* keep sending */
+ set_bit(ST_TX_WAKEUP, &st_data->tx_state);
+ return;
+ /* TX_WAKEUP will be checked in another
+ * context
+ */
+ }
+ do { /* come back if st_tx_wakeup is set */
+ /* woke-up to write */
+ clear_bit(ST_TX_WAKEUP, &st_data->tx_state);
+ while ((skb = st_int_dequeue(st_data))) {
+ int len;
+ spin_lock_irqsave(&st_data->lock, flags);
+ /* enable wake-up from TTY */
+ set_bit(TTY_DO_WRITE_WAKEUP, &st_data->tty->flags);
+ len = st_int_write(st_data, skb->data, skb->len);
+ skb_pull(skb, len);
+ /* if skb->len = len as expected, skb->len=0 */
+ if (skb->len) {
+ /* would be the next skb to be sent */
+ st_data->tx_skb = skb;
+ spin_unlock_irqrestore(&st_data->lock, flags);
+ break;
+ }
+ kfree_skb(skb);
+ spin_unlock_irqrestore(&st_data->lock, flags);
+ }
+ /* if wake-up is set in another context- restart sending */
+ } while (test_bit(ST_TX_WAKEUP, &st_data->tx_state));
+
+ /* clear flag sending */
+ clear_bit(ST_TX_SENDING, &st_data->tx_state);
+}
+
+/********************************************************************/
+/* functions called from ST KIM
+*/
+void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf)
+{
+ seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n",
+ st_gdata->protos_registered,
+ st_gdata->is_registered[0x04] == true ? 'R' : 'U',
+ st_gdata->is_registered[0x08] == true ? 'R' : 'U',
+ st_gdata->is_registered[0x09] == true ? 'R' : 'U');
+}
+
+/********************************************************************/
+/*
+ * functions called from protocol stack drivers
+ * to be EXPORT-ed
+ */
+long st_register(struct st_proto_s *new_proto)
+{
+ struct st_data_s *st_gdata;
+ long err = 0;
+ unsigned long flags = 0;
+
+ st_kim_ref(&st_gdata, 0);
+ if (st_gdata == NULL || new_proto == NULL || new_proto->recv == NULL
+ || new_proto->reg_complete_cb == NULL) {
+ pr_err("gdata/new_proto/recv or reg_complete_cb not ready");
+ return -EINVAL;
+ }
+
+ if (new_proto->chnl_id >= ST_MAX_CHANNELS) {
+ pr_err("chnl_id %d not supported", new_proto->chnl_id);
+ return -EPROTONOSUPPORT;
+ }
+
+ if (st_gdata->is_registered[new_proto->chnl_id] == true) {
+ pr_err("chnl_id %d already registered", new_proto->chnl_id);
+ return -EALREADY;
+ }
+
+ /* can be from process context only */
+ spin_lock_irqsave(&st_gdata->lock, flags);
+
+ if (test_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state)) {
+ pr_info(" ST_REG_IN_PROGRESS:%d ", new_proto->chnl_id);
+ /* fw download in progress */
+
+ add_channel_to_table(st_gdata, new_proto);
+ st_gdata->protos_registered++;
+ new_proto->write = st_write;
+
+ set_bit(ST_REG_PENDING, &st_gdata->st_state);
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ return -EINPROGRESS;
+ } else if (st_gdata->protos_registered == ST_EMPTY) {
+ pr_info(" chnl_id list empty :%d ", new_proto->chnl_id);
+ set_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+ st_recv = st_kim_recv;
+
+ /* enable the ST LL - to set default chip state */
+ st_ll_enable(st_gdata);
+
+ /* release lock previously held - re-locked below */
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+ /* this may take a while to complete
+ * since it involves BT fw download
+ */
+ err = st_kim_start(st_gdata->kim_data);
+ if (err != 0) {
+ clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+ if ((st_gdata->protos_registered != ST_EMPTY) &&
+ (test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+ pr_err(" KIM failure complete callback ");
+ spin_lock_irqsave(&st_gdata->lock, flags);
+ st_reg_complete(st_gdata, err);
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ clear_bit(ST_REG_PENDING, &st_gdata->st_state);
+ }
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&st_gdata->lock, flags);
+
+ clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+ st_recv = st_int_recv;
+
+ /* this is where all pending registration
+ * are signalled to be complete by calling callback functions
+ */
+ if ((st_gdata->protos_registered != ST_EMPTY) &&
+ (test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+ pr_debug(" call reg complete callback ");
+ st_reg_complete(st_gdata, 0);
+ }
+ clear_bit(ST_REG_PENDING, &st_gdata->st_state);
+
+ /* check for already registered once more,
+ * since the above check is old
+ */
+ if (st_gdata->is_registered[new_proto->chnl_id] == true) {
+ pr_err(" proto %d already registered ",
+ new_proto->chnl_id);
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ return -EALREADY;
+ }
+
+ add_channel_to_table(st_gdata, new_proto);
+ st_gdata->protos_registered++;
+ new_proto->write = st_write;
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ return err;
+ }
+ /* if fw is already downloaded & new stack registers protocol */
+ else {
+ add_channel_to_table(st_gdata, new_proto);
+ st_gdata->protos_registered++;
+ new_proto->write = st_write;
+
+ /* lock already held before entering else */
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ return err;
+ }
+ pr_debug("done %s(%d) ", __func__, new_proto->chnl_id);
+}
+EXPORT_SYMBOL_GPL(st_register);
+
+/* to unregister a protocol -
+ * to be called from protocol stack driver
+ */
+long st_unregister(struct st_proto_s *proto)
+{
+ long err = 0;
+ unsigned long flags = 0;
+ struct st_data_s *st_gdata;
+
+ pr_debug("%s: %d ", __func__, proto->chnl_id);
+
+ st_kim_ref(&st_gdata, 0);
+ if (!st_gdata || proto->chnl_id >= ST_MAX_CHANNELS) {
+ pr_err(" chnl_id %d not supported", proto->chnl_id);
+ return -EPROTONOSUPPORT;
+ }
+
+ spin_lock_irqsave(&st_gdata->lock, flags);
+
+ if (st_gdata->is_registered[proto->chnl_id] == false) {
+ pr_err(" chnl_id %d not registered", proto->chnl_id);
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ return -EPROTONOSUPPORT;
+ }
+
+ if (st_gdata->protos_registered)
+ st_gdata->protos_registered--;
+
+ remove_channel_from_table(st_gdata, proto);
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+ if ((st_gdata->protos_registered == ST_EMPTY) &&
+ (!test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+ pr_info(" all chnl_ids unregistered ");
+
+ /* stop traffic on tty */
+ if (st_gdata->tty) {
+ tty_ldisc_flush(st_gdata->tty);
+ stop_tty(st_gdata->tty);
+ }
+
+ /* all chnl_ids now unregistered */
+ st_kim_stop(st_gdata->kim_data);
+ /* disable ST LL */
+ st_ll_disable(st_gdata);
+ }
+ return err;
+}
+
+/*
+ * called in protocol stack drivers
+ * via the write function pointer
+ */
+long st_write(struct sk_buff *skb)
+{
+ struct st_data_s *st_gdata;
+ long len;
+
+ st_kim_ref(&st_gdata, 0);
+ if (unlikely(skb == NULL || st_gdata == NULL
+ || st_gdata->tty == NULL)) {
+ pr_err("data/tty unavailable to perform write");
+ return -EINVAL;
+ }
+
+ pr_debug("%d to be written", skb->len);
+ len = skb->len;
+
+ /* st_ll to decide where to enqueue the skb */
+ st_int_enqueue(st_gdata, skb);
+ /* wake up */
+ st_tx_wakeup(st_gdata);
+
+ /* return number of bytes written */
+ return len;
+}
+
+/* for protocols making use of shared transport */
+EXPORT_SYMBOL_GPL(st_unregister);
+
+/********************************************************************/
+/*
+ * functions called from TTY layer
+ */
+static int st_tty_open(struct tty_struct *tty)
+{
+ int err = 0;
+ struct st_data_s *st_gdata;
+ pr_info("%s ", __func__);
+
+ st_kim_ref(&st_gdata, 0);
+ st_gdata->tty = tty;
+ tty->disc_data = st_gdata;
+
+ /* don't do an wakeup for now */
+ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+ /* mem already allocated
+ */
+ tty->receive_room = 65536;
+ /* Flush any pending characters in the driver and discipline. */
+ tty_ldisc_flush(tty);
+ tty_driver_flush_buffer(tty);
+ /*
+ * signal to UIM via KIM that -
+ * installation of N_TI_WL ldisc is complete
+ */
+ st_kim_complete(st_gdata->kim_data);
+ pr_debug("done %s", __func__);
+ return err;
+}
+
+static void st_tty_close(struct tty_struct *tty)
+{
+ unsigned char i = ST_MAX_CHANNELS;
+ unsigned long flags = 0;
+ struct st_data_s *st_gdata = tty->disc_data;
+
+ pr_info("%s ", __func__);
+
+ /* TODO:
+ * if a protocol has been registered & line discipline
+ * un-installed for some reason - what should be done ?
+ */
+ spin_lock_irqsave(&st_gdata->lock, flags);
+ for (i = ST_BT; i < ST_MAX_CHANNELS; i++) {
+ if (st_gdata->is_registered[i] == true)
+ pr_err("%d not un-registered", i);
+ st_gdata->list[i] = NULL;
+ st_gdata->is_registered[i] = false;
+ }
+ st_gdata->protos_registered = 0;
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+ /*
+ * signal to UIM via KIM that -
+ * N_TI_WL ldisc is un-installed
+ */
+ st_kim_complete(st_gdata->kim_data);
+ st_gdata->tty = NULL;
+ /* Flush any pending characters in the driver and discipline. */
+ tty_ldisc_flush(tty);
+ tty_driver_flush_buffer(tty);
+
+ spin_lock_irqsave(&st_gdata->lock, flags);
+ /* empty out txq and tx_waitq */
+ skb_queue_purge(&st_gdata->txq);
+ skb_queue_purge(&st_gdata->tx_waitq);
+ /* reset the TTY Rx states of ST */
+ st_gdata->rx_count = 0;
+ st_gdata->rx_state = ST_W4_PACKET_TYPE;
+ kfree_skb(st_gdata->rx_skb);
+ st_gdata->rx_skb = NULL;
+ spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+ pr_debug("%s: done ", __func__);
+}
+
+static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
+ char *tty_flags, int count)
+{
+#ifdef VERBOSE
+ print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE,
+ 16, 1, data, count, 0);
+#endif
+
+ /*
+ * if fw download is in progress then route incoming data
+ * to KIM for validation
+ */
+ st_recv(tty->disc_data, data, count);
+ pr_debug("done %s", __func__);
+}
+
+/* wake-up function called in from the TTY layer
+ * inside the internal wakeup function will be called
+ */
+static void st_tty_wakeup(struct tty_struct *tty)
+{
+ struct st_data_s *st_gdata = tty->disc_data;
+ pr_debug("%s ", __func__);
+ /* don't do an wakeup for now */
+ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+ /*
+ * schedule the internal wakeup instead of calling directly to
+ * avoid lockup (port->lock needed in tty->ops->write is
+ * already taken here
+ */
+ schedule_work(&st_gdata->work_write_wakeup);
+}
+
+static void st_tty_flush_buffer(struct tty_struct *tty)
+{
+ struct st_data_s *st_gdata = tty->disc_data;
+ pr_debug("%s ", __func__);
+
+ kfree_skb(st_gdata->tx_skb);
+ st_gdata->tx_skb = NULL;
+
+ tty_driver_flush_buffer(tty);
+ return;
+}
+
+static struct tty_ldisc_ops st_ldisc_ops = {
+ .magic = TTY_LDISC_MAGIC,
+ .name = "n_st",
+ .open = st_tty_open,
+ .close = st_tty_close,
+ .receive_buf = st_tty_receive,
+ .write_wakeup = st_tty_wakeup,
+ .flush_buffer = st_tty_flush_buffer,
+ .owner = THIS_MODULE
+};
+
+/********************************************************************/
+int st_core_init(struct st_data_s **core_data)
+{
+ struct st_data_s *st_gdata;
+ long err;
+
+ err = tty_register_ldisc(N_TI_WL, &st_ldisc_ops);
+ if (err) {
+ pr_err("error registering %d line discipline %ld",
+ N_TI_WL, err);
+ return err;
+ }
+ pr_debug("registered n_shared line discipline");
+
+ st_gdata = kzalloc(sizeof(struct st_data_s), GFP_KERNEL);
+ if (!st_gdata) {
+ pr_err("memory allocation failed");
+ err = tty_unregister_ldisc(N_TI_WL);
+ if (err)
+ pr_err("unable to un-register ldisc %ld", err);
+ err = -ENOMEM;
+ return err;
+ }
+
+ /* Initialize ST TxQ and Tx waitQ queue head. All BT/FM/GPS module skb's
+ * will be pushed in this queue for actual transmission.
+ */
+ skb_queue_head_init(&st_gdata->txq);
+ skb_queue_head_init(&st_gdata->tx_waitq);
+
+ /* Locking used in st_int_enqueue() to avoid multiple execution */
+ spin_lock_init(&st_gdata->lock);
+
+ err = st_ll_init(st_gdata);
+ if (err) {
+ pr_err("error during st_ll initialization(%ld)", err);
+ kfree(st_gdata);
+ err = tty_unregister_ldisc(N_TI_WL);
+ if (err)
+ pr_err("unable to un-register ldisc");
+ return err;
+ }
+
+ INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
+
+ *core_data = st_gdata;
+ return 0;
+}
+
+void st_core_exit(struct st_data_s *st_gdata)
+{
+ long err;
+ /* internal module cleanup */
+ err = st_ll_deinit(st_gdata);
+ if (err)
+ pr_err("error during deinit of ST LL %ld", err);
+
+ if (st_gdata != NULL) {
+ /* Free ST Tx Qs and skbs */
+ skb_queue_purge(&st_gdata->txq);
+ skb_queue_purge(&st_gdata->tx_waitq);
+ kfree_skb(st_gdata->rx_skb);
+ kfree_skb(st_gdata->tx_skb);
+ /* TTY ldisc cleanup */
+ err = tty_unregister_ldisc(N_TI_WL);
+ if (err)
+ pr_err("unable to un-register ldisc %ld", err);
+ /* free the global data pointer */
+ kfree(st_gdata);
+ }
+}
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
new file mode 100644
index 0000000..71b6455
--- /dev/null
+++ b/drivers/misc/ti-st/st_kim.c
@@ -0,0 +1,870 @@
+/*
+ * Shared Transport Line discipline driver Core
+ * Init Manager module responsible for GPIO control
+ * and firmware download
+ * Copyright (C) 2009-2010 Texas Instruments
+ * Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#define pr_fmt(fmt) "(stk) :" fmt
+#include <linux/platform_device.h>
+#include <linux/jiffies.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/gpio.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/sysfs.h>
+#include <linux/tty.h>
+
+#include <linux/skbuff.h>
+#include <linux/ti_wilink_st.h>
+#include <linux/module.h>
+
+#define MAX_ST_DEVICES 3 /* Imagine 1 on each UART for now */
+static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
+
+/**********************************************************************/
+/* internal functions */
+
+/**
+ * st_get_plat_device -
+ * function which returns the reference to the platform device
+ * requested by id. As of now only 1 such device exists (id=0)
+ * the context requesting for reference can get the id to be
+ * requested by a. The protocol driver which is registering or
+ * b. the tty device which is opened.
+ */
+static struct platform_device *st_get_plat_device(int id)
+{
+ return st_kim_devices[id];
+}
+
+/**
+ * validate_firmware_response -
+ * function to return whether the firmware response was proper
+ * in case of error don't complete so that waiting for proper
+ * response times out
+ */
+static void validate_firmware_response(struct kim_data_s *kim_gdata)
+{
+ struct sk_buff *skb = kim_gdata->rx_skb;
+ if (!skb)
+ return;
+
+ /* these magic numbers are the position in the response buffer which
+ * allows us to distinguish whether the response is for the read
+ * version info. command
+ */
+ if (skb->data[2] == 0x01 && skb->data[3] == 0x01 &&
+ skb->data[4] == 0x10 && skb->data[5] == 0x00) {
+ /* fw version response */
+ memcpy(kim_gdata->resp_buffer,
+ kim_gdata->rx_skb->data,
+ kim_gdata->rx_skb->len);
+ complete_all(&kim_gdata->kim_rcvd);
+ kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+ kim_gdata->rx_skb = NULL;
+ kim_gdata->rx_count = 0;
+ } else if (unlikely(skb->data[5] != 0)) {
+ pr_err("no proper response during fw download");
+ pr_err("data6 %x", skb->data[5]);
+ kfree_skb(skb);
+ return; /* keep waiting for the proper response */
+ }
+ /* becos of all the script being downloaded */
+ complete_all(&kim_gdata->kim_rcvd);
+ kfree_skb(skb);
+}
+
+/* check for data len received inside kim_int_recv
+ * most often hit the last case to update state to waiting for data
+ */
+static inline int kim_check_data_len(struct kim_data_s *kim_gdata, int len)
+{
+ register int room = skb_tailroom(kim_gdata->rx_skb);
+
+ pr_debug("len %d room %d", len, room);
+
+ if (!len) {
+ validate_firmware_response(kim_gdata);
+ } else if (len > room) {
+ /* Received packet's payload length is larger.
+ * We can't accommodate it in created skb.
+ */
+ pr_err("Data length is too large len %d room %d", len,
+ room);
+ kfree_skb(kim_gdata->rx_skb);
+ } else {
+ /* Packet header has non-zero payload length and
+ * we have enough space in created skb. Lets read
+ * payload data */
+ kim_gdata->rx_state = ST_W4_DATA;
+ kim_gdata->rx_count = len;
+ return len;
+ }
+
+ /* Change ST LL state to continue to process next
+ * packet */
+ kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+ kim_gdata->rx_skb = NULL;
+ kim_gdata->rx_count = 0;
+
+ return 0;
+}
+
+/**
+ * kim_int_recv - receive function called during firmware download
+ * firmware download responses on different UART drivers
+ * have been observed to come in bursts of different
+ * tty_receive and hence the logic
+ */
+static void kim_int_recv(struct kim_data_s *kim_gdata,
+ const unsigned char *data, long count)
+{
+ const unsigned char *ptr;
+ int len = 0, type = 0;
+ unsigned char *plen;
+
+ pr_debug("%s", __func__);
+ /* Decode received bytes here */
+ ptr = data;
+ if (unlikely(ptr == NULL)) {
+ pr_err(" received null from TTY ");
+ return;
+ }
+
+ while (count) {
+ if (kim_gdata->rx_count) {
+ len = min_t(unsigned int, kim_gdata->rx_count, count);
+ memcpy(skb_put(kim_gdata->rx_skb, len), ptr, len);
+ kim_gdata->rx_count -= len;
+ count -= len;
+ ptr += len;
+
+ if (kim_gdata->rx_count)
+ continue;
+
+ /* Check ST RX state machine , where are we? */
+ switch (kim_gdata->rx_state) {
+ /* Waiting for complete packet ? */
+ case ST_W4_DATA:
+ pr_debug("Complete pkt received");
+ validate_firmware_response(kim_gdata);
+ kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+ kim_gdata->rx_skb = NULL;
+ continue;
+ /* Waiting for Bluetooth event header ? */
+ case ST_W4_HEADER:
+ plen =
+ (unsigned char *)&kim_gdata->rx_skb->data[1];
+ pr_debug("event hdr: plen 0x%02x\n", *plen);
+ kim_check_data_len(kim_gdata, *plen);
+ continue;
+ } /* end of switch */
+ } /* end of if rx_state */
+ switch (*ptr) {
+ /* Bluetooth event packet? */
+ case 0x04:
+ kim_gdata->rx_state = ST_W4_HEADER;
+ kim_gdata->rx_count = 2;
+ type = *ptr;
+ break;
+ default:
+ pr_info("unknown packet");
+ ptr++;
+ count--;
+ continue;
+ }
+ ptr++;
+ count--;
+ kim_gdata->rx_skb =
+ alloc_skb(1024+8, GFP_ATOMIC);
+ if (!kim_gdata->rx_skb) {
+ pr_err("can't allocate mem for new packet");
+ kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+ kim_gdata->rx_count = 0;
+ return;
+ }
+ skb_reserve(kim_gdata->rx_skb, 8);
+ kim_gdata->rx_skb->cb[0] = 4;
+ kim_gdata->rx_skb->cb[1] = 0;
+
+ }
+ return;
+}
+
+static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
+{
+ unsigned short version = 0, chip = 0, min_ver = 0, maj_ver = 0;
+ const char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 };
+ long timeout;
+
+ pr_debug("%s", __func__);
+
+ reinit_completion(&kim_gdata->kim_rcvd);
+ if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) {
+ pr_err("kim: couldn't write 4 bytes");
+ return -EIO;
+ }
+
+ timeout = wait_for_completion_interruptible_timeout(
+ &kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME));
+ if (timeout <= 0) {
+ pr_err(" waiting for ver info- timed out or received signal");
+ return timeout ? -ERESTARTSYS : -ETIMEDOUT;
+ }
+ reinit_completion(&kim_gdata->kim_rcvd);
+ /* the positions 12 & 13 in the response buffer provide with the
+ * chip, major & minor numbers
+ */
+
+ version =
+ MAKEWORD(kim_gdata->resp_buffer[12],
+ kim_gdata->resp_buffer[13]);
+ chip = (version & 0x7C00) >> 10;
+ min_ver = (version & 0x007F);
+ maj_ver = (version & 0x0380) >> 7;
+
+ if (version & 0x8000)
+ maj_ver |= 0x0008;
+
+ sprintf(bts_scr_name, "ti-connectivity/TIInit_%d.%d.%d.bts",
+ chip, maj_ver, min_ver);
+
+ /* to be accessed later via sysfs entry */
+ kim_gdata->version.full = version;
+ kim_gdata->version.chip = chip;
+ kim_gdata->version.maj_ver = maj_ver;
+ kim_gdata->version.min_ver = min_ver;
+
+ pr_info("%s", bts_scr_name);
+ return 0;
+}
+
+static void skip_change_remote_baud(unsigned char **ptr, long *len)
+{
+ unsigned char *nxt_action, *cur_action;
+ cur_action = *ptr;
+
+ nxt_action = cur_action + sizeof(struct bts_action) +
+ ((struct bts_action *) cur_action)->size;
+
+ if (((struct bts_action *) nxt_action)->type != ACTION_WAIT_EVENT) {
+ pr_err("invalid action after change remote baud command");
+ } else {
+ *ptr = *ptr + sizeof(struct bts_action) +
+ ((struct bts_action *)cur_action)->size;
+ *len = *len - (sizeof(struct bts_action) +
+ ((struct bts_action *)cur_action)->size);
+ /* warn user on not commenting these in firmware */
+ pr_warn("skipping the wait event of change remote baud");
+ }
+}
+
+/**
+ * download_firmware -
+ * internal function which parses through the .bts firmware
+ * script file intreprets SEND, DELAY actions only as of now
+ */
+static long download_firmware(struct kim_data_s *kim_gdata)
+{
+ long err = 0;
+ long len = 0;
+ unsigned char *ptr = NULL;
+ unsigned char *action_ptr = NULL;
+ unsigned char bts_scr_name[40] = { 0 }; /* 40 char long bts scr name? */
+ int wr_room_space;
+ int cmd_size;
+ unsigned long timeout;
+
+ err = read_local_version(kim_gdata, bts_scr_name);
+ if (err != 0) {
+ pr_err("kim: failed to read local ver");
+ return err;
+ }
+ err =
+ request_firmware(&kim_gdata->fw_entry, bts_scr_name,
+ &kim_gdata->kim_pdev->dev);
+ if (unlikely((err != 0) || (kim_gdata->fw_entry->data == NULL) ||
+ (kim_gdata->fw_entry->size == 0))) {
+ pr_err(" request_firmware failed(errno %ld) for %s", err,
+ bts_scr_name);
+ return -EINVAL;
+ }
+ ptr = (void *)kim_gdata->fw_entry->data;
+ len = kim_gdata->fw_entry->size;
+ /* bts_header to remove out magic number and
+ * version
+ */
+ ptr += sizeof(struct bts_header);
+ len -= sizeof(struct bts_header);
+
+ while (len > 0 && ptr) {
+ pr_debug(" action size %d, type %d ",
+ ((struct bts_action *)ptr)->size,
+ ((struct bts_action *)ptr)->type);
+
+ switch (((struct bts_action *)ptr)->type) {
+ case ACTION_SEND_COMMAND: /* action send */
+ pr_debug("S");
+ action_ptr = &(((struct bts_action *)ptr)->data[0]);
+ if (unlikely
+ (((struct hci_command *)action_ptr)->opcode ==
+ 0xFF36)) {
+ /* ignore remote change
+ * baud rate HCI VS command */
+ pr_warn("change remote baud"
+ " rate command in firmware");
+ skip_change_remote_baud(&ptr, &len);
+ break;
+ }
+ /*
+ * Make sure we have enough free space in uart
+ * tx buffer to write current firmware command
+ */
+ cmd_size = ((struct bts_action *)ptr)->size;
+ timeout = jiffies + msecs_to_jiffies(CMD_WR_TIME);
+ do {
+ wr_room_space =
+ st_get_uart_wr_room(kim_gdata->core_data);
+ if (wr_room_space < 0) {
+ pr_err("Unable to get free "
+ "space info from uart tx buffer");
+ release_firmware(kim_gdata->fw_entry);
+ return wr_room_space;
+ }
+ mdelay(1); /* wait 1ms before checking room */
+ } while ((wr_room_space < cmd_size) &&
+ time_before(jiffies, timeout));
+
+ /* Timeout happened ? */
+ if (time_after_eq(jiffies, timeout)) {
+ pr_err("Timeout while waiting for free "
+ "free space in uart tx buffer");
+ release_firmware(kim_gdata->fw_entry);
+ return -ETIMEDOUT;
+ }
+ /* reinit completion before sending for the
+ * relevant wait
+ */
+ reinit_completion(&kim_gdata->kim_rcvd);
+
+ /*
+ * Free space found in uart buffer, call st_int_write
+ * to send current firmware command to the uart tx
+ * buffer.
+ */
+ err = st_int_write(kim_gdata->core_data,
+ ((struct bts_action_send *)action_ptr)->data,
+ ((struct bts_action *)ptr)->size);
+ if (unlikely(err < 0)) {
+ release_firmware(kim_gdata->fw_entry);
+ return err;
+ }
+ /*
+ * Check number of bytes written to the uart tx buffer
+ * and requested command write size
+ */
+ if (err != cmd_size) {
+ pr_err("Number of bytes written to uart "
+ "tx buffer are not matching with "
+ "requested cmd write size");
+ release_firmware(kim_gdata->fw_entry);
+ return -EIO;
+ }
+ break;
+ case ACTION_WAIT_EVENT: /* wait */
+ pr_debug("W");
+ err = wait_for_completion_interruptible_timeout(
+ &kim_gdata->kim_rcvd,
+ msecs_to_jiffies(CMD_RESP_TIME));
+ if (err <= 0) {
+ pr_err("response timeout/signaled during fw download ");
+ /* timed out */
+ release_firmware(kim_gdata->fw_entry);
+ return err ? -ERESTARTSYS : -ETIMEDOUT;
+ }
+ reinit_completion(&kim_gdata->kim_rcvd);
+ break;
+ case ACTION_DELAY: /* sleep */
+ pr_info("sleep command in scr");
+ action_ptr = &(((struct bts_action *)ptr)->data[0]);
+ mdelay(((struct bts_action_delay *)action_ptr)->msec);
+ break;
+ }
+ len =
+ len - (sizeof(struct bts_action) +
+ ((struct bts_action *)ptr)->size);
+ ptr =
+ ptr + sizeof(struct bts_action) +
+ ((struct bts_action *)ptr)->size;
+ }
+ /* fw download complete */
+ release_firmware(kim_gdata->fw_entry);
+ return 0;
+}
+
+/**********************************************************************/
+/* functions called from ST core */
+/* called from ST Core, when REG_IN_PROGRESS (registration in progress)
+ * can be because of
+ * 1. response to read local version
+ * 2. during send/recv's of firmware download
+ */
+void st_kim_recv(void *disc_data, const unsigned char *data, long count)
+{
+ struct st_data_s *st_gdata = (struct st_data_s *)disc_data;
+ struct kim_data_s *kim_gdata = st_gdata->kim_data;
+
+ /* proceed to gather all data and distinguish read fw version response
+ * from other fw responses when data gathering is complete
+ */
+ kim_int_recv(kim_gdata, data, count);
+ return;
+}
+
+/* to signal completion of line discipline installation
+ * called from ST Core, upon tty_open
+ */
+void st_kim_complete(void *kim_data)
+{
+ struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data;
+ complete(&kim_gdata->ldisc_installed);
+}
+
+/**
+ * st_kim_start - called from ST Core upon 1st registration
+ * This involves toggling the chip enable gpio, reading
+ * the firmware version from chip, forming the fw file name
+ * based on the chip version, requesting the fw, parsing it
+ * and perform download(send/recv).
+ */
+long st_kim_start(void *kim_data)
+{
+ long err = 0;
+ long retry = POR_RETRY_COUNT;
+ struct ti_st_plat_data *pdata;
+ struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data;
+
+ pr_info(" %s", __func__);
+ pdata = kim_gdata->kim_pdev->dev.platform_data;
+
+ do {
+ /* platform specific enabling code here */
+ if (pdata->chip_enable)
+ pdata->chip_enable(kim_gdata);
+
+ /* Configure BT nShutdown to HIGH state */
+ gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+ mdelay(5); /* FIXME: a proper toggle */
+ gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
+ mdelay(100);
+ /* re-initialize the completion */
+ reinit_completion(&kim_gdata->ldisc_installed);
+ /* send notification to UIM */
+ kim_gdata->ldisc_install = 1;
+ pr_info("ldisc_install = 1");
+ sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
+ NULL, "install");
+ /* wait for ldisc to be installed */
+ err = wait_for_completion_interruptible_timeout(
+ &kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME));
+ if (!err) {
+ /* ldisc installation timeout,
+ * flush uart, power cycle BT_EN */
+ pr_err("ldisc installation timeout");
+ err = st_kim_stop(kim_gdata);
+ continue;
+ } else {
+ /* ldisc installed now */
+ pr_info("line discipline installed");
+ err = download_firmware(kim_gdata);
+ if (err != 0) {
+ /* ldisc installed but fw download failed,
+ * flush uart & power cycle BT_EN */
+ pr_err("download firmware failed");
+ err = st_kim_stop(kim_gdata);
+ continue;
+ } else { /* on success don't retry */
+ break;
+ }
+ }
+ } while (retry--);
+ return err;
+}
+
+/**
+ * st_kim_stop - stop communication with chip.
+ * This can be called from ST Core/KIM, on the-
+ * (a) last un-register when chip need not be powered there-after,
+ * (b) upon failure to either install ldisc or download firmware.
+ * The function is responsible to (a) notify UIM about un-installation,
+ * (b) flush UART if the ldisc was installed.
+ * (c) reset BT_EN - pull down nshutdown at the end.
+ * (d) invoke platform's chip disabling routine.
+ */
+long st_kim_stop(void *kim_data)
+{
+ long err = 0;
+ struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data;
+ struct ti_st_plat_data *pdata =
+ kim_gdata->kim_pdev->dev.platform_data;
+ struct tty_struct *tty = kim_gdata->core_data->tty;
+
+ reinit_completion(&kim_gdata->ldisc_installed);
+
+ if (tty) { /* can be called before ldisc is installed */
+ /* Flush any pending characters in the driver and discipline. */
+ tty_ldisc_flush(tty);
+ tty_driver_flush_buffer(tty);
+ }
+
+ /* send uninstall notification to UIM */
+ pr_info("ldisc_install = 0");
+ kim_gdata->ldisc_install = 0;
+ sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install");
+
+ /* wait for ldisc to be un-installed */
+ err = wait_for_completion_interruptible_timeout(
+ &kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME));
+ if (!err) { /* timeout */
+ pr_err(" timed out waiting for ldisc to be un-installed");
+ err = -ETIMEDOUT;
+ }
+
+ /* By default configure BT nShutdown to LOW state */
+ gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+ mdelay(1);
+ gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
+ mdelay(1);
+ gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+
+ /* platform specific disable */
+ if (pdata->chip_disable)
+ pdata->chip_disable(kim_gdata);
+ return err;
+}
+
+/**********************************************************************/
+/* functions called from subsystems */
+/* called when debugfs entry is read from */
+
+static int show_version(struct seq_file *s, void *unused)
+{
+ struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private;
+ seq_printf(s, "%04X %d.%d.%d\n", kim_gdata->version.full,
+ kim_gdata->version.chip, kim_gdata->version.maj_ver,
+ kim_gdata->version.min_ver);
+ return 0;
+}
+
+static int show_list(struct seq_file *s, void *unused)
+{
+ struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private;
+ kim_st_list_protocols(kim_gdata->core_data, s);
+ return 0;
+}
+
+static ssize_t show_install(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct kim_data_s *kim_data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", kim_data->ldisc_install);
+}
+
+#ifdef DEBUG
+static ssize_t store_dev_name(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kim_data_s *kim_data = dev_get_drvdata(dev);
+ pr_debug("storing dev name >%s<", buf);
+ strncpy(kim_data->dev_name, buf, count);
+ pr_debug("stored dev name >%s<", kim_data->dev_name);
+ return count;
+}
+
+static ssize_t store_baud_rate(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kim_data_s *kim_data = dev_get_drvdata(dev);
+ pr_debug("storing baud rate >%s<", buf);
+ sscanf(buf, "%ld", &kim_data->baud_rate);
+ pr_debug("stored baud rate >%ld<", kim_data->baud_rate);
+ return count;
+}
+#endif /* if DEBUG */
+
+static ssize_t show_dev_name(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct kim_data_s *kim_data = dev_get_drvdata(dev);
+ return sprintf(buf, "%s\n", kim_data->dev_name);
+}
+
+static ssize_t show_baud_rate(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct kim_data_s *kim_data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", kim_data->baud_rate);
+}
+
+static ssize_t show_flow_cntrl(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct kim_data_s *kim_data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", kim_data->flow_cntrl);
+}
+
+/* structures specific for sysfs entries */
+static struct kobj_attribute ldisc_install =
+__ATTR(install, 0444, (void *)show_install, NULL);
+
+static struct kobj_attribute uart_dev_name =
+#ifdef DEBUG /* TODO: move this to debug-fs if possible */
+__ATTR(dev_name, 0644, (void *)show_dev_name, (void *)store_dev_name);
+#else
+__ATTR(dev_name, 0444, (void *)show_dev_name, NULL);
+#endif
+
+static struct kobj_attribute uart_baud_rate =
+#ifdef DEBUG /* TODO: move to debugfs */
+__ATTR(baud_rate, 0644, (void *)show_baud_rate, (void *)store_baud_rate);
+#else
+__ATTR(baud_rate, 0444, (void *)show_baud_rate, NULL);
+#endif
+
+static struct kobj_attribute uart_flow_cntrl =
+__ATTR(flow_cntrl, 0444, (void *)show_flow_cntrl, NULL);
+
+static struct attribute *uim_attrs[] = {
+ &ldisc_install.attr,
+ &uart_dev_name.attr,
+ &uart_baud_rate.attr,
+ &uart_flow_cntrl.attr,
+ NULL,
+};
+
+static struct attribute_group uim_attr_grp = {
+ .attrs = uim_attrs,
+};
+
+/**
+ * st_kim_ref - reference the core's data
+ * This references the per-ST platform device in the arch/xx/
+ * board-xx.c file.
+ * This would enable multiple such platform devices to exist
+ * on a given platform
+ */
+void st_kim_ref(struct st_data_s **core_data, int id)
+{
+ struct platform_device *pdev;
+ struct kim_data_s *kim_gdata;
+ /* get kim_gdata reference from platform device */
+ pdev = st_get_plat_device(id);
+ if (!pdev)
+ goto err;
+ kim_gdata = platform_get_drvdata(pdev);
+ if (!kim_gdata)
+ goto err;
+
+ *core_data = kim_gdata->core_data;
+ return;
+err:
+ *core_data = NULL;
+}
+
+static int kim_version_open(struct inode *i, struct file *f)
+{
+ return single_open(f, show_version, i->i_private);
+}
+
+static int kim_list_open(struct inode *i, struct file *f)
+{
+ return single_open(f, show_list, i->i_private);
+}
+
+static const struct file_operations version_debugfs_fops = {
+ /* version info */
+ .open = kim_version_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+static const struct file_operations list_debugfs_fops = {
+ /* protocols info */
+ .open = kim_list_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/**********************************************************************/
+/* functions called from platform device driver subsystem
+ * need to have a relevant platform device entry in the platform's
+ * board-*.c file
+ */
+
+static struct dentry *kim_debugfs_dir;
+static int kim_probe(struct platform_device *pdev)
+{
+ struct kim_data_s *kim_gdata;
+ struct ti_st_plat_data *pdata = pdev->dev.platform_data;
+ int err;
+
+ if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) {
+ /* multiple devices could exist */
+ st_kim_devices[pdev->id] = pdev;
+ } else {
+ /* platform's sure about existence of 1 device */
+ st_kim_devices[0] = pdev;
+ }
+
+ kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_ATOMIC);
+ if (!kim_gdata) {
+ pr_err("no mem to allocate");
+ return -ENOMEM;
+ }
+ platform_set_drvdata(pdev, kim_gdata);
+
+ err = st_core_init(&kim_gdata->core_data);
+ if (err != 0) {
+ pr_err(" ST core init failed");
+ err = -EIO;
+ goto err_core_init;
+ }
+ /* refer to itself */
+ kim_gdata->core_data->kim_data = kim_gdata;
+
+ /* Claim the chip enable nShutdown gpio from the system */
+ kim_gdata->nshutdown = pdata->nshutdown_gpio;
+ err = gpio_request(kim_gdata->nshutdown, "kim");
+ if (unlikely(err)) {
+ pr_err(" gpio %d request failed ", kim_gdata->nshutdown);
+ return err;
+ }
+
+ /* Configure nShutdown GPIO as output=0 */
+ err = gpio_direction_output(kim_gdata->nshutdown, 0);
+ if (unlikely(err)) {
+ pr_err(" unable to configure gpio %d", kim_gdata->nshutdown);
+ return err;
+ }
+ /* get reference of pdev for request_firmware
+ */
+ kim_gdata->kim_pdev = pdev;
+ init_completion(&kim_gdata->kim_rcvd);
+ init_completion(&kim_gdata->ldisc_installed);
+
+ err = sysfs_create_group(&pdev->dev.kobj, &uim_attr_grp);
+ if (err) {
+ pr_err("failed to create sysfs entries");
+ goto err_sysfs_group;
+ }
+
+ /* copying platform data */
+ strncpy(kim_gdata->dev_name, pdata->dev_name, UART_DEV_NAME_LEN);
+ kim_gdata->flow_cntrl = pdata->flow_cntrl;
+ kim_gdata->baud_rate = pdata->baud_rate;
+ pr_info("sysfs entries created\n");
+
+ kim_debugfs_dir = debugfs_create_dir("ti-st", NULL);
+ if (!kim_debugfs_dir) {
+ pr_err(" debugfs entries creation failed ");
+ return 0;
+ }
+
+ debugfs_create_file("version", S_IRUGO, kim_debugfs_dir,
+ kim_gdata, &version_debugfs_fops);
+ debugfs_create_file("protocols", S_IRUGO, kim_debugfs_dir,
+ kim_gdata, &list_debugfs_fops);
+ return 0;
+
+err_sysfs_group:
+ st_core_exit(kim_gdata->core_data);
+
+err_core_init:
+ kfree(kim_gdata);
+
+ return err;
+}
+
+static int kim_remove(struct platform_device *pdev)
+{
+ /* free the GPIOs requested */
+ struct ti_st_plat_data *pdata = pdev->dev.platform_data;
+ struct kim_data_s *kim_gdata;
+
+ kim_gdata = platform_get_drvdata(pdev);
+
+ /* Free the Bluetooth/FM/GPIO
+ * nShutdown gpio from the system
+ */
+ gpio_free(pdata->nshutdown_gpio);
+ pr_info("nshutdown GPIO Freed");
+
+ debugfs_remove_recursive(kim_debugfs_dir);
+ sysfs_remove_group(&pdev->dev.kobj, &uim_attr_grp);
+ pr_info("sysfs entries removed");
+
+ kim_gdata->kim_pdev = NULL;
+ st_core_exit(kim_gdata->core_data);
+
+ kfree(kim_gdata);
+ kim_gdata = NULL;
+ return 0;
+}
+
+static int kim_suspend(struct platform_device *pdev, pm_message_t state)
+{
+ struct ti_st_plat_data *pdata = pdev->dev.platform_data;
+
+ if (pdata->suspend)
+ return pdata->suspend(pdev, state);
+
+ return 0;
+}
+
+static int kim_resume(struct platform_device *pdev)
+{
+ struct ti_st_plat_data *pdata = pdev->dev.platform_data;
+
+ if (pdata->resume)
+ return pdata->resume(pdev);
+
+ return 0;
+}
+
+/**********************************************************************/
+/* entry point for ST KIM module, called in from ST Core */
+static struct platform_driver kim_platform_driver = {
+ .probe = kim_probe,
+ .remove = kim_remove,
+ .suspend = kim_suspend,
+ .resume = kim_resume,
+ .driver = {
+ .name = "kim",
+ },
+};
+
+module_platform_driver(kim_platform_driver);
+
+MODULE_AUTHOR("Pavan Savoy <pavan_savoy@ti.com>");
+MODULE_DESCRIPTION("Shared Transport Driver for TI BT/FM/GPS combo chips ");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c
new file mode 100644
index 0000000..93b4d67
--- /dev/null
+++ b/drivers/misc/ti-st/st_ll.c
@@ -0,0 +1,169 @@
+/*
+ * Shared Transport driver
+ * HCI-LL module responsible for TI proprietary HCI_LL protocol
+ * Copyright (C) 2009-2010 Texas Instruments
+ * Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#define pr_fmt(fmt) "(stll) :" fmt
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/ti_wilink_st.h>
+
+/**********************************************************************/
+/* internal functions */
+static void send_ll_cmd(struct st_data_s *st_data,
+ unsigned char cmd)
+{
+
+ pr_debug("%s: writing %x", __func__, cmd);
+ st_int_write(st_data, &cmd, 1);
+ return;
+}
+
+static void ll_device_want_to_sleep(struct st_data_s *st_data)
+{
+ struct kim_data_s *kim_data;
+ struct ti_st_plat_data *pdata;
+
+ pr_debug("%s", __func__);
+ /* sanity check */
+ if (st_data->ll_state != ST_LL_AWAKE)
+ pr_err("ERR hcill: ST_LL_GO_TO_SLEEP_IND"
+ "in state %ld", st_data->ll_state);
+
+ send_ll_cmd(st_data, LL_SLEEP_ACK);
+ /* update state */
+ st_data->ll_state = ST_LL_ASLEEP;
+
+ /* communicate to platform about chip asleep */
+ kim_data = st_data->kim_data;
+ pdata = kim_data->kim_pdev->dev.platform_data;
+ if (pdata->chip_asleep)
+ pdata->chip_asleep(NULL);
+}
+
+static void ll_device_want_to_wakeup(struct st_data_s *st_data)
+{
+ struct kim_data_s *kim_data;
+ struct ti_st_plat_data *pdata;
+
+ /* diff actions in diff states */
+ switch (st_data->ll_state) {
+ case ST_LL_ASLEEP:
+ send_ll_cmd(st_data, LL_WAKE_UP_ACK); /* send wake_ack */
+ break;
+ case ST_LL_ASLEEP_TO_AWAKE:
+ /* duplicate wake_ind */
+ pr_err("duplicate wake_ind while waiting for Wake ack");
+ break;
+ case ST_LL_AWAKE:
+ /* duplicate wake_ind */
+ pr_err("duplicate wake_ind already AWAKE");
+ break;
+ case ST_LL_AWAKE_TO_ASLEEP:
+ /* duplicate wake_ind */
+ pr_err("duplicate wake_ind");
+ break;
+ }
+ /* update state */
+ st_data->ll_state = ST_LL_AWAKE;
+
+ /* communicate to platform about chip wakeup */
+ kim_data = st_data->kim_data;
+ pdata = kim_data->kim_pdev->dev.platform_data;
+ if (pdata->chip_awake)
+ pdata->chip_awake(NULL);
+}
+
+/**********************************************************************/
+/* functions invoked by ST Core */
+
+/* called when ST Core wants to
+ * enable ST LL */
+void st_ll_enable(struct st_data_s *ll)
+{
+ ll->ll_state = ST_LL_AWAKE;
+}
+
+/* called when ST Core /local module wants to
+ * disable ST LL */
+void st_ll_disable(struct st_data_s *ll)
+{
+ ll->ll_state = ST_LL_INVALID;
+}
+
+/* called when ST Core wants to update the state */
+void st_ll_wakeup(struct st_data_s *ll)
+{
+ if (likely(ll->ll_state != ST_LL_AWAKE)) {
+ send_ll_cmd(ll, LL_WAKE_UP_IND); /* WAKE_IND */
+ ll->ll_state = ST_LL_ASLEEP_TO_AWAKE;
+ } else {
+ /* don't send the duplicate wake_indication */
+ pr_err(" Chip already AWAKE ");
+ }
+}
+
+/* called when ST Core wants the state */
+unsigned long st_ll_getstate(struct st_data_s *ll)
+{
+ pr_debug(" returning state %ld", ll->ll_state);
+ return ll->ll_state;
+}
+
+/* called from ST Core, when a PM related packet arrives */
+unsigned long st_ll_sleep_state(struct st_data_s *st_data,
+ unsigned char cmd)
+{
+ switch (cmd) {
+ case LL_SLEEP_IND: /* sleep ind */
+ pr_debug("sleep indication recvd");
+ ll_device_want_to_sleep(st_data);
+ break;
+ case LL_SLEEP_ACK: /* sleep ack */
+ pr_err("sleep ack rcvd: host shouldn't");
+ break;
+ case LL_WAKE_UP_IND: /* wake ind */
+ pr_debug("wake indication recvd");
+ ll_device_want_to_wakeup(st_data);
+ break;
+ case LL_WAKE_UP_ACK: /* wake ack */
+ pr_debug("wake ack rcvd");
+ st_data->ll_state = ST_LL_AWAKE;
+ break;
+ default:
+ pr_err(" unknown input/state ");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Called from ST CORE to initialize ST LL */
+long st_ll_init(struct st_data_s *ll)
+{
+ /* set state to invalid */
+ ll->ll_state = ST_LL_INVALID;
+ return 0;
+}
+
+/* Called from ST CORE to de-initialize ST LL */
+long st_ll_deinit(struct st_data_s *ll)
+{
+ return 0;
+}
diff --git a/drivers/misc/ti_dac7512.c b/drivers/misc/ti_dac7512.c
new file mode 100644
index 0000000..f5456fb
--- /dev/null
+++ b/drivers/misc/ti_dac7512.c
@@ -0,0 +1,103 @@
+/*
+ * dac7512.c - Linux kernel module for
+ * Texas Instruments DAC7512
+ *
+ * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/of.h>
+
+static ssize_t dac7512_store_val(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ unsigned char tmp[2];
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ tmp[0] = val >> 8;
+ tmp[1] = val & 0xff;
+ spi_write(spi, tmp, sizeof(tmp));
+ return count;
+}
+
+static DEVICE_ATTR(value, S_IWUSR, NULL, dac7512_store_val);
+
+static struct attribute *dac7512_attributes[] = {
+ &dev_attr_value.attr,
+ NULL
+};
+
+static const struct attribute_group dac7512_attr_group = {
+ .attrs = dac7512_attributes,
+};
+
+static int dac7512_probe(struct spi_device *spi)
+{
+ int ret;
+
+ spi->bits_per_word = 8;
+ spi->mode = SPI_MODE_0;
+ ret = spi_setup(spi);
+ if (ret < 0)
+ return ret;
+
+ return sysfs_create_group(&spi->dev.kobj, &dac7512_attr_group);
+}
+
+static int dac7512_remove(struct spi_device *spi)
+{
+ sysfs_remove_group(&spi->dev.kobj, &dac7512_attr_group);
+ return 0;
+}
+
+static const struct spi_device_id dac7512_id_table[] = {
+ { "dac7512", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, dac7512_id_table);
+
+#ifdef CONFIG_OF
+static const struct of_device_id dac7512_of_match[] = {
+ { .compatible = "ti,dac7512", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, dac7512_of_match);
+#endif
+
+static struct spi_driver dac7512_driver = {
+ .driver = {
+ .name = "dac7512",
+ .of_match_table = of_match_ptr(dac7512_of_match),
+ },
+ .probe = dac7512_probe,
+ .remove = dac7512_remove,
+ .id_table = dac7512_id_table,
+};
+
+module_spi_driver(dac7512_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("DAC7512 16-bit DAC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
new file mode 100644
index 0000000..a37a42f
--- /dev/null
+++ b/drivers/misc/tifm_7xx1.c
@@ -0,0 +1,442 @@
+/*
+ * tifm_7xx1.c - TI FlashMedia driver
+ *
+ * Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+
+#define DRIVER_NAME "tifm_7xx1"
+#define DRIVER_VERSION "0.8"
+
+#define TIFM_IRQ_ENABLE 0x80000000
+#define TIFM_IRQ_SOCKMASK(x) (x)
+#define TIFM_IRQ_CARDMASK(x) ((x) << 8)
+#define TIFM_IRQ_FIFOMASK(x) ((x) << 16)
+#define TIFM_IRQ_SETALL 0xffffffff
+
+static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm,
+ struct tifm_dev *sock)
+{
+}
+
+static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&fm->lock, flags);
+ fm->socket_change_set |= 1 << sock->socket_id;
+ tifm_queue_work(&fm->media_switcher);
+ spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id)
+{
+ struct tifm_adapter *fm = dev_id;
+ struct tifm_dev *sock;
+ unsigned int irq_status, cnt;
+
+ spin_lock(&fm->lock);
+ irq_status = readl(fm->addr + FM_INTERRUPT_STATUS);
+ if (irq_status == 0 || irq_status == (~0)) {
+ spin_unlock(&fm->lock);
+ return IRQ_NONE;
+ }
+
+ if (irq_status & TIFM_IRQ_ENABLE) {
+ writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+ for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+ sock = fm->sockets[cnt];
+ if (sock) {
+ if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1))
+ sock->data_event(sock);
+ if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1))
+ sock->card_event(sock);
+ }
+ }
+
+ fm->socket_change_set |= irq_status
+ & ((1 << fm->num_sockets) - 1);
+ }
+ writel(irq_status, fm->addr + FM_INTERRUPT_STATUS);
+
+ if (fm->finish_me)
+ complete_all(fm->finish_me);
+ else if (!fm->socket_change_set)
+ writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+ else
+ tifm_queue_work(&fm->media_switcher);
+
+ spin_unlock(&fm->lock);
+ return IRQ_HANDLED;
+}
+
+static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr)
+{
+ unsigned int s_state;
+ int cnt;
+
+ writel(0x0e00, sock_addr + SOCK_CONTROL);
+
+ for (cnt = 16; cnt <= 256; cnt <<= 1) {
+ if (!(TIFM_SOCK_STATE_POWERED
+ & readl(sock_addr + SOCK_PRESENT_STATE)))
+ break;
+
+ msleep(cnt);
+ }
+
+ s_state = readl(sock_addr + SOCK_PRESENT_STATE);
+ if (!(TIFM_SOCK_STATE_OCCUPIED & s_state))
+ return 0;
+
+ writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED,
+ sock_addr + SOCK_CONTROL);
+
+ /* xd needs some extra time before power on */
+ if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7)
+ == TIFM_TYPE_XD)
+ msleep(40);
+
+ writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00,
+ sock_addr + SOCK_CONTROL);
+ /* wait for power to stabilize */
+ msleep(20);
+ for (cnt = 16; cnt <= 256; cnt <<= 1) {
+ if ((TIFM_SOCK_STATE_POWERED
+ & readl(sock_addr + SOCK_PRESENT_STATE)))
+ break;
+
+ msleep(cnt);
+ }
+
+ writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED),
+ sock_addr + SOCK_CONTROL);
+
+ return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7;
+}
+
+inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr)
+{
+ writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL),
+ sock_addr + SOCK_CONTROL);
+}
+
+inline static char __iomem *
+tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num)
+{
+ return base_addr + ((sock_num + 1) << 10);
+}
+
+static void tifm_7xx1_switch_media(struct work_struct *work)
+{
+ struct tifm_adapter *fm = container_of(work, struct tifm_adapter,
+ media_switcher);
+ struct tifm_dev *sock;
+ char __iomem *sock_addr;
+ unsigned long flags;
+ unsigned char media_id;
+ unsigned int socket_change_set, cnt;
+
+ spin_lock_irqsave(&fm->lock, flags);
+ socket_change_set = fm->socket_change_set;
+ fm->socket_change_set = 0;
+
+ dev_dbg(fm->dev.parent, "checking media set %x\n",
+ socket_change_set);
+
+ if (!socket_change_set) {
+ spin_unlock_irqrestore(&fm->lock, flags);
+ return;
+ }
+
+ for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+ if (!(socket_change_set & (1 << cnt)))
+ continue;
+ sock = fm->sockets[cnt];
+ if (sock) {
+ printk(KERN_INFO
+ "%s : demand removing card from socket %u:%u\n",
+ dev_name(&fm->dev), fm->id, cnt);
+ fm->sockets[cnt] = NULL;
+ sock_addr = sock->addr;
+ spin_unlock_irqrestore(&fm->lock, flags);
+ device_unregister(&sock->dev);
+ spin_lock_irqsave(&fm->lock, flags);
+ tifm_7xx1_sock_power_off(sock_addr);
+ writel(0x0e00, sock_addr + SOCK_CONTROL);
+ }
+
+ spin_unlock_irqrestore(&fm->lock, flags);
+
+ media_id = tifm_7xx1_toggle_sock_power(
+ tifm_7xx1_sock_addr(fm->addr, cnt));
+
+ // tifm_alloc_device will check if media_id is valid
+ sock = tifm_alloc_device(fm, cnt, media_id);
+ if (sock) {
+ sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt);
+
+ if (!device_register(&sock->dev)) {
+ spin_lock_irqsave(&fm->lock, flags);
+ if (!fm->sockets[cnt]) {
+ fm->sockets[cnt] = sock;
+ sock = NULL;
+ }
+ spin_unlock_irqrestore(&fm->lock, flags);
+ }
+ if (sock)
+ tifm_free_device(&sock->dev);
+ }
+ spin_lock_irqsave(&fm->lock, flags);
+ }
+
+ writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+ | TIFM_IRQ_CARDMASK(socket_change_set),
+ fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+ writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+ | TIFM_IRQ_CARDMASK(socket_change_set),
+ fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+ writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+ spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state)
+{
+ struct tifm_adapter *fm = pci_get_drvdata(dev);
+ int cnt;
+
+ dev_dbg(&dev->dev, "suspending host\n");
+
+ for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+ if (fm->sockets[cnt])
+ tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr);
+ }
+
+ pci_save_state(dev);
+ pci_enable_wake(dev, pci_choose_state(dev, state), 0);
+ pci_disable_device(dev);
+ pci_set_power_state(dev, pci_choose_state(dev, state));
+ return 0;
+}
+
+static int tifm_7xx1_resume(struct pci_dev *dev)
+{
+ struct tifm_adapter *fm = pci_get_drvdata(dev);
+ int rc;
+ unsigned long timeout;
+ unsigned int good_sockets = 0, bad_sockets = 0;
+ unsigned long flags;
+ unsigned char new_ids[fm->num_sockets];
+ DECLARE_COMPLETION_ONSTACK(finish_resume);
+
+ pci_set_power_state(dev, PCI_D0);
+ pci_restore_state(dev);
+ rc = pci_enable_device(dev);
+ if (rc)
+ return rc;
+ pci_set_master(dev);
+
+ dev_dbg(&dev->dev, "resuming host\n");
+
+ for (rc = 0; rc < fm->num_sockets; rc++)
+ new_ids[rc] = tifm_7xx1_toggle_sock_power(
+ tifm_7xx1_sock_addr(fm->addr, rc));
+ spin_lock_irqsave(&fm->lock, flags);
+ for (rc = 0; rc < fm->num_sockets; rc++) {
+ if (fm->sockets[rc]) {
+ if (fm->sockets[rc]->type == new_ids[rc])
+ good_sockets |= 1 << rc;
+ else
+ bad_sockets |= 1 << rc;
+ }
+ }
+
+ writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+ fm->addr + FM_SET_INTERRUPT_ENABLE);
+ dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n",
+ good_sockets, bad_sockets);
+
+ fm->socket_change_set = 0;
+ if (good_sockets) {
+ fm->finish_me = &finish_resume;
+ spin_unlock_irqrestore(&fm->lock, flags);
+ timeout = wait_for_completion_timeout(&finish_resume, HZ);
+ dev_dbg(&dev->dev, "wait returned %lu\n", timeout);
+ writel(TIFM_IRQ_FIFOMASK(good_sockets)
+ | TIFM_IRQ_CARDMASK(good_sockets),
+ fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+ writel(TIFM_IRQ_FIFOMASK(good_sockets)
+ | TIFM_IRQ_CARDMASK(good_sockets),
+ fm->addr + FM_SET_INTERRUPT_ENABLE);
+ spin_lock_irqsave(&fm->lock, flags);
+ fm->finish_me = NULL;
+ fm->socket_change_set ^= good_sockets & fm->socket_change_set;
+ }
+
+ fm->socket_change_set |= bad_sockets;
+ if (fm->socket_change_set)
+ tifm_queue_work(&fm->media_switcher);
+
+ spin_unlock_irqrestore(&fm->lock, flags);
+ writel(TIFM_IRQ_ENABLE,
+ fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+ return 0;
+}
+
+#else
+
+#define tifm_7xx1_suspend NULL
+#define tifm_7xx1_resume NULL
+
+#endif /* CONFIG_PM */
+
+static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm,
+ struct tifm_dev *sock)
+{
+ return 0;
+}
+
+static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+ if (((fm->num_sockets == 4) && (sock->socket_id == 2))
+ || ((fm->num_sockets == 2) && (sock->socket_id == 0)))
+ return 1;
+
+ return 0;
+}
+
+static int tifm_7xx1_probe(struct pci_dev *dev,
+ const struct pci_device_id *dev_id)
+{
+ struct tifm_adapter *fm;
+ int pci_dev_busy = 0;
+ int rc;
+
+ rc = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+ if (rc)
+ return rc;
+
+ rc = pci_enable_device(dev);
+ if (rc)
+ return rc;
+
+ pci_set_master(dev);
+
+ rc = pci_request_regions(dev, DRIVER_NAME);
+ if (rc) {
+ pci_dev_busy = 1;
+ goto err_out;
+ }
+
+ pci_intx(dev, 1);
+
+ fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM
+ ? 4 : 2, &dev->dev);
+ if (!fm) {
+ rc = -ENOMEM;
+ goto err_out_int;
+ }
+
+ INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media);
+ fm->eject = tifm_7xx1_eject;
+ fm->has_ms_pif = tifm_7xx1_has_ms_pif;
+ pci_set_drvdata(dev, fm);
+
+ fm->addr = pci_ioremap_bar(dev, 0);
+ if (!fm->addr) {
+ rc = -ENODEV;
+ goto err_out_free;
+ }
+
+ rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm);
+ if (rc)
+ goto err_out_unmap;
+
+ rc = tifm_add_adapter(fm);
+ if (rc)
+ goto err_out_irq;
+
+ writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+ fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+ writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+ fm->addr + FM_SET_INTERRUPT_ENABLE);
+ return 0;
+
+err_out_irq:
+ free_irq(dev->irq, fm);
+err_out_unmap:
+ iounmap(fm->addr);
+err_out_free:
+ tifm_free_adapter(fm);
+err_out_int:
+ pci_intx(dev, 0);
+ pci_release_regions(dev);
+err_out:
+ if (!pci_dev_busy)
+ pci_disable_device(dev);
+ return rc;
+}
+
+static void tifm_7xx1_remove(struct pci_dev *dev)
+{
+ struct tifm_adapter *fm = pci_get_drvdata(dev);
+ int cnt;
+
+ fm->eject = tifm_7xx1_dummy_eject;
+ fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif;
+ writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+ mmiowb();
+ free_irq(dev->irq, fm);
+
+ tifm_remove_adapter(fm);
+
+ for (cnt = 0; cnt < fm->num_sockets; cnt++)
+ tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt));
+
+ iounmap(fm->addr);
+ pci_intx(dev, 0);
+ pci_release_regions(dev);
+
+ pci_disable_device(dev);
+ tifm_free_adapter(fm);
+}
+
+static struct pci_device_id tifm_7xx1_pci_tbl [] = {
+ { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID,
+ PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */
+ { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID,
+ PCI_ANY_ID, 0, 0, 0 },
+ { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID,
+ PCI_ANY_ID, 0, 0, 0 },
+ { }
+};
+
+static struct pci_driver tifm_7xx1_driver = {
+ .name = DRIVER_NAME,
+ .id_table = tifm_7xx1_pci_tbl,
+ .probe = tifm_7xx1_probe,
+ .remove = tifm_7xx1_remove,
+ .suspend = tifm_7xx1_suspend,
+ .resume = tifm_7xx1_resume,
+};
+
+module_pci_driver(tifm_7xx1_driver);
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia host driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl);
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
new file mode 100644
index 0000000..a511b2a
--- /dev/null
+++ b/drivers/misc/tifm_core.c
@@ -0,0 +1,371 @@
+/*
+ * tifm_core.c - TI FlashMedia driver
+ *
+ * Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+
+#define DRIVER_NAME "tifm_core"
+#define DRIVER_VERSION "0.8"
+
+static struct workqueue_struct *workqueue;
+static DEFINE_IDR(tifm_adapter_idr);
+static DEFINE_SPINLOCK(tifm_adapter_lock);
+
+static const char *tifm_media_type_name(unsigned char type, unsigned char nt)
+{
+ const char *card_type_name[3][3] = {
+ { "SmartMedia/xD", "MemoryStick", "MMC/SD" },
+ { "XD", "MS", "SD"},
+ { "xd", "ms", "sd"}
+ };
+
+ if (nt > 2 || type < 1 || type > 3)
+ return NULL;
+ return card_type_name[nt][type - 1];
+}
+
+static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id)
+{
+ if (sock->type == id->type)
+ return 1;
+ return 0;
+}
+
+static int tifm_bus_match(struct device *dev, struct device_driver *drv)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver,
+ driver);
+ struct tifm_device_id *ids = fm_drv->id_table;
+
+ if (ids) {
+ while (ids->type) {
+ if (tifm_dev_match(sock, ids))
+ return 1;
+ ++ids;
+ }
+ }
+ return 0;
+}
+
+static int tifm_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+
+ if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1)))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int tifm_device_probe(struct device *dev)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+ driver);
+ int rc = -ENODEV;
+
+ get_device(dev);
+ if (dev->driver && drv->probe) {
+ rc = drv->probe(sock);
+ if (!rc)
+ return 0;
+ }
+ put_device(dev);
+ return rc;
+}
+
+static void tifm_dummy_event(struct tifm_dev *sock)
+{
+ return;
+}
+
+static int tifm_device_remove(struct device *dev)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+ driver);
+
+ if (dev->driver && drv->remove) {
+ sock->card_event = tifm_dummy_event;
+ sock->data_event = tifm_dummy_event;
+ drv->remove(sock);
+ sock->dev.driver = NULL;
+ }
+
+ put_device(dev);
+ return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_device_suspend(struct device *dev, pm_message_t state)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+ driver);
+
+ if (dev->driver && drv->suspend)
+ return drv->suspend(sock, state);
+ return 0;
+}
+
+static int tifm_device_resume(struct device *dev)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+ driver);
+
+ if (dev->driver && drv->resume)
+ return drv->resume(sock);
+ return 0;
+}
+
+#else
+
+#define tifm_device_suspend NULL
+#define tifm_device_resume NULL
+
+#endif /* CONFIG_PM */
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ return sprintf(buf, "%x", sock->type);
+}
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *tifm_dev_attrs[] = {
+ &dev_attr_type.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(tifm_dev);
+
+static struct bus_type tifm_bus_type = {
+ .name = "tifm",
+ .dev_groups = tifm_dev_groups,
+ .match = tifm_bus_match,
+ .uevent = tifm_uevent,
+ .probe = tifm_device_probe,
+ .remove = tifm_device_remove,
+ .suspend = tifm_device_suspend,
+ .resume = tifm_device_resume
+};
+
+static void tifm_free(struct device *dev)
+{
+ struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev);
+
+ kfree(fm);
+}
+
+static struct class tifm_adapter_class = {
+ .name = "tifm_adapter",
+ .dev_release = tifm_free
+};
+
+struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets,
+ struct device *dev)
+{
+ struct tifm_adapter *fm;
+
+ fm = kzalloc(sizeof(struct tifm_adapter)
+ + sizeof(struct tifm_dev*) * num_sockets, GFP_KERNEL);
+ if (fm) {
+ fm->dev.class = &tifm_adapter_class;
+ fm->dev.parent = dev;
+ device_initialize(&fm->dev);
+ spin_lock_init(&fm->lock);
+ fm->num_sockets = num_sockets;
+ }
+ return fm;
+}
+EXPORT_SYMBOL(tifm_alloc_adapter);
+
+int tifm_add_adapter(struct tifm_adapter *fm)
+{
+ int rc;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&tifm_adapter_lock);
+ rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT);
+ if (rc >= 0)
+ fm->id = rc;
+ spin_unlock(&tifm_adapter_lock);
+ idr_preload_end();
+ if (rc < 0)
+ return rc;
+
+ dev_set_name(&fm->dev, "tifm%u", fm->id);
+ rc = device_add(&fm->dev);
+ if (rc) {
+ spin_lock(&tifm_adapter_lock);
+ idr_remove(&tifm_adapter_idr, fm->id);
+ spin_unlock(&tifm_adapter_lock);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(tifm_add_adapter);
+
+void tifm_remove_adapter(struct tifm_adapter *fm)
+{
+ unsigned int cnt;
+
+ flush_workqueue(workqueue);
+ for (cnt = 0; cnt < fm->num_sockets; ++cnt) {
+ if (fm->sockets[cnt])
+ device_unregister(&fm->sockets[cnt]->dev);
+ }
+
+ spin_lock(&tifm_adapter_lock);
+ idr_remove(&tifm_adapter_idr, fm->id);
+ spin_unlock(&tifm_adapter_lock);
+ device_del(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_remove_adapter);
+
+void tifm_free_adapter(struct tifm_adapter *fm)
+{
+ put_device(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_free_adapter);
+
+void tifm_free_device(struct device *dev)
+{
+ struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+ kfree(sock);
+}
+EXPORT_SYMBOL(tifm_free_device);
+
+struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id,
+ unsigned char type)
+{
+ struct tifm_dev *sock = NULL;
+
+ if (!tifm_media_type_name(type, 0))
+ return sock;
+
+ sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL);
+ if (sock) {
+ spin_lock_init(&sock->lock);
+ sock->type = type;
+ sock->socket_id = id;
+ sock->card_event = tifm_dummy_event;
+ sock->data_event = tifm_dummy_event;
+
+ sock->dev.parent = fm->dev.parent;
+ sock->dev.bus = &tifm_bus_type;
+ sock->dev.dma_mask = fm->dev.parent->dma_mask;
+ sock->dev.release = tifm_free_device;
+
+ dev_set_name(&sock->dev, "tifm_%s%u:%u",
+ tifm_media_type_name(type, 2), fm->id, id);
+ printk(KERN_INFO DRIVER_NAME
+ ": %s card detected in socket %u:%u\n",
+ tifm_media_type_name(type, 0), fm->id, id);
+ }
+ return sock;
+}
+EXPORT_SYMBOL(tifm_alloc_device);
+
+void tifm_eject(struct tifm_dev *sock)
+{
+ struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+ fm->eject(fm, sock);
+}
+EXPORT_SYMBOL(tifm_eject);
+
+int tifm_has_ms_pif(struct tifm_dev *sock)
+{
+ struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+ return fm->has_ms_pif(fm, sock);
+}
+EXPORT_SYMBOL(tifm_has_ms_pif);
+
+int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+ int direction)
+{
+ return pci_map_sg(to_pci_dev(sock->dev.parent), sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_map_sg);
+
+void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+ int direction)
+{
+ pci_unmap_sg(to_pci_dev(sock->dev.parent), sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_unmap_sg);
+
+void tifm_queue_work(struct work_struct *work)
+{
+ queue_work(workqueue, work);
+}
+EXPORT_SYMBOL(tifm_queue_work);
+
+int tifm_register_driver(struct tifm_driver *drv)
+{
+ drv->driver.bus = &tifm_bus_type;
+
+ return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_register_driver);
+
+void tifm_unregister_driver(struct tifm_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_unregister_driver);
+
+static int __init tifm_init(void)
+{
+ int rc;
+
+ workqueue = create_freezable_workqueue("tifm");
+ if (!workqueue)
+ return -ENOMEM;
+
+ rc = bus_register(&tifm_bus_type);
+
+ if (rc)
+ goto err_out_wq;
+
+ rc = class_register(&tifm_adapter_class);
+ if (!rc)
+ return 0;
+
+ bus_unregister(&tifm_bus_type);
+
+err_out_wq:
+ destroy_workqueue(workqueue);
+
+ return rc;
+}
+
+static void __exit tifm_exit(void)
+{
+ class_unregister(&tifm_adapter_class);
+ bus_unregister(&tifm_bus_type);
+ destroy_workqueue(workqueue);
+}
+
+subsys_initcall(tifm_init);
+module_exit(tifm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia core driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
new file mode 100644
index 0000000..87a1337
--- /dev/null
+++ b/drivers/misc/tsl2550.c
@@ -0,0 +1,461 @@
+/*
+ * tsl2550.c - Linux kernel modules for ambient light sensor
+ *
+ * Copyright (C) 2007 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (C) 2007 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+#define TSL2550_DRV_NAME "tsl2550"
+#define DRIVER_VERSION "1.2"
+
+/*
+ * Defines
+ */
+
+#define TSL2550_POWER_DOWN 0x00
+#define TSL2550_POWER_UP 0x03
+#define TSL2550_STANDARD_RANGE 0x18
+#define TSL2550_EXTENDED_RANGE 0x1d
+#define TSL2550_READ_ADC0 0x43
+#define TSL2550_READ_ADC1 0x83
+
+/*
+ * Structs
+ */
+
+struct tsl2550_data {
+ struct i2c_client *client;
+ struct mutex update_lock;
+
+ unsigned int power_state:1;
+ unsigned int operating_mode:1;
+};
+
+/*
+ * Global data
+ */
+
+static const u8 TSL2550_MODE_RANGE[2] = {
+ TSL2550_STANDARD_RANGE, TSL2550_EXTENDED_RANGE,
+};
+
+/*
+ * Management functions
+ */
+
+static int tsl2550_set_operating_mode(struct i2c_client *client, int mode)
+{
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+
+ int ret = i2c_smbus_write_byte(client, TSL2550_MODE_RANGE[mode]);
+
+ data->operating_mode = mode;
+
+ return ret;
+}
+
+static int tsl2550_set_power_state(struct i2c_client *client, int state)
+{
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+ int ret;
+
+ if (state == 0)
+ ret = i2c_smbus_write_byte(client, TSL2550_POWER_DOWN);
+ else {
+ ret = i2c_smbus_write_byte(client, TSL2550_POWER_UP);
+
+ /* On power up we should reset operating mode also... */
+ tsl2550_set_operating_mode(client, data->operating_mode);
+ }
+
+ data->power_state = state;
+
+ return ret;
+}
+
+static int tsl2550_get_adc_value(struct i2c_client *client, u8 cmd)
+{
+ int ret;
+
+ ret = i2c_smbus_read_byte_data(client, cmd);
+ if (ret < 0)
+ return ret;
+ if (!(ret & 0x80))
+ return -EAGAIN;
+ return ret & 0x7f; /* remove the "valid" bit */
+}
+
+/*
+ * LUX calculation
+ */
+
+#define TSL2550_MAX_LUX 1846
+
+static const u8 ratio_lut[] = {
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 98, 98, 98, 98, 98,
+ 98, 98, 97, 97, 97, 97, 97, 96,
+ 96, 96, 96, 95, 95, 95, 94, 94,
+ 93, 93, 93, 92, 92, 91, 91, 90,
+ 89, 89, 88, 87, 87, 86, 85, 84,
+ 83, 82, 81, 80, 79, 78, 77, 75,
+ 74, 73, 71, 69, 68, 66, 64, 62,
+ 60, 58, 56, 54, 52, 49, 47, 44,
+ 42, 41, 40, 40, 39, 39, 38, 38,
+ 37, 37, 37, 36, 36, 36, 35, 35,
+ 35, 35, 34, 34, 34, 34, 33, 33,
+ 33, 33, 32, 32, 32, 32, 32, 31,
+ 31, 31, 31, 31, 30, 30, 30, 30,
+ 30,
+};
+
+static const u16 count_lut[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 32, 34, 36, 38, 40, 42, 44, 46,
+ 49, 53, 57, 61, 65, 69, 73, 77,
+ 81, 85, 89, 93, 97, 101, 105, 109,
+ 115, 123, 131, 139, 147, 155, 163, 171,
+ 179, 187, 195, 203, 211, 219, 227, 235,
+ 247, 263, 279, 295, 311, 327, 343, 359,
+ 375, 391, 407, 423, 439, 455, 471, 487,
+ 511, 543, 575, 607, 639, 671, 703, 735,
+ 767, 799, 831, 863, 895, 927, 959, 991,
+ 1039, 1103, 1167, 1231, 1295, 1359, 1423, 1487,
+ 1551, 1615, 1679, 1743, 1807, 1871, 1935, 1999,
+ 2095, 2223, 2351, 2479, 2607, 2735, 2863, 2991,
+ 3119, 3247, 3375, 3503, 3631, 3759, 3887, 4015,
+};
+
+/*
+ * This function is described into Taos TSL2550 Designer's Notebook
+ * pages 2, 3.
+ */
+static int tsl2550_calculate_lux(u8 ch0, u8 ch1)
+{
+ unsigned int lux;
+
+ /* Look up count from channel values */
+ u16 c0 = count_lut[ch0];
+ u16 c1 = count_lut[ch1];
+
+ /*
+ * Calculate ratio.
+ * Note: the "128" is a scaling factor
+ */
+ u8 r = 128;
+
+ /* Avoid division by 0 and count 1 cannot be greater than count 0 */
+ if (c1 <= c0)
+ if (c0) {
+ r = c1 * 128 / c0;
+
+ /* Calculate LUX */
+ lux = ((c0 - c1) * ratio_lut[r]) / 256;
+ } else
+ lux = 0;
+ else
+ return -EAGAIN;
+
+ /* LUX range check */
+ return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux;
+}
+
+/*
+ * SysFS support
+ */
+
+static ssize_t tsl2550_show_power_state(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev));
+
+ return sprintf(buf, "%u\n", data->power_state);
+}
+
+static ssize_t tsl2550_store_power_state(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+ unsigned long val = simple_strtoul(buf, NULL, 10);
+ int ret;
+
+ if (val > 1)
+ return -EINVAL;
+
+ mutex_lock(&data->update_lock);
+ ret = tsl2550_set_power_state(client, val);
+ mutex_unlock(&data->update_lock);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+ tsl2550_show_power_state, tsl2550_store_power_state);
+
+static ssize_t tsl2550_show_operating_mode(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev));
+
+ return sprintf(buf, "%u\n", data->operating_mode);
+}
+
+static ssize_t tsl2550_store_operating_mode(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+ unsigned long val = simple_strtoul(buf, NULL, 10);
+ int ret;
+
+ if (val > 1)
+ return -EINVAL;
+
+ if (data->power_state == 0)
+ return -EBUSY;
+
+ mutex_lock(&data->update_lock);
+ ret = tsl2550_set_operating_mode(client, val);
+ mutex_unlock(&data->update_lock);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(operating_mode, S_IWUSR | S_IRUGO,
+ tsl2550_show_operating_mode, tsl2550_store_operating_mode);
+
+static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf)
+{
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+ u8 ch0, ch1;
+ int ret;
+
+ ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC0);
+ if (ret < 0)
+ return ret;
+ ch0 = ret;
+
+ ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC1);
+ if (ret < 0)
+ return ret;
+ ch1 = ret;
+
+ /* Do the job */
+ ret = tsl2550_calculate_lux(ch0, ch1);
+ if (ret < 0)
+ return ret;
+ if (data->operating_mode == 1)
+ ret *= 5;
+
+ return sprintf(buf, "%d\n", ret);
+}
+
+static ssize_t tsl2550_show_lux1_input(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+ int ret;
+
+ /* No LUX data if not operational */
+ if (!data->power_state)
+ return -EBUSY;
+
+ mutex_lock(&data->update_lock);
+ ret = __tsl2550_show_lux(client, buf);
+ mutex_unlock(&data->update_lock);
+
+ return ret;
+}
+
+static DEVICE_ATTR(lux1_input, S_IRUGO,
+ tsl2550_show_lux1_input, NULL);
+
+static struct attribute *tsl2550_attributes[] = {
+ &dev_attr_power_state.attr,
+ &dev_attr_operating_mode.attr,
+ &dev_attr_lux1_input.attr,
+ NULL
+};
+
+static const struct attribute_group tsl2550_attr_group = {
+ .attrs = tsl2550_attributes,
+};
+
+/*
+ * Initialization function
+ */
+
+static int tsl2550_init_client(struct i2c_client *client)
+{
+ struct tsl2550_data *data = i2c_get_clientdata(client);
+ int err;
+
+ /*
+ * Probe the chip. To do so we try to power up the device and then to
+ * read back the 0x03 code
+ */
+ err = i2c_smbus_read_byte_data(client, TSL2550_POWER_UP);
+ if (err < 0)
+ return err;
+ if (err != TSL2550_POWER_UP)
+ return -ENODEV;
+ data->power_state = 1;
+
+ /* Set the default operating mode */
+ err = i2c_smbus_write_byte(client,
+ TSL2550_MODE_RANGE[data->operating_mode]);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+
+static struct i2c_driver tsl2550_driver;
+static int tsl2550_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct tsl2550_data *data;
+ int *opmode, err = 0;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE
+ | I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+ err = -EIO;
+ goto exit;
+ }
+
+ data = kzalloc(sizeof(struct tsl2550_data), GFP_KERNEL);
+ if (!data) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ data->client = client;
+ i2c_set_clientdata(client, data);
+
+ /* Check platform data */
+ opmode = client->dev.platform_data;
+ if (opmode) {
+ if (*opmode < 0 || *opmode > 1) {
+ dev_err(&client->dev, "invalid operating_mode (%d)\n",
+ *opmode);
+ err = -EINVAL;
+ goto exit_kfree;
+ }
+ data->operating_mode = *opmode;
+ } else
+ data->operating_mode = 0; /* default mode is standard */
+ dev_info(&client->dev, "%s operating mode\n",
+ data->operating_mode ? "extended" : "standard");
+
+ mutex_init(&data->update_lock);
+
+ /* Initialize the TSL2550 chip */
+ err = tsl2550_init_client(client);
+ if (err)
+ goto exit_kfree;
+
+ /* Register sysfs hooks */
+ err = sysfs_create_group(&client->dev.kobj, &tsl2550_attr_group);
+ if (err)
+ goto exit_kfree;
+
+ dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+ return 0;
+
+exit_kfree:
+ kfree(data);
+exit:
+ return err;
+}
+
+static int tsl2550_remove(struct i2c_client *client)
+{
+ sysfs_remove_group(&client->dev.kobj, &tsl2550_attr_group);
+
+ /* Power down the device */
+ tsl2550_set_power_state(client, 0);
+
+ kfree(i2c_get_clientdata(client));
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int tsl2550_suspend(struct device *dev)
+{
+ return tsl2550_set_power_state(to_i2c_client(dev), 0);
+}
+
+static int tsl2550_resume(struct device *dev)
+{
+ return tsl2550_set_power_state(to_i2c_client(dev), 1);
+}
+
+static SIMPLE_DEV_PM_OPS(tsl2550_pm_ops, tsl2550_suspend, tsl2550_resume);
+#define TSL2550_PM_OPS (&tsl2550_pm_ops)
+
+#else
+
+#define TSL2550_PM_OPS NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id tsl2550_id[] = {
+ { "tsl2550", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, tsl2550_id);
+
+static struct i2c_driver tsl2550_driver = {
+ .driver = {
+ .name = TSL2550_DRV_NAME,
+ .pm = TSL2550_PM_OPS,
+ },
+ .probe = tsl2550_probe,
+ .remove = tsl2550_remove,
+ .id_table = tsl2550_id,
+};
+
+module_i2c_driver(tsl2550_driver);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("TSL2550 ambient light sensor driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
new file mode 100644
index 0000000..c344483
--- /dev/null
+++ b/drivers/misc/vexpress-syscfg.c
@@ -0,0 +1,294 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2014 ARM Limited
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+#include <linux/vexpress.h>
+
+
+#define SYS_CFGDATA 0x0
+
+#define SYS_CFGCTRL 0x4
+#define SYS_CFGCTRL_START (1 << 31)
+#define SYS_CFGCTRL_WRITE (1 << 30)
+#define SYS_CFGCTRL_DCC(n) (((n) & 0xf) << 26)
+#define SYS_CFGCTRL_FUNC(n) (((n) & 0x3f) << 20)
+#define SYS_CFGCTRL_SITE(n) (((n) & 0x3) << 16)
+#define SYS_CFGCTRL_POSITION(n) (((n) & 0xf) << 12)
+#define SYS_CFGCTRL_DEVICE(n) (((n) & 0xfff) << 0)
+
+#define SYS_CFGSTAT 0x8
+#define SYS_CFGSTAT_ERR (1 << 1)
+#define SYS_CFGSTAT_COMPLETE (1 << 0)
+
+
+struct vexpress_syscfg {
+ struct device *dev;
+ void __iomem *base;
+ struct list_head funcs;
+};
+
+struct vexpress_syscfg_func {
+ struct list_head list;
+ struct vexpress_syscfg *syscfg;
+ struct regmap *regmap;
+ int num_templates;
+ u32 template[0]; /* Keep it last! */
+};
+
+
+static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func,
+ int index, bool write, u32 *data)
+{
+ struct vexpress_syscfg *syscfg = func->syscfg;
+ u32 command, status;
+ int tries;
+ long timeout;
+
+ if (WARN_ON(index > func->num_templates))
+ return -EINVAL;
+
+ command = readl(syscfg->base + SYS_CFGCTRL);
+ if (WARN_ON(command & SYS_CFGCTRL_START))
+ return -EBUSY;
+
+ command = func->template[index];
+ command |= SYS_CFGCTRL_START;
+ command |= write ? SYS_CFGCTRL_WRITE : 0;
+
+ /* Use a canary for reads */
+ if (!write)
+ *data = 0xdeadbeef;
+
+ dev_dbg(syscfg->dev, "func %p, command %x, data %x\n",
+ func, command, *data);
+ writel(*data, syscfg->base + SYS_CFGDATA);
+ writel(0, syscfg->base + SYS_CFGSTAT);
+ writel(command, syscfg->base + SYS_CFGCTRL);
+ mb();
+
+ /* The operation can take ages... Go to sleep, 100us initially */
+ tries = 100;
+ timeout = 100;
+ do {
+ if (!irqs_disabled()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(timeout));
+ if (signal_pending(current))
+ return -EINTR;
+ } else {
+ udelay(timeout);
+ }
+
+ status = readl(syscfg->base + SYS_CFGSTAT);
+ if (status & SYS_CFGSTAT_ERR)
+ return -EFAULT;
+
+ if (timeout > 20)
+ timeout -= 20;
+ } while (--tries && !(status & SYS_CFGSTAT_COMPLETE));
+ if (WARN_ON_ONCE(!tries))
+ return -ETIMEDOUT;
+
+ if (!write) {
+ *data = readl(syscfg->base + SYS_CFGDATA);
+ dev_dbg(syscfg->dev, "func %p, read data %x\n", func, *data);
+ }
+
+ return 0;
+}
+
+static int vexpress_syscfg_read(void *context, unsigned int index,
+ unsigned int *val)
+{
+ struct vexpress_syscfg_func *func = context;
+
+ return vexpress_syscfg_exec(func, index, false, val);
+}
+
+static int vexpress_syscfg_write(void *context, unsigned int index,
+ unsigned int val)
+{
+ struct vexpress_syscfg_func *func = context;
+
+ return vexpress_syscfg_exec(func, index, true, &val);
+}
+
+static struct regmap_config vexpress_syscfg_regmap_config = {
+ .lock = vexpress_config_lock,
+ .unlock = vexpress_config_unlock,
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_read = vexpress_syscfg_read,
+ .reg_write = vexpress_syscfg_write,
+ .reg_format_endian = REGMAP_ENDIAN_LITTLE,
+ .val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+
+static struct regmap *vexpress_syscfg_regmap_init(struct device *dev,
+ void *context)
+{
+ int err;
+ struct vexpress_syscfg *syscfg = context;
+ struct vexpress_syscfg_func *func;
+ struct property *prop;
+ const __be32 *val = NULL;
+ __be32 energy_quirk[4];
+ int num;
+ u32 site, position, dcc;
+ int i;
+
+ err = vexpress_config_get_topo(dev->of_node, &site,
+ &position, &dcc);
+ if (err)
+ return ERR_PTR(err);
+
+ prop = of_find_property(dev->of_node,
+ "arm,vexpress-sysreg,func", NULL);
+ if (!prop)
+ return ERR_PTR(-EINVAL);
+
+ num = prop->length / sizeof(u32) / 2;
+ val = prop->value;
+
+ /*
+ * "arm,vexpress-energy" function used to be described
+ * by its first device only, now it requires both
+ */
+ if (num == 1 && of_device_is_compatible(dev->of_node,
+ "arm,vexpress-energy")) {
+ num = 2;
+ energy_quirk[0] = *val;
+ energy_quirk[2] = *val++;
+ energy_quirk[1] = *val;
+ energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1);
+ val = energy_quirk;
+ }
+
+ func = kzalloc(sizeof(*func) + sizeof(*func->template) * num,
+ GFP_KERNEL);
+ if (!func)
+ return ERR_PTR(-ENOMEM);
+
+ func->syscfg = syscfg;
+ func->num_templates = num;
+
+ for (i = 0; i < num; i++) {
+ u32 function, device;
+
+ function = be32_to_cpup(val++);
+ device = be32_to_cpup(val++);
+
+ dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n",
+ func, site, position, dcc,
+ function, device);
+
+ func->template[i] = SYS_CFGCTRL_DCC(dcc);
+ func->template[i] |= SYS_CFGCTRL_SITE(site);
+ func->template[i] |= SYS_CFGCTRL_POSITION(position);
+ func->template[i] |= SYS_CFGCTRL_FUNC(function);
+ func->template[i] |= SYS_CFGCTRL_DEVICE(device);
+ }
+
+ vexpress_syscfg_regmap_config.max_register = num - 1;
+
+ func->regmap = regmap_init(dev, NULL, func,
+ &vexpress_syscfg_regmap_config);
+
+ if (IS_ERR(func->regmap)) {
+ void *err = func->regmap;
+
+ kfree(func);
+ return err;
+ }
+
+ list_add(&func->list, &syscfg->funcs);
+
+ return func->regmap;
+}
+
+static void vexpress_syscfg_regmap_exit(struct regmap *regmap, void *context)
+{
+ struct vexpress_syscfg *syscfg = context;
+ struct vexpress_syscfg_func *func, *tmp;
+
+ regmap_exit(regmap);
+
+ list_for_each_entry_safe(func, tmp, &syscfg->funcs, list) {
+ if (func->regmap == regmap) {
+ list_del(&syscfg->funcs);
+ kfree(func);
+ break;
+ }
+ }
+}
+
+static struct vexpress_config_bridge_ops vexpress_syscfg_bridge_ops = {
+ .regmap_init = vexpress_syscfg_regmap_init,
+ .regmap_exit = vexpress_syscfg_regmap_exit,
+};
+
+
+static int vexpress_syscfg_probe(struct platform_device *pdev)
+{
+ struct vexpress_syscfg *syscfg;
+ struct resource *res;
+ struct device *bridge;
+
+ syscfg = devm_kzalloc(&pdev->dev, sizeof(*syscfg), GFP_KERNEL);
+ if (!syscfg)
+ return -ENOMEM;
+ syscfg->dev = &pdev->dev;
+ INIT_LIST_HEAD(&syscfg->funcs);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!devm_request_mem_region(&pdev->dev, res->start,
+ resource_size(res), pdev->name))
+ return -EBUSY;
+
+ syscfg->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+ if (!syscfg->base)
+ return -EFAULT;
+
+ /* Must use dev.parent (MFD), as that's where DT phandle points at... */
+ bridge = vexpress_config_bridge_register(pdev->dev.parent,
+ &vexpress_syscfg_bridge_ops, syscfg);
+ if (IS_ERR(bridge))
+ return PTR_ERR(bridge);
+
+ return 0;
+}
+
+static const struct platform_device_id vexpress_syscfg_id_table[] = {
+ { "vexpress-syscfg", },
+ {},
+};
+
+static struct platform_driver vexpress_syscfg_driver = {
+ .driver.name = "vexpress-syscfg",
+ .id_table = vexpress_syscfg_id_table,
+ .probe = vexpress_syscfg_probe,
+};
+
+static int __init vexpress_syscfg_init(void)
+{
+ return platform_driver_register(&vexpress_syscfg_driver);
+}
+core_initcall(vexpress_syscfg_init);
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
new file mode 100644
index 0000000..1e688bf
--- /dev/null
+++ b/drivers/misc/vmw_balloon.c
@@ -0,0 +1,1318 @@
+/*
+ * VMware Balloon driver.
+ *
+ * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained by: Xavier Deguillard <xdeguillard@vmware.com>
+ * Philip Moltmann <moltmann@vmware.com>
+ */
+
+/*
+ * This is VMware physical memory management driver for Linux. The driver
+ * acts like a "balloon" that can be inflated to reclaim physical pages by
+ * reserving them in the guest and invalidating them in the monitor,
+ * freeing up the underlying machine pages so they can be allocated to
+ * other guests. The balloon can also be deflated to allow the guest to
+ * use more physical memory. Higher level policies can control the sizes
+ * of balloons in VMs in order to manage physical memory resources.
+ */
+
+//#define DEBUG
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <asm/hypervisor.h>
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
+MODULE_VERSION("1.5.0.0-k");
+MODULE_ALIAS("dmi:*:svnVMware*:*");
+MODULE_ALIAS("vmware_vmmemctl");
+MODULE_LICENSE("GPL");
+
+/*
+ * Various constants controlling rate of inflaint/deflating balloon,
+ * measured in pages.
+ */
+
+/*
+ * Rates of memory allocaton when guest experiences memory pressure
+ * (driver performs sleeping allocations).
+ */
+#define VMW_BALLOON_RATE_ALLOC_MIN 512U
+#define VMW_BALLOON_RATE_ALLOC_MAX 2048U
+#define VMW_BALLOON_RATE_ALLOC_INC 16U
+
+/*
+ * When guest is under memory pressure, use a reduced page allocation
+ * rate for next several cycles.
+ */
+#define VMW_BALLOON_SLOW_CYCLES 4
+
+/*
+ * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
+ * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
+ * __GFP_NOWARN, to suppress page allocation failure warnings.
+ */
+#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN)
+
+/*
+ * Use GFP_HIGHUSER when executing in a separate kernel thread
+ * context and allocation can sleep. This is less stressful to
+ * the guest memory system, since it allows the thread to block
+ * while memory is reclaimed, and won't take pages from emergency
+ * low-memory pools.
+ */
+#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
+
+/* Maximum number of refused pages we accumulate during inflation cycle */
+#define VMW_BALLOON_MAX_REFUSED 16
+
+/*
+ * Hypervisor communication port definitions.
+ */
+#define VMW_BALLOON_HV_PORT 0x5670
+#define VMW_BALLOON_HV_MAGIC 0x456c6d6f
+#define VMW_BALLOON_GUEST_ID 1 /* Linux */
+
+enum vmwballoon_capabilities {
+ /*
+ * Bit 0 is reserved and not associated to any capability.
+ */
+ VMW_BALLOON_BASIC_CMDS = (1 << 1),
+ VMW_BALLOON_BATCHED_CMDS = (1 << 2),
+ VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
+ VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
+};
+
+#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
+ | VMW_BALLOON_BATCHED_CMDS \
+ | VMW_BALLOON_BATCHED_2M_CMDS \
+ | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
+
+#define VMW_BALLOON_2M_SHIFT (9)
+#define VMW_BALLOON_NUM_PAGE_SIZES (2)
+
+/*
+ * Backdoor commands availability:
+ *
+ * START, GET_TARGET and GUEST_ID are always available,
+ *
+ * VMW_BALLOON_BASIC_CMDS:
+ * LOCK and UNLOCK commands,
+ * VMW_BALLOON_BATCHED_CMDS:
+ * BATCHED_LOCK and BATCHED_UNLOCK commands.
+ * VMW BALLOON_BATCHED_2M_CMDS:
+ * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
+ * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
+ * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
+ */
+#define VMW_BALLOON_CMD_START 0
+#define VMW_BALLOON_CMD_GET_TARGET 1
+#define VMW_BALLOON_CMD_LOCK 2
+#define VMW_BALLOON_CMD_UNLOCK 3
+#define VMW_BALLOON_CMD_GUEST_ID 4
+#define VMW_BALLOON_CMD_BATCHED_LOCK 6
+#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
+#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
+#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
+#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
+
+
+/* error codes */
+#define VMW_BALLOON_SUCCESS 0
+#define VMW_BALLOON_FAILURE -1
+#define VMW_BALLOON_ERROR_CMD_INVALID 1
+#define VMW_BALLOON_ERROR_PPN_INVALID 2
+#define VMW_BALLOON_ERROR_PPN_LOCKED 3
+#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
+#define VMW_BALLOON_ERROR_PPN_PINNED 5
+#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
+#define VMW_BALLOON_ERROR_RESET 7
+#define VMW_BALLOON_ERROR_BUSY 8
+
+#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
+
+/* Batch page description */
+
+/*
+ * Layout of a page in the batch page:
+ *
+ * +-------------+----------+--------+
+ * | | | |
+ * | Page number | Reserved | Status |
+ * | | | |
+ * +-------------+----------+--------+
+ * 64 PAGE_SHIFT 6 0
+ *
+ * The reserved field should be set to 0.
+ */
+#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
+#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
+#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
+
+struct vmballoon_batch_page {
+ u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
+};
+
+static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
+{
+ return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
+}
+
+static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
+ int idx)
+{
+ return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
+}
+
+static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
+ u64 pa)
+{
+ batch->pages[idx] = pa;
+}
+
+
+#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \
+({ \
+ unsigned long __status, __dummy1, __dummy2, __dummy3; \
+ __asm__ __volatile__ ("inl %%dx" : \
+ "=a"(__status), \
+ "=c"(__dummy1), \
+ "=d"(__dummy2), \
+ "=b"(result), \
+ "=S" (__dummy3) : \
+ "0"(VMW_BALLOON_HV_MAGIC), \
+ "1"(VMW_BALLOON_CMD_##cmd), \
+ "2"(VMW_BALLOON_HV_PORT), \
+ "3"(arg1), \
+ "4" (arg2) : \
+ "memory"); \
+ if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \
+ result = __dummy1; \
+ result &= -1UL; \
+ __status & -1UL; \
+})
+
+#ifdef CONFIG_DEBUG_FS
+struct vmballoon_stats {
+ unsigned int timer;
+ unsigned int doorbell;
+
+ /* allocation statistics */
+ unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int sleep_alloc;
+ unsigned int sleep_alloc_fail;
+ unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
+
+ /* monitor operations */
+ unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int target;
+ unsigned int target_fail;
+ unsigned int start;
+ unsigned int start_fail;
+ unsigned int guest_type;
+ unsigned int guest_type_fail;
+ unsigned int doorbell_set;
+ unsigned int doorbell_unset;
+};
+
+#define STATS_INC(stat) (stat)++
+#else
+#define STATS_INC(stat)
+#endif
+
+struct vmballoon;
+
+struct vmballoon_ops {
+ void (*add_page)(struct vmballoon *b, int idx, struct page *p);
+ int (*lock)(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target);
+ int (*unlock)(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target);
+};
+
+struct vmballoon_page_size {
+ /* list of reserved physical pages */
+ struct list_head pages;
+
+ /* transient list of non-balloonable pages */
+ struct list_head refused_pages;
+ unsigned int n_refused_pages;
+};
+
+struct vmballoon {
+ struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
+
+ /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
+ unsigned supported_page_sizes;
+
+ /* balloon size in pages */
+ unsigned int size;
+ unsigned int target;
+
+ /* reset flag */
+ bool reset_required;
+
+ /* adjustment rates (pages per second) */
+ unsigned int rate_alloc;
+
+ /* slowdown page allocations for next few cycles */
+ unsigned int slow_allocation_cycles;
+
+ unsigned long capabilities;
+
+ struct vmballoon_batch_page *batch_page;
+ unsigned int batch_max_pages;
+ struct page *page;
+
+ const struct vmballoon_ops *ops;
+
+#ifdef CONFIG_DEBUG_FS
+ /* statistics */
+ struct vmballoon_stats stats;
+
+ /* debugfs file exporting statistics */
+ struct dentry *dbg_entry;
+#endif
+
+ struct sysinfo sysinfo;
+
+ struct delayed_work dwork;
+
+ struct vmci_handle vmci_doorbell;
+};
+
+static struct vmballoon balloon;
+
+/*
+ * Send "start" command to the host, communicating supported version
+ * of the protocol.
+ */
+static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
+{
+ unsigned long status, capabilities, dummy = 0;
+ bool success;
+
+ STATS_INC(b->stats.start);
+
+ status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
+
+ switch (status) {
+ case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
+ b->capabilities = capabilities;
+ success = true;
+ break;
+ case VMW_BALLOON_SUCCESS:
+ b->capabilities = VMW_BALLOON_BASIC_CMDS;
+ success = true;
+ break;
+ default:
+ success = false;
+ }
+
+ if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
+ b->supported_page_sizes = 2;
+ else
+ b->supported_page_sizes = 1;
+
+ if (!success) {
+ pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+ STATS_INC(b->stats.start_fail);
+ }
+ return success;
+}
+
+static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
+{
+ switch (status) {
+ case VMW_BALLOON_SUCCESS:
+ return true;
+
+ case VMW_BALLOON_ERROR_RESET:
+ b->reset_required = true;
+ /* fall through */
+
+ default:
+ return false;
+ }
+}
+
+/*
+ * Communicate guest type to the host so that it can adjust ballooning
+ * algorithm to the one most appropriate for the guest. This command
+ * is normally issued after sending "start" command and is part of
+ * standard reset sequence.
+ */
+static bool vmballoon_send_guest_id(struct vmballoon *b)
+{
+ unsigned long status, dummy = 0;
+
+ status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
+ dummy);
+
+ STATS_INC(b->stats.guest_type);
+
+ if (vmballoon_check_status(b, status))
+ return true;
+
+ pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+ STATS_INC(b->stats.guest_type_fail);
+ return false;
+}
+
+static u16 vmballoon_page_size(bool is_2m_page)
+{
+ if (is_2m_page)
+ return 1 << VMW_BALLOON_2M_SHIFT;
+
+ return 1;
+}
+
+/*
+ * Retrieve desired balloon size from the host.
+ */
+static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
+{
+ unsigned long status;
+ unsigned long target;
+ unsigned long limit;
+ unsigned long dummy = 0;
+ u32 limit32;
+
+ /*
+ * si_meminfo() is cheap. Moreover, we want to provide dynamic
+ * max balloon size later. So let us call si_meminfo() every
+ * iteration.
+ */
+ si_meminfo(&b->sysinfo);
+ limit = b->sysinfo.totalram;
+
+ /* Ensure limit fits in 32-bits */
+ limit32 = (u32)limit;
+ if (limit != limit32)
+ return false;
+
+ /* update stats */
+ STATS_INC(b->stats.target);
+
+ status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
+ if (vmballoon_check_status(b, status)) {
+ *new_target = target;
+ return true;
+ }
+
+ pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+ STATS_INC(b->stats.target_fail);
+ return false;
+}
+
+/*
+ * Notify the host about allocated page so that host can use it without
+ * fear that guest will need it. Host may reject some pages, we need to
+ * check the return value and maybe submit a different page.
+ */
+static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
+ unsigned int *hv_status, unsigned int *target)
+{
+ unsigned long status, dummy = 0;
+ u32 pfn32;
+
+ pfn32 = (u32)pfn;
+ if (pfn32 != pfn)
+ return -1;
+
+ STATS_INC(b->stats.lock[false]);
+
+ *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
+ if (vmballoon_check_status(b, status))
+ return 0;
+
+ pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
+ STATS_INC(b->stats.lock_fail[false]);
+ return 1;
+}
+
+static int vmballoon_send_batched_lock(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+ unsigned long status;
+ unsigned long pfn = page_to_pfn(b->page);
+
+ STATS_INC(b->stats.lock[is_2m_pages]);
+
+ if (is_2m_pages)
+ status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
+ *target);
+ else
+ status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
+ *target);
+
+ if (vmballoon_check_status(b, status))
+ return 0;
+
+ pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+ STATS_INC(b->stats.lock_fail[is_2m_pages]);
+ return 1;
+}
+
+/*
+ * Notify the host that guest intends to release given page back into
+ * the pool of available (to the guest) pages.
+ */
+static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
+ unsigned int *target)
+{
+ unsigned long status, dummy = 0;
+ u32 pfn32;
+
+ pfn32 = (u32)pfn;
+ if (pfn32 != pfn)
+ return false;
+
+ STATS_INC(b->stats.unlock[false]);
+
+ status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
+ if (vmballoon_check_status(b, status))
+ return true;
+
+ pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
+ STATS_INC(b->stats.unlock_fail[false]);
+ return false;
+}
+
+static bool vmballoon_send_batched_unlock(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+ unsigned long status;
+ unsigned long pfn = page_to_pfn(b->page);
+
+ STATS_INC(b->stats.unlock[is_2m_pages]);
+
+ if (is_2m_pages)
+ status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
+ *target);
+ else
+ status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
+ *target);
+
+ if (vmballoon_check_status(b, status))
+ return true;
+
+ pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+ STATS_INC(b->stats.unlock_fail[is_2m_pages]);
+ return false;
+}
+
+static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
+{
+ if (is_2m_page)
+ return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
+
+ return alloc_page(flags);
+}
+
+static void vmballoon_free_page(struct page *page, bool is_2m_page)
+{
+ if (is_2m_page)
+ __free_pages(page, VMW_BALLOON_2M_SHIFT);
+ else
+ __free_page(page);
+}
+
+/*
+ * Quickly release all pages allocated for the balloon. This function is
+ * called when host decides to "reset" balloon for one reason or another.
+ * Unlike normal "deflate" we do not (shall not) notify host of the pages
+ * being released.
+ */
+static void vmballoon_pop(struct vmballoon *b)
+{
+ struct page *page, *next;
+ unsigned is_2m_pages;
+
+ for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+ is_2m_pages++) {
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+ u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+ list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+ list_del(&page->lru);
+ vmballoon_free_page(page, is_2m_pages);
+ STATS_INC(b->stats.free[is_2m_pages]);
+ b->size -= size_per_page;
+ cond_resched();
+ }
+ }
+
+ if (b->batch_page) {
+ vunmap(b->batch_page);
+ b->batch_page = NULL;
+ }
+
+ if (b->page) {
+ __free_page(b->page);
+ b->page = NULL;
+ }
+}
+
+/*
+ * Notify the host of a ballooned page. If host rejects the page put it on the
+ * refuse list, those refused page are then released at the end of the
+ * inflation cycle.
+ */
+static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target)
+{
+ int locked, hv_status;
+ struct page *page = b->page;
+ struct vmballoon_page_size *page_size = &b->page_sizes[false];
+
+ /* is_2m_pages can never happen as 2m pages support implies batching */
+
+ locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
+ target);
+ if (locked > 0) {
+ STATS_INC(b->stats.refused_alloc[false]);
+
+ if (hv_status == VMW_BALLOON_ERROR_RESET ||
+ hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
+ vmballoon_free_page(page, false);
+ return -EIO;
+ }
+
+ /*
+ * Place page on the list of non-balloonable pages
+ * and retry allocation, unless we already accumulated
+ * too many of them, in which case take a breather.
+ */
+ if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
+ page_size->n_refused_pages++;
+ list_add(&page->lru, &page_size->refused_pages);
+ } else {
+ vmballoon_free_page(page, false);
+ }
+ return -EIO;
+ }
+
+ /* track allocated page */
+ list_add(&page->lru, &page_size->pages);
+
+ /* update balloon size */
+ b->size++;
+
+ return 0;
+}
+
+static int vmballoon_lock_batched_page(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+ int locked, i;
+ u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+ locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
+ target);
+ if (locked > 0) {
+ for (i = 0; i < num_pages; i++) {
+ u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+ struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+
+ vmballoon_free_page(p, is_2m_pages);
+ }
+
+ return -EIO;
+ }
+
+ for (i = 0; i < num_pages; i++) {
+ u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+ struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ locked = vmballoon_batch_get_status(b->batch_page, i);
+
+ switch (locked) {
+ case VMW_BALLOON_SUCCESS:
+ list_add(&p->lru, &page_size->pages);
+ b->size += size_per_page;
+ break;
+ case VMW_BALLOON_ERROR_PPN_PINNED:
+ case VMW_BALLOON_ERROR_PPN_INVALID:
+ if (page_size->n_refused_pages
+ < VMW_BALLOON_MAX_REFUSED) {
+ list_add(&p->lru, &page_size->refused_pages);
+ page_size->n_refused_pages++;
+ break;
+ }
+ /* Fallthrough */
+ case VMW_BALLOON_ERROR_RESET:
+ case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
+ vmballoon_free_page(p, is_2m_pages);
+ break;
+ default:
+ /* This should never happen */
+ WARN_ON_ONCE(true);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Release the page allocated for the balloon. Note that we first notify
+ * the host so it can make sure the page will be available for the guest
+ * to use, if needed.
+ */
+static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target)
+{
+ struct page *page = b->page;
+ struct vmballoon_page_size *page_size = &b->page_sizes[false];
+
+ /* is_2m_pages can never happen as 2m pages support implies batching */
+
+ if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
+ list_add(&page->lru, &page_size->pages);
+ return -EIO;
+ }
+
+ /* deallocate page */
+ vmballoon_free_page(page, false);
+ STATS_INC(b->stats.free[false]);
+
+ /* update balloon size */
+ b->size--;
+
+ return 0;
+}
+
+static int vmballoon_unlock_batched_page(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages,
+ unsigned int *target)
+{
+ int locked, i, ret = 0;
+ bool hv_success;
+ u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+ hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
+ target);
+ if (!hv_success)
+ ret = -EIO;
+
+ for (i = 0; i < num_pages; i++) {
+ u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+ struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ locked = vmballoon_batch_get_status(b->batch_page, i);
+ if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
+ /*
+ * That page wasn't successfully unlocked by the
+ * hypervisor, re-add it to the list of pages owned by
+ * the balloon driver.
+ */
+ list_add(&p->lru, &page_size->pages);
+ } else {
+ /* deallocate page */
+ vmballoon_free_page(p, is_2m_pages);
+ STATS_INC(b->stats.free[is_2m_pages]);
+
+ /* update balloon size */
+ b->size -= size_per_page;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Release pages that were allocated while attempting to inflate the
+ * balloon but were refused by the host for one reason or another.
+ */
+static void vmballoon_release_refused_pages(struct vmballoon *b,
+ bool is_2m_pages)
+{
+ struct page *page, *next;
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
+ list_del(&page->lru);
+ vmballoon_free_page(page, is_2m_pages);
+ STATS_INC(b->stats.refused_free[is_2m_pages]);
+ }
+
+ page_size->n_refused_pages = 0;
+}
+
+static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
+{
+ b->page = p;
+}
+
+static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
+ struct page *p)
+{
+ vmballoon_batch_set_pa(b->batch_page, idx,
+ (u64)page_to_pfn(p) << PAGE_SHIFT);
+}
+
+/*
+ * Inflate the balloon towards its target size. Note that we try to limit
+ * the rate of allocation to make sure we are not choking the rest of the
+ * system.
+ */
+static void vmballoon_inflate(struct vmballoon *b)
+{
+ unsigned rate;
+ unsigned int allocations = 0;
+ unsigned int num_pages = 0;
+ int error = 0;
+ gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
+ bool is_2m_pages;
+
+ pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
+
+ /*
+ * First try NOSLEEP page allocations to inflate balloon.
+ *
+ * If we do not throttle nosleep allocations, we can drain all
+ * free pages in the guest quickly (if the balloon target is high).
+ * As a side-effect, draining free pages helps to inform (force)
+ * the guest to start swapping if balloon target is not met yet,
+ * which is a desired behavior. However, balloon driver can consume
+ * all available CPU cycles if too many pages are allocated in a
+ * second. Therefore, we throttle nosleep allocations even when
+ * the guest is not under memory pressure. OTOH, if we have already
+ * predicted that the guest is under memory pressure, then we
+ * slowdown page allocations considerably.
+ */
+
+ /*
+ * Start with no sleep allocation rate which may be higher
+ * than sleeping allocation rate.
+ */
+ if (b->slow_allocation_cycles) {
+ rate = b->rate_alloc;
+ is_2m_pages = false;
+ } else {
+ rate = UINT_MAX;
+ is_2m_pages =
+ b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
+ }
+
+ pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
+ __func__, b->target - b->size, rate, b->rate_alloc);
+
+ while (!b->reset_required &&
+ b->size + num_pages * vmballoon_page_size(is_2m_pages)
+ < b->target) {
+ struct page *page;
+
+ if (flags == VMW_PAGE_ALLOC_NOSLEEP)
+ STATS_INC(b->stats.alloc[is_2m_pages]);
+ else
+ STATS_INC(b->stats.sleep_alloc);
+
+ page = vmballoon_alloc_page(flags, is_2m_pages);
+ if (!page) {
+ STATS_INC(b->stats.alloc_fail[is_2m_pages]);
+
+ if (is_2m_pages) {
+ b->ops->lock(b, num_pages, true, &b->target);
+
+ /*
+ * ignore errors from locking as we now switch
+ * to 4k pages and we might get different
+ * errors.
+ */
+
+ num_pages = 0;
+ is_2m_pages = false;
+ continue;
+ }
+
+ if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
+ /*
+ * CANSLEEP page allocation failed, so guest
+ * is under severe memory pressure. Quickly
+ * decrease allocation rate.
+ */
+ b->rate_alloc = max(b->rate_alloc / 2,
+ VMW_BALLOON_RATE_ALLOC_MIN);
+ STATS_INC(b->stats.sleep_alloc_fail);
+ break;
+ }
+
+ /*
+ * NOSLEEP page allocation failed, so the guest is
+ * under memory pressure. Let us slow down page
+ * allocations for next few cycles so that the guest
+ * gets out of memory pressure. Also, if we already
+ * allocated b->rate_alloc pages, let's pause,
+ * otherwise switch to sleeping allocations.
+ */
+ b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
+
+ if (allocations >= b->rate_alloc)
+ break;
+
+ flags = VMW_PAGE_ALLOC_CANSLEEP;
+ /* Lower rate for sleeping allocations. */
+ rate = b->rate_alloc;
+ continue;
+ }
+
+ b->ops->add_page(b, num_pages++, page);
+ if (num_pages == b->batch_max_pages) {
+ error = b->ops->lock(b, num_pages, is_2m_pages,
+ &b->target);
+ num_pages = 0;
+ if (error)
+ break;
+ }
+
+ cond_resched();
+
+ if (allocations >= rate) {
+ /* We allocated enough pages, let's take a break. */
+ break;
+ }
+ }
+
+ if (num_pages > 0)
+ b->ops->lock(b, num_pages, is_2m_pages, &b->target);
+
+ /*
+ * We reached our goal without failures so try increasing
+ * allocation rate.
+ */
+ if (error == 0 && allocations >= b->rate_alloc) {
+ unsigned int mult = allocations / b->rate_alloc;
+
+ b->rate_alloc =
+ min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
+ VMW_BALLOON_RATE_ALLOC_MAX);
+ }
+
+ vmballoon_release_refused_pages(b, true);
+ vmballoon_release_refused_pages(b, false);
+}
+
+/*
+ * Decrease the size of the balloon allowing guest to use more memory.
+ */
+static void vmballoon_deflate(struct vmballoon *b)
+{
+ unsigned is_2m_pages;
+
+ pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
+
+ /* free pages to reach target */
+ for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
+ is_2m_pages++) {
+ struct page *page, *next;
+ unsigned int num_pages = 0;
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+ if (b->reset_required ||
+ (b->target > 0 &&
+ b->size - num_pages
+ * vmballoon_page_size(is_2m_pages)
+ < b->target + vmballoon_page_size(true)))
+ break;
+
+ list_del(&page->lru);
+ b->ops->add_page(b, num_pages++, page);
+
+ if (num_pages == b->batch_max_pages) {
+ int error;
+
+ error = b->ops->unlock(b, num_pages,
+ is_2m_pages, &b->target);
+ num_pages = 0;
+ if (error)
+ return;
+ }
+
+ cond_resched();
+ }
+
+ if (num_pages > 0)
+ b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
+ }
+}
+
+static const struct vmballoon_ops vmballoon_basic_ops = {
+ .add_page = vmballoon_add_page,
+ .lock = vmballoon_lock_page,
+ .unlock = vmballoon_unlock_page
+};
+
+static const struct vmballoon_ops vmballoon_batched_ops = {
+ .add_page = vmballoon_add_batched_page,
+ .lock = vmballoon_lock_batched_page,
+ .unlock = vmballoon_unlock_batched_page
+};
+
+static bool vmballoon_init_batching(struct vmballoon *b)
+{
+ b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
+ if (!b->page)
+ return false;
+
+ b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
+ if (!b->batch_page) {
+ __free_page(b->page);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Receive notification and resize balloon
+ */
+static void vmballoon_doorbell(void *client_data)
+{
+ struct vmballoon *b = client_data;
+
+ STATS_INC(b->stats.doorbell);
+
+ mod_delayed_work(system_freezable_wq, &b->dwork, 0);
+}
+
+/*
+ * Clean up vmci doorbell
+ */
+static void vmballoon_vmci_cleanup(struct vmballoon *b)
+{
+ int error;
+
+ VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
+ VMCI_INVALID_ID, error);
+ STATS_INC(b->stats.doorbell_unset);
+
+ if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
+ vmci_doorbell_destroy(b->vmci_doorbell);
+ b->vmci_doorbell = VMCI_INVALID_HANDLE;
+ }
+}
+
+/*
+ * Initialize vmci doorbell, to get notified as soon as balloon changes
+ */
+static int vmballoon_vmci_init(struct vmballoon *b)
+{
+ int error = 0;
+
+ if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
+ error = vmci_doorbell_create(&b->vmci_doorbell,
+ VMCI_FLAG_DELAYED_CB,
+ VMCI_PRIVILEGE_FLAG_RESTRICTED,
+ vmballoon_doorbell, b);
+
+ if (error == VMCI_SUCCESS) {
+ VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
+ b->vmci_doorbell.context,
+ b->vmci_doorbell.resource, error);
+ STATS_INC(b->stats.doorbell_set);
+ }
+ }
+
+ if (error != 0) {
+ vmballoon_vmci_cleanup(b);
+
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * Perform standard reset sequence by popping the balloon (in case it
+ * is not empty) and then restarting protocol. This operation normally
+ * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
+ */
+static void vmballoon_reset(struct vmballoon *b)
+{
+ int error;
+
+ vmballoon_vmci_cleanup(b);
+
+ /* free all pages, skipping monitor unlock */
+ vmballoon_pop(b);
+
+ if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
+ return;
+
+ if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
+ b->ops = &vmballoon_batched_ops;
+ b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
+ if (!vmballoon_init_batching(b)) {
+ /*
+ * We failed to initialize batching, inform the monitor
+ * about it by sending a null capability.
+ *
+ * The guest will retry in one second.
+ */
+ vmballoon_send_start(b, 0);
+ return;
+ }
+ } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
+ b->ops = &vmballoon_basic_ops;
+ b->batch_max_pages = 1;
+ }
+
+ b->reset_required = false;
+
+ error = vmballoon_vmci_init(b);
+ if (error)
+ pr_err("failed to initialize vmci doorbell\n");
+
+ if (!vmballoon_send_guest_id(b))
+ pr_err("failed to send guest ID to the host\n");
+}
+
+/*
+ * Balloon work function: reset protocol, if needed, get the new size and
+ * adjust balloon as needed. Repeat in 1 sec.
+ */
+static void vmballoon_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
+ unsigned int target;
+
+ STATS_INC(b->stats.timer);
+
+ if (b->reset_required)
+ vmballoon_reset(b);
+
+ if (b->slow_allocation_cycles > 0)
+ b->slow_allocation_cycles--;
+
+ if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
+ /* update target, adjust size */
+ b->target = target;
+
+ if (b->size < target)
+ vmballoon_inflate(b);
+ else if (target == 0 ||
+ b->size > target + vmballoon_page_size(true))
+ vmballoon_deflate(b);
+ }
+
+ /*
+ * We are using a freezable workqueue so that balloon operations are
+ * stopped while the system transitions to/from sleep/hibernation.
+ */
+ queue_delayed_work(system_freezable_wq,
+ dwork, round_jiffies_relative(HZ));
+}
+
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+static int vmballoon_debug_show(struct seq_file *f, void *offset)
+{
+ struct vmballoon *b = f->private;
+ struct vmballoon_stats *stats = &b->stats;
+
+ /* format capabilities info */
+ seq_printf(f,
+ "balloon capabilities: %#4x\n"
+ "used capabilities: %#4lx\n"
+ "is resetting: %c\n",
+ VMW_BALLOON_CAPABILITIES, b->capabilities,
+ b->reset_required ? 'y' : 'n');
+
+ /* format size info */
+ seq_printf(f,
+ "target: %8d pages\n"
+ "current: %8d pages\n",
+ b->target, b->size);
+
+ /* format rate info */
+ seq_printf(f,
+ "rateSleepAlloc: %8d pages/sec\n",
+ b->rate_alloc);
+
+ seq_printf(f,
+ "\n"
+ "timer: %8u\n"
+ "doorbell: %8u\n"
+ "start: %8u (%4u failed)\n"
+ "guestType: %8u (%4u failed)\n"
+ "2m-lock: %8u (%4u failed)\n"
+ "lock: %8u (%4u failed)\n"
+ "2m-unlock: %8u (%4u failed)\n"
+ "unlock: %8u (%4u failed)\n"
+ "target: %8u (%4u failed)\n"
+ "prim2mAlloc: %8u (%4u failed)\n"
+ "primNoSleepAlloc: %8u (%4u failed)\n"
+ "primCanSleepAlloc: %8u (%4u failed)\n"
+ "prim2mFree: %8u\n"
+ "primFree: %8u\n"
+ "err2mAlloc: %8u\n"
+ "errAlloc: %8u\n"
+ "err2mFree: %8u\n"
+ "errFree: %8u\n"
+ "doorbellSet: %8u\n"
+ "doorbellUnset: %8u\n",
+ stats->timer,
+ stats->doorbell,
+ stats->start, stats->start_fail,
+ stats->guest_type, stats->guest_type_fail,
+ stats->lock[true], stats->lock_fail[true],
+ stats->lock[false], stats->lock_fail[false],
+ stats->unlock[true], stats->unlock_fail[true],
+ stats->unlock[false], stats->unlock_fail[false],
+ stats->target, stats->target_fail,
+ stats->alloc[true], stats->alloc_fail[true],
+ stats->alloc[false], stats->alloc_fail[false],
+ stats->sleep_alloc, stats->sleep_alloc_fail,
+ stats->free[true],
+ stats->free[false],
+ stats->refused_alloc[true], stats->refused_alloc[false],
+ stats->refused_free[true], stats->refused_free[false],
+ stats->doorbell_set, stats->doorbell_unset);
+
+ return 0;
+}
+
+static int vmballoon_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vmballoon_debug_show, inode->i_private);
+}
+
+static const struct file_operations vmballoon_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = vmballoon_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init vmballoon_debugfs_init(struct vmballoon *b)
+{
+ int error;
+
+ b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
+ &vmballoon_debug_fops);
+ if (IS_ERR(b->dbg_entry)) {
+ error = PTR_ERR(b->dbg_entry);
+ pr_err("failed to create debugfs entry, error: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
+{
+ debugfs_remove(b->dbg_entry);
+}
+
+#else
+
+static inline int vmballoon_debugfs_init(struct vmballoon *b)
+{
+ return 0;
+}
+
+static inline void vmballoon_debugfs_exit(struct vmballoon *b)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int __init vmballoon_init(void)
+{
+ int error;
+ unsigned is_2m_pages;
+ /*
+ * Check if we are running on VMware's hypervisor and bail out
+ * if we are not.
+ */
+ if (x86_hyper != &x86_hyper_vmware)
+ return -ENODEV;
+
+ for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+ is_2m_pages++) {
+ INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
+ INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
+ }
+
+ /* initialize rates */
+ balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
+
+ INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
+
+ error = vmballoon_debugfs_init(&balloon);
+ if (error)
+ return error;
+
+ balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
+ balloon.batch_page = NULL;
+ balloon.page = NULL;
+ balloon.reset_required = true;
+
+ queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
+
+ return 0;
+}
+module_init(vmballoon_init);
+
+static void __exit vmballoon_exit(void)
+{
+ vmballoon_vmci_cleanup(&balloon);
+ cancel_delayed_work_sync(&balloon.dwork);
+
+ vmballoon_debugfs_exit(&balloon);
+
+ /*
+ * Deallocate all reserved memory, and reset connection with monitor.
+ * Reset connection before deallocating memory to avoid potential for
+ * additional spurious resets from guest touching deallocated pages.
+ */
+ vmballoon_send_start(&balloon, 0);
+ vmballoon_pop(&balloon);
+}
+module_exit(vmballoon_exit);
diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
new file mode 100644
index 0000000..39c2eca
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -0,0 +1,16 @@
+#
+# VMware VMCI device
+#
+
+config VMWARE_VMCI
+ tristate "VMware VMCI Driver"
+ depends on X86 && PCI
+ help
+ This is VMware's Virtual Machine Communication Interface. It enables
+ high-speed communication between host and guest in a virtual
+ environment via the VMCI virtual device.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called vmw_vmci.
diff --git a/drivers/misc/vmw_vmci/Makefile b/drivers/misc/vmw_vmci/Makefile
new file mode 100644
index 0000000..4da9893
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o
+vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \
+ vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \
+ vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
new file mode 100644
index 0000000..f866a4ba
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -0,0 +1,1214 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+/*
+ * List of current VMCI contexts. Contexts can be added by
+ * vmci_ctx_create() and removed via vmci_ctx_destroy().
+ * These, along with context lookup, are protected by the
+ * list structure's lock.
+ */
+static struct {
+ struct list_head head;
+ spinlock_t lock; /* Spinlock for context list operations */
+} ctx_list = {
+ .head = LIST_HEAD_INIT(ctx_list.head),
+ .lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
+};
+
+/* Used by contexts that did not set up notify flag pointers */
+static bool ctx_dummy_notify;
+
+static void ctx_signal_notify(struct vmci_ctx *context)
+{
+ *context->notify = true;
+}
+
+static void ctx_clear_notify(struct vmci_ctx *context)
+{
+ *context->notify = false;
+}
+
+/*
+ * If nothing requires the attention of the guest, clears both
+ * notify flag and call.
+ */
+static void ctx_clear_notify_call(struct vmci_ctx *context)
+{
+ if (context->pending_datagrams == 0 &&
+ vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
+ ctx_clear_notify(context);
+}
+
+/*
+ * Sets the context's notify flag iff datagrams are pending for this
+ * context. Called from vmci_setup_notify().
+ */
+void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
+{
+ spin_lock(&context->lock);
+ if (context->pending_datagrams)
+ ctx_signal_notify(context);
+ spin_unlock(&context->lock);
+}
+
+/*
+ * Allocates and initializes a VMCI context.
+ */
+struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
+ uintptr_t event_hnd,
+ int user_version,
+ const struct cred *cred)
+{
+ struct vmci_ctx *context;
+ int error;
+
+ if (cid == VMCI_INVALID_ID) {
+ pr_devel("Invalid context ID for VMCI context\n");
+ error = -EINVAL;
+ goto err_out;
+ }
+
+ if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
+ pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
+ priv_flags);
+ error = -EINVAL;
+ goto err_out;
+ }
+
+ if (user_version == 0) {
+ pr_devel("Invalid suer_version %d\n", user_version);
+ error = -EINVAL;
+ goto err_out;
+ }
+
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context) {
+ pr_warn("Failed to allocate memory for VMCI context\n");
+ error = -EINVAL;
+ goto err_out;
+ }
+
+ kref_init(&context->kref);
+ spin_lock_init(&context->lock);
+ INIT_LIST_HEAD(&context->list_item);
+ INIT_LIST_HEAD(&context->datagram_queue);
+ INIT_LIST_HEAD(&context->notifier_list);
+
+ /* Initialize host-specific VMCI context. */
+ init_waitqueue_head(&context->host_context.wait_queue);
+
+ context->queue_pair_array = vmci_handle_arr_create(0);
+ if (!context->queue_pair_array) {
+ error = -ENOMEM;
+ goto err_free_ctx;
+ }
+
+ context->doorbell_array = vmci_handle_arr_create(0);
+ if (!context->doorbell_array) {
+ error = -ENOMEM;
+ goto err_free_qp_array;
+ }
+
+ context->pending_doorbell_array = vmci_handle_arr_create(0);
+ if (!context->pending_doorbell_array) {
+ error = -ENOMEM;
+ goto err_free_db_array;
+ }
+
+ context->user_version = user_version;
+
+ context->priv_flags = priv_flags;
+
+ if (cred)
+ context->cred = get_cred(cred);
+
+ context->notify = &ctx_dummy_notify;
+ context->notify_page = NULL;
+
+ /*
+ * If we collide with an existing context we generate a new
+ * and use it instead. The VMX will determine if regeneration
+ * is okay. Since there isn't 4B - 16 VMs running on a given
+ * host, the below loop will terminate.
+ */
+ spin_lock(&ctx_list.lock);
+
+ while (vmci_ctx_exists(cid)) {
+ /* We reserve the lowest 16 ids for fixed contexts. */
+ cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
+ if (cid == VMCI_INVALID_ID)
+ cid = VMCI_RESERVED_CID_LIMIT;
+ }
+ context->cid = cid;
+
+ list_add_tail_rcu(&context->list_item, &ctx_list.head);
+ spin_unlock(&ctx_list.lock);
+
+ return context;
+
+ err_free_db_array:
+ vmci_handle_arr_destroy(context->doorbell_array);
+ err_free_qp_array:
+ vmci_handle_arr_destroy(context->queue_pair_array);
+ err_free_ctx:
+ kfree(context);
+ err_out:
+ return ERR_PTR(error);
+}
+
+/*
+ * Destroy VMCI context.
+ */
+void vmci_ctx_destroy(struct vmci_ctx *context)
+{
+ spin_lock(&ctx_list.lock);
+ list_del_rcu(&context->list_item);
+ spin_unlock(&ctx_list.lock);
+ synchronize_rcu();
+
+ vmci_ctx_put(context);
+}
+
+/*
+ * Fire notification for all contexts interested in given cid.
+ */
+static int ctx_fire_notification(u32 context_id, u32 priv_flags)
+{
+ u32 i, array_size;
+ struct vmci_ctx *sub_ctx;
+ struct vmci_handle_arr *subscriber_array;
+ struct vmci_handle context_handle =
+ vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+
+ /*
+ * We create an array to hold the subscribers we find when
+ * scanning through all contexts.
+ */
+ subscriber_array = vmci_handle_arr_create(0);
+ if (subscriber_array == NULL)
+ return VMCI_ERROR_NO_MEM;
+
+ /*
+ * Scan all contexts to find who is interested in being
+ * notified about given contextID.
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
+ struct vmci_handle_list *node;
+
+ /*
+ * We only deliver notifications of the removal of
+ * contexts, if the two contexts are allowed to
+ * interact.
+ */
+ if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
+ continue;
+
+ list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
+ if (!vmci_handle_is_equal(node->handle, context_handle))
+ continue;
+
+ vmci_handle_arr_append_entry(&subscriber_array,
+ vmci_make_handle(sub_ctx->cid,
+ VMCI_EVENT_HANDLER));
+ }
+ }
+ rcu_read_unlock();
+
+ /* Fire event to all subscribers. */
+ array_size = vmci_handle_arr_get_size(subscriber_array);
+ for (i = 0; i < array_size; i++) {
+ int result;
+ struct vmci_event_ctx ev;
+
+ ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
+ ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+ ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+ ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
+ ev.payload.context_id = context_id;
+
+ result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
+ &ev.msg.hdr, false);
+ if (result < VMCI_SUCCESS) {
+ pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
+ ev.msg.event_data.event,
+ ev.msg.hdr.dst.context);
+ /* We continue to enqueue on next subscriber. */
+ }
+ }
+ vmci_handle_arr_destroy(subscriber_array);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Returns the current number of pending datagrams. The call may
+ * also serve as a synchronization point for the datagram queue,
+ * as no enqueue operations can occur concurrently.
+ */
+int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
+{
+ struct vmci_ctx *context;
+
+ context = vmci_ctx_get(cid);
+ if (context == NULL)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ spin_lock(&context->lock);
+ if (pending)
+ *pending = context->pending_datagrams;
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Queues a VMCI datagram for the appropriate target VM context.
+ */
+int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
+{
+ struct vmci_datagram_queue_entry *dq_entry;
+ struct vmci_ctx *context;
+ struct vmci_handle dg_src;
+ size_t vmci_dg_size;
+
+ vmci_dg_size = VMCI_DG_SIZE(dg);
+ if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
+ pr_devel("Datagram too large (bytes=%Zu)\n", vmci_dg_size);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ /* Get the target VM's VMCI context. */
+ context = vmci_ctx_get(cid);
+ if (!context) {
+ pr_devel("Invalid context (ID=0x%x)\n", cid);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ /* Allocate guest call entry and add it to the target VM's queue. */
+ dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
+ if (dq_entry == NULL) {
+ pr_warn("Failed to allocate memory for datagram\n");
+ vmci_ctx_put(context);
+ return VMCI_ERROR_NO_MEM;
+ }
+ dq_entry->dg = dg;
+ dq_entry->dg_size = vmci_dg_size;
+ dg_src = dg->src;
+ INIT_LIST_HEAD(&dq_entry->list_item);
+
+ spin_lock(&context->lock);
+
+ /*
+ * We put a higher limit on datagrams from the hypervisor. If
+ * the pending datagram is not from hypervisor, then we check
+ * if enqueueing it would exceed the
+ * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination. If
+ * the pending datagram is from hypervisor, we allow it to be
+ * queued at the destination side provided we don't reach the
+ * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
+ */
+ if (context->datagram_queue_size + vmci_dg_size >=
+ VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
+ (!vmci_handle_is_equal(dg_src,
+ vmci_make_handle
+ (VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID)) ||
+ context->datagram_queue_size + vmci_dg_size >=
+ VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+ kfree(dq_entry);
+ pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
+ return VMCI_ERROR_NO_RESOURCES;
+ }
+
+ list_add(&dq_entry->list_item, &context->datagram_queue);
+ context->pending_datagrams++;
+ context->datagram_queue_size += vmci_dg_size;
+ ctx_signal_notify(context);
+ wake_up(&context->host_context.wait_queue);
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+
+ return vmci_dg_size;
+}
+
+/*
+ * Verifies whether a context with the specified context ID exists.
+ * FIXME: utility is dubious as no decisions can be reliably made
+ * using this data as context can appear and disappear at any time.
+ */
+bool vmci_ctx_exists(u32 cid)
+{
+ struct vmci_ctx *context;
+ bool exists = false;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
+ if (context->cid == cid) {
+ exists = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return exists;
+}
+
+/*
+ * Retrieves VMCI context corresponding to the given cid.
+ */
+struct vmci_ctx *vmci_ctx_get(u32 cid)
+{
+ struct vmci_ctx *c, *context = NULL;
+
+ if (cid == VMCI_INVALID_ID)
+ return NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
+ if (c->cid == cid) {
+ /*
+ * The context owner drops its own reference to the
+ * context only after removing it from the list and
+ * waiting for RCU grace period to expire. This
+ * means that we are not about to increase the
+ * reference count of something that is in the
+ * process of being destroyed.
+ */
+ context = c;
+ kref_get(&context->kref);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return context;
+}
+
+/*
+ * Deallocates all parts of a context data structure. This
+ * function doesn't lock the context, because it assumes that
+ * the caller was holding the last reference to context.
+ */
+static void ctx_free_ctx(struct kref *kref)
+{
+ struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
+ struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
+ struct vmci_handle temp_handle;
+ struct vmci_handle_list *notifier, *tmp;
+
+ /*
+ * Fire event to all contexts interested in knowing this
+ * context is dying.
+ */
+ ctx_fire_notification(context->cid, context->priv_flags);
+
+ /*
+ * Cleanup all queue pair resources attached to context. If
+ * the VM dies without cleaning up, this code will make sure
+ * that no resources are leaked.
+ */
+ temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
+ while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
+ if (vmci_qp_broker_detach(temp_handle,
+ context) < VMCI_SUCCESS) {
+ /*
+ * When vmci_qp_broker_detach() succeeds it
+ * removes the handle from the array. If
+ * detach fails, we must remove the handle
+ * ourselves.
+ */
+ vmci_handle_arr_remove_entry(context->queue_pair_array,
+ temp_handle);
+ }
+ temp_handle =
+ vmci_handle_arr_get_entry(context->queue_pair_array, 0);
+ }
+
+ /*
+ * It is fine to destroy this without locking the callQueue, as
+ * this is the only thread having a reference to the context.
+ */
+ list_for_each_entry_safe(dq_entry, dq_entry_tmp,
+ &context->datagram_queue, list_item) {
+ WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
+ list_del(&dq_entry->list_item);
+ kfree(dq_entry->dg);
+ kfree(dq_entry);
+ }
+
+ list_for_each_entry_safe(notifier, tmp,
+ &context->notifier_list, node) {
+ list_del(¬ifier->node);
+ kfree(notifier);
+ }
+
+ vmci_handle_arr_destroy(context->queue_pair_array);
+ vmci_handle_arr_destroy(context->doorbell_array);
+ vmci_handle_arr_destroy(context->pending_doorbell_array);
+ vmci_ctx_unset_notify(context);
+ if (context->cred)
+ put_cred(context->cred);
+ kfree(context);
+}
+
+/*
+ * Drops reference to VMCI context. If this is the last reference to
+ * the context it will be deallocated. A context is created with
+ * a reference count of one, and on destroy, it is removed from
+ * the context list before its reference count is decremented. Thus,
+ * if we reach zero, we are sure that nobody else are about to increment
+ * it (they need the entry in the context list for that), and so there
+ * is no need for locking.
+ */
+void vmci_ctx_put(struct vmci_ctx *context)
+{
+ kref_put(&context->kref, ctx_free_ctx);
+}
+
+/*
+ * Dequeues the next datagram and returns it to caller.
+ * The caller passes in a pointer to the max size datagram
+ * it can handle and the datagram is only unqueued if the
+ * size is less than max_size. If larger max_size is set to
+ * the size of the datagram to give the caller a chance to
+ * set up a larger buffer for the guestcall.
+ */
+int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
+ size_t *max_size,
+ struct vmci_datagram **dg)
+{
+ struct vmci_datagram_queue_entry *dq_entry;
+ struct list_head *list_item;
+ int rv;
+
+ /* Dequeue the next datagram entry. */
+ spin_lock(&context->lock);
+ if (context->pending_datagrams == 0) {
+ ctx_clear_notify_call(context);
+ spin_unlock(&context->lock);
+ pr_devel("No datagrams pending\n");
+ return VMCI_ERROR_NO_MORE_DATAGRAMS;
+ }
+
+ list_item = context->datagram_queue.next;
+
+ dq_entry =
+ list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
+
+ /* Check size of caller's buffer. */
+ if (*max_size < dq_entry->dg_size) {
+ *max_size = dq_entry->dg_size;
+ spin_unlock(&context->lock);
+ pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
+ (u32) *max_size);
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ list_del(list_item);
+ context->pending_datagrams--;
+ context->datagram_queue_size -= dq_entry->dg_size;
+ if (context->pending_datagrams == 0) {
+ ctx_clear_notify_call(context);
+ rv = VMCI_SUCCESS;
+ } else {
+ /*
+ * Return the size of the next datagram.
+ */
+ struct vmci_datagram_queue_entry *next_entry;
+
+ list_item = context->datagram_queue.next;
+ next_entry =
+ list_entry(list_item, struct vmci_datagram_queue_entry,
+ list_item);
+
+ /*
+ * The following size_t -> int truncation is fine as
+ * the maximum size of a (routable) datagram is 68KB.
+ */
+ rv = (int)next_entry->dg_size;
+ }
+ spin_unlock(&context->lock);
+
+ /* Caller must free datagram. */
+ *dg = dq_entry->dg;
+ dq_entry->dg = NULL;
+ kfree(dq_entry);
+
+ return rv;
+}
+
+/*
+ * Reverts actions set up by vmci_setup_notify(). Unmaps and unlocks the
+ * page mapped/locked by vmci_setup_notify().
+ */
+void vmci_ctx_unset_notify(struct vmci_ctx *context)
+{
+ struct page *notify_page;
+
+ spin_lock(&context->lock);
+
+ notify_page = context->notify_page;
+ context->notify = &ctx_dummy_notify;
+ context->notify_page = NULL;
+
+ spin_unlock(&context->lock);
+
+ if (notify_page) {
+ kunmap(notify_page);
+ put_page(notify_page);
+ }
+}
+
+/*
+ * Add remote_cid to list of contexts current contexts wants
+ * notifications from/about.
+ */
+int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
+{
+ struct vmci_ctx *context;
+ struct vmci_handle_list *notifier, *n;
+ int result;
+ bool exists = false;
+
+ context = vmci_ctx_get(context_id);
+ if (!context)
+ return VMCI_ERROR_NOT_FOUND;
+
+ if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
+ pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
+ context_id, remote_cid);
+ result = VMCI_ERROR_DST_UNREACHABLE;
+ goto out;
+ }
+
+ if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+ result = VMCI_ERROR_NO_ACCESS;
+ goto out;
+ }
+
+ notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
+ if (!notifier) {
+ result = VMCI_ERROR_NO_MEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(¬ifier->node);
+ notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
+
+ spin_lock(&context->lock);
+
+ list_for_each_entry(n, &context->notifier_list, node) {
+ if (vmci_handle_is_equal(n->handle, notifier->handle)) {
+ exists = true;
+ break;
+ }
+ }
+
+ if (exists) {
+ kfree(notifier);
+ result = VMCI_ERROR_ALREADY_EXISTS;
+ } else {
+ list_add_tail_rcu(¬ifier->node, &context->notifier_list);
+ context->n_notifiers++;
+ result = VMCI_SUCCESS;
+ }
+
+ spin_unlock(&context->lock);
+
+ out:
+ vmci_ctx_put(context);
+ return result;
+}
+
+/*
+ * Remove remote_cid from current context's list of contexts it is
+ * interested in getting notifications from/about.
+ */
+int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
+{
+ struct vmci_ctx *context;
+ struct vmci_handle_list *notifier, *tmp;
+ struct vmci_handle handle;
+ bool found = false;
+
+ context = vmci_ctx_get(context_id);
+ if (!context)
+ return VMCI_ERROR_NOT_FOUND;
+
+ handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
+
+ spin_lock(&context->lock);
+ list_for_each_entry_safe(notifier, tmp,
+ &context->notifier_list, node) {
+ if (vmci_handle_is_equal(notifier->handle, handle)) {
+ list_del_rcu(¬ifier->node);
+ context->n_notifiers--;
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&context->lock);
+
+ if (found) {
+ synchronize_rcu();
+ kfree(notifier);
+ }
+
+ vmci_ctx_put(context);
+
+ return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
+}
+
+static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
+ u32 *buf_size, void **pbuf)
+{
+ u32 *notifiers;
+ size_t data_size;
+ struct vmci_handle_list *entry;
+ int i = 0;
+
+ if (context->n_notifiers == 0) {
+ *buf_size = 0;
+ *pbuf = NULL;
+ return VMCI_SUCCESS;
+ }
+
+ data_size = context->n_notifiers * sizeof(*notifiers);
+ if (*buf_size < data_size) {
+ *buf_size = data_size;
+ return VMCI_ERROR_MORE_DATA;
+ }
+
+ notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
+ if (!notifiers)
+ return VMCI_ERROR_NO_MEM;
+
+ list_for_each_entry(entry, &context->notifier_list, node)
+ notifiers[i++] = entry->handle.context;
+
+ *buf_size = data_size;
+ *pbuf = notifiers;
+ return VMCI_SUCCESS;
+}
+
+static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
+ u32 *buf_size, void **pbuf)
+{
+ struct dbell_cpt_state *dbells;
+ size_t n_doorbells;
+ int i;
+
+ n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
+ if (n_doorbells > 0) {
+ size_t data_size = n_doorbells * sizeof(*dbells);
+ if (*buf_size < data_size) {
+ *buf_size = data_size;
+ return VMCI_ERROR_MORE_DATA;
+ }
+
+ dbells = kmalloc(data_size, GFP_ATOMIC);
+ if (!dbells)
+ return VMCI_ERROR_NO_MEM;
+
+ for (i = 0; i < n_doorbells; i++)
+ dbells[i].handle = vmci_handle_arr_get_entry(
+ context->doorbell_array, i);
+
+ *buf_size = data_size;
+ *pbuf = dbells;
+ } else {
+ *buf_size = 0;
+ *pbuf = NULL;
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Get current context's checkpoint state of given type.
+ */
+int vmci_ctx_get_chkpt_state(u32 context_id,
+ u32 cpt_type,
+ u32 *buf_size,
+ void **pbuf)
+{
+ struct vmci_ctx *context;
+ int result;
+
+ context = vmci_ctx_get(context_id);
+ if (!context)
+ return VMCI_ERROR_NOT_FOUND;
+
+ spin_lock(&context->lock);
+
+ switch (cpt_type) {
+ case VMCI_NOTIFICATION_CPT_STATE:
+ result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
+ break;
+
+ case VMCI_WELLKNOWN_CPT_STATE:
+ /*
+ * For compatibility with VMX'en with VM to VM communication, we
+ * always return zero wellknown handles.
+ */
+
+ *buf_size = 0;
+ *pbuf = NULL;
+ result = VMCI_SUCCESS;
+ break;
+
+ case VMCI_DOORBELL_CPT_STATE:
+ result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
+ break;
+
+ default:
+ pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
+ result = VMCI_ERROR_INVALID_ARGS;
+ break;
+ }
+
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+
+ return result;
+}
+
+/*
+ * Set current context's checkpoint state of given type.
+ */
+int vmci_ctx_set_chkpt_state(u32 context_id,
+ u32 cpt_type,
+ u32 buf_size,
+ void *cpt_buf)
+{
+ u32 i;
+ u32 current_id;
+ int result = VMCI_SUCCESS;
+ u32 num_ids = buf_size / sizeof(u32);
+
+ if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
+ /*
+ * We would end up here if VMX with VM to VM communication
+ * attempts to restore a checkpoint with wellknown handles.
+ */
+ pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
+ return VMCI_ERROR_OBSOLETE;
+ }
+
+ if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
+ pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
+ current_id = ((u32 *)cpt_buf)[i];
+ result = vmci_ctx_add_notification(context_id, current_id);
+ if (result != VMCI_SUCCESS)
+ break;
+ }
+ if (result != VMCI_SUCCESS)
+ pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
+ cpt_type, result);
+
+ return result;
+}
+
+/*
+ * Retrieves the specified context's pending notifications in the
+ * form of a handle array. The handle arrays returned are the
+ * actual data - not a copy and should not be modified by the
+ * caller. They must be released using
+ * vmci_ctx_rcv_notifications_release.
+ */
+int vmci_ctx_rcv_notifications_get(u32 context_id,
+ struct vmci_handle_arr **db_handle_array,
+ struct vmci_handle_arr **qp_handle_array)
+{
+ struct vmci_ctx *context;
+ int result = VMCI_SUCCESS;
+
+ context = vmci_ctx_get(context_id);
+ if (context == NULL)
+ return VMCI_ERROR_NOT_FOUND;
+
+ spin_lock(&context->lock);
+
+ *db_handle_array = context->pending_doorbell_array;
+ context->pending_doorbell_array = vmci_handle_arr_create(0);
+ if (!context->pending_doorbell_array) {
+ context->pending_doorbell_array = *db_handle_array;
+ *db_handle_array = NULL;
+ result = VMCI_ERROR_NO_MEM;
+ }
+ *qp_handle_array = NULL;
+
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+
+ return result;
+}
+
+/*
+ * Releases handle arrays with pending notifications previously
+ * retrieved using vmci_ctx_rcv_notifications_get. If the
+ * notifications were not successfully handed over to the guest,
+ * success must be false.
+ */
+void vmci_ctx_rcv_notifications_release(u32 context_id,
+ struct vmci_handle_arr *db_handle_array,
+ struct vmci_handle_arr *qp_handle_array,
+ bool success)
+{
+ struct vmci_ctx *context = vmci_ctx_get(context_id);
+
+ spin_lock(&context->lock);
+ if (!success) {
+ struct vmci_handle handle;
+
+ /*
+ * New notifications may have been added while we were not
+ * holding the context lock, so we transfer any new pending
+ * doorbell notifications to the old array, and reinstate the
+ * old array.
+ */
+
+ handle = vmci_handle_arr_remove_tail(
+ context->pending_doorbell_array);
+ while (!vmci_handle_is_invalid(handle)) {
+ if (!vmci_handle_arr_has_entry(db_handle_array,
+ handle)) {
+ vmci_handle_arr_append_entry(
+ &db_handle_array, handle);
+ }
+ handle = vmci_handle_arr_remove_tail(
+ context->pending_doorbell_array);
+ }
+ vmci_handle_arr_destroy(context->pending_doorbell_array);
+ context->pending_doorbell_array = db_handle_array;
+ db_handle_array = NULL;
+ } else {
+ ctx_clear_notify_call(context);
+ }
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+
+ if (db_handle_array)
+ vmci_handle_arr_destroy(db_handle_array);
+
+ if (qp_handle_array)
+ vmci_handle_arr_destroy(qp_handle_array);
+}
+
+/*
+ * Registers that a new doorbell handle has been allocated by the
+ * context. Only doorbell handles registered can be notified.
+ */
+int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
+{
+ struct vmci_ctx *context;
+ int result;
+
+ if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ context = vmci_ctx_get(context_id);
+ if (context == NULL)
+ return VMCI_ERROR_NOT_FOUND;
+
+ spin_lock(&context->lock);
+ if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
+ vmci_handle_arr_append_entry(&context->doorbell_array, handle);
+ result = VMCI_SUCCESS;
+ } else {
+ result = VMCI_ERROR_DUPLICATE_ENTRY;
+ }
+
+ spin_unlock(&context->lock);
+ vmci_ctx_put(context);
+
+ return result;
+}
+
+/*
+ * Unregisters a doorbell handle that was previously registered
+ * with vmci_ctx_dbell_create.
+ */
+int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
+{
+ struct vmci_ctx *context;
+ struct vmci_handle removed_handle;
+
+ if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ context = vmci_ctx_get(context_id);
+ if (context == NULL)
+ return VMCI_ERROR_NOT_FOUND;
+
+ spin_lock(&context->lock);
+ removed_handle =
+ vmci_handle_arr_remove_entry(context->doorbell_array, handle);
+ vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
+ spin_unlock(&context->lock);
+
+ vmci_ctx_put(context);
+
+ return vmci_handle_is_invalid(removed_handle) ?
+ VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
+}
+
+/*
+ * Unregisters all doorbell handles that were previously
+ * registered with vmci_ctx_dbell_create.
+ */
+int vmci_ctx_dbell_destroy_all(u32 context_id)
+{
+ struct vmci_ctx *context;
+ struct vmci_handle handle;
+
+ if (context_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ context = vmci_ctx_get(context_id);
+ if (context == NULL)
+ return VMCI_ERROR_NOT_FOUND;
+
+ spin_lock(&context->lock);
+ do {
+ struct vmci_handle_arr *arr = context->doorbell_array;
+ handle = vmci_handle_arr_remove_tail(arr);
+ } while (!vmci_handle_is_invalid(handle));
+ do {
+ struct vmci_handle_arr *arr = context->pending_doorbell_array;
+ handle = vmci_handle_arr_remove_tail(arr);
+ } while (!vmci_handle_is_invalid(handle));
+ spin_unlock(&context->lock);
+
+ vmci_ctx_put(context);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Registers a notification of a doorbell handle initiated by the
+ * specified source context. The notification of doorbells are
+ * subject to the same isolation rules as datagram delivery. To
+ * allow host side senders of notifications a finer granularity
+ * of sender rights than those assigned to the sending context
+ * itself, the host context is required to specify a different
+ * set of privilege flags that will override the privileges of
+ * the source context.
+ */
+int vmci_ctx_notify_dbell(u32 src_cid,
+ struct vmci_handle handle,
+ u32 src_priv_flags)
+{
+ struct vmci_ctx *dst_context;
+ int result;
+
+ if (vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /* Get the target VM's VMCI context. */
+ dst_context = vmci_ctx_get(handle.context);
+ if (!dst_context) {
+ pr_devel("Invalid context (ID=0x%x)\n", handle.context);
+ return VMCI_ERROR_NOT_FOUND;
+ }
+
+ if (src_cid != handle.context) {
+ u32 dst_priv_flags;
+
+ if (VMCI_CONTEXT_IS_VM(src_cid) &&
+ VMCI_CONTEXT_IS_VM(handle.context)) {
+ pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
+ src_cid, handle.context);
+ result = VMCI_ERROR_DST_UNREACHABLE;
+ goto out;
+ }
+
+ result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
+ if (result < VMCI_SUCCESS) {
+ pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
+ handle.context, handle.resource);
+ goto out;
+ }
+
+ if (src_cid != VMCI_HOST_CONTEXT_ID ||
+ src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
+ src_priv_flags = vmci_context_get_priv_flags(src_cid);
+ }
+
+ if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
+ result = VMCI_ERROR_NO_ACCESS;
+ goto out;
+ }
+ }
+
+ if (handle.context == VMCI_HOST_CONTEXT_ID) {
+ result = vmci_dbell_host_context_notify(src_cid, handle);
+ } else {
+ spin_lock(&dst_context->lock);
+
+ if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
+ handle)) {
+ result = VMCI_ERROR_NOT_FOUND;
+ } else {
+ if (!vmci_handle_arr_has_entry(
+ dst_context->pending_doorbell_array,
+ handle)) {
+ vmci_handle_arr_append_entry(
+ &dst_context->pending_doorbell_array,
+ handle);
+
+ ctx_signal_notify(dst_context);
+ wake_up(&dst_context->host_context.wait_queue);
+
+ }
+ result = VMCI_SUCCESS;
+ }
+ spin_unlock(&dst_context->lock);
+ }
+
+ out:
+ vmci_ctx_put(dst_context);
+
+ return result;
+}
+
+bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
+{
+ return context && context->user_version >= VMCI_VERSION_HOSTQP;
+}
+
+/*
+ * Registers that a new queue pair handle has been allocated by
+ * the context.
+ */
+int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
+{
+ int result;
+
+ if (context == NULL || vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
+ vmci_handle_arr_append_entry(&context->queue_pair_array,
+ handle);
+ result = VMCI_SUCCESS;
+ } else {
+ result = VMCI_ERROR_DUPLICATE_ENTRY;
+ }
+
+ return result;
+}
+
+/*
+ * Unregisters a queue pair handle that was previously registered
+ * with vmci_ctx_qp_create.
+ */
+int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
+{
+ struct vmci_handle hndl;
+
+ if (context == NULL || vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
+
+ return vmci_handle_is_invalid(hndl) ?
+ VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
+}
+
+/*
+ * Determines whether a given queue pair handle is registered
+ * with the given context.
+ */
+bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
+{
+ if (context == NULL || vmci_handle_is_invalid(handle))
+ return false;
+
+ return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
+}
+
+/*
+ * vmci_context_get_priv_flags() - Retrieve privilege flags.
+ * @context_id: The context ID of the VMCI context.
+ *
+ * Retrieves privilege flags of the given VMCI context ID.
+ */
+u32 vmci_context_get_priv_flags(u32 context_id)
+{
+ if (vmci_host_code_active()) {
+ u32 flags;
+ struct vmci_ctx *context;
+
+ context = vmci_ctx_get(context_id);
+ if (!context)
+ return VMCI_LEAST_PRIVILEGE_FLAGS;
+
+ flags = context->priv_flags;
+ vmci_ctx_put(context);
+ return flags;
+ }
+ return VMCI_NO_PRIVILEGE_FLAGS;
+}
+EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
+
+/*
+ * vmci_is_context_owner() - Determimnes if user is the context owner
+ * @context_id: The context ID of the VMCI context.
+ * @uid: The host user id (real kernel value).
+ *
+ * Determines whether a given UID is the owner of given VMCI context.
+ */
+bool vmci_is_context_owner(u32 context_id, kuid_t uid)
+{
+ bool is_owner = false;
+
+ if (vmci_host_code_active()) {
+ struct vmci_ctx *context = vmci_ctx_get(context_id);
+ if (context) {
+ if (context->cred)
+ is_owner = uid_eq(context->cred->uid, uid);
+ vmci_ctx_put(context);
+ }
+ }
+
+ return is_owner;
+}
+EXPORT_SYMBOL_GPL(vmci_is_context_owner);
diff --git a/drivers/misc/vmw_vmci/vmci_context.h b/drivers/misc/vmw_vmci/vmci_context.h
new file mode 100644
index 0000000..24a88e6
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.h
@@ -0,0 +1,182 @@
+/*
+ * VMware VMCI driver (vmciContext.h)
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_CONTEXT_H_
+#define _VMCI_CONTEXT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_datagram.h"
+
+/* Used to determine what checkpoint state to get and set. */
+enum {
+ VMCI_NOTIFICATION_CPT_STATE = 1,
+ VMCI_WELLKNOWN_CPT_STATE = 2,
+ VMCI_DG_OUT_STATE = 3,
+ VMCI_DG_IN_STATE = 4,
+ VMCI_DG_IN_SIZE_STATE = 5,
+ VMCI_DOORBELL_CPT_STATE = 6,
+};
+
+/* Host specific struct used for signalling */
+struct vmci_host {
+ wait_queue_head_t wait_queue;
+};
+
+struct vmci_handle_list {
+ struct list_head node;
+ struct vmci_handle handle;
+};
+
+struct vmci_ctx {
+ struct list_head list_item; /* For global VMCI list. */
+ u32 cid;
+ struct kref kref;
+ struct list_head datagram_queue; /* Head of per VM queue. */
+ u32 pending_datagrams;
+ size_t datagram_queue_size; /* Size of datagram queue in bytes. */
+
+ /*
+ * Version of the code that created
+ * this context; e.g., VMX.
+ */
+ int user_version;
+ spinlock_t lock; /* Locks callQueue and handle_arrays. */
+
+ /*
+ * queue_pairs attached to. The array of
+ * handles for queue pairs is accessed
+ * from the code for QP API, and there
+ * it is protected by the QP lock. It
+ * is also accessed from the context
+ * clean up path, which does not
+ * require a lock. VMCILock is not
+ * used to protect the QP array field.
+ */
+ struct vmci_handle_arr *queue_pair_array;
+
+ /* Doorbells created by context. */
+ struct vmci_handle_arr *doorbell_array;
+
+ /* Doorbells pending for context. */
+ struct vmci_handle_arr *pending_doorbell_array;
+
+ /* Contexts current context is subscribing to. */
+ struct list_head notifier_list;
+ unsigned int n_notifiers;
+
+ struct vmci_host host_context;
+ u32 priv_flags;
+
+ const struct cred *cred;
+ bool *notify; /* Notify flag pointer - hosted only. */
+ struct page *notify_page; /* Page backing the notify UVA. */
+};
+
+/* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */
+struct vmci_ctx_info {
+ u32 remote_cid;
+ int result;
+};
+
+/* VMCICptBufInfo: Used to set/get current context's checkpoint state. */
+struct vmci_ctx_chkpt_buf_info {
+ u64 cpt_buf;
+ u32 cpt_type;
+ u32 buf_size;
+ s32 result;
+ u32 _pad;
+};
+
+/*
+ * VMCINotificationReceiveInfo: Used to recieve pending notifications
+ * for doorbells and queue pairs.
+ */
+struct vmci_ctx_notify_recv_info {
+ u64 db_handle_buf_uva;
+ u64 db_handle_buf_size;
+ u64 qp_handle_buf_uva;
+ u64 qp_handle_buf_size;
+ s32 result;
+ u32 _pad;
+};
+
+/*
+ * Utilility function that checks whether two entities are allowed
+ * to interact. If one of them is restricted, the other one must
+ * be trusted.
+ */
+static inline bool vmci_deny_interaction(u32 part_one, u32 part_two)
+{
+ return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+ !(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) ||
+ ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+ !(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED));
+}
+
+struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags,
+ uintptr_t event_hnd, int version,
+ const struct cred *cred);
+void vmci_ctx_destroy(struct vmci_ctx *context);
+
+bool vmci_ctx_supports_host_qp(struct vmci_ctx *context);
+int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg);
+int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
+ size_t *max_size, struct vmci_datagram **dg);
+int vmci_ctx_pending_datagrams(u32 cid, u32 *pending);
+struct vmci_ctx *vmci_ctx_get(u32 cid);
+void vmci_ctx_put(struct vmci_ctx *context);
+bool vmci_ctx_exists(u32 cid);
+
+int vmci_ctx_add_notification(u32 context_id, u32 remote_cid);
+int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid);
+int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type,
+ u32 *num_cids, void **cpt_buf_ptr);
+int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type,
+ u32 num_cids, void *cpt_buf);
+
+int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle);
+int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle);
+bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle);
+
+void vmci_ctx_check_signal_notify(struct vmci_ctx *context);
+void vmci_ctx_unset_notify(struct vmci_ctx *context);
+
+int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle);
+int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle);
+int vmci_ctx_dbell_destroy_all(u32 context_id);
+int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle,
+ u32 src_priv_flags);
+
+int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr
+ **db_handle_array, struct vmci_handle_arr
+ **qp_handle_array);
+void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr
+ *db_handle_array, struct vmci_handle_arr
+ *qp_handle_array, bool success);
+
+static inline u32 vmci_ctx_get_id(struct vmci_ctx *context)
+{
+ if (!context)
+ return VMCI_INVALID_ID;
+ return context->cid;
+}
+
+#endif /* _VMCI_CONTEXT_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
new file mode 100644
index 0000000..8a4b6bb
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -0,0 +1,502 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/bug.h>
+
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * struct datagram_entry describes the datagram entity. It is used for datagram
+ * entities created only on the host.
+ */
+struct datagram_entry {
+ struct vmci_resource resource;
+ u32 flags;
+ bool run_delayed;
+ vmci_datagram_recv_cb recv_cb;
+ void *client_data;
+ u32 priv_flags;
+};
+
+struct delayed_datagram_info {
+ struct datagram_entry *entry;
+ struct work_struct work;
+ bool in_dg_host_queue;
+ /* msg and msg_payload must be together. */
+ struct vmci_datagram msg;
+ u8 msg_payload[];
+};
+
+/* Number of in-flight host->host datagrams */
+static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0);
+
+/*
+ * Create a datagram entry given a handle pointer.
+ */
+static int dg_create_handle(u32 resource_id,
+ u32 flags,
+ u32 priv_flags,
+ vmci_datagram_recv_cb recv_cb,
+ void *client_data, struct vmci_handle *out_handle)
+{
+ int result;
+ u32 context_id;
+ struct vmci_handle handle;
+ struct datagram_entry *entry;
+
+ if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) {
+ context_id = VMCI_INVALID_ID;
+ } else {
+ context_id = vmci_get_context_id();
+ if (context_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_NO_RESOURCES;
+ }
+
+ handle = vmci_make_handle(context_id, resource_id);
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ pr_warn("Failed allocating memory for datagram entry\n");
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false;
+ entry->flags = flags;
+ entry->recv_cb = recv_cb;
+ entry->client_data = client_data;
+ entry->priv_flags = priv_flags;
+
+ /* Make datagram resource live. */
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_DATAGRAM,
+ handle);
+ if (result != VMCI_SUCCESS) {
+ pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
+ handle.context, handle.resource, result);
+ kfree(entry);
+ return result;
+ }
+
+ *out_handle = vmci_resource_handle(&entry->resource);
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Internal utility function with the same purpose as
+ * vmci_datagram_get_priv_flags that also takes a context_id.
+ */
+static int vmci_datagram_get_priv_flags(u32 context_id,
+ struct vmci_handle handle,
+ u32 *priv_flags)
+{
+ if (context_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (context_id == VMCI_HOST_CONTEXT_ID) {
+ struct datagram_entry *src_entry;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_by_handle(handle,
+ VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (!resource)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ src_entry = container_of(resource, struct datagram_entry,
+ resource);
+ *priv_flags = src_entry->priv_flags;
+ vmci_resource_put(resource);
+ } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID)
+ *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS;
+ else
+ *priv_flags = vmci_context_get_priv_flags(context_id);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Calls the specified callback in a delayed context.
+ */
+static void dg_delayed_dispatch(struct work_struct *work)
+{
+ struct delayed_datagram_info *dg_info =
+ container_of(work, struct delayed_datagram_info, work);
+
+ dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg);
+
+ vmci_resource_put(&dg_info->entry->resource);
+
+ if (dg_info->in_dg_host_queue)
+ atomic_dec(&delayed_dg_host_queue_size);
+
+ kfree(dg_info);
+}
+
+/*
+ * Dispatch datagram as a host, to the host, or other vm context. This
+ * function cannot dispatch to hypervisor context handlers. This should
+ * have been handled before we get here by vmci_datagram_dispatch.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
+{
+ int retval;
+ size_t dg_size;
+ u32 src_priv_flags;
+
+ dg_size = VMCI_DG_SIZE(dg);
+
+ /* Host cannot send to the hypervisor. */
+ if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID)
+ return VMCI_ERROR_DST_UNREACHABLE;
+
+ /* Check that source handle matches sending context. */
+ if (dg->src.context != context_id) {
+ pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n",
+ context_id, dg->src.context, dg->src.resource);
+ return VMCI_ERROR_NO_ACCESS;
+ }
+
+ /* Get hold of privileges of sending endpoint. */
+ retval = vmci_datagram_get_priv_flags(context_id, dg->src,
+ &src_priv_flags);
+ if (retval != VMCI_SUCCESS) {
+ pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n",
+ dg->src.context, dg->src.resource);
+ return retval;
+ }
+
+ /* Determine if we should route to host or guest destination. */
+ if (dg->dst.context == VMCI_HOST_CONTEXT_ID) {
+ /* Route to host datagram entry. */
+ struct datagram_entry *dst_entry;
+ struct vmci_resource *resource;
+
+ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+ dg->dst.resource == VMCI_EVENT_HANDLER) {
+ return vmci_event_dispatch(dg);
+ }
+
+ resource = vmci_resource_by_handle(dg->dst,
+ VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (!resource) {
+ pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n",
+ dg->dst.context, dg->dst.resource);
+ return VMCI_ERROR_INVALID_RESOURCE;
+ }
+ dst_entry = container_of(resource, struct datagram_entry,
+ resource);
+ if (vmci_deny_interaction(src_priv_flags,
+ dst_entry->priv_flags)) {
+ vmci_resource_put(resource);
+ return VMCI_ERROR_NO_ACCESS;
+ }
+
+ /*
+ * If a VMCI datagram destined for the host is also sent by the
+ * host, we always run it delayed. This ensures that no locks
+ * are held when the datagram callback runs.
+ */
+ if (dst_entry->run_delayed ||
+ dg->src.context == VMCI_HOST_CONTEXT_ID) {
+ struct delayed_datagram_info *dg_info;
+
+ if (atomic_add_return(1, &delayed_dg_host_queue_size)
+ == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) {
+ atomic_dec(&delayed_dg_host_queue_size);
+ vmci_resource_put(resource);
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ dg_info = kmalloc(sizeof(*dg_info) +
+ (size_t) dg->payload_size, GFP_ATOMIC);
+ if (!dg_info) {
+ atomic_dec(&delayed_dg_host_queue_size);
+ vmci_resource_put(resource);
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ dg_info->in_dg_host_queue = true;
+ dg_info->entry = dst_entry;
+ memcpy(&dg_info->msg, dg, dg_size);
+
+ INIT_WORK(&dg_info->work, dg_delayed_dispatch);
+ schedule_work(&dg_info->work);
+ retval = VMCI_SUCCESS;
+
+ } else {
+ retval = dst_entry->recv_cb(dst_entry->client_data, dg);
+ vmci_resource_put(resource);
+ if (retval < VMCI_SUCCESS)
+ return retval;
+ }
+ } else {
+ /* Route to destination VM context. */
+ struct vmci_datagram *new_dg;
+
+ if (context_id != dg->dst.context) {
+ if (vmci_deny_interaction(src_priv_flags,
+ vmci_context_get_priv_flags
+ (dg->dst.context))) {
+ return VMCI_ERROR_NO_ACCESS;
+ } else if (VMCI_CONTEXT_IS_VM(context_id)) {
+ /*
+ * If the sending context is a VM, it
+ * cannot reach another VM.
+ */
+
+ pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n",
+ context_id, dg->dst.context);
+ return VMCI_ERROR_DST_UNREACHABLE;
+ }
+ }
+
+ /* We make a copy to enqueue. */
+ new_dg = kmemdup(dg, dg_size, GFP_KERNEL);
+ if (new_dg == NULL)
+ return VMCI_ERROR_NO_MEM;
+
+ retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
+ if (retval < VMCI_SUCCESS) {
+ kfree(new_dg);
+ return retval;
+ }
+ }
+
+ /*
+ * We currently truncate the size to signed 32 bits. This doesn't
+ * matter for this handler as it only support 4Kb messages.
+ */
+ return (int)dg_size;
+}
+
+/*
+ * Dispatch datagram as a guest, down through the VMX and potentially to
+ * the host.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+static int dg_dispatch_as_guest(struct vmci_datagram *dg)
+{
+ int retval;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_by_handle(dg->src,
+ VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (!resource)
+ return VMCI_ERROR_NO_HANDLE;
+
+ retval = vmci_send_datagram(dg);
+ vmci_resource_put(resource);
+ return retval;
+}
+
+/*
+ * Dispatch datagram. This will determine the routing for the datagram
+ * and dispatch it accordingly.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+int vmci_datagram_dispatch(u32 context_id,
+ struct vmci_datagram *dg, bool from_guest)
+{
+ int retval;
+ enum vmci_route route;
+
+ BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24);
+
+ if (dg->payload_size > VMCI_MAX_DG_SIZE ||
+ VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) {
+ pr_devel("Payload (size=%llu bytes) too big to send\n",
+ (unsigned long long)dg->payload_size);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ retval = vmci_route(&dg->src, &dg->dst, from_guest, &route);
+ if (retval < VMCI_SUCCESS) {
+ pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n",
+ dg->src.context, dg->dst.context, retval);
+ return retval;
+ }
+
+ if (VMCI_ROUTE_AS_HOST == route) {
+ if (VMCI_INVALID_ID == context_id)
+ context_id = VMCI_HOST_CONTEXT_ID;
+ return dg_dispatch_as_host(context_id, dg);
+ }
+
+ if (VMCI_ROUTE_AS_GUEST == route)
+ return dg_dispatch_as_guest(dg);
+
+ pr_warn("Unknown route (%d) for datagram\n", route);
+ return VMCI_ERROR_DST_UNREACHABLE;
+}
+
+/*
+ * Invoke the handler for the given datagram. This is intended to be
+ * called only when acting as a guest and receiving a datagram from the
+ * virtual device.
+ */
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
+{
+ struct vmci_resource *resource;
+ struct datagram_entry *dst_entry;
+
+ resource = vmci_resource_by_handle(dg->dst,
+ VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (!resource) {
+ pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n",
+ dg->dst.context, dg->dst.resource);
+ return VMCI_ERROR_NO_HANDLE;
+ }
+
+ dst_entry = container_of(resource, struct datagram_entry, resource);
+ if (dst_entry->run_delayed) {
+ struct delayed_datagram_info *dg_info;
+
+ dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size,
+ GFP_ATOMIC);
+ if (!dg_info) {
+ vmci_resource_put(resource);
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ dg_info->in_dg_host_queue = false;
+ dg_info->entry = dst_entry;
+ memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
+
+ INIT_WORK(&dg_info->work, dg_delayed_dispatch);
+ schedule_work(&dg_info->work);
+ } else {
+ dst_entry->recv_cb(dst_entry->client_data, dg);
+ vmci_resource_put(resource);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * vmci_datagram_create_handle_priv() - Create host context datagram endpoint
+ * @resource_id: The resource ID.
+ * @flags: Datagram Flags.
+ * @priv_flags: Privilege Flags.
+ * @recv_cb: Callback when receiving datagrams.
+ * @client_data: Pointer for a datagram_entry struct
+ * @out_handle: vmci_handle that is populated as a result of this function.
+ *
+ * Creates a host context datagram endpoint and returns a handle to it.
+ */
+int vmci_datagram_create_handle_priv(u32 resource_id,
+ u32 flags,
+ u32 priv_flags,
+ vmci_datagram_recv_cb recv_cb,
+ void *client_data,
+ struct vmci_handle *out_handle)
+{
+ if (out_handle == NULL)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (recv_cb == NULL) {
+ pr_devel("Client callback needed when creating datagram\n");
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ return dg_create_handle(resource_id, flags, priv_flags, recv_cb,
+ client_data, out_handle);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv);
+
+/*
+ * vmci_datagram_create_handle() - Create host context datagram endpoint
+ * @resource_id: Resource ID.
+ * @flags: Datagram Flags.
+ * @recv_cb: Callback when receiving datagrams.
+ * @client_ata: Pointer for a datagram_entry struct
+ * @out_handle: vmci_handle that is populated as a result of this function.
+ *
+ * Creates a host context datagram endpoint and returns a handle to
+ * it. Same as vmci_datagram_create_handle_priv without the priviledge
+ * flags argument.
+ */
+int vmci_datagram_create_handle(u32 resource_id,
+ u32 flags,
+ vmci_datagram_recv_cb recv_cb,
+ void *client_data,
+ struct vmci_handle *out_handle)
+{
+ return vmci_datagram_create_handle_priv(
+ resource_id, flags,
+ VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+ recv_cb, client_data,
+ out_handle);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_create_handle);
+
+/*
+ * vmci_datagram_destroy_handle() - Destroys datagram handle
+ * @handle: vmci_handle to be destroyed and reaped.
+ *
+ * Use this function to destroy any datagram handles created by
+ * vmci_datagram_create_handle{,Priv} functions.
+ */
+int vmci_datagram_destroy_handle(struct vmci_handle handle)
+{
+ struct datagram_entry *entry;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (!resource) {
+ pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n",
+ handle.context, handle.resource);
+ return VMCI_ERROR_NOT_FOUND;
+ }
+
+ entry = container_of(resource, struct datagram_entry, resource);
+
+ vmci_resource_put(&entry->resource);
+ vmci_resource_remove(&entry->resource);
+ kfree(entry);
+
+ return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle);
+
+/*
+ * vmci_datagram_send() - Send a datagram
+ * @msg: The datagram to send.
+ *
+ * Sends the provided datagram on its merry way.
+ */
+int vmci_datagram_send(struct vmci_datagram *msg)
+{
+ if (msg == NULL)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_send);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.h b/drivers/misc/vmw_vmci/vmci_datagram.h
new file mode 100644
index 0000000..eb4aab7
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.h
@@ -0,0 +1,52 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_DATAGRAM_H_
+#define _VMCI_DATAGRAM_H_
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include "vmci_context.h"
+
+#define VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE 256
+
+/*
+ * The struct vmci_datagram_queue_entry is a queue header for the in-kernel VMCI
+ * datagram queues. It is allocated in non-paged memory, as the
+ * content is accessed while holding a spinlock. The pending datagram
+ * itself may be allocated from paged memory. We shadow the size of
+ * the datagram in the non-paged queue entry as this size is used
+ * while holding the same spinlock as above.
+ */
+struct vmci_datagram_queue_entry {
+ struct list_head list_item; /* For queuing. */
+ size_t dg_size; /* Size of datagram. */
+ struct vmci_datagram *dg; /* Pending datagram. */
+};
+
+/* VMCIDatagramSendRecvInfo */
+struct vmci_datagram_snd_rcv_info {
+ u64 addr;
+ u32 len;
+ s32 result;
+};
+
+/* Datagram API for non-public use. */
+int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg,
+ bool from_guest);
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg);
+
+#endif /* _VMCI_DATAGRAM_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
new file mode 100644
index 0000000..a8cee33
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -0,0 +1,601 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/completion.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_driver.h"
+#include "vmci_route.h"
+
+
+#define VMCI_DOORBELL_INDEX_BITS 6
+#define VMCI_DOORBELL_INDEX_TABLE_SIZE (1 << VMCI_DOORBELL_INDEX_BITS)
+#define VMCI_DOORBELL_HASH(_idx) hash_32(_idx, VMCI_DOORBELL_INDEX_BITS)
+
+/*
+ * DoorbellEntry describes the a doorbell notification handle allocated by the
+ * host.
+ */
+struct dbell_entry {
+ struct vmci_resource resource;
+ struct hlist_node node;
+ struct work_struct work;
+ vmci_callback notify_cb;
+ void *client_data;
+ u32 idx;
+ u32 priv_flags;
+ bool run_delayed;
+ atomic_t active; /* Only used by guest personality */
+};
+
+/* The VMCI index table keeps track of currently registered doorbells. */
+struct dbell_index_table {
+ spinlock_t lock; /* Index table lock */
+ struct hlist_head entries[VMCI_DOORBELL_INDEX_TABLE_SIZE];
+};
+
+static struct dbell_index_table vmci_doorbell_it = {
+ .lock = __SPIN_LOCK_UNLOCKED(vmci_doorbell_it.lock),
+};
+
+/*
+ * The max_notify_idx is one larger than the currently known bitmap index in
+ * use, and is used to determine how much of the bitmap needs to be scanned.
+ */
+static u32 max_notify_idx;
+
+/*
+ * The notify_idx_count is used for determining whether there are free entries
+ * within the bitmap (if notify_idx_count + 1 < max_notify_idx).
+ */
+static u32 notify_idx_count;
+
+/*
+ * The last_notify_idx_reserved is used to track the last index handed out - in
+ * the case where multiple handles share a notification index, we hand out
+ * indexes round robin based on last_notify_idx_reserved.
+ */
+static u32 last_notify_idx_reserved;
+
+/* This is a one entry cache used to by the index allocation. */
+static u32 last_notify_idx_released = PAGE_SIZE;
+
+
+/*
+ * Utility function that retrieves the privilege flags associated
+ * with a given doorbell handle. For guest endpoints, the
+ * privileges are determined by the context ID, but for host
+ * endpoints privileges are associated with the complete
+ * handle. Hypervisor endpoints are not yet supported.
+ */
+int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags)
+{
+ if (priv_flags == NULL || handle.context == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (handle.context == VMCI_HOST_CONTEXT_ID) {
+ struct dbell_entry *entry;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_by_handle(handle,
+ VMCI_RESOURCE_TYPE_DOORBELL);
+ if (!resource)
+ return VMCI_ERROR_NOT_FOUND;
+
+ entry = container_of(resource, struct dbell_entry, resource);
+ *priv_flags = entry->priv_flags;
+ vmci_resource_put(resource);
+ } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) {
+ /*
+ * Hypervisor endpoints for notifications are not
+ * supported (yet).
+ */
+ return VMCI_ERROR_INVALID_ARGS;
+ } else {
+ *priv_flags = vmci_context_get_priv_flags(handle.context);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Find doorbell entry by bitmap index.
+ */
+static struct dbell_entry *dbell_index_table_find(u32 idx)
+{
+ u32 bucket = VMCI_DOORBELL_HASH(idx);
+ struct dbell_entry *dbell;
+
+ hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket],
+ node) {
+ if (idx == dbell->idx)
+ return dbell;
+ }
+
+ return NULL;
+}
+
+/*
+ * Add the given entry to the index table. This willi take a reference to the
+ * entry's resource so that the entry is not deleted before it is removed from
+ * the * table.
+ */
+static void dbell_index_table_add(struct dbell_entry *entry)
+{
+ u32 bucket;
+ u32 new_notify_idx;
+
+ vmci_resource_get(&entry->resource);
+
+ spin_lock_bh(&vmci_doorbell_it.lock);
+
+ /*
+ * Below we try to allocate an index in the notification
+ * bitmap with "not too much" sharing between resources. If we
+ * use less that the full bitmap, we either add to the end if
+ * there are no unused flags within the currently used area,
+ * or we search for unused ones. If we use the full bitmap, we
+ * allocate the index round robin.
+ */
+ if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) {
+ if (last_notify_idx_released < max_notify_idx &&
+ !dbell_index_table_find(last_notify_idx_released)) {
+ new_notify_idx = last_notify_idx_released;
+ last_notify_idx_released = PAGE_SIZE;
+ } else {
+ bool reused = false;
+ new_notify_idx = last_notify_idx_reserved;
+ if (notify_idx_count + 1 < max_notify_idx) {
+ do {
+ if (!dbell_index_table_find
+ (new_notify_idx)) {
+ reused = true;
+ break;
+ }
+ new_notify_idx = (new_notify_idx + 1) %
+ max_notify_idx;
+ } while (new_notify_idx !=
+ last_notify_idx_released);
+ }
+ if (!reused) {
+ new_notify_idx = max_notify_idx;
+ max_notify_idx++;
+ }
+ }
+ } else {
+ new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE;
+ }
+
+ last_notify_idx_reserved = new_notify_idx;
+ notify_idx_count++;
+
+ entry->idx = new_notify_idx;
+ bucket = VMCI_DOORBELL_HASH(entry->idx);
+ hlist_add_head(&entry->node, &vmci_doorbell_it.entries[bucket]);
+
+ spin_unlock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ * Remove the given entry from the index table. This will release() the
+ * entry's resource.
+ */
+static void dbell_index_table_remove(struct dbell_entry *entry)
+{
+ spin_lock_bh(&vmci_doorbell_it.lock);
+
+ hlist_del_init(&entry->node);
+
+ notify_idx_count--;
+ if (entry->idx == max_notify_idx - 1) {
+ /*
+ * If we delete an entry with the maximum known
+ * notification index, we take the opportunity to
+ * prune the current max. As there might be other
+ * unused indices immediately below, we lower the
+ * maximum until we hit an index in use.
+ */
+ while (max_notify_idx > 0 &&
+ !dbell_index_table_find(max_notify_idx - 1))
+ max_notify_idx--;
+ }
+
+ last_notify_idx_released = entry->idx;
+
+ spin_unlock_bh(&vmci_doorbell_it.lock);
+
+ vmci_resource_put(&entry->resource);
+}
+
+/*
+ * Creates a link between the given doorbell handle and the given
+ * index in the bitmap in the device backend. A notification state
+ * is created in hypervisor.
+ */
+static int dbell_link(struct vmci_handle handle, u32 notify_idx)
+{
+ struct vmci_doorbell_link_msg link_msg;
+
+ link_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_DOORBELL_LINK);
+ link_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE;
+ link_msg.handle = handle;
+ link_msg.notify_idx = notify_idx;
+
+ return vmci_send_datagram(&link_msg.hdr);
+}
+
+/*
+ * Unlinks the given doorbell handle from an index in the bitmap in
+ * the device backend. The notification state is destroyed in hypervisor.
+ */
+static int dbell_unlink(struct vmci_handle handle)
+{
+ struct vmci_doorbell_unlink_msg unlink_msg;
+
+ unlink_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_DOORBELL_UNLINK);
+ unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE;
+ unlink_msg.handle = handle;
+
+ return vmci_send_datagram(&unlink_msg.hdr);
+}
+
+/*
+ * Notify another guest or the host. We send a datagram down to the
+ * host via the hypervisor with the notification info.
+ */
+static int dbell_notify_as_guest(struct vmci_handle handle, u32 priv_flags)
+{
+ struct vmci_doorbell_notify_msg notify_msg;
+
+ notify_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_DOORBELL_NOTIFY);
+ notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE;
+ notify_msg.handle = handle;
+
+ return vmci_send_datagram(¬ify_msg.hdr);
+}
+
+/*
+ * Calls the specified callback in a delayed context.
+ */
+static void dbell_delayed_dispatch(struct work_struct *work)
+{
+ struct dbell_entry *entry = container_of(work,
+ struct dbell_entry, work);
+
+ entry->notify_cb(entry->client_data);
+ vmci_resource_put(&entry->resource);
+}
+
+/*
+ * Dispatches a doorbell notification to the host context.
+ */
+int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle)
+{
+ struct dbell_entry *entry;
+ struct vmci_resource *resource;
+
+ if (vmci_handle_is_invalid(handle)) {
+ pr_devel("Notifying an invalid doorbell (handle=0x%x:0x%x)\n",
+ handle.context, handle.resource);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ resource = vmci_resource_by_handle(handle,
+ VMCI_RESOURCE_TYPE_DOORBELL);
+ if (!resource) {
+ pr_devel("Notifying an unknown doorbell (handle=0x%x:0x%x)\n",
+ handle.context, handle.resource);
+ return VMCI_ERROR_NOT_FOUND;
+ }
+
+ entry = container_of(resource, struct dbell_entry, resource);
+ if (entry->run_delayed) {
+ schedule_work(&entry->work);
+ } else {
+ entry->notify_cb(entry->client_data);
+ vmci_resource_put(resource);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Register the notification bitmap with the host.
+ */
+bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn)
+{
+ int result;
+ struct vmci_notify_bm_set_msg bitmap_set_msg;
+
+ bitmap_set_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_SET_NOTIFY_BITMAP);
+ bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) -
+ VMCI_DG_HEADERSIZE;
+ bitmap_set_msg.bitmap_ppn = bitmap_ppn;
+
+ result = vmci_send_datagram(&bitmap_set_msg.hdr);
+ if (result != VMCI_SUCCESS) {
+ pr_devel("Failed to register (PPN=%u) as notification bitmap (error=%d)\n",
+ bitmap_ppn, result);
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Executes or schedules the handlers for a given notify index.
+ */
+static void dbell_fire_entries(u32 notify_idx)
+{
+ u32 bucket = VMCI_DOORBELL_HASH(notify_idx);
+ struct dbell_entry *dbell;
+
+ spin_lock_bh(&vmci_doorbell_it.lock);
+
+ hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) {
+ if (dbell->idx == notify_idx &&
+ atomic_read(&dbell->active) == 1) {
+ if (dbell->run_delayed) {
+ vmci_resource_get(&dbell->resource);
+ schedule_work(&dbell->work);
+ } else {
+ dbell->notify_cb(dbell->client_data);
+ }
+ }
+ }
+
+ spin_unlock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ * Scans the notification bitmap, collects pending notifications,
+ * resets the bitmap and invokes appropriate callbacks.
+ */
+void vmci_dbell_scan_notification_entries(u8 *bitmap)
+{
+ u32 idx;
+
+ for (idx = 0; idx < max_notify_idx; idx++) {
+ if (bitmap[idx] & 0x1) {
+ bitmap[idx] &= ~1;
+ dbell_fire_entries(idx);
+ }
+ }
+}
+
+/*
+ * vmci_doorbell_create() - Creates a doorbell
+ * @handle: A handle used to track the resource. Can be invalid.
+ * @flags: Flag that determines context of callback.
+ * @priv_flags: Privileges flags.
+ * @notify_cb: The callback to be ivoked when the doorbell fires.
+ * @client_data: A parameter to be passed to the callback.
+ *
+ * Creates a doorbell with the given callback. If the handle is
+ * VMCI_INVALID_HANDLE, a free handle will be assigned, if
+ * possible. The callback can be run immediately (potentially with
+ * locks held - the default) or delayed (in a kernel thread) by
+ * specifying the flag VMCI_FLAG_DELAYED_CB. If delayed execution
+ * is selected, a given callback may not be run if the kernel is
+ * unable to allocate memory for the delayed execution (highly
+ * unlikely).
+ */
+int vmci_doorbell_create(struct vmci_handle *handle,
+ u32 flags,
+ u32 priv_flags,
+ vmci_callback notify_cb, void *client_data)
+{
+ struct dbell_entry *entry;
+ struct vmci_handle new_handle;
+ int result;
+
+ if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB ||
+ priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL) {
+ pr_warn("Failed allocating memory for datagram entry\n");
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ if (vmci_handle_is_invalid(*handle)) {
+ u32 context_id = vmci_get_context_id();
+
+ /* Let resource code allocate a free ID for us */
+ new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+ } else {
+ bool valid_context = false;
+
+ /*
+ * Validate the handle. We must do both of the checks below
+ * because we can be acting as both a host and a guest at the
+ * same time. We always allow the host context ID, since the
+ * host functionality is in practice always there with the
+ * unified driver.
+ */
+ if (handle->context == VMCI_HOST_CONTEXT_ID ||
+ (vmci_guest_code_active() &&
+ vmci_get_context_id() == handle->context)) {
+ valid_context = true;
+ }
+
+ if (!valid_context || handle->resource == VMCI_INVALID_ID) {
+ pr_devel("Invalid argument (handle=0x%x:0x%x)\n",
+ handle->context, handle->resource);
+ result = VMCI_ERROR_INVALID_ARGS;
+ goto free_mem;
+ }
+
+ new_handle = *handle;
+ }
+
+ entry->idx = 0;
+ INIT_HLIST_NODE(&entry->node);
+ entry->priv_flags = priv_flags;
+ INIT_WORK(&entry->work, dbell_delayed_dispatch);
+ entry->run_delayed = flags & VMCI_FLAG_DELAYED_CB;
+ entry->notify_cb = notify_cb;
+ entry->client_data = client_data;
+ atomic_set(&entry->active, 0);
+
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_DOORBELL,
+ new_handle);
+ if (result != VMCI_SUCCESS) {
+ pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
+ new_handle.context, new_handle.resource, result);
+ goto free_mem;
+ }
+
+ new_handle = vmci_resource_handle(&entry->resource);
+ if (vmci_guest_code_active()) {
+ dbell_index_table_add(entry);
+ result = dbell_link(new_handle, entry->idx);
+ if (VMCI_SUCCESS != result)
+ goto destroy_resource;
+
+ atomic_set(&entry->active, 1);
+ }
+
+ *handle = new_handle;
+
+ return result;
+
+ destroy_resource:
+ dbell_index_table_remove(entry);
+ vmci_resource_remove(&entry->resource);
+ free_mem:
+ kfree(entry);
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_create);
+
+/*
+ * vmci_doorbell_destroy() - Destroy a doorbell.
+ * @handle: The handle tracking the resource.
+ *
+ * Destroys a doorbell previously created with vmcii_doorbell_create. This
+ * operation may block waiting for a callback to finish.
+ */
+int vmci_doorbell_destroy(struct vmci_handle handle)
+{
+ struct dbell_entry *entry;
+ struct vmci_resource *resource;
+
+ if (vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ resource = vmci_resource_by_handle(handle,
+ VMCI_RESOURCE_TYPE_DOORBELL);
+ if (!resource) {
+ pr_devel("Failed to destroy doorbell (handle=0x%x:0x%x)\n",
+ handle.context, handle.resource);
+ return VMCI_ERROR_NOT_FOUND;
+ }
+
+ entry = container_of(resource, struct dbell_entry, resource);
+
+ if (vmci_guest_code_active()) {
+ int result;
+
+ dbell_index_table_remove(entry);
+
+ result = dbell_unlink(handle);
+ if (VMCI_SUCCESS != result) {
+
+ /*
+ * The only reason this should fail would be
+ * an inconsistency between guest and
+ * hypervisor state, where the guest believes
+ * it has an active registration whereas the
+ * hypervisor doesn't. One case where this may
+ * happen is if a doorbell is unregistered
+ * following a hibernation at a time where the
+ * doorbell state hasn't been restored on the
+ * hypervisor side yet. Since the handle has
+ * now been removed in the guest, we just
+ * print a warning and return success.
+ */
+ pr_devel("Unlink of doorbell (handle=0x%x:0x%x) unknown by hypervisor (error=%d)\n",
+ handle.context, handle.resource, result);
+ }
+ }
+
+ /*
+ * Now remove the resource from the table. It might still be in use
+ * after this, in a callback or still on the delayed work queue.
+ */
+ vmci_resource_put(&entry->resource);
+ vmci_resource_remove(&entry->resource);
+
+ kfree(entry);
+
+ return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_destroy);
+
+/*
+ * vmci_doorbell_notify() - Ring the doorbell (and hide in the bushes).
+ * @dst: The handlle identifying the doorbell resource
+ * @priv_flags: Priviledge flags.
+ *
+ * Generates a notification on the doorbell identified by the
+ * handle. For host side generation of notifications, the caller
+ * can specify what the privilege of the calling side is.
+ */
+int vmci_doorbell_notify(struct vmci_handle dst, u32 priv_flags)
+{
+ int retval;
+ enum vmci_route route;
+ struct vmci_handle src;
+
+ if (vmci_handle_is_invalid(dst) ||
+ (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ src = VMCI_INVALID_HANDLE;
+ retval = vmci_route(&src, &dst, false, &route);
+ if (retval < VMCI_SUCCESS)
+ return retval;
+
+ if (VMCI_ROUTE_AS_HOST == route)
+ return vmci_ctx_notify_dbell(VMCI_HOST_CONTEXT_ID,
+ dst, priv_flags);
+
+ if (VMCI_ROUTE_AS_GUEST == route)
+ return dbell_notify_as_guest(dst, priv_flags);
+
+ pr_warn("Unknown route (%d) for doorbell\n", route);
+ return VMCI_ERROR_DST_UNREACHABLE;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_notify);
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.h b/drivers/misc/vmw_vmci/vmci_doorbell.h
new file mode 100644
index 0000000..e4c0b17
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.h
@@ -0,0 +1,51 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef VMCI_DOORBELL_H
+#define VMCI_DOORBELL_H
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_driver.h"
+
+/*
+ * VMCINotifyResourceInfo: Used to create and destroy doorbells, and
+ * generate a notification for a doorbell or queue pair.
+ */
+struct vmci_dbell_notify_resource_info {
+ struct vmci_handle handle;
+ u16 resource;
+ u16 action;
+ s32 result;
+};
+
+/*
+ * Structure used for checkpointing the doorbell mappings. It is
+ * written to the checkpoint as is, so changing this structure will
+ * break checkpoint compatibility.
+ */
+struct dbell_cpt_state {
+ struct vmci_handle handle;
+ u64 bitmap_idx;
+};
+
+int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle);
+int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags);
+
+bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn);
+void vmci_dbell_scan_notification_entries(u8 *bitmap);
+
+#endif /* VMCI_DOORBELL_H */
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
new file mode 100644
index 0000000..b823f9a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -0,0 +1,117 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+static bool vmci_disable_host;
+module_param_named(disable_host, vmci_disable_host, bool, 0);
+MODULE_PARM_DESC(disable_host,
+ "Disable driver host personality (default=enabled)");
+
+static bool vmci_disable_guest;
+module_param_named(disable_guest, vmci_disable_guest, bool, 0);
+MODULE_PARM_DESC(disable_guest,
+ "Disable driver guest personality (default=enabled)");
+
+static bool vmci_guest_personality_initialized;
+static bool vmci_host_personality_initialized;
+
+/*
+ * vmci_get_context_id() - Gets the current context ID.
+ *
+ * Returns the current context ID. Note that since this is accessed only
+ * from code running in the host, this always returns the host context ID.
+ */
+u32 vmci_get_context_id(void)
+{
+ if (vmci_guest_code_active())
+ return vmci_get_vm_context_id();
+ else if (vmci_host_code_active())
+ return VMCI_HOST_CONTEXT_ID;
+
+ return VMCI_INVALID_ID;
+}
+EXPORT_SYMBOL_GPL(vmci_get_context_id);
+
+static int __init vmci_drv_init(void)
+{
+ int vmci_err;
+ int error;
+
+ vmci_err = vmci_event_init();
+ if (vmci_err < VMCI_SUCCESS) {
+ pr_err("Failed to initialize VMCIEvent (result=%d)\n",
+ vmci_err);
+ return -EINVAL;
+ }
+
+ if (!vmci_disable_guest) {
+ error = vmci_guest_init();
+ if (error) {
+ pr_warn("Failed to initialize guest personality (err=%d)\n",
+ error);
+ } else {
+ vmci_guest_personality_initialized = true;
+ pr_info("Guest personality initialized and is %s\n",
+ vmci_guest_code_active() ?
+ "active" : "inactive");
+ }
+ }
+
+ if (!vmci_disable_host) {
+ error = vmci_host_init();
+ if (error) {
+ pr_warn("Unable to initialize host personality (err=%d)\n",
+ error);
+ } else {
+ vmci_host_personality_initialized = true;
+ pr_info("Initialized host personality\n");
+ }
+ }
+
+ if (!vmci_guest_personality_initialized &&
+ !vmci_host_personality_initialized) {
+ vmci_event_exit();
+ return -ENODEV;
+ }
+
+ return 0;
+}
+module_init(vmci_drv_init);
+
+static void __exit vmci_drv_exit(void)
+{
+ if (vmci_guest_personality_initialized)
+ vmci_guest_exit();
+
+ if (vmci_host_personality_initialized)
+ vmci_host_exit();
+
+ vmci_event_exit();
+}
+module_exit(vmci_drv_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
+MODULE_VERSION("1.1.3.0-k");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h
new file mode 100644
index 0000000..cee9e97
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.h
@@ -0,0 +1,57 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_DRIVER_H_
+#define _VMCI_DRIVER_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/wait.h>
+
+#include "vmci_queue_pair.h"
+#include "vmci_context.h"
+
+enum vmci_obj_type {
+ VMCIOBJ_VMX_VM = 10,
+ VMCIOBJ_CONTEXT,
+ VMCIOBJ_SOCKET,
+ VMCIOBJ_NOT_SET,
+};
+
+/* For storing VMCI structures in file handles. */
+struct vmci_obj {
+ void *ptr;
+ enum vmci_obj_type type;
+};
+
+/*
+ * Needed by other components of this module. It's okay to have one global
+ * instance of this because there can only ever be one VMCI device. Our
+ * virtual hardware enforces this.
+ */
+extern struct pci_dev *vmci_pdev;
+
+u32 vmci_get_context_id(void);
+int vmci_send_datagram(struct vmci_datagram *dg);
+
+int vmci_host_init(void);
+void vmci_host_exit(void);
+bool vmci_host_code_active(void);
+
+int vmci_guest_init(void);
+void vmci_guest_exit(void);
+bool vmci_guest_code_active(void);
+u32 vmci_get_vm_context_id(void);
+
+#endif /* _VMCI_DRIVER_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
new file mode 100644
index 0000000..8449516
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -0,0 +1,224 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define EVENT_MAGIC 0xEABE0000
+#define VMCI_EVENT_MAX_ATTEMPTS 10
+
+struct vmci_subscription {
+ u32 id;
+ u32 event;
+ vmci_event_cb callback;
+ void *callback_data;
+ struct list_head node; /* on one of subscriber lists */
+};
+
+static struct list_head subscriber_array[VMCI_EVENT_MAX];
+static DEFINE_MUTEX(subscriber_mutex);
+
+int __init vmci_event_init(void)
+{
+ int i;
+
+ for (i = 0; i < VMCI_EVENT_MAX; i++)
+ INIT_LIST_HEAD(&subscriber_array[i]);
+
+ return VMCI_SUCCESS;
+}
+
+void vmci_event_exit(void)
+{
+ int e;
+
+ /* We free all memory at exit. */
+ for (e = 0; e < VMCI_EVENT_MAX; e++) {
+ struct vmci_subscription *cur, *p2;
+ list_for_each_entry_safe(cur, p2, &subscriber_array[e], node) {
+
+ /*
+ * We should never get here because all events
+ * should have been unregistered before we try
+ * to unload the driver module.
+ */
+ pr_warn("Unexpected free events occurring\n");
+ list_del(&cur->node);
+ kfree(cur);
+ }
+ }
+}
+
+/*
+ * Find entry. Assumes subscriber_mutex is held.
+ */
+static struct vmci_subscription *event_find(u32 sub_id)
+{
+ int e;
+
+ for (e = 0; e < VMCI_EVENT_MAX; e++) {
+ struct vmci_subscription *cur;
+ list_for_each_entry(cur, &subscriber_array[e], node) {
+ if (cur->id == sub_id)
+ return cur;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Actually delivers the events to the subscribers.
+ * The callback function for each subscriber is invoked.
+ */
+static void event_deliver(struct vmci_event_msg *event_msg)
+{
+ struct vmci_subscription *cur;
+ struct list_head *subscriber_list;
+
+ rcu_read_lock();
+ subscriber_list = &subscriber_array[event_msg->event_data.event];
+ list_for_each_entry_rcu(cur, subscriber_list, node) {
+ cur->callback(cur->id, &event_msg->event_data,
+ cur->callback_data);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all
+ * subscribers for given event.
+ */
+int vmci_event_dispatch(struct vmci_datagram *msg)
+{
+ struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg;
+
+ if (msg->payload_size < sizeof(u32) ||
+ msg->payload_size > sizeof(struct vmci_event_data_max))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (!VMCI_EVENT_VALID(event_msg->event_data.event))
+ return VMCI_ERROR_EVENT_UNKNOWN;
+
+ event_deliver(event_msg);
+ return VMCI_SUCCESS;
+}
+
+/*
+ * vmci_event_subscribe() - Subscribe to a given event.
+ * @event: The event to subscribe to.
+ * @callback: The callback to invoke upon the event.
+ * @callback_data: Data to pass to the callback.
+ * @subscription_id: ID used to track subscription. Used with
+ * vmci_event_unsubscribe()
+ *
+ * Subscribes to the provided event. The callback specified will be
+ * fired from RCU critical section and therefore must not sleep.
+ */
+int vmci_event_subscribe(u32 event,
+ vmci_event_cb callback,
+ void *callback_data,
+ u32 *new_subscription_id)
+{
+ struct vmci_subscription *sub;
+ int attempts;
+ int retval;
+ bool have_new_id = false;
+
+ if (!new_subscription_id) {
+ pr_devel("%s: Invalid subscription (NULL)\n", __func__);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ if (!VMCI_EVENT_VALID(event) || !callback) {
+ pr_devel("%s: Failed to subscribe to event (type=%d) (callback=%p) (data=%p)\n",
+ __func__, event, callback, callback_data);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ sub = kzalloc(sizeof(*sub), GFP_KERNEL);
+ if (!sub)
+ return VMCI_ERROR_NO_MEM;
+
+ sub->id = VMCI_EVENT_MAX;
+ sub->event = event;
+ sub->callback = callback;
+ sub->callback_data = callback_data;
+ INIT_LIST_HEAD(&sub->node);
+
+ mutex_lock(&subscriber_mutex);
+
+ /* Creation of a new event is always allowed. */
+ for (attempts = 0; attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) {
+ static u32 subscription_id;
+ /*
+ * We try to get an id a couple of time before
+ * claiming we are out of resources.
+ */
+
+ /* Test for duplicate id. */
+ if (!event_find(++subscription_id)) {
+ sub->id = subscription_id;
+ have_new_id = true;
+ break;
+ }
+ }
+
+ if (have_new_id) {
+ list_add_rcu(&sub->node, &subscriber_array[event]);
+ retval = VMCI_SUCCESS;
+ } else {
+ retval = VMCI_ERROR_NO_RESOURCES;
+ }
+
+ mutex_unlock(&subscriber_mutex);
+
+ *new_subscription_id = sub->id;
+ return retval;
+}
+EXPORT_SYMBOL_GPL(vmci_event_subscribe);
+
+/*
+ * vmci_event_unsubscribe() - unsubscribe from an event.
+ * @sub_id: A subscription ID as provided by vmci_event_subscribe()
+ *
+ * Unsubscribe from given event. Removes it from list and frees it.
+ * Will return callback_data if requested by caller.
+ */
+int vmci_event_unsubscribe(u32 sub_id)
+{
+ struct vmci_subscription *s;
+
+ mutex_lock(&subscriber_mutex);
+ s = event_find(sub_id);
+ if (s)
+ list_del_rcu(&s->node);
+ mutex_unlock(&subscriber_mutex);
+
+ if (!s)
+ return VMCI_ERROR_NOT_FOUND;
+
+ synchronize_rcu();
+ kfree(s);
+
+ return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_event_unsubscribe);
diff --git a/drivers/misc/vmw_vmci/vmci_event.h b/drivers/misc/vmw_vmci/vmci_event.h
new file mode 100644
index 0000000..7df9b1c
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.h
@@ -0,0 +1,25 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef __VMCI_EVENT_H__
+#define __VMCI_EVENT_H__
+
+#include <linux/vmw_vmci_api.h>
+
+int vmci_event_init(void);
+void vmci_event_exit(void);
+int vmci_event_dispatch(struct vmci_datagram *msg);
+
+#endif /*__VMCI_EVENT_H__ */
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
new file mode 100644
index 0000000..189b325
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -0,0 +1,771 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+static bool vmci_disable_msi;
+module_param_named(disable_msi, vmci_disable_msi, bool, 0);
+MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
+
+static bool vmci_disable_msix;
+module_param_named(disable_msix, vmci_disable_msix, bool, 0);
+MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
+
+static u32 ctx_update_sub_id = VMCI_INVALID_ID;
+static u32 vm_context_id = VMCI_INVALID_ID;
+
+struct vmci_guest_device {
+ struct device *dev; /* PCI device we are attached to */
+ void __iomem *iobase;
+
+ unsigned int irq;
+ unsigned int intr_type;
+ bool exclusive_vectors;
+ struct msix_entry msix_entries[VMCI_MAX_INTRS];
+
+ struct tasklet_struct datagram_tasklet;
+ struct tasklet_struct bm_tasklet;
+
+ void *data_buffer;
+ void *notification_bitmap;
+ dma_addr_t notification_base;
+};
+
+/* vmci_dev singleton device and supporting data*/
+struct pci_dev *vmci_pdev;
+static struct vmci_guest_device *vmci_dev_g;
+static DEFINE_SPINLOCK(vmci_dev_spinlock);
+
+static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
+
+bool vmci_guest_code_active(void)
+{
+ return atomic_read(&vmci_num_guest_devices) != 0;
+}
+
+u32 vmci_get_vm_context_id(void)
+{
+ if (vm_context_id == VMCI_INVALID_ID) {
+ struct vmci_datagram get_cid_msg;
+ get_cid_msg.dst =
+ vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_GET_CONTEXT_ID);
+ get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
+ get_cid_msg.payload_size = 0;
+ vm_context_id = vmci_send_datagram(&get_cid_msg);
+ }
+ return vm_context_id;
+}
+
+/*
+ * VM to hypervisor call mechanism. We use the standard VMware naming
+ * convention since shared code is calling this function as well.
+ */
+int vmci_send_datagram(struct vmci_datagram *dg)
+{
+ unsigned long flags;
+ int result;
+
+ /* Check args. */
+ if (dg == NULL)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /*
+ * Need to acquire spinlock on the device because the datagram
+ * data may be spread over multiple pages and the monitor may
+ * interleave device user rpc calls from multiple
+ * VCPUs. Acquiring the spinlock precludes that
+ * possibility. Disabling interrupts to avoid incoming
+ * datagrams during a "rep out" and possibly landing up in
+ * this function.
+ */
+ spin_lock_irqsave(&vmci_dev_spinlock, flags);
+
+ if (vmci_dev_g) {
+ iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
+ dg, VMCI_DG_SIZE(dg));
+ result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
+ } else {
+ result = VMCI_ERROR_UNAVAILABLE;
+ }
+
+ spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_send_datagram);
+
+/*
+ * Gets called with the new context id if updated or resumed.
+ * Context id.
+ */
+static void vmci_guest_cid_update(u32 sub_id,
+ const struct vmci_event_data *event_data,
+ void *client_data)
+{
+ const struct vmci_event_payld_ctx *ev_payload =
+ vmci_event_data_const_payload(event_data);
+
+ if (sub_id != ctx_update_sub_id) {
+ pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
+ return;
+ }
+
+ if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
+ pr_devel("Invalid event data\n");
+ return;
+ }
+
+ pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
+ vm_context_id, ev_payload->context_id, event_data->event);
+
+ vm_context_id = ev_payload->context_id;
+}
+
+/*
+ * Verify that the host supports the hypercalls we need. If it does not,
+ * try to find fallback hypercalls and use those instead. Returns
+ * true if required hypercalls (or fallback hypercalls) are
+ * supported by the host, false otherwise.
+ */
+static int vmci_check_host_caps(struct pci_dev *pdev)
+{
+ bool result;
+ struct vmci_resource_query_msg *msg;
+ u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
+ VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
+ struct vmci_datagram *check_msg;
+
+ check_msg = kmalloc(msg_size, GFP_KERNEL);
+ if (!check_msg) {
+ dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
+ return -ENOMEM;
+ }
+
+ check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_RESOURCES_QUERY);
+ check_msg->src = VMCI_ANON_SRC_HANDLE;
+ check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
+ msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
+
+ msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
+ msg->resources[0] = VMCI_GET_CONTEXT_ID;
+
+ /* Checks that hyper calls are supported */
+ result = vmci_send_datagram(check_msg) == 0x01;
+ kfree(check_msg);
+
+ dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
+ __func__, result ? "PASSED" : "FAILED");
+
+ /* We need the vector. There are no fallbacks. */
+ return result ? 0 : -ENXIO;
+}
+
+/*
+ * Reads datagrams from the data in port and dispatches them. We
+ * always start reading datagrams into only the first page of the
+ * datagram buffer. If the datagrams don't fit into one page, we
+ * use the maximum datagram buffer size for the remainder of the
+ * invocation. This is a simple heuristic for not penalizing
+ * small datagrams.
+ *
+ * This function assumes that it has exclusive access to the data
+ * in port for the duration of the call.
+ */
+static void vmci_dispatch_dgs(unsigned long data)
+{
+ struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
+ u8 *dg_in_buffer = vmci_dev->data_buffer;
+ struct vmci_datagram *dg;
+ size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
+ size_t current_dg_in_buffer_size = PAGE_SIZE;
+ size_t remaining_bytes;
+
+ BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
+
+ ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+ vmci_dev->data_buffer, current_dg_in_buffer_size);
+ dg = (struct vmci_datagram *)dg_in_buffer;
+ remaining_bytes = current_dg_in_buffer_size;
+
+ while (dg->dst.resource != VMCI_INVALID_ID ||
+ remaining_bytes > PAGE_SIZE) {
+ unsigned dg_in_size;
+
+ /*
+ * When the input buffer spans multiple pages, a datagram can
+ * start on any page boundary in the buffer.
+ */
+ if (dg->dst.resource == VMCI_INVALID_ID) {
+ dg = (struct vmci_datagram *)roundup(
+ (uintptr_t)dg + 1, PAGE_SIZE);
+ remaining_bytes =
+ (size_t)(dg_in_buffer +
+ current_dg_in_buffer_size -
+ (u8 *)dg);
+ continue;
+ }
+
+ dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
+
+ if (dg_in_size <= dg_in_buffer_size) {
+ int result;
+
+ /*
+ * If the remaining bytes in the datagram
+ * buffer doesn't contain the complete
+ * datagram, we first make sure we have enough
+ * room for it and then we read the reminder
+ * of the datagram and possibly any following
+ * datagrams.
+ */
+ if (dg_in_size > remaining_bytes) {
+ if (remaining_bytes !=
+ current_dg_in_buffer_size) {
+
+ /*
+ * We move the partial
+ * datagram to the front and
+ * read the reminder of the
+ * datagram and possibly
+ * following calls into the
+ * following bytes.
+ */
+ memmove(dg_in_buffer, dg_in_buffer +
+ current_dg_in_buffer_size -
+ remaining_bytes,
+ remaining_bytes);
+ dg = (struct vmci_datagram *)
+ dg_in_buffer;
+ }
+
+ if (current_dg_in_buffer_size !=
+ dg_in_buffer_size)
+ current_dg_in_buffer_size =
+ dg_in_buffer_size;
+
+ ioread8_rep(vmci_dev->iobase +
+ VMCI_DATA_IN_ADDR,
+ vmci_dev->data_buffer +
+ remaining_bytes,
+ current_dg_in_buffer_size -
+ remaining_bytes);
+ }
+
+ /*
+ * We special case event datagrams from the
+ * hypervisor.
+ */
+ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+ dg->dst.resource == VMCI_EVENT_HANDLER) {
+ result = vmci_event_dispatch(dg);
+ } else {
+ result = vmci_datagram_invoke_guest_handler(dg);
+ }
+ if (result < VMCI_SUCCESS)
+ dev_dbg(vmci_dev->dev,
+ "Datagram with resource (ID=0x%x) failed (err=%d)\n",
+ dg->dst.resource, result);
+
+ /* On to the next datagram. */
+ dg = (struct vmci_datagram *)((u8 *)dg +
+ dg_in_size);
+ } else {
+ size_t bytes_to_skip;
+
+ /*
+ * Datagram doesn't fit in datagram buffer of maximal
+ * size. We drop it.
+ */
+ dev_dbg(vmci_dev->dev,
+ "Failed to receive datagram (size=%u bytes)\n",
+ dg_in_size);
+
+ bytes_to_skip = dg_in_size - remaining_bytes;
+ if (current_dg_in_buffer_size != dg_in_buffer_size)
+ current_dg_in_buffer_size = dg_in_buffer_size;
+
+ for (;;) {
+ ioread8_rep(vmci_dev->iobase +
+ VMCI_DATA_IN_ADDR,
+ vmci_dev->data_buffer,
+ current_dg_in_buffer_size);
+ if (bytes_to_skip <= current_dg_in_buffer_size)
+ break;
+
+ bytes_to_skip -= current_dg_in_buffer_size;
+ }
+ dg = (struct vmci_datagram *)(dg_in_buffer +
+ bytes_to_skip);
+ }
+
+ remaining_bytes =
+ (size_t) (dg_in_buffer + current_dg_in_buffer_size -
+ (u8 *)dg);
+
+ if (remaining_bytes < VMCI_DG_HEADERSIZE) {
+ /* Get the next batch of datagrams. */
+
+ ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+ vmci_dev->data_buffer,
+ current_dg_in_buffer_size);
+ dg = (struct vmci_datagram *)dg_in_buffer;
+ remaining_bytes = current_dg_in_buffer_size;
+ }
+ }
+}
+
+/*
+ * Scans the notification bitmap for raised flags, clears them
+ * and handles the notifications.
+ */
+static void vmci_process_bitmap(unsigned long data)
+{
+ struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
+
+ if (!dev->notification_bitmap) {
+ dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
+ return;
+ }
+
+ vmci_dbell_scan_notification_entries(dev->notification_bitmap);
+}
+
+/*
+ * Enable MSI-X. Try exclusive vectors first, then shared vectors.
+ */
+static int vmci_enable_msix(struct pci_dev *pdev,
+ struct vmci_guest_device *vmci_dev)
+{
+ int i;
+ int result;
+
+ for (i = 0; i < VMCI_MAX_INTRS; ++i) {
+ vmci_dev->msix_entries[i].entry = i;
+ vmci_dev->msix_entries[i].vector = i;
+ }
+
+ result = pci_enable_msix_exact(pdev,
+ vmci_dev->msix_entries, VMCI_MAX_INTRS);
+ if (result == 0)
+ vmci_dev->exclusive_vectors = true;
+ else if (result == -ENOSPC)
+ result = pci_enable_msix_exact(pdev, vmci_dev->msix_entries, 1);
+
+ return result;
+}
+
+/*
+ * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
+ * interrupt (vector VMCI_INTR_DATAGRAM).
+ */
+static irqreturn_t vmci_interrupt(int irq, void *_dev)
+{
+ struct vmci_guest_device *dev = _dev;
+
+ /*
+ * If we are using MSI-X with exclusive vectors then we simply schedule
+ * the datagram tasklet, since we know the interrupt was meant for us.
+ * Otherwise we must read the ICR to determine what to do.
+ */
+
+ if (dev->intr_type == VMCI_INTR_TYPE_MSIX && dev->exclusive_vectors) {
+ tasklet_schedule(&dev->datagram_tasklet);
+ } else {
+ unsigned int icr;
+
+ /* Acknowledge interrupt and determine what needs doing. */
+ icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
+ if (icr == 0 || icr == ~0)
+ return IRQ_NONE;
+
+ if (icr & VMCI_ICR_DATAGRAM) {
+ tasklet_schedule(&dev->datagram_tasklet);
+ icr &= ~VMCI_ICR_DATAGRAM;
+ }
+
+ if (icr & VMCI_ICR_NOTIFICATION) {
+ tasklet_schedule(&dev->bm_tasklet);
+ icr &= ~VMCI_ICR_NOTIFICATION;
+ }
+
+ if (icr != 0)
+ dev_warn(dev->dev,
+ "Ignoring unknown interrupt cause (%d)\n",
+ icr);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
+ * which is for the notification bitmap. Will only get called if we are
+ * using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
+{
+ struct vmci_guest_device *dev = _dev;
+
+ /* For MSI-X we can just assume it was meant for us. */
+ tasklet_schedule(&dev->bm_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Most of the initialization at module load time is done here.
+ */
+static int vmci_guest_probe_device(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct vmci_guest_device *vmci_dev;
+ void __iomem *iobase;
+ unsigned int capabilities;
+ unsigned long cmd;
+ int vmci_err;
+ int error;
+
+ dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
+
+ error = pcim_enable_device(pdev);
+ if (error) {
+ dev_err(&pdev->dev,
+ "Failed to enable VMCI device: %d\n", error);
+ return error;
+ }
+
+ error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
+ if (error) {
+ dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
+ return error;
+ }
+
+ iobase = pcim_iomap_table(pdev)[0];
+
+ dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n",
+ (unsigned long)iobase, pdev->irq);
+
+ vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
+ if (!vmci_dev) {
+ dev_err(&pdev->dev,
+ "Can't allocate memory for VMCI device\n");
+ return -ENOMEM;
+ }
+
+ vmci_dev->dev = &pdev->dev;
+ vmci_dev->intr_type = VMCI_INTR_TYPE_INTX;
+ vmci_dev->exclusive_vectors = false;
+ vmci_dev->iobase = iobase;
+
+ tasklet_init(&vmci_dev->datagram_tasklet,
+ vmci_dispatch_dgs, (unsigned long)vmci_dev);
+ tasklet_init(&vmci_dev->bm_tasklet,
+ vmci_process_bitmap, (unsigned long)vmci_dev);
+
+ vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+ if (!vmci_dev->data_buffer) {
+ dev_err(&pdev->dev,
+ "Can't allocate memory for datagram buffer\n");
+ return -ENOMEM;
+ }
+
+ pci_set_master(pdev); /* To enable queue_pair functionality. */
+
+ /*
+ * Verify that the VMCI Device supports the capabilities that
+ * we need. If the device is missing capabilities that we would
+ * like to use, check for fallback capabilities and use those
+ * instead (so we can run a new VM on old hosts). Fail the load if
+ * a required capability is missing and there is no fallback.
+ *
+ * Right now, we need datagrams. There are no fallbacks.
+ */
+ capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR);
+ if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
+ dev_err(&pdev->dev, "Device does not support datagrams\n");
+ error = -ENXIO;
+ goto err_free_data_buffer;
+ }
+
+ /*
+ * If the hardware supports notifications, we will use that as
+ * well.
+ */
+ if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
+ vmci_dev->notification_bitmap = dma_alloc_coherent(
+ &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
+ GFP_KERNEL);
+ if (!vmci_dev->notification_bitmap) {
+ dev_warn(&pdev->dev,
+ "Unable to allocate notification bitmap\n");
+ } else {
+ memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
+ capabilities |= VMCI_CAPS_NOTIFICATIONS;
+ }
+ }
+
+ dev_info(&pdev->dev, "Using capabilities 0x%x\n", capabilities);
+
+ /* Let the host know which capabilities we intend to use. */
+ iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR);
+
+ /* Set up global device so that we can start sending datagrams */
+ spin_lock_irq(&vmci_dev_spinlock);
+ vmci_dev_g = vmci_dev;
+ vmci_pdev = pdev;
+ spin_unlock_irq(&vmci_dev_spinlock);
+
+ /*
+ * Register notification bitmap with device if that capability is
+ * used.
+ */
+ if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
+ unsigned long bitmap_ppn =
+ vmci_dev->notification_base >> PAGE_SHIFT;
+ if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
+ dev_warn(&pdev->dev,
+ "VMCI device unable to register notification bitmap with PPN 0x%x\n",
+ (u32) bitmap_ppn);
+ error = -ENXIO;
+ goto err_remove_vmci_dev_g;
+ }
+ }
+
+ /* Check host capabilities. */
+ error = vmci_check_host_caps(pdev);
+ if (error)
+ goto err_remove_bitmap;
+
+ /* Enable device. */
+
+ /*
+ * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
+ * update the internal context id when needed.
+ */
+ vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
+ vmci_guest_cid_update, NULL,
+ &ctx_update_sub_id);
+ if (vmci_err < VMCI_SUCCESS)
+ dev_warn(&pdev->dev,
+ "Failed to subscribe to event (type=%d): %d\n",
+ VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
+
+ /*
+ * Enable interrupts. Try MSI-X first, then MSI, and then fallback on
+ * legacy interrupts.
+ */
+ if (!vmci_disable_msix && !vmci_enable_msix(pdev, vmci_dev)) {
+ vmci_dev->intr_type = VMCI_INTR_TYPE_MSIX;
+ vmci_dev->irq = vmci_dev->msix_entries[0].vector;
+ } else if (!vmci_disable_msi && !pci_enable_msi(pdev)) {
+ vmci_dev->intr_type = VMCI_INTR_TYPE_MSI;
+ vmci_dev->irq = pdev->irq;
+ } else {
+ vmci_dev->intr_type = VMCI_INTR_TYPE_INTX;
+ vmci_dev->irq = pdev->irq;
+ }
+
+ /*
+ * Request IRQ for legacy or MSI interrupts, or for first
+ * MSI-X vector.
+ */
+ error = request_irq(vmci_dev->irq, vmci_interrupt, IRQF_SHARED,
+ KBUILD_MODNAME, vmci_dev);
+ if (error) {
+ dev_err(&pdev->dev, "Irq %u in use: %d\n",
+ vmci_dev->irq, error);
+ goto err_disable_msi;
+ }
+
+ /*
+ * For MSI-X with exclusive vectors we need to request an
+ * interrupt for each vector so that we get a separate
+ * interrupt handler routine. This allows us to distinguish
+ * between the vectors.
+ */
+ if (vmci_dev->exclusive_vectors) {
+ error = request_irq(vmci_dev->msix_entries[1].vector,
+ vmci_interrupt_bm, 0, KBUILD_MODNAME,
+ vmci_dev);
+ if (error) {
+ dev_err(&pdev->dev,
+ "Failed to allocate irq %u: %d\n",
+ vmci_dev->msix_entries[1].vector, error);
+ goto err_free_irq;
+ }
+ }
+
+ dev_dbg(&pdev->dev, "Registered device\n");
+
+ atomic_inc(&vmci_num_guest_devices);
+
+ /* Enable specific interrupt bits. */
+ cmd = VMCI_IMR_DATAGRAM;
+ if (capabilities & VMCI_CAPS_NOTIFICATIONS)
+ cmd |= VMCI_IMR_NOTIFICATION;
+ iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
+
+ /* Enable interrupts. */
+ iowrite32(VMCI_CONTROL_INT_ENABLE,
+ vmci_dev->iobase + VMCI_CONTROL_ADDR);
+
+ pci_set_drvdata(pdev, vmci_dev);
+ return 0;
+
+err_free_irq:
+ free_irq(vmci_dev->irq, vmci_dev);
+ tasklet_kill(&vmci_dev->datagram_tasklet);
+ tasklet_kill(&vmci_dev->bm_tasklet);
+
+err_disable_msi:
+ if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX)
+ pci_disable_msix(pdev);
+ else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI)
+ pci_disable_msi(pdev);
+
+ vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
+ if (vmci_err < VMCI_SUCCESS)
+ dev_warn(&pdev->dev,
+ "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
+ VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
+
+err_remove_bitmap:
+ if (vmci_dev->notification_bitmap) {
+ iowrite32(VMCI_CONTROL_RESET,
+ vmci_dev->iobase + VMCI_CONTROL_ADDR);
+ dma_free_coherent(&pdev->dev, PAGE_SIZE,
+ vmci_dev->notification_bitmap,
+ vmci_dev->notification_base);
+ }
+
+err_remove_vmci_dev_g:
+ spin_lock_irq(&vmci_dev_spinlock);
+ vmci_pdev = NULL;
+ vmci_dev_g = NULL;
+ spin_unlock_irq(&vmci_dev_spinlock);
+
+err_free_data_buffer:
+ vfree(vmci_dev->data_buffer);
+
+ /* The rest are managed resources and will be freed by PCI core */
+ return error;
+}
+
+static void vmci_guest_remove_device(struct pci_dev *pdev)
+{
+ struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
+ int vmci_err;
+
+ dev_dbg(&pdev->dev, "Removing device\n");
+
+ atomic_dec(&vmci_num_guest_devices);
+
+ vmci_qp_guest_endpoints_exit();
+
+ vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
+ if (vmci_err < VMCI_SUCCESS)
+ dev_warn(&pdev->dev,
+ "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
+ VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
+
+ spin_lock_irq(&vmci_dev_spinlock);
+ vmci_dev_g = NULL;
+ vmci_pdev = NULL;
+ spin_unlock_irq(&vmci_dev_spinlock);
+
+ dev_dbg(&pdev->dev, "Resetting vmci device\n");
+ iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR);
+
+ /*
+ * Free IRQ and then disable MSI/MSI-X as appropriate. For
+ * MSI-X, we might have multiple vectors, each with their own
+ * IRQ, which we must free too.
+ */
+ free_irq(vmci_dev->irq, vmci_dev);
+ if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX) {
+ if (vmci_dev->exclusive_vectors)
+ free_irq(vmci_dev->msix_entries[1].vector, vmci_dev);
+ pci_disable_msix(pdev);
+ } else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI) {
+ pci_disable_msi(pdev);
+ }
+
+ tasklet_kill(&vmci_dev->datagram_tasklet);
+ tasklet_kill(&vmci_dev->bm_tasklet);
+
+ if (vmci_dev->notification_bitmap) {
+ /*
+ * The device reset above cleared the bitmap state of the
+ * device, so we can safely free it here.
+ */
+
+ dma_free_coherent(&pdev->dev, PAGE_SIZE,
+ vmci_dev->notification_bitmap,
+ vmci_dev->notification_base);
+ }
+
+ vfree(vmci_dev->data_buffer);
+
+ /* The rest are managed resources and will be freed by PCI core */
+}
+
+static const struct pci_device_id vmci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
+ { 0 },
+};
+MODULE_DEVICE_TABLE(pci, vmci_ids);
+
+static struct pci_driver vmci_guest_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = vmci_ids,
+ .probe = vmci_guest_probe_device,
+ .remove = vmci_guest_remove_device,
+};
+
+int __init vmci_guest_init(void)
+{
+ return pci_register_driver(&vmci_guest_driver);
+}
+
+void __exit vmci_guest_exit(void)
+{
+ pci_unregister_driver(&vmci_guest_driver);
+}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c
new file mode 100644
index 0000000..344973a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.c
@@ -0,0 +1,142 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/slab.h>
+#include "vmci_handle_array.h"
+
+static size_t handle_arr_calc_size(size_t capacity)
+{
+ return sizeof(struct vmci_handle_arr) +
+ capacity * sizeof(struct vmci_handle);
+}
+
+struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity)
+{
+ struct vmci_handle_arr *array;
+
+ if (capacity == 0)
+ capacity = VMCI_HANDLE_ARRAY_DEFAULT_SIZE;
+
+ array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC);
+ if (!array)
+ return NULL;
+
+ array->capacity = capacity;
+ array->size = 0;
+
+ return array;
+}
+
+void vmci_handle_arr_destroy(struct vmci_handle_arr *array)
+{
+ kfree(array);
+}
+
+void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+ struct vmci_handle handle)
+{
+ struct vmci_handle_arr *array = *array_ptr;
+
+ if (unlikely(array->size >= array->capacity)) {
+ /* reallocate. */
+ struct vmci_handle_arr *new_array;
+ size_t new_capacity = array->capacity * VMCI_ARR_CAP_MULT;
+ size_t new_size = handle_arr_calc_size(new_capacity);
+
+ new_array = krealloc(array, new_size, GFP_ATOMIC);
+ if (!new_array)
+ return;
+
+ new_array->capacity = new_capacity;
+ *array_ptr = array = new_array;
+ }
+
+ array->entries[array->size] = handle;
+ array->size++;
+}
+
+/*
+ * Handle that was removed, VMCI_INVALID_HANDLE if entry not found.
+ */
+struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
+ struct vmci_handle entry_handle)
+{
+ struct vmci_handle handle = VMCI_INVALID_HANDLE;
+ size_t i;
+
+ for (i = 0; i < array->size; i++) {
+ if (vmci_handle_is_equal(array->entries[i], entry_handle)) {
+ handle = array->entries[i];
+ array->size--;
+ array->entries[i] = array->entries[array->size];
+ array->entries[array->size] = VMCI_INVALID_HANDLE;
+ break;
+ }
+ }
+
+ return handle;
+}
+
+/*
+ * Handle that was removed, VMCI_INVALID_HANDLE if array was empty.
+ */
+struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array)
+{
+ struct vmci_handle handle = VMCI_INVALID_HANDLE;
+
+ if (array->size) {
+ array->size--;
+ handle = array->entries[array->size];
+ array->entries[array->size] = VMCI_INVALID_HANDLE;
+ }
+
+ return handle;
+}
+
+/*
+ * Handle at given index, VMCI_INVALID_HANDLE if invalid index.
+ */
+struct vmci_handle
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index)
+{
+ if (unlikely(index >= array->size))
+ return VMCI_INVALID_HANDLE;
+
+ return array->entries[index];
+}
+
+bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
+ struct vmci_handle entry_handle)
+{
+ size_t i;
+
+ for (i = 0; i < array->size; i++)
+ if (vmci_handle_is_equal(array->entries[i], entry_handle))
+ return true;
+
+ return false;
+}
+
+/*
+ * NULL if the array is empty. Otherwise, a pointer to the array
+ * of VMCI handles in the handle array.
+ */
+struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array)
+{
+ if (array->size)
+ return array->entries;
+
+ return NULL;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h
new file mode 100644
index 0000000..b5f3a7f
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.h
@@ -0,0 +1,52 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_HANDLE_ARRAY_H_
+#define _VMCI_HANDLE_ARRAY_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#define VMCI_HANDLE_ARRAY_DEFAULT_SIZE 4
+#define VMCI_ARR_CAP_MULT 2 /* Array capacity multiplier */
+
+struct vmci_handle_arr {
+ size_t capacity;
+ size_t size;
+ struct vmci_handle entries[];
+};
+
+struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity);
+void vmci_handle_arr_destroy(struct vmci_handle_arr *array);
+void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+ struct vmci_handle handle);
+struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
+ struct vmci_handle
+ entry_handle);
+struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array);
+struct vmci_handle
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index);
+bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
+ struct vmci_handle entry_handle);
+struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array);
+
+static inline size_t vmci_handle_arr_get_size(
+ const struct vmci_handle_arr *array)
+{
+ return array->size;
+}
+
+
+#endif /* _VMCI_HANDLE_ARRAY_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
new file mode 100644
index 0000000..9ec262a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -0,0 +1,1041 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/moduleparam.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+enum {
+ VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0,
+ VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1,
+};
+
+enum {
+ VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0,
+ VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1,
+ VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2,
+};
+
+/*
+ * VMCI driver initialization. This block can also be used to
+ * pass initial group membership etc.
+ */
+struct vmci_init_blk {
+ u32 cid;
+ u32 flags;
+};
+
+/* VMCIqueue_pairAllocInfo_VMToVM */
+struct vmci_qp_alloc_info_vmvm {
+ struct vmci_handle handle;
+ u32 peer;
+ u32 flags;
+ u64 produce_size;
+ u64 consume_size;
+ u64 produce_page_file; /* User VA. */
+ u64 consume_page_file; /* User VA. */
+ u64 produce_page_file_size; /* Size of the file name array. */
+ u64 consume_page_file_size; /* Size of the file name array. */
+ s32 result;
+ u32 _pad;
+};
+
+/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */
+struct vmci_set_notify_info {
+ u64 notify_uva;
+ s32 result;
+ u32 _pad;
+};
+
+/*
+ * Per-instance host state
+ */
+struct vmci_host_dev {
+ struct vmci_ctx *context;
+ int user_version;
+ enum vmci_obj_type ct_type;
+ struct mutex lock; /* Mutex lock for vmci context access */
+};
+
+static struct vmci_ctx *host_context;
+static bool vmci_host_device_initialized;
+static atomic_t vmci_host_active_users = ATOMIC_INIT(0);
+
+/*
+ * Determines whether the VMCI host personality is
+ * available. Since the core functionality of the host driver is
+ * always present, all guests could possibly use the host
+ * personality. However, to minimize the deviation from the
+ * pre-unified driver state of affairs, we only consider the host
+ * device active if there is no active guest device or if there
+ * are VMX'en with active VMCI contexts using the host device.
+ */
+bool vmci_host_code_active(void)
+{
+ return vmci_host_device_initialized &&
+ (!vmci_guest_code_active() ||
+ atomic_read(&vmci_host_active_users) > 0);
+}
+
+/*
+ * Called on open of /dev/vmci.
+ */
+static int vmci_host_open(struct inode *inode, struct file *filp)
+{
+ struct vmci_host_dev *vmci_host_dev;
+
+ vmci_host_dev = kzalloc(sizeof(struct vmci_host_dev), GFP_KERNEL);
+ if (vmci_host_dev == NULL)
+ return -ENOMEM;
+
+ vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
+ mutex_init(&vmci_host_dev->lock);
+ filp->private_data = vmci_host_dev;
+
+ return 0;
+}
+
+/*
+ * Called on close of /dev/vmci, most often when the process
+ * exits.
+ */
+static int vmci_host_close(struct inode *inode, struct file *filp)
+{
+ struct vmci_host_dev *vmci_host_dev = filp->private_data;
+
+ if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+ vmci_ctx_destroy(vmci_host_dev->context);
+ vmci_host_dev->context = NULL;
+
+ /*
+ * The number of active contexts is used to track whether any
+ * VMX'en are using the host personality. It is incremented when
+ * a context is created through the IOCTL_VMCI_INIT_CONTEXT
+ * ioctl.
+ */
+ atomic_dec(&vmci_host_active_users);
+ }
+ vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
+
+ kfree(vmci_host_dev);
+ filp->private_data = NULL;
+ return 0;
+}
+
+/*
+ * This is used to wake up the VMX when a VMCI call arrives, or
+ * to wake up select() or poll() at the next clock tick.
+ */
+static unsigned int vmci_host_poll(struct file *filp, poll_table *wait)
+{
+ struct vmci_host_dev *vmci_host_dev = filp->private_data;
+ struct vmci_ctx *context = vmci_host_dev->context;
+ unsigned int mask = 0;
+
+ if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+ /* Check for VMCI calls to this VM context. */
+ if (wait)
+ poll_wait(filp, &context->host_context.wait_queue,
+ wait);
+
+ spin_lock(&context->lock);
+ if (context->pending_datagrams > 0 ||
+ vmci_handle_arr_get_size(
+ context->pending_doorbell_array) > 0) {
+ mask = POLLIN;
+ }
+ spin_unlock(&context->lock);
+ }
+ return mask;
+}
+
+/*
+ * Copies the handles of a handle array into a user buffer, and
+ * returns the new length in userBufferSize. If the copy to the
+ * user buffer fails, the functions still returns VMCI_SUCCESS,
+ * but retval != 0.
+ */
+static int drv_cp_harray_to_user(void __user *user_buf_uva,
+ u64 *user_buf_size,
+ struct vmci_handle_arr *handle_array,
+ int *retval)
+{
+ u32 array_size = 0;
+ struct vmci_handle *handles;
+
+ if (handle_array)
+ array_size = vmci_handle_arr_get_size(handle_array);
+
+ if (array_size * sizeof(*handles) > *user_buf_size)
+ return VMCI_ERROR_MORE_DATA;
+
+ *user_buf_size = array_size * sizeof(*handles);
+ if (*user_buf_size)
+ *retval = copy_to_user(user_buf_uva,
+ vmci_handle_arr_get_handles
+ (handle_array), *user_buf_size);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Sets up a given context for notify to work. Maps the notify
+ * boolean in user VA into kernel space.
+ */
+static int vmci_host_setup_notify(struct vmci_ctx *context,
+ unsigned long uva)
+{
+ int retval;
+
+ if (context->notify_page) {
+ pr_devel("%s: Notify mechanism is already set up\n", __func__);
+ return VMCI_ERROR_DUPLICATE_ENTRY;
+ }
+
+ /*
+ * We are using 'bool' internally, but let's make sure we explicit
+ * about the size.
+ */
+ BUILD_BUG_ON(sizeof(bool) != sizeof(u8));
+ if (!access_ok(VERIFY_WRITE, (void __user *)uva, sizeof(u8)))
+ return VMCI_ERROR_GENERIC;
+
+ /*
+ * Lock physical page backing a given user VA.
+ */
+ retval = get_user_pages_fast(uva, 1, 1, &context->notify_page);
+ if (retval != 1) {
+ context->notify_page = NULL;
+ return VMCI_ERROR_GENERIC;
+ }
+
+ /*
+ * Map the locked page and set up notify pointer.
+ */
+ context->notify = kmap(context->notify_page) + (uva & (PAGE_SIZE - 1));
+ vmci_ctx_check_signal_notify(context);
+
+ return VMCI_SUCCESS;
+}
+
+static int vmci_host_get_version(struct vmci_host_dev *vmci_host_dev,
+ unsigned int cmd, void __user *uptr)
+{
+ if (cmd == IOCTL_VMCI_VERSION2) {
+ int __user *vptr = uptr;
+ if (get_user(vmci_host_dev->user_version, vptr))
+ return -EFAULT;
+ }
+
+ /*
+ * The basic logic here is:
+ *
+ * If the user sends in a version of 0 tell it our version.
+ * If the user didn't send in a version, tell it our version.
+ * If the user sent in an old version, tell it -its- version.
+ * If the user sent in an newer version, tell it our version.
+ *
+ * The rationale behind telling the caller its version is that
+ * Workstation 6.5 required that VMX and VMCI kernel module were
+ * version sync'd. All new VMX users will be programmed to
+ * handle the VMCI kernel module version.
+ */
+
+ if (vmci_host_dev->user_version > 0 &&
+ vmci_host_dev->user_version < VMCI_VERSION_HOSTQP) {
+ return vmci_host_dev->user_version;
+ }
+
+ return VMCI_VERSION;
+}
+
+#define vmci_ioctl_err(fmt, ...) \
+ pr_devel("%s: " fmt, ioctl_name, ##__VA_ARGS__)
+
+static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_init_blk init_block;
+ const struct cred *cred;
+ int retval;
+
+ if (copy_from_user(&init_block, uptr, sizeof(init_block))) {
+ vmci_ioctl_err("error reading init block\n");
+ return -EFAULT;
+ }
+
+ mutex_lock(&vmci_host_dev->lock);
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) {
+ vmci_ioctl_err("received VMCI init on initialized handle\n");
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (init_block.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+ vmci_ioctl_err("unsupported VMCI restriction flag\n");
+ retval = -EINVAL;
+ goto out;
+ }
+
+ cred = get_current_cred();
+ vmci_host_dev->context = vmci_ctx_create(init_block.cid,
+ init_block.flags, 0,
+ vmci_host_dev->user_version,
+ cred);
+ put_cred(cred);
+ if (IS_ERR(vmci_host_dev->context)) {
+ retval = PTR_ERR(vmci_host_dev->context);
+ vmci_ioctl_err("error initializing context\n");
+ goto out;
+ }
+
+ /*
+ * Copy cid to userlevel, we do this to allow the VMX
+ * to enforce its policy on cid generation.
+ */
+ init_block.cid = vmci_ctx_get_id(vmci_host_dev->context);
+ if (copy_to_user(uptr, &init_block, sizeof(init_block))) {
+ vmci_ctx_destroy(vmci_host_dev->context);
+ vmci_host_dev->context = NULL;
+ vmci_ioctl_err("error writing init block\n");
+ retval = -EFAULT;
+ goto out;
+ }
+
+ vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
+ atomic_inc(&vmci_host_active_users);
+
+ retval = 0;
+
+out:
+ mutex_unlock(&vmci_host_dev->lock);
+ return retval;
+}
+
+static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_datagram_snd_rcv_info send_info;
+ struct vmci_datagram *dg = NULL;
+ u32 cid;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&send_info, uptr, sizeof(send_info)))
+ return -EFAULT;
+
+ if (send_info.len > VMCI_MAX_DG_SIZE) {
+ vmci_ioctl_err("datagram is too big (size=%d)\n",
+ send_info.len);
+ return -EINVAL;
+ }
+
+ if (send_info.len < sizeof(*dg)) {
+ vmci_ioctl_err("datagram is too small (size=%d)\n",
+ send_info.len);
+ return -EINVAL;
+ }
+
+ dg = kmalloc(send_info.len, GFP_KERNEL);
+ if (!dg) {
+ vmci_ioctl_err(
+ "cannot allocate memory to dispatch datagram\n");
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(dg, (void __user *)(uintptr_t)send_info.addr,
+ send_info.len)) {
+ vmci_ioctl_err("error getting datagram\n");
+ kfree(dg);
+ return -EFAULT;
+ }
+
+ if (VMCI_DG_SIZE(dg) != send_info.len) {
+ vmci_ioctl_err("datagram size mismatch\n");
+ kfree(dg);
+ return -EINVAL;
+ }
+
+ pr_devel("Datagram dst (handle=0x%x:0x%x) src (handle=0x%x:0x%x), payload (size=%llu bytes)\n",
+ dg->dst.context, dg->dst.resource,
+ dg->src.context, dg->src.resource,
+ (unsigned long long)dg->payload_size);
+
+ /* Get source context id. */
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+ send_info.result = vmci_datagram_dispatch(cid, dg, true);
+ kfree(dg);
+
+ return copy_to_user(uptr, &send_info, sizeof(send_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_datagram_snd_rcv_info recv_info;
+ struct vmci_datagram *dg = NULL;
+ int retval;
+ size_t size;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&recv_info, uptr, sizeof(recv_info)))
+ return -EFAULT;
+
+ size = recv_info.len;
+ recv_info.result = vmci_ctx_dequeue_datagram(vmci_host_dev->context,
+ &size, &dg);
+
+ if (recv_info.result >= VMCI_SUCCESS) {
+ void __user *ubuf = (void __user *)(uintptr_t)recv_info.addr;
+ retval = copy_to_user(ubuf, dg, VMCI_DG_SIZE(dg));
+ kfree(dg);
+ if (retval != 0)
+ return -EFAULT;
+ }
+
+ return copy_to_user(uptr, &recv_info, sizeof(recv_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_handle handle;
+ int vmci_status;
+ int __user *retptr;
+ u32 cid;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+ if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+ struct vmci_qp_alloc_info_vmvm alloc_info;
+ struct vmci_qp_alloc_info_vmvm __user *info = uptr;
+
+ if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
+ return -EFAULT;
+
+ handle = alloc_info.handle;
+ retptr = &info->result;
+
+ vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
+ alloc_info.peer,
+ alloc_info.flags,
+ VMCI_NO_PRIVILEGE_FLAGS,
+ alloc_info.produce_size,
+ alloc_info.consume_size,
+ NULL,
+ vmci_host_dev->context);
+
+ if (vmci_status == VMCI_SUCCESS)
+ vmci_status = VMCI_SUCCESS_QUEUEPAIR_CREATE;
+ } else {
+ struct vmci_qp_alloc_info alloc_info;
+ struct vmci_qp_alloc_info __user *info = uptr;
+ struct vmci_qp_page_store page_store;
+
+ if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
+ return -EFAULT;
+
+ handle = alloc_info.handle;
+ retptr = &info->result;
+
+ page_store.pages = alloc_info.ppn_va;
+ page_store.len = alloc_info.num_ppns;
+
+ vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
+ alloc_info.peer,
+ alloc_info.flags,
+ VMCI_NO_PRIVILEGE_FLAGS,
+ alloc_info.produce_size,
+ alloc_info.consume_size,
+ &page_store,
+ vmci_host_dev->context);
+ }
+
+ if (put_user(vmci_status, retptr)) {
+ if (vmci_status >= VMCI_SUCCESS) {
+ vmci_status = vmci_qp_broker_detach(handle,
+ vmci_host_dev->context);
+ }
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_qp_set_va_info set_va_info;
+ struct vmci_qp_set_va_info __user *info = uptr;
+ s32 result;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+ vmci_ioctl_err("is not allowed\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&set_va_info, uptr, sizeof(set_va_info)))
+ return -EFAULT;
+
+ if (set_va_info.va) {
+ /*
+ * VMX is passing down a new VA for the queue
+ * pair mapping.
+ */
+ result = vmci_qp_broker_map(set_va_info.handle,
+ vmci_host_dev->context,
+ set_va_info.va);
+ } else {
+ /*
+ * The queue pair is about to be unmapped by
+ * the VMX.
+ */
+ result = vmci_qp_broker_unmap(set_va_info.handle,
+ vmci_host_dev->context, 0);
+ }
+
+ return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_qp_page_file_info page_file_info;
+ struct vmci_qp_page_file_info __user *info = uptr;
+ s32 result;
+
+ if (vmci_host_dev->user_version < VMCI_VERSION_HOSTQP ||
+ vmci_host_dev->user_version >= VMCI_VERSION_NOVMVM) {
+ vmci_ioctl_err("not supported on this VMX (version=%d)\n",
+ vmci_host_dev->user_version);
+ return -EINVAL;
+ }
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&page_file_info, uptr, sizeof(*info)))
+ return -EFAULT;
+
+ /*
+ * Communicate success pre-emptively to the caller. Note that the
+ * basic premise is that it is incumbent upon the caller not to look at
+ * the info.result field until after the ioctl() returns. And then,
+ * only if the ioctl() result indicates no error. We send up the
+ * SUCCESS status before calling SetPageStore() store because failing
+ * to copy up the result code means unwinding the SetPageStore().
+ *
+ * It turns out the logic to unwind a SetPageStore() opens a can of
+ * worms. For example, if a host had created the queue_pair and a
+ * guest attaches and SetPageStore() is successful but writing success
+ * fails, then ... the host has to be stopped from writing (anymore)
+ * data into the queue_pair. That means an additional test in the
+ * VMCI_Enqueue() code path. Ugh.
+ */
+
+ if (put_user(VMCI_SUCCESS, &info->result)) {
+ /*
+ * In this case, we can't write a result field of the
+ * caller's info block. So, we don't even try to
+ * SetPageStore().
+ */
+ return -EFAULT;
+ }
+
+ result = vmci_qp_broker_set_page_store(page_file_info.handle,
+ page_file_info.produce_va,
+ page_file_info.consume_va,
+ vmci_host_dev->context);
+ if (result < VMCI_SUCCESS) {
+ if (put_user(result, &info->result)) {
+ /*
+ * Note that in this case the SetPageStore()
+ * call failed but we were unable to
+ * communicate that to the caller (because the
+ * copy_to_user() call failed). So, if we
+ * simply return an error (in this case
+ * -EFAULT) then the caller will know that the
+ * SetPageStore failed even though we couldn't
+ * put the result code in the result field and
+ * indicate exactly why it failed.
+ *
+ * That says nothing about the issue where we
+ * were once able to write to the caller's info
+ * memory and now can't. Something more
+ * serious is probably going on than the fact
+ * that SetPageStore() didn't work.
+ */
+ return -EFAULT;
+ }
+ }
+
+ return 0;
+}
+
+static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_qp_dtch_info detach_info;
+ struct vmci_qp_dtch_info __user *info = uptr;
+ s32 result;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&detach_info, uptr, sizeof(detach_info)))
+ return -EFAULT;
+
+ result = vmci_qp_broker_detach(detach_info.handle,
+ vmci_host_dev->context);
+ if (result == VMCI_SUCCESS &&
+ vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+ result = VMCI_SUCCESS_LAST_DETACH;
+ }
+
+ return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_ctx_info ar_info;
+ struct vmci_ctx_info __user *info = uptr;
+ s32 result;
+ u32 cid;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
+ return -EFAULT;
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+ result = vmci_ctx_add_notification(cid, ar_info.remote_cid);
+
+ return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_ctx_info ar_info;
+ struct vmci_ctx_info __user *info = uptr;
+ u32 cid;
+ int result;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
+ return -EFAULT;
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+ result = vmci_ctx_remove_notification(cid,
+ ar_info.remote_cid);
+
+ return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_ctx_chkpt_buf_info get_info;
+ u32 cid;
+ void *cpt_buf;
+ int retval;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&get_info, uptr, sizeof(get_info)))
+ return -EFAULT;
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+ get_info.result = vmci_ctx_get_chkpt_state(cid, get_info.cpt_type,
+ &get_info.buf_size, &cpt_buf);
+ if (get_info.result == VMCI_SUCCESS && get_info.buf_size) {
+ void __user *ubuf = (void __user *)(uintptr_t)get_info.cpt_buf;
+ retval = copy_to_user(ubuf, cpt_buf, get_info.buf_size);
+ kfree(cpt_buf);
+
+ if (retval)
+ return -EFAULT;
+ }
+
+ return copy_to_user(uptr, &get_info, sizeof(get_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_ctx_chkpt_buf_info set_info;
+ u32 cid;
+ void *cpt_buf;
+ int retval;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&set_info, uptr, sizeof(set_info)))
+ return -EFAULT;
+
+ cpt_buf = kmalloc(set_info.buf_size, GFP_KERNEL);
+ if (!cpt_buf) {
+ vmci_ioctl_err(
+ "cannot allocate memory to set cpt state (type=%d)\n",
+ set_info.cpt_type);
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(cpt_buf, (void __user *)(uintptr_t)set_info.cpt_buf,
+ set_info.buf_size)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+ set_info.result = vmci_ctx_set_chkpt_state(cid, set_info.cpt_type,
+ set_info.buf_size, cpt_buf);
+
+ retval = copy_to_user(uptr, &set_info, sizeof(set_info)) ? -EFAULT : 0;
+
+out:
+ kfree(cpt_buf);
+ return retval;
+}
+
+static int vmci_host_do_get_context_id(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ u32 __user *u32ptr = uptr;
+
+ return put_user(VMCI_HOST_CONTEXT_ID, u32ptr) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_set_notify_info notify_info;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(¬ify_info, uptr, sizeof(notify_info)))
+ return -EFAULT;
+
+ if (notify_info.notify_uva) {
+ notify_info.result =
+ vmci_host_setup_notify(vmci_host_dev->context,
+ notify_info.notify_uva);
+ } else {
+ vmci_ctx_unset_notify(vmci_host_dev->context);
+ notify_info.result = VMCI_SUCCESS;
+ }
+
+ return copy_to_user(uptr, ¬ify_info, sizeof(notify_info)) ?
+ -EFAULT : 0;
+}
+
+static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_dbell_notify_resource_info info;
+ u32 cid;
+
+ if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
+ vmci_ioctl_err("invalid for current VMX versions\n");
+ return -EINVAL;
+ }
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&info, uptr, sizeof(info)))
+ return -EFAULT;
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+ switch (info.action) {
+ case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY:
+ if (info.resource == VMCI_NOTIFY_RESOURCE_DOOR_BELL) {
+ u32 flags = VMCI_NO_PRIVILEGE_FLAGS;
+ info.result = vmci_ctx_notify_dbell(cid, info.handle,
+ flags);
+ } else {
+ info.result = VMCI_ERROR_UNAVAILABLE;
+ }
+ break;
+
+ case VMCI_NOTIFY_RESOURCE_ACTION_CREATE:
+ info.result = vmci_ctx_dbell_create(cid, info.handle);
+ break;
+
+ case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY:
+ info.result = vmci_ctx_dbell_destroy(cid, info.handle);
+ break;
+
+ default:
+ vmci_ioctl_err("got unknown action (action=%d)\n",
+ info.action);
+ info.result = VMCI_ERROR_INVALID_ARGS;
+ }
+
+ return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev,
+ const char *ioctl_name,
+ void __user *uptr)
+{
+ struct vmci_ctx_notify_recv_info info;
+ struct vmci_handle_arr *db_handle_array;
+ struct vmci_handle_arr *qp_handle_array;
+ void __user *ubuf;
+ u32 cid;
+ int retval = 0;
+
+ if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ vmci_ioctl_err("only valid for contexts\n");
+ return -EINVAL;
+ }
+
+ if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
+ vmci_ioctl_err("not supported for the current vmx version\n");
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&info, uptr, sizeof(info)))
+ return -EFAULT;
+
+ if ((info.db_handle_buf_size && !info.db_handle_buf_uva) ||
+ (info.qp_handle_buf_size && !info.qp_handle_buf_uva)) {
+ return -EINVAL;
+ }
+
+ cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+ info.result = vmci_ctx_rcv_notifications_get(cid,
+ &db_handle_array, &qp_handle_array);
+ if (info.result != VMCI_SUCCESS)
+ return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
+
+ ubuf = (void __user *)(uintptr_t)info.db_handle_buf_uva;
+ info.result = drv_cp_harray_to_user(ubuf, &info.db_handle_buf_size,
+ db_handle_array, &retval);
+ if (info.result == VMCI_SUCCESS && !retval) {
+ ubuf = (void __user *)(uintptr_t)info.qp_handle_buf_uva;
+ info.result = drv_cp_harray_to_user(ubuf,
+ &info.qp_handle_buf_size,
+ qp_handle_array, &retval);
+ }
+
+ if (!retval && copy_to_user(uptr, &info, sizeof(info)))
+ retval = -EFAULT;
+
+ vmci_ctx_rcv_notifications_release(cid,
+ db_handle_array, qp_handle_array,
+ info.result == VMCI_SUCCESS && !retval);
+
+ return retval;
+}
+
+static long vmci_host_unlocked_ioctl(struct file *filp,
+ unsigned int iocmd, unsigned long ioarg)
+{
+#define VMCI_DO_IOCTL(ioctl_name, ioctl_fn) do { \
+ char *name = __stringify(IOCTL_VMCI_ ## ioctl_name); \
+ return vmci_host_do_ ## ioctl_fn( \
+ vmci_host_dev, name, uptr); \
+ } while (0)
+
+ struct vmci_host_dev *vmci_host_dev = filp->private_data;
+ void __user *uptr = (void __user *)ioarg;
+
+ switch (iocmd) {
+ case IOCTL_VMCI_INIT_CONTEXT:
+ VMCI_DO_IOCTL(INIT_CONTEXT, init_context);
+ case IOCTL_VMCI_DATAGRAM_SEND:
+ VMCI_DO_IOCTL(DATAGRAM_SEND, send_datagram);
+ case IOCTL_VMCI_DATAGRAM_RECEIVE:
+ VMCI_DO_IOCTL(DATAGRAM_RECEIVE, receive_datagram);
+ case IOCTL_VMCI_QUEUEPAIR_ALLOC:
+ VMCI_DO_IOCTL(QUEUEPAIR_ALLOC, alloc_queuepair);
+ case IOCTL_VMCI_QUEUEPAIR_SETVA:
+ VMCI_DO_IOCTL(QUEUEPAIR_SETVA, queuepair_setva);
+ case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE:
+ VMCI_DO_IOCTL(QUEUEPAIR_SETPAGEFILE, queuepair_setpf);
+ case IOCTL_VMCI_QUEUEPAIR_DETACH:
+ VMCI_DO_IOCTL(QUEUEPAIR_DETACH, qp_detach);
+ case IOCTL_VMCI_CTX_ADD_NOTIFICATION:
+ VMCI_DO_IOCTL(CTX_ADD_NOTIFICATION, ctx_add_notify);
+ case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION:
+ VMCI_DO_IOCTL(CTX_REMOVE_NOTIFICATION, ctx_remove_notify);
+ case IOCTL_VMCI_CTX_GET_CPT_STATE:
+ VMCI_DO_IOCTL(CTX_GET_CPT_STATE, ctx_get_cpt_state);
+ case IOCTL_VMCI_CTX_SET_CPT_STATE:
+ VMCI_DO_IOCTL(CTX_SET_CPT_STATE, ctx_set_cpt_state);
+ case IOCTL_VMCI_GET_CONTEXT_ID:
+ VMCI_DO_IOCTL(GET_CONTEXT_ID, get_context_id);
+ case IOCTL_VMCI_SET_NOTIFY:
+ VMCI_DO_IOCTL(SET_NOTIFY, set_notify);
+ case IOCTL_VMCI_NOTIFY_RESOURCE:
+ VMCI_DO_IOCTL(NOTIFY_RESOURCE, notify_resource);
+ case IOCTL_VMCI_NOTIFICATIONS_RECEIVE:
+ VMCI_DO_IOCTL(NOTIFICATIONS_RECEIVE, recv_notifications);
+
+ case IOCTL_VMCI_VERSION:
+ case IOCTL_VMCI_VERSION2:
+ return vmci_host_get_version(vmci_host_dev, iocmd, uptr);
+
+ default:
+ pr_devel("%s: Unknown ioctl (iocmd=%d)\n", __func__, iocmd);
+ return -EINVAL;
+ }
+
+#undef VMCI_DO_IOCTL
+}
+
+static const struct file_operations vmuser_fops = {
+ .owner = THIS_MODULE,
+ .open = vmci_host_open,
+ .release = vmci_host_close,
+ .poll = vmci_host_poll,
+ .unlocked_ioctl = vmci_host_unlocked_ioctl,
+ .compat_ioctl = vmci_host_unlocked_ioctl,
+};
+
+static struct miscdevice vmci_host_miscdev = {
+ .name = "vmci",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &vmuser_fops,
+};
+
+int __init vmci_host_init(void)
+{
+ int error;
+
+ host_context = vmci_ctx_create(VMCI_HOST_CONTEXT_ID,
+ VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+ -1, VMCI_VERSION, NULL);
+ if (IS_ERR(host_context)) {
+ error = PTR_ERR(host_context);
+ pr_warn("Failed to initialize VMCIContext (error%d)\n",
+ error);
+ return error;
+ }
+
+ error = misc_register(&vmci_host_miscdev);
+ if (error) {
+ pr_warn("Module registration error (name=%s, major=%d, minor=%d, err=%d)\n",
+ vmci_host_miscdev.name,
+ MISC_MAJOR, vmci_host_miscdev.minor,
+ error);
+ pr_warn("Unable to initialize host personality\n");
+ vmci_ctx_destroy(host_context);
+ return error;
+ }
+
+ pr_info("VMCI host device registered (name=%s, major=%d, minor=%d)\n",
+ vmci_host_miscdev.name, MISC_MAJOR, vmci_host_miscdev.minor);
+
+ vmci_host_device_initialized = true;
+ return 0;
+}
+
+void __exit vmci_host_exit(void)
+{
+ vmci_host_device_initialized = false;
+
+ misc_deregister(&vmci_host_miscdev);
+ vmci_ctx_destroy(host_context);
+ vmci_qp_broker_exit();
+
+ pr_debug("VMCI host driver module unloaded\n");
+}
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
new file mode 100644
index 0000000..f42d9c4
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -0,0 +1,3355 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/wait.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * In the following, we will distinguish between two kinds of VMX processes -
+ * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
+ * VMCI page files in the VMX and supporting VM to VM communication and the
+ * newer ones that use the guest memory directly. We will in the following
+ * refer to the older VMX versions as old-style VMX'en, and the newer ones as
+ * new-style VMX'en.
+ *
+ * The state transition datagram is as follows (the VMCIQPB_ prefix has been
+ * removed for readability) - see below for more details on the transtions:
+ *
+ * -------------- NEW -------------
+ * | |
+ * \_/ \_/
+ * CREATED_NO_MEM <-----------------> CREATED_MEM
+ * | | |
+ * | o-----------------------o |
+ * | | |
+ * \_/ \_/ \_/
+ * ATTACHED_NO_MEM <----------------> ATTACHED_MEM
+ * | | |
+ * | o----------------------o |
+ * | | |
+ * \_/ \_/ \_/
+ * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
+ * | |
+ * | |
+ * -------------> gone <-------------
+ *
+ * In more detail. When a VMCI queue pair is first created, it will be in the
+ * VMCIQPB_NEW state. It will then move into one of the following states:
+ *
+ * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
+ *
+ * - the created was performed by a host endpoint, in which case there is
+ * no backing memory yet.
+ *
+ * - the create was initiated by an old-style VMX, that uses
+ * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
+ * a later point in time. This state can be distinguished from the one
+ * above by the context ID of the creator. A host side is not allowed to
+ * attach until the page store has been set.
+ *
+ * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
+ * is created by a VMX using the queue pair device backend that
+ * sets the UVAs of the queue pair immediately and stores the
+ * information for later attachers. At this point, it is ready for
+ * the host side to attach to it.
+ *
+ * Once the queue pair is in one of the created states (with the exception of
+ * the case mentioned for older VMX'en above), it is possible to attach to the
+ * queue pair. Again we have two new states possible:
+ *
+ * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
+ * paths:
+ *
+ * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
+ * pair, and attaches to a queue pair previously created by the host side.
+ *
+ * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
+ * already created by a guest.
+ *
+ * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
+ * vmci_qp_broker_set_page_store (see below).
+ *
+ * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
+ * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
+ * bring the queue pair into this state. Once vmci_qp_broker_set_page_store
+ * is called to register the user memory, the VMCIQPB_ATTACH_MEM state
+ * will be entered.
+ *
+ * From the attached queue pair, the queue pair can enter the shutdown states
+ * when either side of the queue pair detaches. If the guest side detaches
+ * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
+ * the content of the queue pair will no longer be available. If the host
+ * side detaches first, the queue pair will either enter the
+ * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
+ * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
+ * (e.g., the host detaches while a guest is stunned).
+ *
+ * New-style VMX'en will also unmap guest memory, if the guest is
+ * quiesced, e.g., during a snapshot operation. In that case, the guest
+ * memory will no longer be available, and the queue pair will transition from
+ * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
+ * in which case the queue pair will transition from the *_NO_MEM state at that
+ * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
+ * since the peer may have either attached or detached in the meantime. The
+ * values are laid out such that ++ on a state will move from a *_NO_MEM to a
+ * *_MEM state, and vice versa.
+ */
+
+/*
+ * VMCIMemcpy{To,From}QueueFunc() prototypes. Functions of these
+ * types are passed around to enqueue and dequeue routines. Note that
+ * often the functions passed are simply wrappers around memcpy
+ * itself.
+ *
+ * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
+ * there's an unused last parameter for the hosted side. In
+ * ESX, that parameter holds a buffer type.
+ */
+typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
+ u64 queue_offset, const void *src,
+ size_t src_offset, size_t size);
+typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue,
+ u64 queue_offset, size_t size);
+
+/* The Kernel specific component of the struct vmci_queue structure. */
+struct vmci_queue_kern_if {
+ struct mutex __mutex; /* Protects the queue. */
+ struct mutex *mutex; /* Shared by producer and consumer queues. */
+ size_t num_pages; /* Number of pages incl. header. */
+ bool host; /* Host or guest? */
+ union {
+ struct {
+ dma_addr_t *pas;
+ void **vas;
+ } g; /* Used by the guest. */
+ struct {
+ struct page **page;
+ struct page **header_page;
+ } h; /* Used by the host. */
+ } u;
+};
+
+/*
+ * This structure is opaque to the clients.
+ */
+struct vmci_qp {
+ struct vmci_handle handle;
+ struct vmci_queue *produce_q;
+ struct vmci_queue *consume_q;
+ u64 produce_q_size;
+ u64 consume_q_size;
+ u32 peer;
+ u32 flags;
+ u32 priv_flags;
+ bool guest_endpoint;
+ unsigned int blocked;
+ unsigned int generation;
+ wait_queue_head_t event;
+};
+
+enum qp_broker_state {
+ VMCIQPB_NEW,
+ VMCIQPB_CREATED_NO_MEM,
+ VMCIQPB_CREATED_MEM,
+ VMCIQPB_ATTACHED_NO_MEM,
+ VMCIQPB_ATTACHED_MEM,
+ VMCIQPB_SHUTDOWN_NO_MEM,
+ VMCIQPB_SHUTDOWN_MEM,
+ VMCIQPB_GONE
+};
+
+#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
+ _qpb->state == VMCIQPB_ATTACHED_MEM || \
+ _qpb->state == VMCIQPB_SHUTDOWN_MEM)
+
+/*
+ * In the queue pair broker, we always use the guest point of view for
+ * the produce and consume queue values and references, e.g., the
+ * produce queue size stored is the guests produce queue size. The
+ * host endpoint will need to swap these around. The only exception is
+ * the local queue pairs on the host, in which case the host endpoint
+ * that creates the queue pair will have the right orientation, and
+ * the attaching host endpoint will need to swap.
+ */
+struct qp_entry {
+ struct list_head list_item;
+ struct vmci_handle handle;
+ u32 peer;
+ u32 flags;
+ u64 produce_size;
+ u64 consume_size;
+ u32 ref_count;
+};
+
+struct qp_broker_entry {
+ struct vmci_resource resource;
+ struct qp_entry qp;
+ u32 create_id;
+ u32 attach_id;
+ enum qp_broker_state state;
+ bool require_trusted_attach;
+ bool created_by_trusted;
+ bool vmci_page_files; /* Created by VMX using VMCI page files */
+ struct vmci_queue *produce_q;
+ struct vmci_queue *consume_q;
+ struct vmci_queue_header saved_produce_q;
+ struct vmci_queue_header saved_consume_q;
+ vmci_event_release_cb wakeup_cb;
+ void *client_data;
+ void *local_mem; /* Kernel memory for local queue pair */
+};
+
+struct qp_guest_endpoint {
+ struct vmci_resource resource;
+ struct qp_entry qp;
+ u64 num_ppns;
+ void *produce_q;
+ void *consume_q;
+ struct ppn_set ppn_set;
+};
+
+struct qp_list {
+ struct list_head head;
+ struct mutex mutex; /* Protect queue list. */
+};
+
+static struct qp_list qp_broker_list = {
+ .head = LIST_HEAD_INIT(qp_broker_list.head),
+ .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
+};
+
+static struct qp_list qp_guest_endpoints = {
+ .head = LIST_HEAD_INIT(qp_guest_endpoints.head),
+ .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
+};
+
+#define INVALID_VMCI_GUEST_MEM_ID 0
+#define QPE_NUM_PAGES(_QPE) ((u32) \
+ (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
+ DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
+
+
+/*
+ * Frees kernel VA space for a given queue and its queue header, and
+ * frees physical data pages.
+ */
+static void qp_free_queue(void *q, u64 size)
+{
+ struct vmci_queue *queue = q;
+
+ if (queue) {
+ u64 i;
+
+ /* Given size does not include header, so add in a page here. */
+ for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) {
+ dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE,
+ queue->kernel_if->u.g.vas[i],
+ queue->kernel_if->u.g.pas[i]);
+ }
+
+ vfree(queue);
+ }
+}
+
+/*
+ * Allocates kernel queue pages of specified size with IOMMU mappings,
+ * plus space for the queue structure/kernel interface and the queue
+ * header.
+ */
+static void *qp_alloc_queue(u64 size, u32 flags)
+{
+ u64 i;
+ struct vmci_queue *queue;
+ size_t pas_size;
+ size_t vas_size;
+ size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if);
+ const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+
+ if (num_pages >
+ (SIZE_MAX - queue_size) /
+ (sizeof(*queue->kernel_if->u.g.pas) +
+ sizeof(*queue->kernel_if->u.g.vas)))
+ return NULL;
+
+ pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas);
+ vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas);
+ queue_size += pas_size + vas_size;
+
+ queue = vmalloc(queue_size);
+ if (!queue)
+ return NULL;
+
+ queue->q_header = NULL;
+ queue->saved_header = NULL;
+ queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+ queue->kernel_if->mutex = NULL;
+ queue->kernel_if->num_pages = num_pages;
+ queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1);
+ queue->kernel_if->u.g.vas =
+ (void **)((u8 *)queue->kernel_if->u.g.pas + pas_size);
+ queue->kernel_if->host = false;
+
+ for (i = 0; i < num_pages; i++) {
+ queue->kernel_if->u.g.vas[i] =
+ dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE,
+ &queue->kernel_if->u.g.pas[i],
+ GFP_KERNEL);
+ if (!queue->kernel_if->u.g.vas[i]) {
+ /* Size excl. the header. */
+ qp_free_queue(queue, i * PAGE_SIZE);
+ return NULL;
+ }
+ }
+
+ /* Queue header is the first page. */
+ queue->q_header = queue->kernel_if->u.g.vas[0];
+
+ return queue;
+}
+
+/*
+ * Copies from a given buffer or iovector to a VMCI Queue. Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int __qp_memcpy_to_queue(struct vmci_queue *queue,
+ u64 queue_offset,
+ const void *src,
+ size_t size,
+ bool is_iovec)
+{
+ struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < size) {
+ const u64 page_index =
+ (queue_offset + bytes_copied) / PAGE_SIZE;
+ const size_t page_offset =
+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+ void *va;
+ size_t to_copy;
+
+ if (kernel_if->host)
+ va = kmap(kernel_if->u.h.page[page_index]);
+ else
+ va = kernel_if->u.g.vas[page_index + 1];
+ /* Skip header. */
+
+ if (size - bytes_copied > PAGE_SIZE - page_offset)
+ /* Enough payload to fill up from this page. */
+ to_copy = PAGE_SIZE - page_offset;
+ else
+ to_copy = size - bytes_copied;
+
+ if (is_iovec) {
+ struct msghdr *msg = (struct msghdr *)src;
+ int err;
+
+ /* The iovec will track bytes_copied internally. */
+ err = memcpy_from_msg((u8 *)va + page_offset,
+ msg, to_copy);
+ if (err != 0) {
+ if (kernel_if->host)
+ kunmap(kernel_if->u.h.page[page_index]);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+ } else {
+ memcpy((u8 *)va + page_offset,
+ (u8 *)src + bytes_copied, to_copy);
+ }
+
+ bytes_copied += to_copy;
+ if (kernel_if->host)
+ kunmap(kernel_if->u.h.page[page_index]);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Copies to a given buffer or iovector from a VMCI Queue. Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int __qp_memcpy_from_queue(void *dest,
+ const struct vmci_queue *queue,
+ u64 queue_offset,
+ size_t size,
+ bool is_iovec)
+{
+ struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < size) {
+ const u64 page_index =
+ (queue_offset + bytes_copied) / PAGE_SIZE;
+ const size_t page_offset =
+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+ void *va;
+ size_t to_copy;
+
+ if (kernel_if->host)
+ va = kmap(kernel_if->u.h.page[page_index]);
+ else
+ va = kernel_if->u.g.vas[page_index + 1];
+ /* Skip header. */
+
+ if (size - bytes_copied > PAGE_SIZE - page_offset)
+ /* Enough payload to fill up this page. */
+ to_copy = PAGE_SIZE - page_offset;
+ else
+ to_copy = size - bytes_copied;
+
+ if (is_iovec) {
+ struct msghdr *msg = dest;
+ int err;
+
+ /* The iovec will track bytes_copied internally. */
+ err = memcpy_to_msg(msg, (u8 *)va + page_offset,
+ to_copy);
+ if (err != 0) {
+ if (kernel_if->host)
+ kunmap(kernel_if->u.h.page[page_index]);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+ } else {
+ memcpy((u8 *)dest + bytes_copied,
+ (u8 *)va + page_offset, to_copy);
+ }
+
+ bytes_copied += to_copy;
+ if (kernel_if->host)
+ kunmap(kernel_if->u.h.page[page_index]);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates two list of PPNs --- one for the pages in the produce queue,
+ * and the other for the pages in the consume queue. Intializes the list
+ * of PPNs with the page frame numbers of the KVA for the two queues (and
+ * the queue headers).
+ */
+static int qp_alloc_ppn_set(void *prod_q,
+ u64 num_produce_pages,
+ void *cons_q,
+ u64 num_consume_pages, struct ppn_set *ppn_set)
+{
+ u32 *produce_ppns;
+ u32 *consume_ppns;
+ struct vmci_queue *produce_q = prod_q;
+ struct vmci_queue *consume_q = cons_q;
+ u64 i;
+
+ if (!produce_q || !num_produce_pages || !consume_q ||
+ !num_consume_pages || !ppn_set)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (ppn_set->initialized)
+ return VMCI_ERROR_ALREADY_EXISTS;
+
+ produce_ppns =
+ kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
+ if (!produce_ppns)
+ return VMCI_ERROR_NO_MEM;
+
+ consume_ppns =
+ kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
+ if (!consume_ppns) {
+ kfree(produce_ppns);
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ for (i = 0; i < num_produce_pages; i++) {
+ unsigned long pfn;
+
+ produce_ppns[i] =
+ produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+ pfn = produce_ppns[i];
+
+ /* Fail allocation if PFN isn't supported by hypervisor. */
+ if (sizeof(pfn) > sizeof(*produce_ppns)
+ && pfn != produce_ppns[i])
+ goto ppn_error;
+ }
+
+ for (i = 0; i < num_consume_pages; i++) {
+ unsigned long pfn;
+
+ consume_ppns[i] =
+ consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+ pfn = consume_ppns[i];
+
+ /* Fail allocation if PFN isn't supported by hypervisor. */
+ if (sizeof(pfn) > sizeof(*consume_ppns)
+ && pfn != consume_ppns[i])
+ goto ppn_error;
+ }
+
+ ppn_set->num_produce_pages = num_produce_pages;
+ ppn_set->num_consume_pages = num_consume_pages;
+ ppn_set->produce_ppns = produce_ppns;
+ ppn_set->consume_ppns = consume_ppns;
+ ppn_set->initialized = true;
+ return VMCI_SUCCESS;
+
+ ppn_error:
+ kfree(produce_ppns);
+ kfree(consume_ppns);
+ return VMCI_ERROR_INVALID_ARGS;
+}
+
+/*
+ * Frees the two list of PPNs for a queue pair.
+ */
+static void qp_free_ppn_set(struct ppn_set *ppn_set)
+{
+ if (ppn_set->initialized) {
+ /* Do not call these functions on NULL inputs. */
+ kfree(ppn_set->produce_ppns);
+ kfree(ppn_set->consume_ppns);
+ }
+ memset(ppn_set, 0, sizeof(*ppn_set));
+}
+
+/*
+ * Populates the list of PPNs in the hypercall structure with the PPNS
+ * of the produce queue and the consume queue.
+ */
+static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
+{
+ memcpy(call_buf, ppn_set->produce_ppns,
+ ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
+ memcpy(call_buf +
+ ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
+ ppn_set->consume_ppns,
+ ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
+
+ return VMCI_SUCCESS;
+}
+
+static int qp_memcpy_to_queue(struct vmci_queue *queue,
+ u64 queue_offset,
+ const void *src, size_t src_offset, size_t size)
+{
+ return __qp_memcpy_to_queue(queue, queue_offset,
+ (u8 *)src + src_offset, size, false);
+}
+
+static int qp_memcpy_from_queue(void *dest,
+ size_t dest_offset,
+ const struct vmci_queue *queue,
+ u64 queue_offset, size_t size)
+{
+ return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
+ queue, queue_offset, size, false);
+}
+
+/*
+ * Copies from a given iovec from a VMCI Queue.
+ */
+static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
+ u64 queue_offset,
+ const void *msg,
+ size_t src_offset, size_t size)
+{
+
+ /*
+ * We ignore src_offset because src is really a struct iovec * and will
+ * maintain offset internally.
+ */
+ return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true);
+}
+
+/*
+ * Copies to a given iovec from a VMCI Queue.
+ */
+static int qp_memcpy_from_queue_iov(void *dest,
+ size_t dest_offset,
+ const struct vmci_queue *queue,
+ u64 queue_offset, size_t size)
+{
+ /*
+ * We ignore dest_offset because dest is really a struct iovec * and
+ * will maintain offset internally.
+ */
+ return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
+}
+
+/*
+ * Allocates kernel VA space of specified size plus space for the queue
+ * and kernel interface. This is different from the guest queue allocator,
+ * because we do not allocate our own queue header/data pages here but
+ * share those of the guest.
+ */
+static struct vmci_queue *qp_host_alloc_queue(u64 size)
+{
+ struct vmci_queue *queue;
+ size_t queue_page_size;
+ const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+ const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
+
+ if (num_pages > (SIZE_MAX - queue_size) /
+ sizeof(*queue->kernel_if->u.h.page))
+ return NULL;
+
+ queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
+
+ queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
+ if (queue) {
+ queue->q_header = NULL;
+ queue->saved_header = NULL;
+ queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+ queue->kernel_if->host = true;
+ queue->kernel_if->mutex = NULL;
+ queue->kernel_if->num_pages = num_pages;
+ queue->kernel_if->u.h.header_page =
+ (struct page **)((u8 *)queue + queue_size);
+ queue->kernel_if->u.h.page =
+ &queue->kernel_if->u.h.header_page[1];
+ }
+
+ return queue;
+}
+
+/*
+ * Frees kernel memory for a given queue (header plus translation
+ * structure).
+ */
+static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
+{
+ kfree(queue);
+}
+
+/*
+ * Initialize the mutex for the pair of queues. This mutex is used to
+ * protect the q_header and the buffer from changing out from under any
+ * users of either queue. Of course, it's only any good if the mutexes
+ * are actually acquired. Queue structure must lie on non-paged memory
+ * or we cannot guarantee access to the mutex.
+ */
+static void qp_init_queue_mutex(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ /*
+ * Only the host queue has shared state - the guest queues do not
+ * need to synchronize access using a queue mutex.
+ */
+
+ if (produce_q->kernel_if->host) {
+ produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+ consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+ mutex_init(produce_q->kernel_if->mutex);
+ }
+}
+
+/*
+ * Cleans up the mutex for the pair of queues.
+ */
+static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ if (produce_q->kernel_if->host) {
+ produce_q->kernel_if->mutex = NULL;
+ consume_q->kernel_if->mutex = NULL;
+ }
+}
+
+/*
+ * Acquire the mutex for the queue. Note that the produce_q and
+ * the consume_q share a mutex. So, only one of the two need to
+ * be passed in to this routine. Either will work just fine.
+ */
+static void qp_acquire_queue_mutex(struct vmci_queue *queue)
+{
+ if (queue->kernel_if->host)
+ mutex_lock(queue->kernel_if->mutex);
+}
+
+/*
+ * Release the mutex for the queue. Note that the produce_q and
+ * the consume_q share a mutex. So, only one of the two need to
+ * be passed in to this routine. Either will work just fine.
+ */
+static void qp_release_queue_mutex(struct vmci_queue *queue)
+{
+ if (queue->kernel_if->host)
+ mutex_unlock(queue->kernel_if->mutex);
+}
+
+/*
+ * Helper function to release pages in the PageStoreAttachInfo
+ * previously obtained using get_user_pages.
+ */
+static void qp_release_pages(struct page **pages,
+ u64 num_pages, bool dirty)
+{
+ int i;
+
+ for (i = 0; i < num_pages; i++) {
+ if (dirty)
+ set_page_dirty(pages[i]);
+
+ page_cache_release(pages[i]);
+ pages[i] = NULL;
+ }
+}
+
+/*
+ * Lock the user pages referenced by the {produce,consume}Buffer
+ * struct into memory and populate the {produce,consume}Pages
+ * arrays in the attach structure with them.
+ */
+static int qp_host_get_user_memory(u64 produce_uva,
+ u64 consume_uva,
+ struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ int retval;
+ int err = VMCI_SUCCESS;
+
+ retval = get_user_pages_fast((uintptr_t) produce_uva,
+ produce_q->kernel_if->num_pages, 1,
+ produce_q->kernel_if->u.h.header_page);
+ if (retval < produce_q->kernel_if->num_pages) {
+ pr_debug("get_user_pages_fast(produce) failed (retval=%d)",
+ retval);
+ qp_release_pages(produce_q->kernel_if->u.h.header_page,
+ retval, false);
+ err = VMCI_ERROR_NO_MEM;
+ goto out;
+ }
+
+ retval = get_user_pages_fast((uintptr_t) consume_uva,
+ consume_q->kernel_if->num_pages, 1,
+ consume_q->kernel_if->u.h.header_page);
+ if (retval < consume_q->kernel_if->num_pages) {
+ pr_debug("get_user_pages_fast(consume) failed (retval=%d)",
+ retval);
+ qp_release_pages(consume_q->kernel_if->u.h.header_page,
+ retval, false);
+ qp_release_pages(produce_q->kernel_if->u.h.header_page,
+ produce_q->kernel_if->num_pages, false);
+ err = VMCI_ERROR_NO_MEM;
+ }
+
+ out:
+ return err;
+}
+
+/*
+ * Registers the specification of the user pages used for backing a queue
+ * pair. Enough information to map in pages is stored in the OS specific
+ * part of the struct vmci_queue structure.
+ */
+static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
+ struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ u64 produce_uva;
+ u64 consume_uva;
+
+ /*
+ * The new style and the old style mapping only differs in
+ * that we either get a single or two UVAs, so we split the
+ * single UVA range at the appropriate spot.
+ */
+ produce_uva = page_store->pages;
+ consume_uva = page_store->pages +
+ produce_q->kernel_if->num_pages * PAGE_SIZE;
+ return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
+ consume_q);
+}
+
+/*
+ * Releases and removes the references to user pages stored in the attach
+ * struct. Pages are released from the page cache and may become
+ * swappable again.
+ */
+static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ qp_release_pages(produce_q->kernel_if->u.h.header_page,
+ produce_q->kernel_if->num_pages, true);
+ memset(produce_q->kernel_if->u.h.header_page, 0,
+ sizeof(*produce_q->kernel_if->u.h.header_page) *
+ produce_q->kernel_if->num_pages);
+ qp_release_pages(consume_q->kernel_if->u.h.header_page,
+ consume_q->kernel_if->num_pages, true);
+ memset(consume_q->kernel_if->u.h.header_page, 0,
+ sizeof(*consume_q->kernel_if->u.h.header_page) *
+ consume_q->kernel_if->num_pages);
+}
+
+/*
+ * Once qp_host_register_user_memory has been performed on a
+ * queue, the queue pair headers can be mapped into the
+ * kernel. Once mapped, they must be unmapped with
+ * qp_host_unmap_queues prior to calling
+ * qp_host_unregister_user_memory.
+ * Pages are pinned.
+ */
+static int qp_host_map_queues(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ int result;
+
+ if (!produce_q->q_header || !consume_q->q_header) {
+ struct page *headers[2];
+
+ if (produce_q->q_header != consume_q->q_header)
+ return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+ if (produce_q->kernel_if->u.h.header_page == NULL ||
+ *produce_q->kernel_if->u.h.header_page == NULL)
+ return VMCI_ERROR_UNAVAILABLE;
+
+ headers[0] = *produce_q->kernel_if->u.h.header_page;
+ headers[1] = *consume_q->kernel_if->u.h.header_page;
+
+ produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
+ if (produce_q->q_header != NULL) {
+ consume_q->q_header =
+ (struct vmci_queue_header *)((u8 *)
+ produce_q->q_header +
+ PAGE_SIZE);
+ result = VMCI_SUCCESS;
+ } else {
+ pr_warn("vmap failed\n");
+ result = VMCI_ERROR_NO_MEM;
+ }
+ } else {
+ result = VMCI_SUCCESS;
+ }
+
+ return result;
+}
+
+/*
+ * Unmaps previously mapped queue pair headers from the kernel.
+ * Pages are unpinned.
+ */
+static int qp_host_unmap_queues(u32 gid,
+ struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ if (produce_q->q_header) {
+ if (produce_q->q_header < consume_q->q_header)
+ vunmap(produce_q->q_header);
+ else
+ vunmap(consume_q->q_header);
+
+ produce_q->q_header = NULL;
+ consume_q->q_header = NULL;
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle. Assumes
+ * that the list is locked.
+ */
+static struct qp_entry *qp_list_find(struct qp_list *qp_list,
+ struct vmci_handle handle)
+{
+ struct qp_entry *entry;
+
+ if (vmci_handle_is_invalid(handle))
+ return NULL;
+
+ list_for_each_entry(entry, &qp_list->head, list_item) {
+ if (vmci_handle_is_equal(entry->handle, handle))
+ return entry;
+ }
+
+ return NULL;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_guest_endpoint *
+qp_guest_handle_to_entry(struct vmci_handle handle)
+{
+ struct qp_guest_endpoint *entry;
+ struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
+
+ entry = qp ? container_of(
+ qp, struct qp_guest_endpoint, qp) : NULL;
+ return entry;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_broker_entry *
+qp_broker_handle_to_entry(struct vmci_handle handle)
+{
+ struct qp_broker_entry *entry;
+ struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
+
+ entry = qp ? container_of(
+ qp, struct qp_broker_entry, qp) : NULL;
+ return entry;
+}
+
+/*
+ * Dispatches a queue pair event message directly into the local event
+ * queue.
+ */
+static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
+{
+ u32 context_id = vmci_get_context_id();
+ struct vmci_event_qp ev;
+
+ ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+ ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+ ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+ ev.msg.event_data.event =
+ attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+ ev.payload.peer_id = context_id;
+ ev.payload.handle = handle;
+
+ return vmci_event_dispatch(&ev.msg.hdr);
+}
+
+/*
+ * Allocates and initializes a qp_guest_endpoint structure.
+ * Allocates a queue_pair rid (and handle) iff the given entry has
+ * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX
+ * are reserved handles. Assumes that the QP list mutex is held
+ * by the caller.
+ */
+static struct qp_guest_endpoint *
+qp_guest_endpoint_create(struct vmci_handle handle,
+ u32 peer,
+ u32 flags,
+ u64 produce_size,
+ u64 consume_size,
+ void *produce_q,
+ void *consume_q)
+{
+ int result;
+ struct qp_guest_endpoint *entry;
+ /* One page each for the queue headers. */
+ const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
+ DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
+
+ if (vmci_handle_is_invalid(handle)) {
+ u32 context_id = vmci_get_context_id();
+
+ handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry) {
+ entry->qp.peer = peer;
+ entry->qp.flags = flags;
+ entry->qp.produce_size = produce_size;
+ entry->qp.consume_size = consume_size;
+ entry->qp.ref_count = 0;
+ entry->num_ppns = num_ppns;
+ entry->produce_q = produce_q;
+ entry->consume_q = consume_q;
+ INIT_LIST_HEAD(&entry->qp.list_item);
+
+ /* Add resource obj */
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+ handle);
+ entry->qp.handle = vmci_resource_handle(&entry->resource);
+ if ((result != VMCI_SUCCESS) ||
+ qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
+ pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+ handle.context, handle.resource, result);
+ kfree(entry);
+ entry = NULL;
+ }
+ }
+ return entry;
+}
+
+/*
+ * Frees a qp_guest_endpoint structure.
+ */
+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
+{
+ qp_free_ppn_set(&entry->ppn_set);
+ qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+ qp_free_queue(entry->produce_q, entry->qp.produce_size);
+ qp_free_queue(entry->consume_q, entry->qp.consume_size);
+ /* Unlink from resource hash table and free callback */
+ vmci_resource_remove(&entry->resource);
+
+ kfree(entry);
+}
+
+/*
+ * Helper to make a queue_pairAlloc hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
+{
+ struct vmci_qp_alloc_msg *alloc_msg;
+ size_t msg_size;
+ int result;
+
+ if (!entry || entry->num_ppns <= 2)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ msg_size = sizeof(*alloc_msg) +
+ (size_t) entry->num_ppns * sizeof(u32);
+ alloc_msg = kmalloc(msg_size, GFP_KERNEL);
+ if (!alloc_msg)
+ return VMCI_ERROR_NO_MEM;
+
+ alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_QUEUEPAIR_ALLOC);
+ alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
+ alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
+ alloc_msg->handle = entry->qp.handle;
+ alloc_msg->peer = entry->qp.peer;
+ alloc_msg->flags = entry->qp.flags;
+ alloc_msg->produce_size = entry->qp.produce_size;
+ alloc_msg->consume_size = entry->qp.consume_size;
+ alloc_msg->num_ppns = entry->num_ppns;
+
+ result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
+ &entry->ppn_set);
+ if (result == VMCI_SUCCESS)
+ result = vmci_send_datagram(&alloc_msg->hdr);
+
+ kfree(alloc_msg);
+
+ return result;
+}
+
+/*
+ * Helper to make a queue_pairDetach hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_detatch_hypercall(struct vmci_handle handle)
+{
+ struct vmci_qp_detach_msg detach_msg;
+
+ detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_QUEUEPAIR_DETACH);
+ detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ detach_msg.hdr.payload_size = sizeof(handle);
+ detach_msg.handle = handle;
+
+ return vmci_send_datagram(&detach_msg.hdr);
+}
+
+/*
+ * Adds the given entry to the list. Assumes that the list is locked.
+ */
+static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
+{
+ if (entry)
+ list_add(&entry->list_item, &qp_list->head);
+}
+
+/*
+ * Removes the given entry from the list. Assumes that the list is locked.
+ */
+static void qp_list_remove_entry(struct qp_list *qp_list,
+ struct qp_entry *entry)
+{
+ if (entry)
+ list_del(&entry->list_item);
+}
+
+/*
+ * Helper for VMCI queue_pair detach interface. Frees the physical
+ * pages for the queue pair.
+ */
+static int qp_detatch_guest_work(struct vmci_handle handle)
+{
+ int result;
+ struct qp_guest_endpoint *entry;
+ u32 ref_count = ~0; /* To avoid compiler warning below */
+
+ mutex_lock(&qp_guest_endpoints.mutex);
+
+ entry = qp_guest_handle_to_entry(handle);
+ if (!entry) {
+ mutex_unlock(&qp_guest_endpoints.mutex);
+ return VMCI_ERROR_NOT_FOUND;
+ }
+
+ if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+ result = VMCI_SUCCESS;
+
+ if (entry->qp.ref_count > 1) {
+ result = qp_notify_peer_local(false, handle);
+ /*
+ * We can fail to notify a local queuepair
+ * because we can't allocate. We still want
+ * to release the entry if that happens, so
+ * don't bail out yet.
+ */
+ }
+ } else {
+ result = qp_detatch_hypercall(handle);
+ if (result < VMCI_SUCCESS) {
+ /*
+ * We failed to notify a non-local queuepair.
+ * That other queuepair might still be
+ * accessing the shared memory, so don't
+ * release the entry yet. It will get cleaned
+ * up by VMCIqueue_pair_Exit() if necessary
+ * (assuming we are going away, otherwise why
+ * did this fail?).
+ */
+
+ mutex_unlock(&qp_guest_endpoints.mutex);
+ return result;
+ }
+ }
+
+ /*
+ * If we get here then we either failed to notify a local queuepair, or
+ * we succeeded in all cases. Release the entry if required.
+ */
+
+ entry->qp.ref_count--;
+ if (entry->qp.ref_count == 0)
+ qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
+
+ /* If we didn't remove the entry, this could change once we unlock. */
+ if (entry)
+ ref_count = entry->qp.ref_count;
+
+ mutex_unlock(&qp_guest_endpoints.mutex);
+
+ if (ref_count == 0)
+ qp_guest_endpoint_destroy(entry);
+
+ return result;
+}
+
+/*
+ * This functions handles the actual allocation of a VMCI queue
+ * pair guest endpoint. Allocates physical pages for the queue
+ * pair. It makes OS dependent calls through generic wrappers.
+ */
+static int qp_alloc_guest_work(struct vmci_handle *handle,
+ struct vmci_queue **produce_q,
+ u64 produce_size,
+ struct vmci_queue **consume_q,
+ u64 consume_size,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags)
+{
+ const u64 num_produce_pages =
+ DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
+ const u64 num_consume_pages =
+ DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
+ void *my_produce_q = NULL;
+ void *my_consume_q = NULL;
+ int result;
+ struct qp_guest_endpoint *queue_pair_entry = NULL;
+
+ if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
+ return VMCI_ERROR_NO_ACCESS;
+
+ mutex_lock(&qp_guest_endpoints.mutex);
+
+ queue_pair_entry = qp_guest_handle_to_entry(*handle);
+ if (queue_pair_entry) {
+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+ /* Local attach case. */
+ if (queue_pair_entry->qp.ref_count > 1) {
+ pr_devel("Error attempting to attach more than once\n");
+ result = VMCI_ERROR_UNAVAILABLE;
+ goto error_keep_entry;
+ }
+
+ if (queue_pair_entry->qp.produce_size != consume_size ||
+ queue_pair_entry->qp.consume_size !=
+ produce_size ||
+ queue_pair_entry->qp.flags !=
+ (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
+ pr_devel("Error mismatched queue pair in local attach\n");
+ result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
+ goto error_keep_entry;
+ }
+
+ /*
+ * Do a local attach. We swap the consume and
+ * produce queues for the attacher and deliver
+ * an attach event.
+ */
+ result = qp_notify_peer_local(true, *handle);
+ if (result < VMCI_SUCCESS)
+ goto error_keep_entry;
+
+ my_produce_q = queue_pair_entry->consume_q;
+ my_consume_q = queue_pair_entry->produce_q;
+ goto out;
+ }
+
+ result = VMCI_ERROR_ALREADY_EXISTS;
+ goto error_keep_entry;
+ }
+
+ my_produce_q = qp_alloc_queue(produce_size, flags);
+ if (!my_produce_q) {
+ pr_warn("Error allocating pages for produce queue\n");
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ my_consume_q = qp_alloc_queue(consume_size, flags);
+ if (!my_consume_q) {
+ pr_warn("Error allocating pages for consume queue\n");
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
+ produce_size, consume_size,
+ my_produce_q, my_consume_q);
+ if (!queue_pair_entry) {
+ pr_warn("Error allocating memory in %s\n", __func__);
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
+ num_consume_pages,
+ &queue_pair_entry->ppn_set);
+ if (result < VMCI_SUCCESS) {
+ pr_warn("qp_alloc_ppn_set failed\n");
+ goto error;
+ }
+
+ /*
+ * It's only necessary to notify the host if this queue pair will be
+ * attached to from another context.
+ */
+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+ /* Local create case. */
+ u32 context_id = vmci_get_context_id();
+
+ /*
+ * Enforce similar checks on local queue pairs as we
+ * do for regular ones. The handle's context must
+ * match the creator or attacher context id (here they
+ * are both the current context id) and the
+ * attach-only flag cannot exist during create. We
+ * also ensure specified peer is this context or an
+ * invalid one.
+ */
+ if (queue_pair_entry->qp.handle.context != context_id ||
+ (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
+ queue_pair_entry->qp.peer != context_id)) {
+ result = VMCI_ERROR_NO_ACCESS;
+ goto error;
+ }
+
+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
+ result = VMCI_ERROR_NOT_FOUND;
+ goto error;
+ }
+ } else {
+ result = qp_alloc_hypercall(queue_pair_entry);
+ if (result < VMCI_SUCCESS) {
+ pr_warn("qp_alloc_hypercall result = %d\n", result);
+ goto error;
+ }
+ }
+
+ qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
+ (struct vmci_queue *)my_consume_q);
+
+ qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
+
+ out:
+ queue_pair_entry->qp.ref_count++;
+ *handle = queue_pair_entry->qp.handle;
+ *produce_q = (struct vmci_queue *)my_produce_q;
+ *consume_q = (struct vmci_queue *)my_consume_q;
+
+ /*
+ * We should initialize the queue pair header pages on a local
+ * queue pair create. For non-local queue pairs, the
+ * hypervisor initializes the header pages in the create step.
+ */
+ if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
+ queue_pair_entry->qp.ref_count == 1) {
+ vmci_q_header_init((*produce_q)->q_header, *handle);
+ vmci_q_header_init((*consume_q)->q_header, *handle);
+ }
+
+ mutex_unlock(&qp_guest_endpoints.mutex);
+
+ return VMCI_SUCCESS;
+
+ error:
+ mutex_unlock(&qp_guest_endpoints.mutex);
+ if (queue_pair_entry) {
+ /* The queues will be freed inside the destroy routine. */
+ qp_guest_endpoint_destroy(queue_pair_entry);
+ } else {
+ qp_free_queue(my_produce_q, produce_size);
+ qp_free_queue(my_consume_q, consume_size);
+ }
+ return result;
+
+ error_keep_entry:
+ /* This path should only be used when an existing entry was found. */
+ mutex_unlock(&qp_guest_endpoints.mutex);
+ return result;
+}
+
+/*
+ * The first endpoint issuing a queue pair allocation will create the state
+ * of the queue pair in the queue pair broker.
+ *
+ * If the creator is a guest, it will associate a VMX virtual address range
+ * with the queue pair as specified by the page_store. For compatibility with
+ * older VMX'en, that would use a separate step to set the VMX virtual
+ * address range, the virtual address range can be registered later using
+ * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
+ * used.
+ *
+ * If the creator is the host, a page_store of NULL should be used as well,
+ * since the host is not able to supply a page store for the queue pair.
+ *
+ * For older VMX and host callers, the queue pair will be created in the
+ * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
+ * created in VMCOQPB_CREATED_MEM state.
+ */
+static int qp_broker_create(struct vmci_handle handle,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags,
+ u64 produce_size,
+ u64 consume_size,
+ struct vmci_qp_page_store *page_store,
+ struct vmci_ctx *context,
+ vmci_event_release_cb wakeup_cb,
+ void *client_data, struct qp_broker_entry **ent)
+{
+ struct qp_broker_entry *entry = NULL;
+ const u32 context_id = vmci_ctx_get_id(context);
+ bool is_local = flags & VMCI_QPFLAG_LOCAL;
+ int result;
+ u64 guest_produce_size;
+ u64 guest_consume_size;
+
+ /* Do not create if the caller asked not to. */
+ if (flags & VMCI_QPFLAG_ATTACH_ONLY)
+ return VMCI_ERROR_NOT_FOUND;
+
+ /*
+ * Creator's context ID should match handle's context ID or the creator
+ * must allow the context in handle's context ID as the "peer".
+ */
+ if (handle.context != context_id && handle.context != peer)
+ return VMCI_ERROR_NO_ACCESS;
+
+ if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
+ return VMCI_ERROR_DST_UNREACHABLE;
+
+ /*
+ * Creator's context ID for local queue pairs should match the
+ * peer, if a peer is specified.
+ */
+ if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
+ return VMCI_ERROR_NO_ACCESS;
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return VMCI_ERROR_NO_MEM;
+
+ if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
+ /*
+ * The queue pair broker entry stores values from the guest
+ * point of view, so a creating host side endpoint should swap
+ * produce and consume values -- unless it is a local queue
+ * pair, in which case no swapping is necessary, since the local
+ * attacher will swap queues.
+ */
+
+ guest_produce_size = consume_size;
+ guest_consume_size = produce_size;
+ } else {
+ guest_produce_size = produce_size;
+ guest_consume_size = consume_size;
+ }
+
+ entry->qp.handle = handle;
+ entry->qp.peer = peer;
+ entry->qp.flags = flags;
+ entry->qp.produce_size = guest_produce_size;
+ entry->qp.consume_size = guest_consume_size;
+ entry->qp.ref_count = 1;
+ entry->create_id = context_id;
+ entry->attach_id = VMCI_INVALID_ID;
+ entry->state = VMCIQPB_NEW;
+ entry->require_trusted_attach =
+ !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
+ entry->created_by_trusted =
+ !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
+ entry->vmci_page_files = false;
+ entry->wakeup_cb = wakeup_cb;
+ entry->client_data = client_data;
+ entry->produce_q = qp_host_alloc_queue(guest_produce_size);
+ if (entry->produce_q == NULL) {
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+ entry->consume_q = qp_host_alloc_queue(guest_consume_size);
+ if (entry->consume_q == NULL) {
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ qp_init_queue_mutex(entry->produce_q, entry->consume_q);
+
+ INIT_LIST_HEAD(&entry->qp.list_item);
+
+ if (is_local) {
+ u8 *tmp;
+
+ entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
+ PAGE_SIZE, GFP_KERNEL);
+ if (entry->local_mem == NULL) {
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+ entry->state = VMCIQPB_CREATED_MEM;
+ entry->produce_q->q_header = entry->local_mem;
+ tmp = (u8 *)entry->local_mem + PAGE_SIZE *
+ (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
+ entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
+ } else if (page_store) {
+ /*
+ * The VMX already initialized the queue pair headers, so no
+ * need for the kernel side to do that.
+ */
+ result = qp_host_register_user_memory(page_store,
+ entry->produce_q,
+ entry->consume_q);
+ if (result < VMCI_SUCCESS)
+ goto error;
+
+ entry->state = VMCIQPB_CREATED_MEM;
+ } else {
+ /*
+ * A create without a page_store may be either a host
+ * side create (in which case we are waiting for the
+ * guest side to supply the memory) or an old style
+ * queue pair create (in which case we will expect a
+ * set page store call as the next step).
+ */
+ entry->state = VMCIQPB_CREATED_NO_MEM;
+ }
+
+ qp_list_add_entry(&qp_broker_list, &entry->qp);
+ if (ent != NULL)
+ *ent = entry;
+
+ /* Add to resource obj */
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_QPAIR_HOST,
+ handle);
+ if (result != VMCI_SUCCESS) {
+ pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+ handle.context, handle.resource, result);
+ goto error;
+ }
+
+ entry->qp.handle = vmci_resource_handle(&entry->resource);
+ if (is_local) {
+ vmci_q_header_init(entry->produce_q->q_header,
+ entry->qp.handle);
+ vmci_q_header_init(entry->consume_q->q_header,
+ entry->qp.handle);
+ }
+
+ vmci_ctx_qp_create(context, entry->qp.handle);
+
+ return VMCI_SUCCESS;
+
+ error:
+ if (entry != NULL) {
+ qp_host_free_queue(entry->produce_q, guest_produce_size);
+ qp_host_free_queue(entry->consume_q, guest_consume_size);
+ kfree(entry);
+ }
+
+ return result;
+}
+
+/*
+ * Enqueues an event datagram to notify the peer VM attached to
+ * the given queue pair handle about attach/detach event by the
+ * given VM. Returns Payload size of datagram enqueued on
+ * success, error code otherwise.
+ */
+static int qp_notify_peer(bool attach,
+ struct vmci_handle handle,
+ u32 my_id,
+ u32 peer_id)
+{
+ int rv;
+ struct vmci_event_qp ev;
+
+ if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
+ peer_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /*
+ * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
+ * number of pending events from the hypervisor to a given VM
+ * otherwise a rogue VM could do an arbitrary number of attach
+ * and detach operations causing memory pressure in the host
+ * kernel.
+ */
+
+ ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
+ ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+ ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+ ev.msg.event_data.event = attach ?
+ VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+ ev.payload.handle = handle;
+ ev.payload.peer_id = my_id;
+
+ rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
+ &ev.msg.hdr, false);
+ if (rv < VMCI_SUCCESS)
+ pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
+ attach ? "ATTACH" : "DETACH", peer_id);
+
+ return rv;
+}
+
+/*
+ * The second endpoint issuing a queue pair allocation will attach to
+ * the queue pair registered with the queue pair broker.
+ *
+ * If the attacher is a guest, it will associate a VMX virtual address
+ * range with the queue pair as specified by the page_store. At this
+ * point, the already attach host endpoint may start using the queue
+ * pair, and an attach event is sent to it. For compatibility with
+ * older VMX'en, that used a separate step to set the VMX virtual
+ * address range, the virtual address range can be registered later
+ * using vmci_qp_broker_set_page_store. In that case, a page_store of
+ * NULL should be used, and the attach event will be generated once
+ * the actual page store has been set.
+ *
+ * If the attacher is the host, a page_store of NULL should be used as
+ * well, since the page store information is already set by the guest.
+ *
+ * For new VMX and host callers, the queue pair will be moved to the
+ * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
+ * moved to the VMCOQPB_ATTACHED_NO_MEM state.
+ */
+static int qp_broker_attach(struct qp_broker_entry *entry,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags,
+ u64 produce_size,
+ u64 consume_size,
+ struct vmci_qp_page_store *page_store,
+ struct vmci_ctx *context,
+ vmci_event_release_cb wakeup_cb,
+ void *client_data,
+ struct qp_broker_entry **ent)
+{
+ const u32 context_id = vmci_ctx_get_id(context);
+ bool is_local = flags & VMCI_QPFLAG_LOCAL;
+ int result;
+
+ if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+ entry->state != VMCIQPB_CREATED_MEM)
+ return VMCI_ERROR_UNAVAILABLE;
+
+ if (is_local) {
+ if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
+ context_id != entry->create_id) {
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+ } else if (context_id == entry->create_id ||
+ context_id == entry->attach_id) {
+ return VMCI_ERROR_ALREADY_EXISTS;
+ }
+
+ if (VMCI_CONTEXT_IS_VM(context_id) &&
+ VMCI_CONTEXT_IS_VM(entry->create_id))
+ return VMCI_ERROR_DST_UNREACHABLE;
+
+ /*
+ * If we are attaching from a restricted context then the queuepair
+ * must have been created by a trusted endpoint.
+ */
+ if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+ !entry->created_by_trusted)
+ return VMCI_ERROR_NO_ACCESS;
+
+ /*
+ * If we are attaching to a queuepair that was created by a restricted
+ * context then we must be trusted.
+ */
+ if (entry->require_trusted_attach &&
+ (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
+ return VMCI_ERROR_NO_ACCESS;
+
+ /*
+ * If the creator specifies VMCI_INVALID_ID in "peer" field, access
+ * control check is not performed.
+ */
+ if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
+ return VMCI_ERROR_NO_ACCESS;
+
+ if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
+ /*
+ * Do not attach if the caller doesn't support Host Queue Pairs
+ * and a host created this queue pair.
+ */
+
+ if (!vmci_ctx_supports_host_qp(context))
+ return VMCI_ERROR_INVALID_RESOURCE;
+
+ } else if (context_id == VMCI_HOST_CONTEXT_ID) {
+ struct vmci_ctx *create_context;
+ bool supports_host_qp;
+
+ /*
+ * Do not attach a host to a user created queue pair if that
+ * user doesn't support host queue pair end points.
+ */
+
+ create_context = vmci_ctx_get(entry->create_id);
+ supports_host_qp = vmci_ctx_supports_host_qp(create_context);
+ vmci_ctx_put(create_context);
+
+ if (!supports_host_qp)
+ return VMCI_ERROR_INVALID_RESOURCE;
+ }
+
+ if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
+ return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+ if (context_id != VMCI_HOST_CONTEXT_ID) {
+ /*
+ * The queue pair broker entry stores values from the guest
+ * point of view, so an attaching guest should match the values
+ * stored in the entry.
+ */
+
+ if (entry->qp.produce_size != produce_size ||
+ entry->qp.consume_size != consume_size) {
+ return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+ }
+ } else if (entry->qp.produce_size != consume_size ||
+ entry->qp.consume_size != produce_size) {
+ return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+ }
+
+ if (context_id != VMCI_HOST_CONTEXT_ID) {
+ /*
+ * If a guest attached to a queue pair, it will supply
+ * the backing memory. If this is a pre NOVMVM vmx,
+ * the backing memory will be supplied by calling
+ * vmci_qp_broker_set_page_store() following the
+ * return of the vmci_qp_broker_alloc() call. If it is
+ * a vmx of version NOVMVM or later, the page store
+ * must be supplied as part of the
+ * vmci_qp_broker_alloc call. Under all circumstances
+ * must the initially created queue pair not have any
+ * memory associated with it already.
+ */
+
+ if (entry->state != VMCIQPB_CREATED_NO_MEM)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (page_store != NULL) {
+ /*
+ * Patch up host state to point to guest
+ * supplied memory. The VMX already
+ * initialized the queue pair headers, so no
+ * need for the kernel side to do that.
+ */
+
+ result = qp_host_register_user_memory(page_store,
+ entry->produce_q,
+ entry->consume_q);
+ if (result < VMCI_SUCCESS)
+ return result;
+
+ entry->state = VMCIQPB_ATTACHED_MEM;
+ } else {
+ entry->state = VMCIQPB_ATTACHED_NO_MEM;
+ }
+ } else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
+ /*
+ * The host side is attempting to attach to a queue
+ * pair that doesn't have any memory associated with
+ * it. This must be a pre NOVMVM vmx that hasn't set
+ * the page store information yet, or a quiesced VM.
+ */
+
+ return VMCI_ERROR_UNAVAILABLE;
+ } else {
+ /* The host side has successfully attached to a queue pair. */
+ entry->state = VMCIQPB_ATTACHED_MEM;
+ }
+
+ if (entry->state == VMCIQPB_ATTACHED_MEM) {
+ result =
+ qp_notify_peer(true, entry->qp.handle, context_id,
+ entry->create_id);
+ if (result < VMCI_SUCCESS)
+ pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
+ entry->create_id, entry->qp.handle.context,
+ entry->qp.handle.resource);
+ }
+
+ entry->attach_id = context_id;
+ entry->qp.ref_count++;
+ if (wakeup_cb) {
+ entry->wakeup_cb = wakeup_cb;
+ entry->client_data = client_data;
+ }
+
+ /*
+ * When attaching to local queue pairs, the context already has
+ * an entry tracking the queue pair, so don't add another one.
+ */
+ if (!is_local)
+ vmci_ctx_qp_create(context, entry->qp.handle);
+
+ if (ent != NULL)
+ *ent = entry;
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * queue_pair_Alloc for use when setting up queue pair endpoints
+ * on the host.
+ */
+static int qp_broker_alloc(struct vmci_handle handle,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags,
+ u64 produce_size,
+ u64 consume_size,
+ struct vmci_qp_page_store *page_store,
+ struct vmci_ctx *context,
+ vmci_event_release_cb wakeup_cb,
+ void *client_data,
+ struct qp_broker_entry **ent,
+ bool *swap)
+{
+ const u32 context_id = vmci_ctx_get_id(context);
+ bool create;
+ struct qp_broker_entry *entry = NULL;
+ bool is_local = flags & VMCI_QPFLAG_LOCAL;
+ int result;
+
+ if (vmci_handle_is_invalid(handle) ||
+ (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
+ !(produce_size || consume_size) ||
+ !context || context_id == VMCI_INVALID_ID ||
+ handle.context == VMCI_INVALID_ID) {
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /*
+ * In the initial argument check, we ensure that non-vmkernel hosts
+ * are not allowed to create local queue pairs.
+ */
+
+ mutex_lock(&qp_broker_list.mutex);
+
+ if (!is_local && vmci_ctx_qp_exists(context, handle)) {
+ pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
+ context_id, handle.context, handle.resource);
+ mutex_unlock(&qp_broker_list.mutex);
+ return VMCI_ERROR_ALREADY_EXISTS;
+ }
+
+ if (handle.resource != VMCI_INVALID_ID)
+ entry = qp_broker_handle_to_entry(handle);
+
+ if (!entry) {
+ create = true;
+ result =
+ qp_broker_create(handle, peer, flags, priv_flags,
+ produce_size, consume_size, page_store,
+ context, wakeup_cb, client_data, ent);
+ } else {
+ create = false;
+ result =
+ qp_broker_attach(entry, peer, flags, priv_flags,
+ produce_size, consume_size, page_store,
+ context, wakeup_cb, client_data, ent);
+ }
+
+ mutex_unlock(&qp_broker_list.mutex);
+
+ if (swap)
+ *swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
+ !(create && is_local);
+
+ return result;
+}
+
+/*
+ * This function implements the kernel API for allocating a queue
+ * pair.
+ */
+static int qp_alloc_host_work(struct vmci_handle *handle,
+ struct vmci_queue **produce_q,
+ u64 produce_size,
+ struct vmci_queue **consume_q,
+ u64 consume_size,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags,
+ vmci_event_release_cb wakeup_cb,
+ void *client_data)
+{
+ struct vmci_handle new_handle;
+ struct vmci_ctx *context;
+ struct qp_broker_entry *entry;
+ int result;
+ bool swap;
+
+ if (vmci_handle_is_invalid(*handle)) {
+ new_handle = vmci_make_handle(
+ VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
+ } else
+ new_handle = *handle;
+
+ context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
+ entry = NULL;
+ result =
+ qp_broker_alloc(new_handle, peer, flags, priv_flags,
+ produce_size, consume_size, NULL, context,
+ wakeup_cb, client_data, &entry, &swap);
+ if (result == VMCI_SUCCESS) {
+ if (swap) {
+ /*
+ * If this is a local queue pair, the attacher
+ * will swap around produce and consume
+ * queues.
+ */
+
+ *produce_q = entry->consume_q;
+ *consume_q = entry->produce_q;
+ } else {
+ *produce_q = entry->produce_q;
+ *consume_q = entry->consume_q;
+ }
+
+ *handle = vmci_resource_handle(&entry->resource);
+ } else {
+ *handle = VMCI_INVALID_HANDLE;
+ pr_devel("queue pair broker failed to alloc (result=%d)\n",
+ result);
+ }
+ vmci_ctx_put(context);
+ return result;
+}
+
+/*
+ * Allocates a VMCI queue_pair. Only checks validity of input
+ * arguments. The real work is done in the host or guest
+ * specific function.
+ */
+int vmci_qp_alloc(struct vmci_handle *handle,
+ struct vmci_queue **produce_q,
+ u64 produce_size,
+ struct vmci_queue **consume_q,
+ u64 consume_size,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags,
+ bool guest_endpoint,
+ vmci_event_release_cb wakeup_cb,
+ void *client_data)
+{
+ if (!handle || !produce_q || !consume_q ||
+ (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (guest_endpoint) {
+ return qp_alloc_guest_work(handle, produce_q,
+ produce_size, consume_q,
+ consume_size, peer,
+ flags, priv_flags);
+ } else {
+ return qp_alloc_host_work(handle, produce_q,
+ produce_size, consume_q,
+ consume_size, peer, flags,
+ priv_flags, wakeup_cb, client_data);
+ }
+}
+
+/*
+ * This function implements the host kernel API for detaching from
+ * a queue pair.
+ */
+static int qp_detatch_host_work(struct vmci_handle handle)
+{
+ int result;
+ struct vmci_ctx *context;
+
+ context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
+
+ result = vmci_qp_broker_detach(handle, context);
+
+ vmci_ctx_put(context);
+ return result;
+}
+
+/*
+ * Detaches from a VMCI queue_pair. Only checks validity of input argument.
+ * Real work is done in the host or guest specific function.
+ */
+static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
+{
+ if (vmci_handle_is_invalid(handle))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (guest_endpoint)
+ return qp_detatch_guest_work(handle);
+ else
+ return qp_detatch_host_work(handle);
+}
+
+/*
+ * Returns the entry from the head of the list. Assumes that the list is
+ * locked.
+ */
+static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
+{
+ if (!list_empty(&qp_list->head)) {
+ struct qp_entry *entry =
+ list_first_entry(&qp_list->head, struct qp_entry,
+ list_item);
+ return entry;
+ }
+
+ return NULL;
+}
+
+void vmci_qp_broker_exit(void)
+{
+ struct qp_entry *entry;
+ struct qp_broker_entry *be;
+
+ mutex_lock(&qp_broker_list.mutex);
+
+ while ((entry = qp_list_get_head(&qp_broker_list))) {
+ be = (struct qp_broker_entry *)entry;
+
+ qp_list_remove_entry(&qp_broker_list, entry);
+ kfree(be);
+ }
+
+ mutex_unlock(&qp_broker_list.mutex);
+}
+
+/*
+ * Requests that a queue pair be allocated with the VMCI queue
+ * pair broker. Allocates a queue pair entry if one does not
+ * exist. Attaches to one if it exists, and retrieves the page
+ * files backing that queue_pair. Assumes that the queue pair
+ * broker lock is held.
+ */
+int vmci_qp_broker_alloc(struct vmci_handle handle,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags,
+ u64 produce_size,
+ u64 consume_size,
+ struct vmci_qp_page_store *page_store,
+ struct vmci_ctx *context)
+{
+ return qp_broker_alloc(handle, peer, flags, priv_flags,
+ produce_size, consume_size,
+ page_store, context, NULL, NULL, NULL, NULL);
+}
+
+/*
+ * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
+ * step to add the UVAs of the VMX mapping of the queue pair. This function
+ * provides backwards compatibility with such VMX'en, and takes care of
+ * registering the page store for a queue pair previously allocated by the
+ * VMX during create or attach. This function will move the queue pair state
+ * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
+ * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
+ * attached state with memory, the queue pair is ready to be used by the
+ * host peer, and an attached event will be generated.
+ *
+ * Assumes that the queue pair broker lock is held.
+ *
+ * This function is only used by the hosted platform, since there is no
+ * issue with backwards compatibility for vmkernel.
+ */
+int vmci_qp_broker_set_page_store(struct vmci_handle handle,
+ u64 produce_uva,
+ u64 consume_uva,
+ struct vmci_ctx *context)
+{
+ struct qp_broker_entry *entry;
+ int result;
+ const u32 context_id = vmci_ctx_get_id(context);
+
+ if (vmci_handle_is_invalid(handle) || !context ||
+ context_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /*
+ * We only support guest to host queue pairs, so the VMX must
+ * supply UVAs for the mapped page files.
+ */
+
+ if (produce_uva == 0 || consume_uva == 0)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ mutex_lock(&qp_broker_list.mutex);
+
+ if (!vmci_ctx_qp_exists(context, handle)) {
+ pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ entry = qp_broker_handle_to_entry(handle);
+ if (!entry) {
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ /*
+ * If I'm the owner then I can set the page store.
+ *
+ * Or, if a host created the queue_pair and I'm the attached peer
+ * then I can set the page store.
+ */
+ if (entry->create_id != context_id &&
+ (entry->create_id != VMCI_HOST_CONTEXT_ID ||
+ entry->attach_id != context_id)) {
+ result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
+ goto out;
+ }
+
+ if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+ entry->state != VMCIQPB_ATTACHED_NO_MEM) {
+ result = VMCI_ERROR_UNAVAILABLE;
+ goto out;
+ }
+
+ result = qp_host_get_user_memory(produce_uva, consume_uva,
+ entry->produce_q, entry->consume_q);
+ if (result < VMCI_SUCCESS)
+ goto out;
+
+ result = qp_host_map_queues(entry->produce_q, entry->consume_q);
+ if (result < VMCI_SUCCESS) {
+ qp_host_unregister_user_memory(entry->produce_q,
+ entry->consume_q);
+ goto out;
+ }
+
+ if (entry->state == VMCIQPB_CREATED_NO_MEM)
+ entry->state = VMCIQPB_CREATED_MEM;
+ else
+ entry->state = VMCIQPB_ATTACHED_MEM;
+
+ entry->vmci_page_files = true;
+
+ if (entry->state == VMCIQPB_ATTACHED_MEM) {
+ result =
+ qp_notify_peer(true, handle, context_id, entry->create_id);
+ if (result < VMCI_SUCCESS) {
+ pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
+ entry->create_id, entry->qp.handle.context,
+ entry->qp.handle.resource);
+ }
+ }
+
+ result = VMCI_SUCCESS;
+ out:
+ mutex_unlock(&qp_broker_list.mutex);
+ return result;
+}
+
+/*
+ * Resets saved queue headers for the given QP broker
+ * entry. Should be used when guest memory becomes available
+ * again, or the guest detaches.
+ */
+static void qp_reset_saved_headers(struct qp_broker_entry *entry)
+{
+ entry->produce_q->saved_header = NULL;
+ entry->consume_q->saved_header = NULL;
+}
+
+/*
+ * The main entry point for detaching from a queue pair registered with the
+ * queue pair broker. If more than one endpoint is attached to the queue
+ * pair, the first endpoint will mainly decrement a reference count and
+ * generate a notification to its peer. The last endpoint will clean up
+ * the queue pair state registered with the broker.
+ *
+ * When a guest endpoint detaches, it will unmap and unregister the guest
+ * memory backing the queue pair. If the host is still attached, it will
+ * no longer be able to access the queue pair content.
+ *
+ * If the queue pair is already in a state where there is no memory
+ * registered for the queue pair (any *_NO_MEM state), it will transition to
+ * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
+ * endpoint is the first of two endpoints to detach. If the host endpoint is
+ * the first out of two to detach, the queue pair will move to the
+ * VMCIQPB_SHUTDOWN_MEM state.
+ */
+int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
+{
+ struct qp_broker_entry *entry;
+ const u32 context_id = vmci_ctx_get_id(context);
+ u32 peer_id;
+ bool is_local = false;
+ int result;
+
+ if (vmci_handle_is_invalid(handle) || !context ||
+ context_id == VMCI_INVALID_ID) {
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ mutex_lock(&qp_broker_list.mutex);
+
+ if (!vmci_ctx_qp_exists(context, handle)) {
+ pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ entry = qp_broker_handle_to_entry(handle);
+ if (!entry) {
+ pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ if (context_id != entry->create_id && context_id != entry->attach_id) {
+ result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+ goto out;
+ }
+
+ if (context_id == entry->create_id) {
+ peer_id = entry->attach_id;
+ entry->create_id = VMCI_INVALID_ID;
+ } else {
+ peer_id = entry->create_id;
+ entry->attach_id = VMCI_INVALID_ID;
+ }
+ entry->qp.ref_count--;
+
+ is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
+
+ if (context_id != VMCI_HOST_CONTEXT_ID) {
+ bool headers_mapped;
+
+ /*
+ * Pre NOVMVM vmx'en may detach from a queue pair
+ * before setting the page store, and in that case
+ * there is no user memory to detach from. Also, more
+ * recent VMX'en may detach from a queue pair in the
+ * quiesced state.
+ */
+
+ qp_acquire_queue_mutex(entry->produce_q);
+ headers_mapped = entry->produce_q->q_header ||
+ entry->consume_q->q_header;
+ if (QPBROKERSTATE_HAS_MEM(entry)) {
+ result =
+ qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
+ entry->produce_q,
+ entry->consume_q);
+ if (result < VMCI_SUCCESS)
+ pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
+ handle.context, handle.resource,
+ result);
+
+ if (entry->vmci_page_files)
+ qp_host_unregister_user_memory(entry->produce_q,
+ entry->
+ consume_q);
+ else
+ qp_host_unregister_user_memory(entry->produce_q,
+ entry->
+ consume_q);
+
+ }
+
+ if (!headers_mapped)
+ qp_reset_saved_headers(entry);
+
+ qp_release_queue_mutex(entry->produce_q);
+
+ if (!headers_mapped && entry->wakeup_cb)
+ entry->wakeup_cb(entry->client_data);
+
+ } else {
+ if (entry->wakeup_cb) {
+ entry->wakeup_cb = NULL;
+ entry->client_data = NULL;
+ }
+ }
+
+ if (entry->qp.ref_count == 0) {
+ qp_list_remove_entry(&qp_broker_list, &entry->qp);
+
+ if (is_local)
+ kfree(entry->local_mem);
+
+ qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+ qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
+ qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
+ /* Unlink from resource hash table and free callback */
+ vmci_resource_remove(&entry->resource);
+
+ kfree(entry);
+
+ vmci_ctx_qp_destroy(context, handle);
+ } else {
+ qp_notify_peer(false, handle, context_id, peer_id);
+ if (context_id == VMCI_HOST_CONTEXT_ID &&
+ QPBROKERSTATE_HAS_MEM(entry)) {
+ entry->state = VMCIQPB_SHUTDOWN_MEM;
+ } else {
+ entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
+ }
+
+ if (!is_local)
+ vmci_ctx_qp_destroy(context, handle);
+
+ }
+ result = VMCI_SUCCESS;
+ out:
+ mutex_unlock(&qp_broker_list.mutex);
+ return result;
+}
+
+/*
+ * Establishes the necessary mappings for a queue pair given a
+ * reference to the queue pair guest memory. This is usually
+ * called when a guest is unquiesced and the VMX is allowed to
+ * map guest memory once again.
+ */
+int vmci_qp_broker_map(struct vmci_handle handle,
+ struct vmci_ctx *context,
+ u64 guest_mem)
+{
+ struct qp_broker_entry *entry;
+ const u32 context_id = vmci_ctx_get_id(context);
+ bool is_local = false;
+ int result;
+
+ if (vmci_handle_is_invalid(handle) || !context ||
+ context_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ mutex_lock(&qp_broker_list.mutex);
+
+ if (!vmci_ctx_qp_exists(context, handle)) {
+ pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ entry = qp_broker_handle_to_entry(handle);
+ if (!entry) {
+ pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ if (context_id != entry->create_id && context_id != entry->attach_id) {
+ result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+ goto out;
+ }
+
+ is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
+ result = VMCI_SUCCESS;
+
+ if (context_id != VMCI_HOST_CONTEXT_ID) {
+ struct vmci_qp_page_store page_store;
+
+ page_store.pages = guest_mem;
+ page_store.len = QPE_NUM_PAGES(entry->qp);
+
+ qp_acquire_queue_mutex(entry->produce_q);
+ qp_reset_saved_headers(entry);
+ result =
+ qp_host_register_user_memory(&page_store,
+ entry->produce_q,
+ entry->consume_q);
+ qp_release_queue_mutex(entry->produce_q);
+ if (result == VMCI_SUCCESS) {
+ /* Move state from *_NO_MEM to *_MEM */
+
+ entry->state++;
+
+ if (entry->wakeup_cb)
+ entry->wakeup_cb(entry->client_data);
+ }
+ }
+
+ out:
+ mutex_unlock(&qp_broker_list.mutex);
+ return result;
+}
+
+/*
+ * Saves a snapshot of the queue headers for the given QP broker
+ * entry. Should be used when guest memory is unmapped.
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code if guest memory
+ * can't be accessed..
+ */
+static int qp_save_headers(struct qp_broker_entry *entry)
+{
+ int result;
+
+ if (entry->produce_q->saved_header != NULL &&
+ entry->consume_q->saved_header != NULL) {
+ /*
+ * If the headers have already been saved, we don't need to do
+ * it again, and we don't want to map in the headers
+ * unnecessarily.
+ */
+
+ return VMCI_SUCCESS;
+ }
+
+ if (NULL == entry->produce_q->q_header ||
+ NULL == entry->consume_q->q_header) {
+ result = qp_host_map_queues(entry->produce_q, entry->consume_q);
+ if (result < VMCI_SUCCESS)
+ return result;
+ }
+
+ memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
+ sizeof(entry->saved_produce_q));
+ entry->produce_q->saved_header = &entry->saved_produce_q;
+ memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
+ sizeof(entry->saved_consume_q));
+ entry->consume_q->saved_header = &entry->saved_consume_q;
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Removes all references to the guest memory of a given queue pair, and
+ * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
+ * called when a VM is being quiesced where access to guest memory should
+ * avoided.
+ */
+int vmci_qp_broker_unmap(struct vmci_handle handle,
+ struct vmci_ctx *context,
+ u32 gid)
+{
+ struct qp_broker_entry *entry;
+ const u32 context_id = vmci_ctx_get_id(context);
+ bool is_local = false;
+ int result;
+
+ if (vmci_handle_is_invalid(handle) || !context ||
+ context_id == VMCI_INVALID_ID)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ mutex_lock(&qp_broker_list.mutex);
+
+ if (!vmci_ctx_qp_exists(context, handle)) {
+ pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ entry = qp_broker_handle_to_entry(handle);
+ if (!entry) {
+ pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
+ context_id, handle.context, handle.resource);
+ result = VMCI_ERROR_NOT_FOUND;
+ goto out;
+ }
+
+ if (context_id != entry->create_id && context_id != entry->attach_id) {
+ result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+ goto out;
+ }
+
+ is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
+
+ if (context_id != VMCI_HOST_CONTEXT_ID) {
+ qp_acquire_queue_mutex(entry->produce_q);
+ result = qp_save_headers(entry);
+ if (result < VMCI_SUCCESS)
+ pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
+ handle.context, handle.resource, result);
+
+ qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
+
+ /*
+ * On hosted, when we unmap queue pairs, the VMX will also
+ * unmap the guest memory, so we invalidate the previously
+ * registered memory. If the queue pair is mapped again at a
+ * later point in time, we will need to reregister the user
+ * memory with a possibly new user VA.
+ */
+ qp_host_unregister_user_memory(entry->produce_q,
+ entry->consume_q);
+
+ /*
+ * Move state from *_MEM to *_NO_MEM.
+ */
+ entry->state--;
+
+ qp_release_queue_mutex(entry->produce_q);
+ }
+
+ result = VMCI_SUCCESS;
+
+ out:
+ mutex_unlock(&qp_broker_list.mutex);
+ return result;
+}
+
+/*
+ * Destroys all guest queue pair endpoints. If active guest queue
+ * pairs still exist, hypercalls to attempt detach from these
+ * queue pairs will be made. Any failure to detach is silently
+ * ignored.
+ */
+void vmci_qp_guest_endpoints_exit(void)
+{
+ struct qp_entry *entry;
+ struct qp_guest_endpoint *ep;
+
+ mutex_lock(&qp_guest_endpoints.mutex);
+
+ while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
+ ep = (struct qp_guest_endpoint *)entry;
+
+ /* Don't make a hypercall for local queue_pairs. */
+ if (!(entry->flags & VMCI_QPFLAG_LOCAL))
+ qp_detatch_hypercall(entry->handle);
+
+ /* We cannot fail the exit, so let's reset ref_count. */
+ entry->ref_count = 0;
+ qp_list_remove_entry(&qp_guest_endpoints, entry);
+
+ qp_guest_endpoint_destroy(ep);
+ }
+
+ mutex_unlock(&qp_guest_endpoints.mutex);
+}
+
+/*
+ * Helper routine that will lock the queue pair before subsequent
+ * operations.
+ * Note: Non-blocking on the host side is currently only implemented in ESX.
+ * Since non-blocking isn't yet implemented on the host personality we
+ * have no reason to acquire a spin lock. So to avoid the use of an
+ * unnecessary lock only acquire the mutex if we can block.
+ */
+static void qp_lock(const struct vmci_qp *qpair)
+{
+ qp_acquire_queue_mutex(qpair->produce_q);
+}
+
+/*
+ * Helper routine that unlocks the queue pair after calling
+ * qp_lock.
+ */
+static void qp_unlock(const struct vmci_qp *qpair)
+{
+ qp_release_queue_mutex(qpair->produce_q);
+}
+
+/*
+ * The queue headers may not be mapped at all times. If a queue is
+ * currently not mapped, it will be attempted to do so.
+ */
+static int qp_map_queue_headers(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ int result;
+
+ if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
+ result = qp_host_map_queues(produce_q, consume_q);
+ if (result < VMCI_SUCCESS)
+ return (produce_q->saved_header &&
+ consume_q->saved_header) ?
+ VMCI_ERROR_QUEUEPAIR_NOT_READY :
+ VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Helper routine that will retrieve the produce and consume
+ * headers of a given queue pair. If the guest memory of the
+ * queue pair is currently not available, the saved queue headers
+ * will be returned, if these are available.
+ */
+static int qp_get_queue_headers(const struct vmci_qp *qpair,
+ struct vmci_queue_header **produce_q_header,
+ struct vmci_queue_header **consume_q_header)
+{
+ int result;
+
+ result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q);
+ if (result == VMCI_SUCCESS) {
+ *produce_q_header = qpair->produce_q->q_header;
+ *consume_q_header = qpair->consume_q->q_header;
+ } else if (qpair->produce_q->saved_header &&
+ qpair->consume_q->saved_header) {
+ *produce_q_header = qpair->produce_q->saved_header;
+ *consume_q_header = qpair->consume_q->saved_header;
+ result = VMCI_SUCCESS;
+ }
+
+ return result;
+}
+
+/*
+ * Callback from VMCI queue pair broker indicating that a queue
+ * pair that was previously not ready, now either is ready or
+ * gone forever.
+ */
+static int qp_wakeup_cb(void *client_data)
+{
+ struct vmci_qp *qpair = (struct vmci_qp *)client_data;
+
+ qp_lock(qpair);
+ while (qpair->blocked > 0) {
+ qpair->blocked--;
+ qpair->generation++;
+ wake_up(&qpair->event);
+ }
+ qp_unlock(qpair);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Makes the calling thread wait for the queue pair to become
+ * ready for host side access. Returns true when thread is
+ * woken up after queue pair state change, false otherwise.
+ */
+static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
+{
+ unsigned int generation;
+
+ qpair->blocked++;
+ generation = qpair->generation;
+ qp_unlock(qpair);
+ wait_event(qpair->event, generation != qpair->generation);
+ qp_lock(qpair);
+
+ return true;
+}
+
+/*
+ * Enqueues a given buffer to the produce queue using the provided
+ * function. As many bytes as possible (space available in the queue)
+ * are enqueued. Assumes the queue->mutex has been acquired. Returns
+ * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
+ * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
+ * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
+ * an error occured when accessing the buffer,
+ * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
+ * available. Otherwise, the number of bytes written to the queue is
+ * returned. Updates the tail pointer of the produce queue.
+ */
+static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q,
+ const u64 produce_q_size,
+ const void *buf,
+ size_t buf_size,
+ vmci_memcpy_to_queue_func memcpy_to_queue)
+{
+ s64 free_space;
+ u64 tail;
+ size_t written;
+ ssize_t result;
+
+ result = qp_map_queue_headers(produce_q, consume_q);
+ if (unlikely(result != VMCI_SUCCESS))
+ return result;
+
+ free_space = vmci_q_header_free_space(produce_q->q_header,
+ consume_q->q_header,
+ produce_q_size);
+ if (free_space == 0)
+ return VMCI_ERROR_QUEUEPAIR_NOSPACE;
+
+ if (free_space < VMCI_SUCCESS)
+ return (ssize_t) free_space;
+
+ written = (size_t) (free_space > buf_size ? buf_size : free_space);
+ tail = vmci_q_header_producer_tail(produce_q->q_header);
+ if (likely(tail + written < produce_q_size)) {
+ result = memcpy_to_queue(produce_q, tail, buf, 0, written);
+ } else {
+ /* Tail pointer wraps around. */
+
+ const size_t tmp = (size_t) (produce_q_size - tail);
+
+ result = memcpy_to_queue(produce_q, tail, buf, 0, tmp);
+ if (result >= VMCI_SUCCESS)
+ result = memcpy_to_queue(produce_q, 0, buf, tmp,
+ written - tmp);
+ }
+
+ if (result < VMCI_SUCCESS)
+ return result;
+
+ vmci_q_header_add_producer_tail(produce_q->q_header, written,
+ produce_q_size);
+ return written;
+}
+
+/*
+ * Dequeues data (if available) from the given consume queue. Writes data
+ * to the user provided buffer using the provided function.
+ * Assumes the queue->mutex has been acquired.
+ * Results:
+ * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
+ * (as defined by the queue size).
+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
+ * Otherwise the number of bytes dequeued is returned.
+ * Side effects:
+ * Updates the head pointer of the consume queue.
+ */
+static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q,
+ const u64 consume_q_size,
+ void *buf,
+ size_t buf_size,
+ vmci_memcpy_from_queue_func memcpy_from_queue,
+ bool update_consumer)
+{
+ s64 buf_ready;
+ u64 head;
+ size_t read;
+ ssize_t result;
+
+ result = qp_map_queue_headers(produce_q, consume_q);
+ if (unlikely(result != VMCI_SUCCESS))
+ return result;
+
+ buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
+ produce_q->q_header,
+ consume_q_size);
+ if (buf_ready == 0)
+ return VMCI_ERROR_QUEUEPAIR_NODATA;
+
+ if (buf_ready < VMCI_SUCCESS)
+ return (ssize_t) buf_ready;
+
+ read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
+ head = vmci_q_header_consumer_head(produce_q->q_header);
+ if (likely(head + read < consume_q_size)) {
+ result = memcpy_from_queue(buf, 0, consume_q, head, read);
+ } else {
+ /* Head pointer wraps around. */
+
+ const size_t tmp = (size_t) (consume_q_size - head);
+
+ result = memcpy_from_queue(buf, 0, consume_q, head, tmp);
+ if (result >= VMCI_SUCCESS)
+ result = memcpy_from_queue(buf, tmp, consume_q, 0,
+ read - tmp);
+
+ }
+
+ if (result < VMCI_SUCCESS)
+ return result;
+
+ if (update_consumer)
+ vmci_q_header_add_consumer_head(produce_q->q_header,
+ read, consume_q_size);
+
+ return read;
+}
+
+/*
+ * vmci_qpair_alloc() - Allocates a queue pair.
+ * @qpair: Pointer for the new vmci_qp struct.
+ * @handle: Handle to track the resource.
+ * @produce_qsize: Desired size of the producer queue.
+ * @consume_qsize: Desired size of the consumer queue.
+ * @peer: ContextID of the peer.
+ * @flags: VMCI flags.
+ * @priv_flags: VMCI priviledge flags.
+ *
+ * This is the client interface for allocating the memory for a
+ * vmci_qp structure and then attaching to the underlying
+ * queue. If an error occurs allocating the memory for the
+ * vmci_qp structure no attempt is made to attach. If an
+ * error occurs attaching, then the structure is freed.
+ */
+int vmci_qpair_alloc(struct vmci_qp **qpair,
+ struct vmci_handle *handle,
+ u64 produce_qsize,
+ u64 consume_qsize,
+ u32 peer,
+ u32 flags,
+ u32 priv_flags)
+{
+ struct vmci_qp *my_qpair;
+ int retval;
+ struct vmci_handle src = VMCI_INVALID_HANDLE;
+ struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
+ enum vmci_route route;
+ vmci_event_release_cb wakeup_cb;
+ void *client_data;
+
+ /*
+ * Restrict the size of a queuepair. The device already
+ * enforces a limit on the total amount of memory that can be
+ * allocated to queuepairs for a guest. However, we try to
+ * allocate this memory before we make the queuepair
+ * allocation hypercall. On Linux, we allocate each page
+ * separately, which means rather than fail, the guest will
+ * thrash while it tries to allocate, and will become
+ * increasingly unresponsive to the point where it appears to
+ * be hung. So we place a limit on the size of an individual
+ * queuepair here, and leave the device to enforce the
+ * restriction on total queuepair memory. (Note that this
+ * doesn't prevent all cases; a user with only this much
+ * physical memory could still get into trouble.) The error
+ * used by the device is NO_RESOURCES, so use that here too.
+ */
+
+ if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
+ produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
+ return VMCI_ERROR_NO_RESOURCES;
+
+ retval = vmci_route(&src, &dst, false, &route);
+ if (retval < VMCI_SUCCESS)
+ route = vmci_guest_code_active() ?
+ VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
+
+ if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) {
+ pr_devel("NONBLOCK OR PINNED set");
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+
+ my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
+ if (!my_qpair)
+ return VMCI_ERROR_NO_MEM;
+
+ my_qpair->produce_q_size = produce_qsize;
+ my_qpair->consume_q_size = consume_qsize;
+ my_qpair->peer = peer;
+ my_qpair->flags = flags;
+ my_qpair->priv_flags = priv_flags;
+
+ wakeup_cb = NULL;
+ client_data = NULL;
+
+ if (VMCI_ROUTE_AS_HOST == route) {
+ my_qpair->guest_endpoint = false;
+ if (!(flags & VMCI_QPFLAG_LOCAL)) {
+ my_qpair->blocked = 0;
+ my_qpair->generation = 0;
+ init_waitqueue_head(&my_qpair->event);
+ wakeup_cb = qp_wakeup_cb;
+ client_data = (void *)my_qpair;
+ }
+ } else {
+ my_qpair->guest_endpoint = true;
+ }
+
+ retval = vmci_qp_alloc(handle,
+ &my_qpair->produce_q,
+ my_qpair->produce_q_size,
+ &my_qpair->consume_q,
+ my_qpair->consume_q_size,
+ my_qpair->peer,
+ my_qpair->flags,
+ my_qpair->priv_flags,
+ my_qpair->guest_endpoint,
+ wakeup_cb, client_data);
+
+ if (retval < VMCI_SUCCESS) {
+ kfree(my_qpair);
+ return retval;
+ }
+
+ *qpair = my_qpair;
+ my_qpair->handle = *handle;
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
+
+/*
+ * vmci_qpair_detach() - Detatches the client from a queue pair.
+ * @qpair: Reference of a pointer to the qpair struct.
+ *
+ * This is the client interface for detaching from a VMCIQPair.
+ * Note that this routine will free the memory allocated for the
+ * vmci_qp structure too.
+ */
+int vmci_qpair_detach(struct vmci_qp **qpair)
+{
+ int result;
+ struct vmci_qp *old_qpair;
+
+ if (!qpair || !(*qpair))
+ return VMCI_ERROR_INVALID_ARGS;
+
+ old_qpair = *qpair;
+ result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
+
+ /*
+ * The guest can fail to detach for a number of reasons, and
+ * if it does so, it will cleanup the entry (if there is one).
+ * The host can fail too, but it won't cleanup the entry
+ * immediately, it will do that later when the context is
+ * freed. Either way, we need to release the qpair struct
+ * here; there isn't much the caller can do, and we don't want
+ * to leak.
+ */
+
+ memset(old_qpair, 0, sizeof(*old_qpair));
+ old_qpair->handle = VMCI_INVALID_HANDLE;
+ old_qpair->peer = VMCI_INVALID_ID;
+ kfree(old_qpair);
+ *qpair = NULL;
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_detach);
+
+/*
+ * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
+ * @qpair: Pointer to the queue pair struct.
+ * @producer_tail: Reference used for storing producer tail index.
+ * @consumer_head: Reference used for storing the consumer head index.
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the producer.
+ */
+int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
+ u64 *producer_tail,
+ u64 *consumer_head)
+{
+ struct vmci_queue_header *produce_q_header;
+ struct vmci_queue_header *consume_q_header;
+ int result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+ result =
+ qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+ if (result == VMCI_SUCCESS)
+ vmci_q_header_get_pointers(produce_q_header, consume_q_header,
+ producer_tail, consumer_head);
+ qp_unlock(qpair);
+
+ if (result == VMCI_SUCCESS &&
+ ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
+ (consumer_head && *consumer_head >= qpair->produce_q_size)))
+ return VMCI_ERROR_INVALID_SIZE;
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
+
+/*
+ * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer.
+ * @qpair: Pointer to the queue pair struct.
+ * @consumer_tail: Reference used for storing consumer tail index.
+ * @producer_head: Reference used for storing the producer head index.
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the consumer.
+ */
+int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
+ u64 *consumer_tail,
+ u64 *producer_head)
+{
+ struct vmci_queue_header *produce_q_header;
+ struct vmci_queue_header *consume_q_header;
+ int result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+ result =
+ qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+ if (result == VMCI_SUCCESS)
+ vmci_q_header_get_pointers(consume_q_header, produce_q_header,
+ consumer_tail, producer_head);
+ qp_unlock(qpair);
+
+ if (result == VMCI_SUCCESS &&
+ ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
+ (producer_head && *producer_head >= qpair->consume_q_size)))
+ return VMCI_ERROR_INVALID_SIZE;
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
+
+/*
+ * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
+ * @qpair: Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of free
+ * space in the QPair from the point of the view of the caller as
+ * the producer which is the common case. Returns < 0 if err, else
+ * available bytes into which data can be enqueued if > 0.
+ */
+s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
+{
+ struct vmci_queue_header *produce_q_header;
+ struct vmci_queue_header *consume_q_header;
+ s64 result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+ result =
+ qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+ if (result == VMCI_SUCCESS)
+ result = vmci_q_header_free_space(produce_q_header,
+ consume_q_header,
+ qpair->produce_q_size);
+ else
+ result = 0;
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
+
+/*
+ * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
+ * @qpair: Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of free
+ * space in the QPair from the point of the view of the caller as
+ * the consumer which is not the common case. Returns < 0 if err, else
+ * available bytes into which data can be enqueued if > 0.
+ */
+s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
+{
+ struct vmci_queue_header *produce_q_header;
+ struct vmci_queue_header *consume_q_header;
+ s64 result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+ result =
+ qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+ if (result == VMCI_SUCCESS)
+ result = vmci_q_header_free_space(consume_q_header,
+ produce_q_header,
+ qpair->consume_q_size);
+ else
+ result = 0;
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
+
+/*
+ * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
+ * producer queue.
+ * @qpair: Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of
+ * enqueued data in the QPair from the point of the view of the
+ * caller as the producer which is not the common case. Returns < 0 if err,
+ * else available bytes that may be read.
+ */
+s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
+{
+ struct vmci_queue_header *produce_q_header;
+ struct vmci_queue_header *consume_q_header;
+ s64 result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+ result =
+ qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+ if (result == VMCI_SUCCESS)
+ result = vmci_q_header_buf_ready(produce_q_header,
+ consume_q_header,
+ qpair->produce_q_size);
+ else
+ result = 0;
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
+
+/*
+ * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
+ * consumer queue.
+ * @qpair: Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of
+ * enqueued data in the QPair from the point of the view of the
+ * caller as the consumer which is the normal case. Returns < 0 if err,
+ * else available bytes that may be read.
+ */
+s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
+{
+ struct vmci_queue_header *produce_q_header;
+ struct vmci_queue_header *consume_q_header;
+ s64 result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+ result =
+ qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+ if (result == VMCI_SUCCESS)
+ result = vmci_q_header_buf_ready(consume_q_header,
+ produce_q_header,
+ qpair->consume_q_size);
+ else
+ result = 0;
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
+
+/*
+ * vmci_qpair_enqueue() - Throw data on the queue.
+ * @qpair: Pointer to the queue pair struct.
+ * @buf: Pointer to buffer containing data
+ * @buf_size: Length of buffer.
+ * @buf_type: Buffer type (Unused).
+ *
+ * This is the client interface for enqueueing data into the queue.
+ * Returns number of bytes enqueued or < 0 on error.
+ */
+ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
+ const void *buf,
+ size_t buf_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !buf)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+
+ do {
+ result = qp_enqueue_locked(qpair->produce_q,
+ qpair->consume_q,
+ qpair->produce_q_size,
+ buf, buf_size,
+ qp_memcpy_to_queue);
+
+ if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+ !qp_wait_for_ready_queue(qpair))
+ result = VMCI_ERROR_WOULD_BLOCK;
+
+ } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
+
+/*
+ * vmci_qpair_dequeue() - Get data from the queue.
+ * @qpair: Pointer to the queue pair struct.
+ * @buf: Pointer to buffer for the data
+ * @buf_size: Length of buffer.
+ * @buf_type: Buffer type (Unused).
+ *
+ * This is the client interface for dequeueing data from the queue.
+ * Returns number of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
+ void *buf,
+ size_t buf_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !buf)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+
+ do {
+ result = qp_dequeue_locked(qpair->produce_q,
+ qpair->consume_q,
+ qpair->consume_q_size,
+ buf, buf_size,
+ qp_memcpy_from_queue, true);
+
+ if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+ !qp_wait_for_ready_queue(qpair))
+ result = VMCI_ERROR_WOULD_BLOCK;
+
+ } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
+
+/*
+ * vmci_qpair_peek() - Peek at the data in the queue.
+ * @qpair: Pointer to the queue pair struct.
+ * @buf: Pointer to buffer for the data
+ * @buf_size: Length of buffer.
+ * @buf_type: Buffer type (Unused on Linux).
+ *
+ * This is the client interface for peeking into a queue. (I.e.,
+ * copy data from the queue without updating the head pointer.)
+ * Returns number of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
+ void *buf,
+ size_t buf_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !buf)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+
+ do {
+ result = qp_dequeue_locked(qpair->produce_q,
+ qpair->consume_q,
+ qpair->consume_q_size,
+ buf, buf_size,
+ qp_memcpy_from_queue, false);
+
+ if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+ !qp_wait_for_ready_queue(qpair))
+ result = VMCI_ERROR_WOULD_BLOCK;
+
+ } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_peek);
+
+/*
+ * vmci_qpair_enquev() - Throw data on the queue using iov.
+ * @qpair: Pointer to the queue pair struct.
+ * @iov: Pointer to buffer containing data
+ * @iov_size: Length of buffer.
+ * @buf_type: Buffer type (Unused).
+ *
+ * This is the client interface for enqueueing data into the queue.
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes enqueued or < 0 on error.
+ */
+ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
+ struct msghdr *msg,
+ size_t iov_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+
+ do {
+ result = qp_enqueue_locked(qpair->produce_q,
+ qpair->consume_q,
+ qpair->produce_q_size,
+ msg, iov_size,
+ qp_memcpy_to_queue_iov);
+
+ if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+ !qp_wait_for_ready_queue(qpair))
+ result = VMCI_ERROR_WOULD_BLOCK;
+
+ } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
+
+/*
+ * vmci_qpair_dequev() - Get data from the queue using iov.
+ * @qpair: Pointer to the queue pair struct.
+ * @iov: Pointer to buffer for the data
+ * @iov_size: Length of buffer.
+ * @buf_type: Buffer type (Unused).
+ *
+ * This is the client interface for dequeueing data from the queue.
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
+ struct msghdr *msg,
+ size_t iov_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+
+ do {
+ result = qp_dequeue_locked(qpair->produce_q,
+ qpair->consume_q,
+ qpair->consume_q_size,
+ msg, iov_size,
+ qp_memcpy_from_queue_iov,
+ true);
+
+ if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+ !qp_wait_for_ready_queue(qpair))
+ result = VMCI_ERROR_WOULD_BLOCK;
+
+ } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+ qp_unlock(qpair);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
+
+/*
+ * vmci_qpair_peekv() - Peek at the data in the queue using iov.
+ * @qpair: Pointer to the queue pair struct.
+ * @iov: Pointer to buffer for the data
+ * @iov_size: Length of buffer.
+ * @buf_type: Buffer type (Unused on Linux).
+ *
+ * This is the client interface for peeking into a queue. (I.e.,
+ * copy data from the queue without updating the head pointer.)
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes peeked or < 0 on error.
+ */
+ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
+ struct msghdr *msg,
+ size_t iov_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ qp_lock(qpair);
+
+ do {
+ result = qp_dequeue_locked(qpair->produce_q,
+ qpair->consume_q,
+ qpair->consume_q_size,
+ msg, iov_size,
+ qp_memcpy_from_queue_iov,
+ false);
+
+ if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+ !qp_wait_for_ready_queue(qpair))
+ result = VMCI_ERROR_WOULD_BLOCK;
+
+ } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+ qp_unlock(qpair);
+ return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_peekv);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h
new file mode 100644
index 0000000..ed177f0
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h
@@ -0,0 +1,173 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_QUEUE_PAIR_H_
+#define _VMCI_QUEUE_PAIR_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_context.h"
+
+/* Callback needed for correctly waiting on events. */
+typedef int (*vmci_event_release_cb) (void *client_data);
+
+/* Guest device port I/O. */
+struct ppn_set {
+ u64 num_produce_pages;
+ u64 num_consume_pages;
+ u32 *produce_ppns;
+ u32 *consume_ppns;
+ bool initialized;
+};
+
+/* VMCIqueue_pairAllocInfo */
+struct vmci_qp_alloc_info {
+ struct vmci_handle handle;
+ u32 peer;
+ u32 flags;
+ u64 produce_size;
+ u64 consume_size;
+ u64 ppn_va; /* Start VA of queue pair PPNs. */
+ u64 num_ppns;
+ s32 result;
+ u32 version;
+};
+
+/* VMCIqueue_pairSetVAInfo */
+struct vmci_qp_set_va_info {
+ struct vmci_handle handle;
+ u64 va; /* Start VA of queue pair PPNs. */
+ u64 num_ppns;
+ u32 version;
+ s32 result;
+};
+
+/*
+ * For backwards compatibility, here is a version of the
+ * VMCIqueue_pairPageFileInfo before host support end-points was added.
+ * Note that the current version of that structure requires VMX to
+ * pass down the VA of the mapped file. Before host support was added
+ * there was nothing of the sort. So, when the driver sees the ioctl
+ * with a parameter that is the sizeof
+ * VMCIqueue_pairPageFileInfo_NoHostQP then it can infer that the version
+ * of VMX running can't attach to host end points because it doesn't
+ * provide the VA of the mapped files.
+ *
+ * The Linux driver doesn't get an indication of the size of the
+ * structure passed down from user space. So, to fix a long standing
+ * but unfiled bug, the _pad field has been renamed to version.
+ * Existing versions of VMX always initialize the PageFileInfo
+ * structure so that _pad, er, version is set to 0.
+ *
+ * A version value of 1 indicates that the size of the structure has
+ * been increased to include two UVA's: produce_uva and consume_uva.
+ * These UVA's are of the mmap()'d queue contents backing files.
+ *
+ * In addition, if when VMX is sending down the
+ * VMCIqueue_pairPageFileInfo structure it gets an error then it will
+ * try again with the _NoHostQP version of the file to see if an older
+ * VMCI kernel module is running.
+ */
+
+/* VMCIqueue_pairPageFileInfo */
+struct vmci_qp_page_file_info {
+ struct vmci_handle handle;
+ u64 produce_page_file; /* User VA. */
+ u64 consume_page_file; /* User VA. */
+ u64 produce_page_file_size; /* Size of the file name array. */
+ u64 consume_page_file_size; /* Size of the file name array. */
+ s32 result;
+ u32 version; /* Was _pad. */
+ u64 produce_va; /* User VA of the mapped file. */
+ u64 consume_va; /* User VA of the mapped file. */
+};
+
+/* vmci queuepair detach info */
+struct vmci_qp_dtch_info {
+ struct vmci_handle handle;
+ s32 result;
+ u32 _pad;
+};
+
+/*
+ * struct vmci_qp_page_store describes how the memory of a given queue pair
+ * is backed. When the queue pair is between the host and a guest, the
+ * page store consists of references to the guest pages. On vmkernel,
+ * this is a list of PPNs, and on hosted, it is a user VA where the
+ * queue pair is mapped into the VMX address space.
+ */
+struct vmci_qp_page_store {
+ /* Reference to pages backing the queue pair. */
+ u64 pages;
+ /* Length of pageList/virtual addres range (in pages). */
+ u32 len;
+};
+
+/*
+ * This data type contains the information about a queue.
+ * There are two queues (hence, queue pairs) per transaction model between a
+ * pair of end points, A & B. One queue is used by end point A to transmit
+ * commands and responses to B. The other queue is used by B to transmit
+ * commands and responses.
+ *
+ * struct vmci_queue_kern_if is a per-OS defined Queue structure. It contains
+ * either a direct pointer to the linear address of the buffer contents or a
+ * pointer to structures which help the OS locate those data pages. See
+ * vmciKernelIf.c for each platform for its definition.
+ */
+struct vmci_queue {
+ struct vmci_queue_header *q_header;
+ struct vmci_queue_header *saved_header;
+ struct vmci_queue_kern_if *kernel_if;
+};
+
+/*
+ * Utility function that checks whether the fields of the page
+ * store contain valid values.
+ * Result:
+ * true if the page store is wellformed. false otherwise.
+ */
+static inline bool
+VMCI_QP_PAGESTORE_IS_WELLFORMED(struct vmci_qp_page_store *page_store)
+{
+ return page_store->len >= 2;
+}
+
+void vmci_qp_broker_exit(void);
+int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer,
+ u32 flags, u32 priv_flags,
+ u64 produce_size, u64 consume_size,
+ struct vmci_qp_page_store *page_store,
+ struct vmci_ctx *context);
+int vmci_qp_broker_set_page_store(struct vmci_handle handle,
+ u64 produce_uva, u64 consume_uva,
+ struct vmci_ctx *context);
+int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context);
+
+void vmci_qp_guest_endpoints_exit(void);
+
+int vmci_qp_alloc(struct vmci_handle *handle,
+ struct vmci_queue **produce_q, u64 produce_size,
+ struct vmci_queue **consume_q, u64 consume_size,
+ u32 peer, u32 flags, u32 priv_flags,
+ bool guest_endpoint, vmci_event_release_cb wakeup_cb,
+ void *client_data);
+int vmci_qp_broker_map(struct vmci_handle handle,
+ struct vmci_ctx *context, u64 guest_mem);
+int vmci_qp_broker_unmap(struct vmci_handle handle,
+ struct vmci_ctx *context, u32 gid);
+
+#endif /* _VMCI_QUEUE_PAIR_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
new file mode 100644
index 0000000..9a53a30
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -0,0 +1,227 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/rculist.h>
+
+#include "vmci_resource.h"
+#include "vmci_driver.h"
+
+
+#define VMCI_RESOURCE_HASH_BITS 7
+#define VMCI_RESOURCE_HASH_BUCKETS (1 << VMCI_RESOURCE_HASH_BITS)
+
+struct vmci_hash_table {
+ spinlock_t lock;
+ struct hlist_head entries[VMCI_RESOURCE_HASH_BUCKETS];
+};
+
+static struct vmci_hash_table vmci_resource_table = {
+ .lock = __SPIN_LOCK_UNLOCKED(vmci_resource_table.lock),
+};
+
+static unsigned int vmci_resource_hash(struct vmci_handle handle)
+{
+ return hash_32(handle.resource, VMCI_RESOURCE_HASH_BITS);
+}
+
+/*
+ * Gets a resource (if one exists) matching given handle from the hash table.
+ */
+static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
+ enum vmci_resource_type type)
+{
+ struct vmci_resource *r, *resource = NULL;
+ unsigned int idx = vmci_resource_hash(handle);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(r,
+ &vmci_resource_table.entries[idx], node) {
+ u32 cid = r->handle.context;
+ u32 rid = r->handle.resource;
+
+ if (r->type == type &&
+ rid == handle.resource &&
+ (cid == handle.context || cid == VMCI_INVALID_ID)) {
+ resource = r;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return resource;
+}
+
+/*
+ * Find an unused resource ID and return it. The first
+ * VMCI_RESERVED_RESOURCE_ID_MAX are reserved so we start from
+ * its value + 1.
+ * Returns VMCI resource id on success, VMCI_INVALID_ID on failure.
+ */
+static u32 vmci_resource_find_id(u32 context_id,
+ enum vmci_resource_type resource_type)
+{
+ static u32 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+ u32 old_rid = resource_id;
+ u32 current_rid;
+
+ /*
+ * Generate a unique resource ID. Keep on trying until we wrap around
+ * in the RID space.
+ */
+ do {
+ struct vmci_handle handle;
+
+ current_rid = resource_id;
+ resource_id++;
+ if (unlikely(resource_id == VMCI_INVALID_ID)) {
+ /* Skip the reserved rids. */
+ resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+ }
+
+ handle = vmci_make_handle(context_id, current_rid);
+ if (!vmci_resource_lookup(handle, resource_type))
+ return current_rid;
+ } while (resource_id != old_rid);
+
+ return VMCI_INVALID_ID;
+}
+
+
+int vmci_resource_add(struct vmci_resource *resource,
+ enum vmci_resource_type resource_type,
+ struct vmci_handle handle)
+
+{
+ unsigned int idx;
+ int result;
+
+ spin_lock(&vmci_resource_table.lock);
+
+ if (handle.resource == VMCI_INVALID_ID) {
+ handle.resource = vmci_resource_find_id(handle.context,
+ resource_type);
+ if (handle.resource == VMCI_INVALID_ID) {
+ result = VMCI_ERROR_NO_HANDLE;
+ goto out;
+ }
+ } else if (vmci_resource_lookup(handle, resource_type)) {
+ result = VMCI_ERROR_ALREADY_EXISTS;
+ goto out;
+ }
+
+ resource->handle = handle;
+ resource->type = resource_type;
+ INIT_HLIST_NODE(&resource->node);
+ kref_init(&resource->kref);
+ init_completion(&resource->done);
+
+ idx = vmci_resource_hash(resource->handle);
+ hlist_add_head_rcu(&resource->node, &vmci_resource_table.entries[idx]);
+
+ result = VMCI_SUCCESS;
+
+out:
+ spin_unlock(&vmci_resource_table.lock);
+ return result;
+}
+
+void vmci_resource_remove(struct vmci_resource *resource)
+{
+ struct vmci_handle handle = resource->handle;
+ unsigned int idx = vmci_resource_hash(handle);
+ struct vmci_resource *r;
+
+ /* Remove resource from hash table. */
+ spin_lock(&vmci_resource_table.lock);
+
+ hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) {
+ if (vmci_handle_is_equal(r->handle, resource->handle)) {
+ hlist_del_init_rcu(&r->node);
+ break;
+ }
+ }
+
+ spin_unlock(&vmci_resource_table.lock);
+ synchronize_rcu();
+
+ vmci_resource_put(resource);
+ wait_for_completion(&resource->done);
+}
+
+struct vmci_resource *
+vmci_resource_by_handle(struct vmci_handle resource_handle,
+ enum vmci_resource_type resource_type)
+{
+ struct vmci_resource *r, *resource = NULL;
+
+ rcu_read_lock();
+
+ r = vmci_resource_lookup(resource_handle, resource_type);
+ if (r &&
+ (resource_type == r->type ||
+ resource_type == VMCI_RESOURCE_TYPE_ANY)) {
+ resource = vmci_resource_get(r);
+ }
+
+ rcu_read_unlock();
+
+ return resource;
+}
+
+/*
+ * Get a reference to given resource.
+ */
+struct vmci_resource *vmci_resource_get(struct vmci_resource *resource)
+{
+ kref_get(&resource->kref);
+
+ return resource;
+}
+
+static void vmci_release_resource(struct kref *kref)
+{
+ struct vmci_resource *resource =
+ container_of(kref, struct vmci_resource, kref);
+
+ /* Verify the resource has been unlinked from hash table */
+ WARN_ON(!hlist_unhashed(&resource->node));
+
+ /* Signal that container of this resource can now be destroyed */
+ complete(&resource->done);
+}
+
+/*
+ * Resource's release function will get called if last reference.
+ * If it is the last reference, then we are sure that nobody else
+ * can increment the count again (it's gone from the resource hash
+ * table), so there's no need for locking here.
+ */
+int vmci_resource_put(struct vmci_resource *resource)
+{
+ /*
+ * We propagate the information back to caller in case it wants to know
+ * whether entry was freed.
+ */
+ return kref_put(&resource->kref, vmci_release_resource) ?
+ VMCI_SUCCESS_ENTRY_DEAD : VMCI_SUCCESS;
+}
+
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource)
+{
+ return resource->handle;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_resource.h b/drivers/misc/vmw_vmci/vmci_resource.h
new file mode 100644
index 0000000..9190cd2
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.h
@@ -0,0 +1,59 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_RESOURCE_H_
+#define _VMCI_RESOURCE_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_context.h"
+
+
+enum vmci_resource_type {
+ VMCI_RESOURCE_TYPE_ANY,
+ VMCI_RESOURCE_TYPE_API,
+ VMCI_RESOURCE_TYPE_GROUP,
+ VMCI_RESOURCE_TYPE_DATAGRAM,
+ VMCI_RESOURCE_TYPE_DOORBELL,
+ VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+ VMCI_RESOURCE_TYPE_QPAIR_HOST
+};
+
+struct vmci_resource {
+ struct vmci_handle handle;
+ enum vmci_resource_type type;
+ struct hlist_node node;
+ struct kref kref;
+ struct completion done;
+};
+
+
+int vmci_resource_add(struct vmci_resource *resource,
+ enum vmci_resource_type resource_type,
+ struct vmci_handle handle);
+
+void vmci_resource_remove(struct vmci_resource *resource);
+
+struct vmci_resource *
+vmci_resource_by_handle(struct vmci_handle resource_handle,
+ enum vmci_resource_type resource_type);
+
+struct vmci_resource *vmci_resource_get(struct vmci_resource *resource);
+int vmci_resource_put(struct vmci_resource *resource);
+
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource);
+
+#endif /* _VMCI_RESOURCE_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_route.c b/drivers/misc/vmw_vmci/vmci_route.c
new file mode 100644
index 0000000..9109065
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.c
@@ -0,0 +1,226 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_route.h"
+
+/*
+ * Make a routing decision for the given source and destination handles.
+ * This will try to determine the route using the handles and the available
+ * devices. Will set the source context if it is invalid.
+ */
+int vmci_route(struct vmci_handle *src,
+ const struct vmci_handle *dst,
+ bool from_guest,
+ enum vmci_route *route)
+{
+ bool has_host_device = vmci_host_code_active();
+ bool has_guest_device = vmci_guest_code_active();
+
+ *route = VMCI_ROUTE_NONE;
+
+ /*
+ * "from_guest" is only ever set to true by
+ * IOCTL_VMCI_DATAGRAM_SEND (or by the vmkernel equivalent),
+ * which comes from the VMX, so we know it is coming from a
+ * guest.
+ *
+ * To avoid inconsistencies, test these once. We will test
+ * them again when we do the actual send to ensure that we do
+ * not touch a non-existent device.
+ */
+
+ /* Must have a valid destination context. */
+ if (VMCI_INVALID_ID == dst->context)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /* Anywhere to hypervisor. */
+ if (VMCI_HYPERVISOR_CONTEXT_ID == dst->context) {
+
+ /*
+ * If this message already came from a guest then we
+ * cannot send it to the hypervisor. It must come
+ * from a local client.
+ */
+ if (from_guest)
+ return VMCI_ERROR_DST_UNREACHABLE;
+
+ /*
+ * We must be acting as a guest in order to send to
+ * the hypervisor.
+ */
+ if (!has_guest_device)
+ return VMCI_ERROR_DEVICE_NOT_FOUND;
+
+ /* And we cannot send if the source is the host context. */
+ if (VMCI_HOST_CONTEXT_ID == src->context)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ /*
+ * If the client passed the ANON source handle then
+ * respect it (both context and resource are invalid).
+ * However, if they passed only an invalid context,
+ * then they probably mean ANY, in which case we
+ * should set the real context here before passing it
+ * down.
+ */
+ if (VMCI_INVALID_ID == src->context &&
+ VMCI_INVALID_ID != src->resource)
+ src->context = vmci_get_context_id();
+
+ /* Send from local client down to the hypervisor. */
+ *route = VMCI_ROUTE_AS_GUEST;
+ return VMCI_SUCCESS;
+ }
+
+ /* Anywhere to local client on host. */
+ if (VMCI_HOST_CONTEXT_ID == dst->context) {
+ /*
+ * If it is not from a guest but we are acting as a
+ * guest, then we need to send it down to the host.
+ * Note that if we are also acting as a host then this
+ * will prevent us from sending from local client to
+ * local client, but we accept that restriction as a
+ * way to remove any ambiguity from the host context.
+ */
+ if (src->context == VMCI_HYPERVISOR_CONTEXT_ID) {
+ /*
+ * If the hypervisor is the source, this is
+ * host local communication. The hypervisor
+ * may send vmci event datagrams to the host
+ * itself, but it will never send datagrams to
+ * an "outer host" through the guest device.
+ */
+
+ if (has_host_device) {
+ *route = VMCI_ROUTE_AS_HOST;
+ return VMCI_SUCCESS;
+ } else {
+ return VMCI_ERROR_DEVICE_NOT_FOUND;
+ }
+ }
+
+ if (!from_guest && has_guest_device) {
+ /* If no source context then use the current. */
+ if (VMCI_INVALID_ID == src->context)
+ src->context = vmci_get_context_id();
+
+ /* Send it from local client down to the host. */
+ *route = VMCI_ROUTE_AS_GUEST;
+ return VMCI_SUCCESS;
+ }
+
+ /*
+ * Otherwise we already received it from a guest and
+ * it is destined for a local client on this host, or
+ * it is from another local client on this host. We
+ * must be acting as a host to service it.
+ */
+ if (!has_host_device)
+ return VMCI_ERROR_DEVICE_NOT_FOUND;
+
+ if (VMCI_INVALID_ID == src->context) {
+ /*
+ * If it came from a guest then it must have a
+ * valid context. Otherwise we can use the
+ * host context.
+ */
+ if (from_guest)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ src->context = VMCI_HOST_CONTEXT_ID;
+ }
+
+ /* Route to local client. */
+ *route = VMCI_ROUTE_AS_HOST;
+ return VMCI_SUCCESS;
+ }
+
+ /*
+ * If we are acting as a host then this might be destined for
+ * a guest.
+ */
+ if (has_host_device) {
+ /* It will have a context if it is meant for a guest. */
+ if (vmci_ctx_exists(dst->context)) {
+ if (VMCI_INVALID_ID == src->context) {
+ /*
+ * If it came from a guest then it
+ * must have a valid context.
+ * Otherwise we can use the host
+ * context.
+ */
+
+ if (from_guest)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ src->context = VMCI_HOST_CONTEXT_ID;
+ } else if (VMCI_CONTEXT_IS_VM(src->context) &&
+ src->context != dst->context) {
+ /*
+ * VM to VM communication is not
+ * allowed. Since we catch all
+ * communication destined for the host
+ * above, this must be destined for a
+ * VM since there is a valid context.
+ */
+
+ return VMCI_ERROR_DST_UNREACHABLE;
+ }
+
+ /* Pass it up to the guest. */
+ *route = VMCI_ROUTE_AS_HOST;
+ return VMCI_SUCCESS;
+ } else if (!has_guest_device) {
+ /*
+ * The host is attempting to reach a CID
+ * without an active context, and we can't
+ * send it down, since we have no guest
+ * device.
+ */
+
+ return VMCI_ERROR_DST_UNREACHABLE;
+ }
+ }
+
+ /*
+ * We must be a guest trying to send to another guest, which means
+ * we need to send it down to the host. We do not filter out VM to
+ * VM communication here, since we want to be able to use the guest
+ * driver on older versions that do support VM to VM communication.
+ */
+ if (!has_guest_device) {
+ /*
+ * Ending up here means we have neither guest nor host
+ * device.
+ */
+ return VMCI_ERROR_DEVICE_NOT_FOUND;
+ }
+
+ /* If no source context then use the current context. */
+ if (VMCI_INVALID_ID == src->context)
+ src->context = vmci_get_context_id();
+
+ /*
+ * Send it from local client down to the host, which will
+ * route it to the other guest for us.
+ */
+ *route = VMCI_ROUTE_AS_GUEST;
+ return VMCI_SUCCESS;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_route.h b/drivers/misc/vmw_vmci/vmci_route.h
new file mode 100644
index 0000000..3b30e82
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.h
@@ -0,0 +1,30 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_ROUTE_H_
+#define _VMCI_ROUTE_H_
+
+#include <linux/vmw_vmci_defs.h>
+
+enum vmci_route {
+ VMCI_ROUTE_NONE,
+ VMCI_ROUTE_AS_HOST,
+ VMCI_ROUTE_AS_GUEST,
+};
+
+int vmci_route(struct vmci_handle *src, const struct vmci_handle *dst,
+ bool from_guest, enum vmci_route *route);
+
+#endif /* _VMCI_ROUTE_H_ */