Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net Pull networking fixes from David Miller: "Just a small pile of fixes" 1) Fix race conditions in IP fragmentation LRU list handling, from Konstantin Khlebnikov. 2) vfree() is no longer verboten in interrupts, so deferring is pointless, from Al Viro. 3) Conversion from mutex to semaphore in netpoll left trylock test inverted, caught by Dan Carpenter. 4) 3c59x uses wrong base address when releasing regions, from Sergei Shtylyov. 5) Bounds checking in TIPC from Dan Carpenter. 6) Fastopen cookies should not be expired as aggressively as other TCP metrics. From Eric Dumazet. 7) Fix retrieval of MAC address in ibmveth, from Ben Herrenschmidt. 8) Don't use "u16" in virtio user headers, from Stephen Hemminger * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: tipc: potential divide by zero in tipc_link_recv_fragment() tipc: add a bounds check in link_recv_changeover_msg() net/usb: new driver for RTL8152 3c59x: fix freeing nonexistent resource on driver unload netpoll: inverted down_trylock() test rps_dev_flow_table_release(): no need to delay vfree() fib_trie: no need to delay vfree() net: frag, fix race conditions in LRU list maintenance tcp: do not expire TCP fastopen cookies net/eth/ibmveth: Fixup retrieval of MAC address virtio: don't expose u16 in userspace api

commit: 51a26ae7a14b85c99c9be470c2d28eeeba0f26a3 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon May 06 15:51:10 2013 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon May 06 15:51:10 2013 -0700
tree: 63b38cb292a53e89d4b0cfe631c7031e1fed3cfe
parent: 2b69703fea185bb0ae1af78ca2da41af677b9dff [diff]
parent: 6bf15191f666c5965d212561d7a5c7b78b808dfa [diff]
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index cd9213c..0a30647 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd

@@ -66,27 +66,7 @@
 
 	The current snapshot for which the device is mapped.
 
-snap_*
-
-	A directory per each snapshot
-
 parent
 
 	Information identifying the pool, image, and snapshot id for
 	the parent image in a layered rbd image (format 2 only).
-
-Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
--------------------------------------------------------------
-
-snap_id
-
-	The rados internal snapshot id assigned for this snapshot
-
-snap_size
-
-	The size of the image when this snapshot was taken.
-
-snap_features
-
-	A hexadecimal encoding of the feature bits for this snapshot.
-

diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index e38b8df..8e9359d 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt

@@ -191,7 +191,7 @@
 o	A hardware or software issue shuts off the scheduler-clock
 	interrupt on a CPU that is not in dyntick-idle mode.  This
 	problem really has happened, and seems to be most likely to
-	result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels.
+	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
 
 o	A bug in the RCU implementation.
 

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index 94a6561..b0d5410 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt

@@ -199,6 +199,8 @@
 	{
 		Name (SBUF, ResourceTemplate()
 		{
+			...
+			// Used to power on/off the device
 			GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
 				IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
 				0x00, ResourceConsumer,,)
@@ -206,10 +208,20 @@
 				// Pin List
 				0x0055
 			}
+
+			// Interrupt for the device
+			GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
+				 0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
+			{
+				// Pin list
+				0x0058
+			}
+
 			...
 
-			Return (SBUF)
 		}
+
+		Return (SBUF)
 	}
 
 These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
@@ -220,6 +232,24 @@
 acpi_get_gpio(path, gpio). This will return the Linux GPIO number or
 negative errno if there was no translation found.
 
+In a simple case of just getting the Linux GPIO number from device
+resources one can use acpi_get_gpio_by_index() helper function. It takes
+pointer to the device and index of the GpioIo/GpioInt descriptor in the
+device resources list. For example:
+
+	int gpio_irq, gpio_power;
+	int ret;
+
+	gpio_irq = acpi_get_gpio_by_index(dev, 1, NULL);
+	if (gpio_irq < 0)
+		/* handle error */
+
+	gpio_power = acpi_get_gpio_by_index(dev, 0, NULL);
+	if (gpio_power < 0)
+		/* handle error */
+
+	/* Now we can use the GPIO numbers */
+
 Other GpioIo parameters must be converted first by the driver to be
 suitable to the gpiolib before passing them.
 

diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index 66f9cc3..219970b 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt

@@ -131,8 +131,8 @@
 The sampling rate is limited by the HW transition latency:
 transition_latency * 100
 Or by kernel restrictions:
-If CONFIG_NO_HZ is set, the limit is 10ms fixed.
-If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the
+If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed.
+If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the
 limits depend on the CONFIG_HZ option:
 HZ=1000: min=20000us  (20ms)
 HZ=250:  min=80000us  (80ms)

diff --git a/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt b/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt
new file mode 100644
index 0000000..e466598
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt

@@ -0,0 +1,26 @@
+Aeroflex Gaisler GRGPIO General Purpose I/O cores.
+
+The GRGPIO GPIO core is available in the GRLIB VHDL IP core library.
+
+Note: In the ordinary environment for the GRGPIO core, a Leon SPARC system,
+these properties are built from information in the AMBA plug&play.
+
+Required properties:
+
+- name : Should be "GAISLER_GPIO" or "01_01a"
+
+- reg : Address and length of the register set for the device
+
+- interrupts : Interrupt numbers for this device
+
+Optional properties:
+
+- nbits : The number of gpio lines. If not present driver assumes 32 lines.
+
+- irqmap : An array with an index for each gpio line. An index is either a valid
+	index into the interrupts property array, or 0xffffffff that indicates
+	no irq for that line. Driver provides no interrupt support if not
+	present.
+
+For further information look in the documentation for the GLIB IP core library:
+http://www.gaisler.com/products/grlib/grip.pdf

diff --git a/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt b/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt
new file mode 100644
index 0000000..629d0ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt

@@ -0,0 +1,47 @@
+Microchip MCP2308/MCP23S08/MCP23017/MCP23S17 driver for
+8-/16-bit I/O expander with serial interface (I2C/SPI)
+
+Required properties:
+- compatible : Should be
+    - "mcp,mcp23s08" for  8 GPIO SPI version
+    - "mcp,mcp23s17" for 16 GPIO SPI version
+    - "mcp,mcp23008" for  8 GPIO I2C version or
+    - "mcp,mcp23017" for 16 GPIO I2C version of the chip
+- #gpio-cells : Should be two.
+  - first cell is the pin number
+  - second cell is used to specify flags. Flags are currently unused.
+- gpio-controller : Marks the device node as a GPIO controller.
+- reg : For an address on its bus. I2C uses this a the I2C address of the chip.
+        SPI uses this to specify the chipselect line which the chip is
+        connected to. The driver and the SPI variant of the chip support
+        multiple chips on the same chipselect. Have a look at
+        mcp,spi-present-mask below.
+
+Required device specific properties (only for SPI chips):
+- mcp,spi-present-mask : This is a present flag, that makes only sense for SPI
+        chips - as the name suggests. Multiple SPI chips can share the same
+        SPI chipselect. Set a bit in bit0-7 in this mask to 1 if there is a
+        chip connected with the corresponding spi address set. For example if
+        you have a chip with address 3 connected, you have to set bit3 to 1,
+        which is 0x08. mcp23s08 chip variant only supports bits 0-3. It is not
+        possible to mix mcp23s08 and mcp23s17 on the same chipselect. Set at
+        least one bit to 1 for SPI chips.
+- spi-max-frequency = The maximum frequency this chip is able to handle
+
+Example I2C:
+gpiom1: gpio@20 {
+        compatible = "mcp,mcp23017";
+        gpio-controller;
+        #gpio-cells = <2>;
+        reg = <0x20>;
+};
+
+Example SPI:
+gpiom1: gpio@0 {
+        compatible = "mcp,mcp23s17";
+        gpio-controller;
+        #gpio-cells = <2>;
+        spi-present-mask = <0x01>;
+        reg = <0>;
+        spi-max-frequency = <1000000>;
+};

diff --git a/Documentation/devicetree/bindings/gpio/gpio-omap.txt b/Documentation/devicetree/bindings/gpio/gpio-omap.txt
index bff51a2..1b524c0 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-omap.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-omap.txt

@@ -20,8 +20,11 @@
       8 = active low level-sensitive.
 
 OMAP specific properties:
-- ti,hwmods: Name of the hwmod associated to the GPIO:
-  "gpio<X>", <X> being the 1-based instance number from the HW spec
+- ti,hwmods:		Name of the hwmod associated to the GPIO:
+			"gpio<X>", <X> being the 1-based instance number
+			from the HW spec.
+- ti,gpio-always-on: 	Indicates if a GPIO bank is always powered and
+			so will never lose its logic state.
 
 
 Example:

diff --git a/Documentation/devicetree/bindings/input/cros-ec-keyb.txt b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt
new file mode 100644
index 0000000..0f6355c
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt

@@ -0,0 +1,72 @@
+ChromeOS EC Keyboard
+
+Google's ChromeOS EC Keyboard is a simple matrix keyboard implemented on
+a separate EC (Embedded Controller) device. It provides a message for reading
+key scans from the EC. These are then converted into keycodes for processing
+by the kernel.
+
+This binding is based on matrix-keymap.txt and extends/modifies it as follows:
+
+Required properties:
+- compatible: "google,cros-ec-keyb"
+
+Optional properties:
+- google,needs-ghost-filter: True to enable a ghost filter for the matrix
+keyboard. This is recommended if the EC does not have its own logic or
+hardware for this.
+
+
+Example:
+
+cros-ec-keyb {
+	compatible = "google,cros-ec-keyb";
+	keypad,num-rows = <8>;
+	keypad,num-columns = <13>;
+	google,needs-ghost-filter;
+	/*
+	 * Keymap entries take the form of 0xRRCCKKKK where
+	 * RR=Row CC=Column KKKK=Key Code
+	 * The values below are for a US keyboard layout and
+	 * are taken from the Linux driver. Note that the
+	 * 102ND key is not used for US keyboards.
+	 */
+	linux,keymap = <
+		/* CAPSLCK F1         B          F10     */
+		0x0001003a 0x0002003b 0x00030030 0x00040044
+		/* N       =          R_ALT      ESC     */
+		0x00060031 0x0008000d 0x000a0064 0x01010001
+		/* F4      G          F7         H       */
+		0x0102003e 0x01030022 0x01040041 0x01060023
+		/* '       F9         BKSPACE    L_CTRL  */
+		0x01080028 0x01090043 0x010b000e 0x0200001d
+		/* TAB     F3         T          F6      */
+		0x0201000f 0x0202003d 0x02030014 0x02040040
+		/* ]       Y          102ND      [       */
+		0x0205001b 0x02060015 0x02070056 0x0208001a
+		/* F8      GRAVE      F2         5       */
+		0x02090042 0x03010029 0x0302003c 0x03030006
+		/* F5      6          -          \       */
+		0x0304003f 0x03060007 0x0308000c 0x030b002b
+		/* R_CTRL  A          D          F       */
+		0x04000061 0x0401001e 0x04020020 0x04030021
+		/* S       K          J          ;       */
+		0x0404001f 0x04050025 0x04060024 0x04080027
+		/* L       ENTER      Z          C       */
+		0x04090026 0x040b001c 0x0501002c 0x0502002e
+		/* V       X          ,          M       */
+		0x0503002f 0x0504002d 0x05050033 0x05060032
+		/* L_SHIFT /          .          SPACE   */
+		0x0507002a 0x05080035 0x05090034 0x050B0039
+		/* 1       3          4          2       */
+		0x06010002 0x06020004 0x06030005 0x06040003
+		/* 8       7          0          9       */
+		0x06050009 0x06060008 0x0608000b 0x0609000a
+		/* L_ALT   DOWN       RIGHT      Q       */
+		0x060a0038 0x060b006c 0x060c006a 0x07010010
+		/* E       R          W          I       */
+		0x07020012 0x07030013 0x07040011 0x07050017
+		/* U       R_SHIFT    P          O       */
+		0x07060016 0x07070036 0x07080019 0x07090018
+		/* UP      LEFT    */
+		0x070b0067 0x070c0069>;
+};

diff --git a/Documentation/devicetree/bindings/mfd/as3711.txt b/Documentation/devicetree/bindings/mfd/as3711.txt
new file mode 100644
index 0000000..d98cf18
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/as3711.txt

@@ -0,0 +1,73 @@
+AS3711 is an I2C PMIC from Austria MicroSystems with multiple DCDC and LDO power
+supplies, a battery charger and an RTC. So far only bindings for the two stepup
+DCDC converters are defined. Other DCDC and LDO supplies are configured, using
+standard regulator properties, they must belong to a sub-node, called
+"regulators" and be called "sd1" to "sd4" and "ldo1" to "ldo8." Stepup converter
+configuration should be placed in a subnode, called "backlight."
+
+Compulsory properties:
+- compatible		: must be "ams,as3711"
+- reg			: specifies the I2C address
+
+To use the SU1 converter as a backlight source the following two properties must
+be provided:
+- su1-dev		: framebuffer phandle
+- su1-max-uA		: maximum current
+
+To use the SU2 converter as a backlight source the following two properties must
+be provided:
+- su2-dev		: framebuffer phandle
+- su1-max-uA		: maximum current
+
+Additionally one of these properties must be provided to select the type of
+feedback used:
+- su2-feedback-voltage	: voltage feedback is used
+- su2-feedback-curr1	: CURR1 input used for current feedback
+- su2-feedback-curr2	: CURR2 input used for current feedback
+- su2-feedback-curr3	: CURR3 input used for current feedback
+- su2-feedback-curr-auto: automatic current feedback selection
+
+and one of these to select the over-voltage protection pin
+- su2-fbprot-lx-sd4	: LX_SD4 is used for over-voltage protection
+- su2-fbprot-gpio2	: GPIO2 is used for over-voltage protection
+- su2-fbprot-gpio3	: GPIO3 is used for over-voltage protection
+- su2-fbprot-gpio4	: GPIO4 is used for over-voltage protection
+
+If "su2-feedback-curr-auto" is selected, one or more of the following properties
+have to be specified:
+- su2-auto-curr1	: use CURR1 input for current feedback
+- su2-auto-curr2	: use CURR2 input for current feedback
+- su2-auto-curr3	: use CURR3 input for current feedback
+
+Example:
+
+as3711@40 {
+	compatible = "ams,as3711";
+	reg = <0x40>;
+
+	regulators {
+		sd4 {
+			regulator-name = "1.215V";
+			regulator-min-microvolt = <1215000>;
+			regulator-max-microvolt = <1235000>;
+		};
+		ldo2 {
+			regulator-name = "2.8V CPU";
+			regulator-min-microvolt = <2800000>;
+			regulator-max-microvolt = <2800000>;
+			regulator-always-on;
+			regulator-boot-on;
+		};
+	};
+
+	backlight {
+		compatible = "ams,as3711-bl";
+		su2-dev = <&lcdc>;
+		su2-max-uA = <36000>;
+		su2-feedback-curr-auto;
+		su2-fbprot-gpio4;
+		su2-auto-curr1;
+		su2-auto-curr2;
+		su2-auto-curr3;
+	};
+};

diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt b/Documentation/devicetree/bindings/mfd/cros-ec.txt
new file mode 100644
index 0000000..e0e59c58
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/cros-ec.txt

@@ -0,0 +1,56 @@
+ChromeOS Embedded Controller
+
+Google's ChromeOS EC is a Cortex-M device which talks to the AP and
+implements various function such as keyboard and battery charging.
+
+The EC can be connect through various means (I2C, SPI, LPC) and the
+compatible string used depends on the inteface. Each connection method has
+its own driver which connects to the top level interface-agnostic EC driver.
+Other Linux driver (such as cros-ec-keyb for the matrix keyboard) connect to
+the top-level driver.
+
+Required properties (I2C):
+- compatible: "google,cros-ec-i2c"
+- reg: I2C slave address
+
+Required properties (SPI):
+- compatible: "google,cros-ec-spi"
+- reg: SPI chip select
+
+Required properties (LPC):
+- compatible: "google,cros-ec-lpc"
+- reg: List of (IO address, size) pairs defining the interface uses
+
+
+Example for I2C:
+
+i2c@12CA0000 {
+	cros-ec@1e {
+		reg = <0x1e>;
+		compatible = "google,cros-ec-i2c";
+		interrupts = <14 0>;
+		interrupt-parent = <&wakeup_eint>;
+		wakeup-source;
+	};
+
+
+Example for SPI:
+
+spi@131b0000 {
+	ec@0 {
+		compatible = "google,cros-ec-spi";
+		reg = <0x0>;
+		interrupts = <14 0>;
+		interrupt-parent = <&wakeup_eint>;
+		wakeup-source;
+		spi-max-frequency = <5000000>;
+		controller-data {
+		cs-gpio = <&gpf0 3 4 3 0>;
+		samsung,spi-cs;
+		samsung,spi-feedback-delay = <2>;
+		};
+	};
+};
+
+
+Example for LPC is not supplied as it is not yet implemented.

diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-host.txt b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt
new file mode 100644
index 0000000..b381fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt

@@ -0,0 +1,80 @@
+OMAP HS USB Host
+
+Required properties:
+
+- compatible: should be "ti,usbhs-host"
+- reg: should contain one register range i.e. start and length
+- ti,hwmods: must contain "usb_host_hs"
+
+Optional properties:
+
+- num-ports: number of USB ports. Usually this is automatically detected
+  from the IP's revision register but can be overridden by specifying
+  this property. A maximum of 3 ports are supported at the moment.
+
+- portN-mode: String specifying the port mode for port N, where N can be
+  from 1 to 3. If the port mode is not specified, that port is treated
+  as unused. When specified, it must be one of the following.
+	"ehci-phy",
+        "ehci-tll",
+        "ehci-hsic",
+        "ohci-phy-6pin-datse0",
+        "ohci-phy-6pin-dpdm",
+        "ohci-phy-3pin-datse0",
+        "ohci-phy-4pin-dpdm",
+        "ohci-tll-6pin-datse0",
+        "ohci-tll-6pin-dpdm",
+        "ohci-tll-3pin-datse0",
+        "ohci-tll-4pin-dpdm",
+        "ohci-tll-2pin-datse0",
+        "ohci-tll-2pin-dpdm",
+
+- single-ulpi-bypass: Must be present if the controller contains a single
+  ULPI bypass control bit. e.g. OMAP3 silicon <= ES2.1
+
+Required properties if child node exists:
+
+- #address-cells: Must be 1
+- #size-cells: Must be 1
+- ranges: must be present
+
+Properties for children:
+
+The OMAP HS USB Host subsystem contains EHCI and OHCI controllers.
+See Documentation/devicetree/bindings/usb/omap-ehci.txt and
+omap3-ohci.txt
+
+Example for OMAP4:
+
+usbhshost: usbhshost@4a064000 {
+	compatible = "ti,usbhs-host";
+	reg = <0x4a064000 0x800>;
+	ti,hwmods = "usb_host_hs";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges;
+
+	usbhsohci: ohci@4a064800 {
+		compatible = "ti,ohci-omap3", "usb-ohci";
+		reg = <0x4a064800 0x400>;
+		interrupt-parent = <&gic>;
+		interrupts = <0 76 0x4>;
+	};
+
+	usbhsehci: ehci@4a064c00 {
+		compatible = "ti,ehci-omap", "usb-ehci";
+		reg = <0x4a064c00 0x400>;
+		interrupt-parent = <&gic>;
+		interrupts = <0 77 0x4>;
+	};
+};
+
+&usbhshost {
+	port1-mode = "ehci-phy";
+	port2-mode = "ehci-tll";
+	port3-mode = "ehci-phy";
+};
+
+&usbhsehci {
+	phys = <&hsusb1_phy 0 &hsusb3_phy>;
+};

diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt
new file mode 100644
index 0000000..62fe697
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt

@@ -0,0 +1,17 @@
+OMAP HS USB Host TLL (Transceiver-Less Interface)
+
+Required properties:
+
+- compatible : should be "ti,usbhs-tll"
+- reg : should contain one register range i.e. start and length
+- interrupts : should contain the TLL module's interrupt
+- ti,hwmod : must contain "usb_tll_hs"
+
+Example:
+
+	usbhstll: usbhstll@4a062000 {
+		compatible = "ti,usbhs-tll";
+		reg = <0x4a062000 0x1000>;
+		interrupts = <78>;
+		ti,hwmods = "usb_tll_hs";
+	  };

diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
index 131e8c11..681afad 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt

@@ -1,7 +1,9 @@
 TI SOC ECAP based APWM controller
 
 Required properties:
-- compatible: Must be "ti,am33xx-ecap"
+- compatible: Must be "ti,<soc>-ecap".
+  for am33xx - compatible = "ti,am33xx-ecap";
+  for da850  - compatible = "ti,da850-ecap", "ti,am33xx-ecap";
 - #pwm-cells: Should be 3. Number of cells being used to specify PWM property.
   First cell specifies the per-chip index of the PWM to use, the second
   cell is the period in nanoseconds and bit 0 in the third cell is used to
@@ -15,9 +17,15 @@
 
 Example:
 
-ecap0: ecap@0 {
+ecap0: ecap@0 { /* ECAP on am33xx */
 	compatible = "ti,am33xx-ecap";
 	#pwm-cells = <3>;
 	reg = <0x48300100 0x80>;
 	ti,hwmods = "ecap0";
 };
+
+ecap0: ecap@0 { /* ECAP on da850 */
+	compatible = "ti,da850-ecap", "ti,am33xx-ecap";
+	#pwm-cells = <3>;
+	reg = <0x306000 0x80>;
+};

diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
index 4fc7079..337c6fc 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt

@@ -1,7 +1,9 @@
 TI SOC EHRPWM based PWM controller
 
 Required properties:
-- compatible : Must be "ti,am33xx-ehrpwm"
+- compatible: Must be "ti,<soc>-ehrpwm".
+  for am33xx - compatible = "ti,am33xx-ehrpwm";
+  for da850  - compatible = "ti,da850-ehrpwm", "ti,am33xx-ehrpwm";
 - #pwm-cells: Should be 3. Number of cells being used to specify PWM property.
   First cell specifies the per-chip index of the PWM to use, the second
   cell is the period in nanoseconds and bit 0 in the third cell is used to
@@ -15,9 +17,15 @@
 
 Example:
 
-ehrpwm0: ehrpwm@0 {
+ehrpwm0: ehrpwm@0 { /* EHRPWM on am33xx */
 	compatible = "ti,am33xx-ehrpwm";
 	#pwm-cells = <3>;
 	reg = <0x48300200 0x100>;
 	ti,hwmods = "ehrpwm0";
 };
+
+ehrpwm0: ehrpwm@0 { /* EHRPWM on da850 */
+	compatible = "ti,da850-ehrpwm", "ti,am33xx-ehrpwm";
+	#pwm-cells = <3>;
+	reg = <0x300000 0x2000>;
+};

diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt
index 7a7eb1e..f2f3e80 100644
--- a/Documentation/devicetree/bindings/sound/wm8994.txt
+++ b/Documentation/devicetree/bindings/sound/wm8994.txt

@@ -5,14 +5,70 @@
 
 Required properties:
 
-  - compatible : "wlf,wm1811", "wlf,wm8994", "wlf,wm8958"
+  - compatible : One of "wlf,wm1811", "wlf,wm8994" or "wlf,wm8958".
 
   - reg : the I2C address of the device for I2C, the chip select
           number for SPI.
 
+  - gpio-controller : Indicates this device is a GPIO controller.
+  - #gpio-cells : Must be 2. The first cell is the pin number and the
+    second cell is used to specify optional parameters (currently unused).
+
+  - AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, CPVDD-supply,
+    SPKVDD1-supply, SPKVDD2-supply : power supplies for the device, as covered
+    in Documentation/devicetree/bindings/regulator/regulator.txt
+
+Optional properties:
+
+  - interrupts : The interrupt line the IRQ signal for the device is
+    connected to.  This is optional, if it is not connected then none
+    of the interrupt related properties should be specified.
+  - interrupt-controller : These devices contain interrupt controllers
+    and may provide interrupt services to other devices if they have an
+    interrupt line connected.
+  - interrupt-parent : The parent interrupt controller.
+  - #interrupt-cells: the number of cells to describe an IRQ, this should be 2.
+    The first cell is the IRQ number.
+    The second cell is the flags, encoded as the trigger masks from
+    Documentation/devicetree/bindings/interrupts.txt
+
+  - wlf,gpio-cfg : A list of GPIO configuration register values. If absent,
+    no configuration of these registers is performed. If any value is
+    over 0xffff then the register will be left as default. If present 11
+    values must be supplied.
+
+  - wlf,micbias-cfg : Two MICBIAS register values for WM1811 or
+    WM8958.  If absent the register defaults will be used.
+
+  - wlf,ldo1ena : GPIO specifier for control of LDO1ENA input to device.
+  - wlf,ldo2ena : GPIO specifier for control of LDO2ENA input to device.
+
+  - wlf,lineout1-se : If present LINEOUT1 is in single ended mode.
+  - wlf,lineout2-se : If present LINEOUT2 is in single ended mode.
+
+  - wlf,lineout1-feedback : If present LINEOUT1 has common mode feedback
+    connected.
+  - wlf,lineout2-feedback : If present LINEOUT2 has common mode feedback
+    connected.
+
+  - wlf,ldoena-always-driven : If present LDOENA is always driven.
+
 Example:
 
 codec: wm8994@1a {
 	compatible = "wlf,wm8994";
 	reg = <0x1a>;
+
+	gpio-controller;
+	#gpio-cells = <2>;
+
+	lineout1-se;
+
+	AVDD2-supply = <&regulator>;
+	CPVDD-supply = <&regulator>;
+	DBVDD1-supply = <&regulator>;
+	DBVDD2-supply = <&regulator>;
+	DBVDD3-supply = <&regulator>;
+	SPKVDD1-supply = <&regulator>;
+	SPKVDD2-supply = <&regulator>;
 };

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9653cf2..c3bfacb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -1277,6 +1277,20 @@
 
 	iucv=		[HW,NET]
 
+	ivrs_ioapic	[HW,X86_64]
+			Provide an override to the IOAPIC-ID<->DEVICE-ID
+			mapping provided in the IVRS ACPI table. For
+			example, to map IOAPIC-ID decimal 10 to
+			PCI device 00:14.0 write the parameter as:
+				ivrs_ioapic[10]=00:14.0
+
+	ivrs_hpet	[HW,X86_64]
+			Provide an override to the HPET-ID<->DEVICE-ID
+			mapping provided in the IVRS ACPI table. For
+			example, to map HPET-ID decimal 0 to
+			PCI device 00:14.0 write the parameter as:
+				ivrs_hpet[0]=00:14.0
+
 	js=		[HW,JOY] Analog joystick
 			See Documentation/input/joystick.txt.
 
@@ -1964,6 +1978,14 @@
 			Valid arguments: on, off
 			Default: on
 
+	nohz_full=	[KNL,BOOT]
+			In kernels built with CONFIG_NO_HZ_FULL=y, set
+			the specified list of CPUs whose tick will be stopped
+			whenever possible. The boot CPU will be forced outside
+			the range to maintain the timekeeping.
+			The CPUs in this range must also be included in the
+			rcu_nocbs= set.
+
 	noiotrap	[SH] Disables trapped I/O port accesses.
 
 	noirqdebug	[X86-32] Disables the code which attempts to detect and

diff --git a/Documentation/leds/00-INDEX b/Documentation/leds/00-INDEX
index 5246090..1ecd159 100644
--- a/Documentation/leds/00-INDEX
+++ b/Documentation/leds/00-INDEX

@@ -6,6 +6,8 @@
 	- notes on how to use the leds-lp5521 driver.
 leds-lp5523.txt
 	- notes on how to use the leds-lp5523 driver.
+leds-lp5562.txt
+	- notes on how to use the leds-lp5562 driver.
 leds-lp55xx.txt
 	- description about lp55xx common driver.
 leds-lm3556.txt

diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt
index 270f571..79e4c2e 100644
--- a/Documentation/leds/leds-lp5521.txt
+++ b/Documentation/leds/leds-lp5521.txt

@@ -81,22 +81,3 @@
 
 If the current is set to 0 in the platform data, that channel is
 disabled and it is not visible in the sysfs.
-
-The 'update_config' : CONFIG register (ADDR 08h)
-This value is platform-specific data.
-If update_config is not defined, the CONFIG register is set with
-'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'.
-(Enable auto-powersave, set charge pump to auto, red to battery)
-
-example of update_config :
-
-#define LP5521_CONFIGS	(LP5521_PWM_HF | LP5521_PWRSAVE_EN | \
-			LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT | \
-			LP5521_CLK_INT)
-
-static struct lp55xx_platform_data lp5521_pdata = {
-	.led_config = lp5521_led_config,
-	.num_channels = ARRAY_SIZE(lp5521_led_config),
-	.clock_mode = LP55XX_CLOCK_INT,
-	.update_config = LP5521_CONFIGS,
-};

diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt
new file mode 100644
index 0000000..5a823ff
--- /dev/null
+++ b/Documentation/leds/leds-lp5562.txt

@@ -0,0 +1,120 @@
+Kernel driver for LP5562
+========================
+
+* TI LP5562 LED Driver
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+
+  LP5562 can drive up to 4 channels. R/G/B and White.
+  LEDs can be controlled directly via the led class control interface.
+
+  All four channels can be also controlled using the engine micro programs.
+  LP5562 has the internal program memory for running various LED patterns.
+  For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+Device attribute: engine_mux
+
+  3 Engines are allocated in LP5562, but the number of channel is 4.
+  Therefore each channel should be mapped to the engine number.
+  Value : RGB or W
+
+  This attribute is used for programming LED data with the firmware interface.
+  Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux,
+  so additional sysfs is required.
+
+  LED Map
+  Red   ... Engine 1 (fixed)
+  Green ... Engine 2 (fixed)
+  Blue  ... Engine 3 (fixed)
+  White ... Engine 1 or 2 or 3 (selective)
+
+How to load the program data using engine_mux
+
+  Before loading the LP5562 program data, engine_mux should be written between
+  the engine selection and loading the firmware.
+  Engine mux has two different mode, RGB and W.
+  RGB is used for loading RGB program data, W is used for W program data.
+
+  For example, run blinking green channel pattern,
+  echo 2 > /sys/bus/i2c/devices/xxxx/select_engine     # 2 is for green channel
+  echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux    # engine mux for RGB
+  echo 1 > /sys/class/firmware/lp5562/loading
+  echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+  echo 0 > /sys/class/firmware/lp5562/loading
+  echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+  To run a blinking white pattern,
+  echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine
+  echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux
+  echo 1 > /sys/class/firmware/lp5562/loading
+  echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+  echo 0 > /sys/class/firmware/lp5562/loading
+  echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+How to load the predefined patterns
+
+  Please refer to 'leds-lp55xx.txt"
+
+Setting Current of Each Channel
+
+  Like LP5521 and LP5523/55231, LP5562 provides LED current settings.
+  The 'led_current' and 'max_current' are used.
+
+(Example of Platform data)
+
+To configure the platform specific data, lp55xx_platform_data structure is used.
+
+static struct lp55xx_led_config lp5562_led_config[] = {
+	{
+		.name 		= "R",
+		.chan_nr	= 0,
+		.led_current	= 20,
+		.max_current	= 40,
+	},
+	{
+		.name 		= "G",
+		.chan_nr	= 1,
+		.led_current	= 20,
+		.max_current	= 40,
+	},
+	{
+		.name 		= "B",
+		.chan_nr	= 2,
+		.led_current	= 20,
+		.max_current	= 40,
+	},
+	{
+		.name 		= "W",
+		.chan_nr	= 3,
+		.led_current	= 20,
+		.max_current	= 40,
+	},
+};
+
+static int lp5562_setup(void)
+{
+	/* setup HW resources */
+}
+
+static void lp5562_release(void)
+{
+	/* Release HW resources */
+}
+
+static void lp5562_enable(bool state)
+{
+	/* Control of chip enable signal */
+}
+
+static struct lp55xx_platform_data lp5562_platform_data = {
+        .led_config     = lp5562_led_config,
+        .num_channels   = ARRAY_SIZE(lp5562_led_config),
+        .setup_resources   = lp5562_setup,
+        .release_resources = lp5562_release,
+        .enable            = lp5562_enable,
+};
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.

diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt
index ced4186..eec8fa2 100644
--- a/Documentation/leds/leds-lp55xx.txt
+++ b/Documentation/leds/leds-lp55xx.txt

@@ -5,7 +5,7 @@
 
 Description
 -----------
-LP5521, LP5523/55231 have common features as below.
+LP5521, LP5523/55231 and LP5562 have common features as below.
 
   Register access via the I2C
   Device initialization/deinitialization
@@ -116,3 +116,47 @@
 run_engine  : Control the selected engine
 firmware_cb : The callback function after loading the firmware is done.
               Chip specific commands for loading and updating program memory.
+
+( Predefined pattern data )
+
+Without the firmware interface, LP55xx driver provides another method for
+loading a LED pattern. That is 'predefined' pattern.
+A predefined pattern is defined in the platform data and load it(or them)
+via the sysfs if needed.
+To use the predefined pattern concept, 'patterns' and 'num_patterns' should be
+configured.
+
+  Example of predefined pattern data:
+
+  /* mode_1: blinking data */
+  static const u8 mode_1[] = {
+		0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00,
+		};
+
+  /* mode_2: always on */
+  static const u8 mode_2[] = { 0x40, 0xFF, };
+
+  struct lp55xx_predef_pattern board_led_patterns[] = {
+	{
+		.r = mode_1,
+		.size_r = ARRAY_SIZE(mode_1),
+	},
+	{
+		.b = mode_2,
+		.size_b = ARRAY_SIZE(mode_2),
+	},
+  }
+
+  struct lp55xx_platform_data lp5562_pdata = {
+  ...
+	.patterns      = board_led_patterns,
+	.num_patterns  = ARRAY_SIZE(board_led_patterns),
+  };
+
+Then, mode_1 and mode_2 can be run via through the sysfs.
+
+  echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern    # red blinking LED pattern
+  echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern    # blue LED always on
+
+To stop running pattern,
+  echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern

diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO
index d378cba..6e0f63f3 100644
--- a/Documentation/s390/CommonIO
+++ b/Documentation/s390/CommonIO

@@ -8,9 +8,9 @@
 
   Enable logging of debug information in case of ccw device timeouts.
 
-* cio_ignore = {all} |
-	       {<device> | <range of devices>} |
-	       {!<device> | !<range of devices>}
+* cio_ignore = device[,device[,..]]
+
+	device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
 
   The given devices will be ignored by the common I/O-layer; no detection
   and device sensing will be done on any of those devices. The subchannel to 
@@ -24,8 +24,10 @@
   device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
   give a device number 0xabcd, it will be interpreted as 0.0.abcd.
 
-  You can use the 'all' keyword to ignore all devices.
-  The '!' operator will cause the I/O-layer to _not_ ignore a device.
+  You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev'
+  keywords can be used to refer to the CCW based boot device and CCW console
+  device respectively (these are probably useful only when combined with the '!'
+  operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
   The command line is parsed from left to right.
 
   For example, 

diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
new file mode 100644
index 0000000..5b53220
--- /dev/null
+++ b/Documentation/timers/NO_HZ.txt

@@ -0,0 +1,273 @@
+		NO_HZ: Reducing Scheduling-Clock Ticks
+
+
+This document describes Kconfig options and boot parameters that can
+reduce the number of scheduling-clock interrupts, thereby improving energy
+efficiency and reducing OS jitter.  Reducing OS jitter is important for
+some types of computationally intensive high-performance computing (HPC)
+applications and for real-time applications.
+
+There are two main contexts in which the number of scheduling-clock
+interrupts can be reduced compared to the old-school approach of sending
+a scheduling-clock interrupt to all CPUs every jiffy whether they need
+it or not (CONFIG_HZ_PERIODIC=y or CONFIG_NO_HZ=n for older kernels):
+
+1.	Idle CPUs (CONFIG_NO_HZ_IDLE=y or CONFIG_NO_HZ=y for older kernels).
+
+2.	CPUs having only one runnable task (CONFIG_NO_HZ_FULL=y).
+
+These two cases are described in the following two sections, followed
+by a third section on RCU-specific considerations and a fourth and final
+section listing known issues.
+
+
+IDLE CPUs
+
+If a CPU is idle, there is little point in sending it a scheduling-clock
+interrupt.  After all, the primary purpose of a scheduling-clock interrupt
+is to force a busy CPU to shift its attention among multiple duties,
+and an idle CPU has no duties to shift its attention among.
+
+The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending
+scheduling-clock interrupts to idle CPUs, which is critically important
+both to battery-powered devices and to highly virtualized mainframes.
+A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would
+drain its battery very quickly, easily 2-3 times as fast as would the
+same device running a CONFIG_NO_HZ_IDLE=y kernel.  A mainframe running
+1,500 OS instances might find that half of its CPU time was consumed by
+unnecessary scheduling-clock interrupts.  In these situations, there
+is strong motivation to avoid sending scheduling-clock interrupts to
+idle CPUs.  That said, dyntick-idle mode is not free:
+
+1.	It increases the number of instructions executed on the path
+	to and from the idle loop.
+
+2.	On many architectures, dyntick-idle mode also increases the
+	number of expensive clock-reprogramming operations.
+
+Therefore, systems with aggressive real-time response constraints often
+run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels)
+in order to avoid degrading from-idle transition latencies.
+
+An idle CPU that is not receiving scheduling-clock interrupts is said to
+be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running
+tickless".  The remainder of this document will use "dyntick-idle mode".
+
+There is also a boot parameter "nohz=" that can be used to disable
+dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off".
+By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling
+dyntick-idle mode.
+
+
+CPUs WITH ONLY ONE RUNNABLE TASK
+
+If a CPU has only one runnable task, there is little point in sending it
+a scheduling-clock interrupt because there is no other task to switch to.
+
+The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid
+sending scheduling-clock interrupts to CPUs with a single runnable task,
+and such CPUs are said to be "adaptive-ticks CPUs".  This is important
+for applications with aggressive real-time response constraints because
+it allows them to improve their worst-case response times by the maximum
+duration of a scheduling-clock interrupt.  It is also important for
+computationally intensive short-iteration workloads:  If any CPU is
+delayed during a given iteration, all the other CPUs will be forced to
+wait idle while the delayed CPU finishes.  Thus, the delay is multiplied
+by one less than the number of CPUs.  In these situations, there is
+again strong motivation to avoid sending scheduling-clock interrupts.
+
+By default, no CPU will be an adaptive-ticks CPU.  The "nohz_full="
+boot parameter specifies the adaptive-ticks CPUs.  For example,
+"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks
+CPUs.  Note that you are prohibited from marking all of the CPUs as
+adaptive-tick CPUs:  At least one non-adaptive-tick CPU must remain
+online to handle timekeeping tasks in order to ensure that system calls
+like gettimeofday() returns accurate values on adaptive-tick CPUs.
+(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no
+running user processes to observe slight drifts in clock rate.)
+Therefore, the boot CPU is prohibited from entering adaptive-ticks
+mode.  Specifying a "nohz_full=" mask that includes the boot CPU will
+result in a boot-time error message, and the boot CPU will be removed
+from the mask.
+
+Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies
+that all CPUs other than the boot CPU are adaptive-ticks CPUs.  This
+Kconfig parameter will be overridden by the "nohz_full=" boot parameter,
+so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and
+the "nohz_full=1" boot parameter is specified, the boot parameter will
+prevail so that only CPU 1 will be an adaptive-ticks CPU.
+
+Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
+This is covered in the "RCU IMPLICATIONS" section below.
+
+Normally, a CPU remains in adaptive-ticks mode as long as possible.
+In particular, transitioning to kernel mode does not automatically change
+the mode.  Instead, the CPU will exit adaptive-ticks mode only if needed,
+for example, if that CPU enqueues an RCU callback.
+
+Just as with dyntick-idle mode, the benefits of adaptive-tick mode do
+not come for free:
+
+1.	CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run
+	adaptive ticks without also running dyntick idle.  This dependency
+	extends down into the implementation, so that all of the costs
+	of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL.
+
+2.	The user/kernel transitions are slightly more expensive due
+	to the need to inform kernel subsystems (such as RCU) about
+	the change in mode.
+
+3.	POSIX CPU timers on adaptive-tick CPUs may miss their deadlines
+	(perhaps indefinitely) because they currently rely on
+	scheduling-tick interrupts.  This will likely be fixed in
+	one of two ways: (1) Prevent CPUs with POSIX CPU timers from
+	entering adaptive-tick mode, or (2) Use hrtimers or other
+	adaptive-ticks-immune mechanism to cause the POSIX CPU timer to
+	fire properly.
+
+4.	If there are more perf events pending than the hardware can
+	accommodate, they are normally round-robined so as to collect
+	all of them over time.  Adaptive-tick mode may prevent this
+	round-robining from happening.  This will likely be fixed by
+	preventing CPUs with large numbers of perf events pending from
+	entering adaptive-tick mode.
+
+5.	Scheduler statistics for adaptive-tick CPUs may be computed
+	slightly differently than those for non-adaptive-tick CPUs.
+	This might in turn perturb load-balancing of real-time tasks.
+
+6.	The LB_BIAS scheduler feature is disabled by adaptive ticks.
+
+Although improvements are expected over time, adaptive ticks is quite
+useful for many types of real-time and compute-intensive applications.
+However, the drawbacks listed above mean that adaptive ticks should not
+(yet) be enabled by default.
+
+
+RCU IMPLICATIONS
+
+There are situations in which idle CPUs cannot be permitted to
+enter either dyntick-idle mode or adaptive-tick mode, the most
+common being when that CPU has RCU callbacks pending.
+
+The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs
+to enter dyntick-idle mode or adaptive-tick mode anyway.  In this case,
+a timer will awaken these CPUs every four jiffies in order to ensure
+that the RCU callbacks are processed in a timely fashion.
+
+Another approach is to offload RCU callback processing to "rcuo" kthreads
+using the CONFIG_RCU_NOCB_CPU=y Kconfig option.  The specific CPUs to
+offload may be selected via several methods:
+
+1.	One of three mutually exclusive Kconfig options specify a
+	build-time default for the CPUs to offload:
+
+	a.	The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in
+		no CPUs being offloaded.
+
+	b.	The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes
+		CPU 0 to be offloaded.
+
+	c.	The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all
+		CPUs to be offloaded.  Note that the callbacks will be
+		offloaded to "rcuo" kthreads, and that those kthreads
+		will in fact run on some CPU.  However, this approach
+		gives fine-grained control on exactly which CPUs the
+		callbacks run on, along with their scheduling priority
+		(including the default of SCHED_OTHER), and it further
+		allows this control to be varied dynamically at runtime.
+
+2.	The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated
+	list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1,
+	3, 4, and 5.  The specified CPUs will be offloaded in addition to
+	any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or
+	CONFIG_RCU_NOCB_CPU_ALL=y.  This means that the "rcu_nocbs=" boot
+	parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y.
+
+The offloaded CPUs will never queue RCU callbacks, and therefore RCU
+never prevents offloaded CPUs from entering either dyntick-idle mode
+or adaptive-tick mode.  That said, note that it is up to userspace to
+pin the "rcuo" kthreads to specific CPUs if desired.  Otherwise, the
+scheduler will decide where to run them, which might or might not be
+where you want them to run.
+
+
+KNOWN ISSUES
+
+o	Dyntick-idle slows transitions to and from idle slightly.
+	In practice, this has not been a problem except for the most
+	aggressive real-time workloads, which have the option of disabling
+	dyntick-idle mode, an option that most of them take.  However,
+	some workloads will no doubt want to use adaptive ticks to
+	eliminate scheduling-clock interrupt latencies.  Here are some
+	options for these workloads:
+
+	a.	Use PMQOS from userspace to inform the kernel of your
+		latency requirements (preferred).
+
+	b.	On x86 systems, use the "idle=mwait" boot parameter.
+
+	c.	On x86 systems, use the "intel_idle.max_cstate=" to limit
+	`	the maximum C-state depth.
+
+	d.	On x86 systems, use the "idle=poll" boot parameter.
+		However, please note that use of this parameter can cause
+		your CPU to overheat, which may cause thermal throttling
+		to degrade your latencies -- and that this degradation can
+		be even worse than that of dyntick-idle.  Furthermore,
+		this parameter effectively disables Turbo Mode on Intel
+		CPUs, which can significantly reduce maximum performance.
+
+o	Adaptive-ticks slows user/kernel transitions slightly.
+	This is not expected to be a problem for computationally intensive
+	workloads, which have few such transitions.  Careful benchmarking
+	will be required to determine whether or not other workloads
+	are significantly affected by this effect.
+
+o	Adaptive-ticks does not do anything unless there is only one
+	runnable task for a given CPU, even though there are a number
+	of other situations where the scheduling-clock tick is not
+	needed.  To give but one example, consider a CPU that has one
+	runnable high-priority SCHED_FIFO task and an arbitrary number
+	of low-priority SCHED_OTHER tasks.  In this case, the CPU is
+	required to run the SCHED_FIFO task until it either blocks or
+	some other higher-priority task awakens on (or is assigned to)
+	this CPU, so there is no point in sending a scheduling-clock
+	interrupt to this CPU.	However, the current implementation
+	nevertheless sends scheduling-clock interrupts to CPUs having a
+	single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER
+	tasks, even though these interrupts are unnecessary.
+
+	Better handling of these sorts of situations is future work.
+
+o	A reboot is required to reconfigure both adaptive idle and RCU
+	callback offloading.  Runtime reconfiguration could be provided
+	if needed, however, due to the complexity of reconfiguring RCU at
+	runtime, there would need to be an earthshakingly good reason.
+	Especially given that you have the straightforward option of
+	simply offloading RCU callbacks from all CPUs and pinning them
+	where you want them whenever you want them pinned.
+
+o	Additional configuration is required to deal with other sources
+	of OS jitter, including interrupts and system-utility tasks
+	and processes.  This configuration normally involves binding
+	interrupts and tasks to particular CPUs.
+
+o	Some sources of OS jitter can currently be eliminated only by
+	constraining the workload.  For example, the only way to eliminate
+	OS jitter due to global TLB shootdowns is to avoid the unmapping
+	operations (such as kernel module unload operations) that
+	result in these shootdowns.  For another example, page faults
+	and TLB misses can be reduced (and in some cases eliminated) by
+	using huge pages and by constraining the amount of memory used
+	by the application.  Pre-faulting the working set can also be
+	helpful, especially when combined with the mlock() and mlockall()
+	system calls.
+
+o	Unless all CPUs are idle, at least one CPU must keep the
+	scheduling-clock interrupt going in order to support accurate
+	timekeeping.
+
+o	If there are adaptive-ticks CPUs, there will be at least one
+	CPU keeping the scheduling-clock interrupt going, even if all
+	CPUs are otherwise idle.

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 119358d..5f91eda 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt

@@ -1486,15 +1486,23 @@
 	__u8  pad[36];
 };
 
+For the special case of virtio-ccw devices on s390, the ioevent is matched
+to a subchannel/virtqueue tuple instead.
+
 The following flags are defined:
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
 #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
 
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
+For virtio-ccw devices, addr contains the subchannel id and datamatch the
+virtqueue index.
+
 
 4.60 KVM_DIRTY_TLB
 
@@ -1780,27 +1788,48 @@
   PPC   | KVM_REG_PPC_VPA_DTL   | 128
   PPC   | KVM_REG_PPC_EPCR	| 32
   PPC   | KVM_REG_PPC_EPR	| 32
+  PPC   | KVM_REG_PPC_TCR	| 32
+  PPC   | KVM_REG_PPC_TSR	| 32
+  PPC   | KVM_REG_PPC_OR_TSR	| 32
+  PPC   | KVM_REG_PPC_CLEAR_TSR	| 32
+  PPC   | KVM_REG_PPC_MAS0	| 32
+  PPC   | KVM_REG_PPC_MAS1	| 32
+  PPC   | KVM_REG_PPC_MAS2	| 64
+  PPC   | KVM_REG_PPC_MAS7_3	| 64
+  PPC   | KVM_REG_PPC_MAS4	| 32
+  PPC   | KVM_REG_PPC_MAS6	| 32
+  PPC   | KVM_REG_PPC_MMUCFG	| 32
+  PPC   | KVM_REG_PPC_TLB0CFG	| 32
+  PPC   | KVM_REG_PPC_TLB1CFG	| 32
+  PPC   | KVM_REG_PPC_TLB2CFG	| 32
+  PPC   | KVM_REG_PPC_TLB3CFG	| 32
+  PPC   | KVM_REG_PPC_TLB0PS	| 32
+  PPC   | KVM_REG_PPC_TLB1PS	| 32
+  PPC   | KVM_REG_PPC_TLB2PS	| 32
+  PPC   | KVM_REG_PPC_TLB3PS	| 32
+  PPC   | KVM_REG_PPC_EPTCFG	| 32
+  PPC   | KVM_REG_PPC_ICP_STATE | 64
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
 
 ARM core registers have the following id bit patterns:
-  0x4002 0000 0010 <index into the kvm_regs struct:16>
+  0x4020 0000 0010 <index into the kvm_regs struct:16>
 
 ARM 32-bit CP15 registers have the following id bit patterns:
-  0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
+  0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
 
 ARM 64-bit CP15 registers have the following id bit patterns:
-  0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
+  0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
 
 ARM CCSIDR registers are demultiplexed by CSSELR value:
-  0x4002 0000 0011 00 <csselr:8>
+  0x4020 0000 0011 00 <csselr:8>
 
 ARM 32-bit VFP control registers have the following id bit patterns:
-  0x4002 0000 0012 1 <regno:12>
+  0x4020 0000 0012 1 <regno:12>
 
 ARM 64-bit FP registers have the following id bit patterns:
-  0x4002 0000 0012 0 <regno:12>
+  0x4030 0000 0012 0 <regno:12>
 
 4.69 KVM_GET_ONE_REG
 
@@ -2161,6 +2190,76 @@
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
+4.79 KVM_CREATE_DEVICE
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: vm ioctl
+Parameters: struct kvm_create_device (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device type is unknown or unsupported
+  EEXIST: Device already created, and this type of device may not
+          be instantiated multiple times
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
+Creates an emulated device in the kernel.  The file descriptor returned
+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
+
+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
+device type is supported (not necessarily whether it can be created
+in the current vm).
+
+Individual devices should not define flags.  Attributes should be used
+for specifying any behavior that is not implied by the device type
+number.
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: device ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+  EPERM:  The attribute cannot (currently) be accessed this way
+          (e.g. read-only attribute, or attribute that only makes
+          sense when the device is in a different state)
+
+  Other error conditions may be defined by individual device types.
+
+Gets/sets a specified piece of device configuration and/or state.  The
+semantics are device-specific.  See individual device documentation in
+the "devices" directory.  As with ONE_REG, the size of the data
+transferred is defined by the particular attribute.
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
+4.81 KVM_HAS_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: device ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+
+Tests whether a device supports a particular attribute.  A successful
+return indicates the attribute is implemented.  It does not necessarily
+indicate that the attribute can be read or written in the device's
+current state.  "addr" is ignored.
 
 4.77 KVM_ARM_VCPU_INIT
 
@@ -2243,6 +2342,25 @@
 KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
 this ioctl twice for any of the base addresses will return -EEXIST.
 
+4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel.  The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation.  If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel.  If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
 
 5. The kvm_run structure
 ------------------------
@@ -2646,3 +2764,19 @@
 When disabled (args[0] == 0), behavior is as if this facility is unsupported.
 
 When this capability is enabled, KVM_EXIT_EPR can occur.
+
+6.6 KVM_CAP_IRQ_MPIC
+
+Architectures: ppc
+Parameters: args[0] is the MPIC device fd
+            args[1] is the MPIC CPU number for this vcpu
+
+This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Parameters: args[0] is the XICS device fd
+            args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.

diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README
new file mode 100644
index 0000000..34a6983
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/README

@@ -0,0 +1 @@
+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.

diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt
new file mode 100644
index 0000000..8257397
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/mpic.txt

@@ -0,0 +1,53 @@
+MPIC interrupt controller
+=========================
+
+Device types supported:
+  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
+  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
+
+Only one MPIC instance, of any type, may be instantiated.  The created
+MPIC will act as the system interrupt controller, connecting to each
+vcpu's interrupt inputs.
+
+Groups:
+  KVM_DEV_MPIC_GRP_MISC
+  Attributes:
+    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
+      Base address of the 256 KiB MPIC register space.  Must be
+      naturally aligned.  A value of zero disables the mapping.
+      Reset value is zero.
+
+  KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
+    Access an MPIC register, as if the access were made from the guest.
+    "attr" is the byte offset into the MPIC register space.  Accesses
+    must be 4-byte aligned.
+
+    MSIs may be signaled by using this attribute group to write
+    to the relevant MSIIR.
+
+  KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
+    IRQ input line for each standard openpic source.  0 is inactive and 1
+    is active, regardless of interrupt sense.
+
+    For edge-triggered interrupts:  Writing 1 is considered an activating
+    edge, and writing 0 is ignored.  Reading returns 1 if a previously
+    signaled edge has not been acknowledged, and 0 otherwise.
+
+    "attr" is the IRQ number.  IRQ numbers for standard sources are the
+    byte offset of the relevant IVPR from EIVPR0, divided by 32.
+
+IRQ Routing:
+
+  The MPIC emulation supports IRQ routing. Only a single MPIC device can
+  be instantiated. Once that device has been created, it's available as
+  irqchip id 0.
+
+  This irqchip 0 has 256 interrupt pins, which expose the interrupts in
+  the main array of interrupt sources (a.k.a. "SRC" interrupts).
+
+  The numbering is the same as the MPIC device tree binding -- based on
+  the register offset from the beginning of the sources array, without
+  regard to any subdivisions in chip documentation such as "internal"
+  or "external" interrupts.
+
+  Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.

diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt
new file mode 100644
index 0000000..4286493
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xics.txt

@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+  KVM_DEV_XICS_SOURCES
+  Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR.  The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called "servers",
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+  Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+  Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+  Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+  Zero is the highest priority, meaning no interrupts can be
+  delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number.  The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+  This specifies where the interrupt should be sent, and is the
+  interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+  This is the priority specified for this interrupt source, where 0 is
+  the highest priority and 255 is the lowest.  An interrupt with a
+  priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+  This bit is 1 for a level-sensitive interrupt source, or 0 for
+  edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+  This bit is set to 1 if the interrupt is masked (cannot be delivered
+  regardless of its priority), for example by the ibm,int-off RTAS
+  call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+  This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.

diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h
index 1a66f907..bf863ed 100644
--- a/arch/arm/include/asm/idmap.h
+++ b/arch/arm/include/asm/idmap.h

@@ -8,7 +8,6 @@
 #define __idmap __section(.idmap.text) noinline notrace
 
 extern pgd_t *idmap_pgd;
-extern pgd_t *hyp_pgd;
 
 void setup_mm_for_reboot(void);
 

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0c4e643..57cb786 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h

@@ -87,7 +87,7 @@
 	u32 hyp_pc;		/* PC when exception was taken from Hyp mode */
 };
 
-typedef struct vfp_hard_struct kvm_kernel_vfp_t;
+typedef struct vfp_hard_struct kvm_cpu_context_t;
 
 struct kvm_vcpu_arch {
 	struct kvm_regs regs;
@@ -105,8 +105,10 @@
 	struct kvm_vcpu_fault_info fault;
 
 	/* Floating point registers (VFP and Advanced SIMD/NEON) */
-	kvm_kernel_vfp_t vfp_guest;
-	kvm_kernel_vfp_t *vfp_host;
+	struct vfp_hard_struct vfp_guest;
+
+	/* Host FP context */
+	kvm_cpu_context_t *host_cpu_context;
 
 	/* VGIC state */
 	struct vgic_cpu vgic_cpu;
@@ -188,23 +190,38 @@
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
+				       unsigned long long pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
-	unsigned long pgd_low, pgd_high;
-
-	pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
-	pgd_high = (pgd_ptr >> 32ULL);
-
 	/*
-	 * Call initialization code, and switch to the full blown
-	 * HYP code. The init code doesn't need to preserve these registers as
-	 * r1-r3 and r12 are already callee save according to the AAPCS.
-	 * Note that we slightly misuse the prototype by casing the pgd_low to
-	 * a void *.
+	 * Call initialization code, and switch to the full blown HYP
+	 * code. The init code doesn't need to preserve these
+	 * registers as r0-r3 are already callee saved according to
+	 * the AAPCS.
+	 * Note that we slightly misuse the prototype by casing the
+	 * stack pointer to a void *.
+	 *
+	 * We don't have enough registers to perform the full init in
+	 * one go.  Install the boot PGD first, and then install the
+	 * runtime PGD, stack pointer and vectors. The PGDs are always
+	 * passed as the third argument, in order to be passed into
+	 * r2-r3 to the init code (yes, this is compliant with the
+	 * PCS!).
 	 */
-	kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+
+	kvm_call_hyp(NULL, 0, boot_pgd_ptr);
+
+	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
 }
 
+static inline int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+	return 0;
+}
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
 #endif /* __ARM_KVM_HOST_H__ */

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 970f3b5..472ac70 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h

@@ -19,21 +19,33 @@
 #ifndef __ARM_KVM_MMU_H__
 #define __ARM_KVM_MMU_H__
 
-#include <asm/cacheflush.h>
-#include <asm/pgalloc.h>
-#include <asm/idmap.h>
+#include <asm/memory.h>
+#include <asm/page.h>
 
 /*
  * We directly use the kernel VA for the HYP, as we can directly share
  * the mapping (HTTBR "covers" TTBR1).
  */
-#define HYP_PAGE_OFFSET_MASK	(~0UL)
+#define HYP_PAGE_OFFSET_MASK	UL(~0)
 #define HYP_PAGE_OFFSET		PAGE_OFFSET
 #define KERN_TO_HYP(kva)	(kva)
 
+/*
+ * Our virtual mapping for the boot-time MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the vectors
+ * page, where no kernel data will ever be shared with HYP.
+ */
+#define TRAMPOLINE_VA		UL(CONFIG_VECTORS_BASE)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cacheflush.h>
+#include <asm/pgalloc.h>
+
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
-void free_hyp_pmds(void);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
 
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
@@ -45,6 +57,8 @@
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 
 phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
@@ -114,4 +128,8 @@
 	}
 }
 
+#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+
+#endif	/* !__ASSEMBLY__ */
+
 #endif /* __ARM_KVM_MMU_H__ */

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index a53efa9..ee68cce 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c

@@ -158,7 +158,7 @@
   DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.midr));
   DEFINE(VCPU_CP15,		offsetof(struct kvm_vcpu, arch.cp15));
   DEFINE(VCPU_VFP_GUEST,	offsetof(struct kvm_vcpu, arch.vfp_guest));
-  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.vfp_host));
+  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.host_cpu_context));
   DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
   DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
   DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b571484..a871b8e 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S

@@ -20,7 +20,7 @@
 	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
 	*(.idmap.text)							\
 	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
-	ALIGN_FUNCTION();						\
+	. = ALIGN(32);							\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
 	*(.hyp.idmap.text)						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
@@ -315,3 +315,8 @@
  */
 ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
 ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
+/*
+ * The HYP init code can't be more than a page long.
+ * The above comment applies as well.
+ */
+ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 49dd64e..370e1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig

@@ -41,9 +41,9 @@
 	  Provides host support for ARM processors.
 
 config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM"
-	depends on KVM_ARM_HOST
-	default 4
+	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
+	default 4 if KVM_ARM_HOST
+	default 0
 	help
 	  Static number of max supported virtual CPUs per VM.
 

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 8dc5e76..53c5ed8 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile

@@ -18,6 +18,6 @@
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o
+obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o

diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
index 6ac938d..c55b608 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/arch/arm/kvm/arch_timer.c

@@ -22,6 +22,7 @@
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 
+#include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
 
 #include <asm/kvm_vgic.h>
@@ -64,7 +65,7 @@
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
-	timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
+	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
 			    vcpu->arch.timer_cpu.irq->irq,
 			    vcpu->arch.timer_cpu.irq->level);
@@ -133,8 +134,8 @@
 	cycle_t cval, now;
 	u64 ns;
 
-	/* Check if the timer is enabled and unmasked first */
-	if ((timer->cntv_ctl & 3) != 1)
+	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
+		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
 		return;
 
 	cval = timer->cntv_cval;

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a0dfc2a..37d216d8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c

@@ -16,6 +16,7 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -48,7 +49,7 @@
 #endif
 
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state;
+static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
 static unsigned long hyp_default_vectors;
 
 /* Per-CPU variable containing the currently running vcpu. */
@@ -206,7 +207,7 @@
 		r = KVM_MAX_VCPUS;
 		break;
 	default:
-		r = 0;
+		r = kvm_arch_dev_ioctl_check_extension(ext);
 		break;
 	}
 	return r;
@@ -218,27 +219,18 @@
 	return -EINVAL;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-			       struct kvm_userspace_memory_region *mem,
-			       struct kvm_memory_slot old,
-			       int user_alloc)
-{
-	return 0;
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
-				   struct kvm_memory_slot old,
 				   struct kvm_userspace_memory_region *mem,
-				   bool user_alloc)
+				   enum kvm_mr_change change)
 {
 	return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				   struct kvm_userspace_memory_region *mem,
-				   struct kvm_memory_slot old,
-				   bool user_alloc)
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
 {
 }
 
@@ -326,7 +318,7 @@
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
-	vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
+	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
 	/*
 	 * Check whether this vcpu requires the cache to be flushed on
@@ -639,7 +631,8 @@
 	return 0;
 }
 
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+			  bool line_status)
 {
 	u32 irq = irq_level->irq;
 	unsigned int irq_type, vcpu_idx, irq_num;
@@ -794,30 +787,48 @@
 	}
 }
 
-static void cpu_init_hyp_mode(void *vector)
+static void cpu_init_hyp_mode(void *dummy)
 {
+	unsigned long long boot_pgd_ptr;
 	unsigned long long pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
 
 	/* Switch from the HYP stub to our own HYP init vector */
-	__hyp_set_vectors((unsigned long)vector);
+	__hyp_set_vectors(kvm_get_idmap_vector());
 
+	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
 	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;
 
-	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
+	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
+static int hyp_init_cpu_notify(struct notifier_block *self,
+			       unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		cpu_init_hyp_mode(NULL);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hyp_init_cpu_nb = {
+	.notifier_call = hyp_init_cpu_notify,
+};
+
 /**
  * Inits Hyp-mode on all online CPUs
  */
 static int init_hyp_mode(void)
 {
-	phys_addr_t init_phys_addr;
 	int cpu;
 	int err = 0;
 
@@ -850,24 +861,6 @@
 	}
 
 	/*
-	 * Execute the init code on each CPU.
-	 *
-	 * Note: The stack is not mapped yet, so don't do anything else than
-	 * initializing the hypervisor mode on each CPU using a local stack
-	 * space for temporary storage.
-	 */
-	init_phys_addr = virt_to_phys(__kvm_hyp_init);
-	for_each_online_cpu(cpu) {
-		smp_call_function_single(cpu, cpu_init_hyp_mode,
-					 (void *)(long)init_phys_addr, 1);
-	}
-
-	/*
-	 * Unmap the identity mapping
-	 */
-	kvm_clear_hyp_idmap();
-
-	/*
 	 * Map the Hyp-code called directly from the host
 	 */
 	err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
@@ -890,33 +883,38 @@
 	}
 
 	/*
-	 * Map the host VFP structures
+	 * Map the host CPU structures
 	 */
-	kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t);
-	if (!kvm_host_vfp_state) {
+	kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
+	if (!kvm_host_cpu_state) {
 		err = -ENOMEM;
-		kvm_err("Cannot allocate host VFP state\n");
+		kvm_err("Cannot allocate host CPU state\n");
 		goto out_free_mappings;
 	}
 
 	for_each_possible_cpu(cpu) {
-		kvm_kernel_vfp_t *vfp;
+		kvm_cpu_context_t *cpu_ctxt;
 
-		vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
-		err = create_hyp_mappings(vfp, vfp + 1);
+		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1);
 
 		if (err) {
-			kvm_err("Cannot map host VFP state: %d\n", err);
-			goto out_free_vfp;
+			kvm_err("Cannot map host CPU state: %d\n", err);
+			goto out_free_context;
 		}
 	}
 
 	/*
+	 * Execute the init code on each CPU.
+	 */
+	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
+
+	/*
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
 	if (err)
-		goto out_free_vfp;
+		goto out_free_context;
 
 #ifdef CONFIG_KVM_ARM_VGIC
 		vgic_present = true;
@@ -929,12 +927,19 @@
 	if (err)
 		goto out_free_mappings;
 
+#ifndef CONFIG_HOTPLUG_CPU
+	free_boot_hyp_pgd();
+#endif
+
+	kvm_perf_init();
+
 	kvm_info("Hyp mode initialized successfully\n");
+
 	return 0;
-out_free_vfp:
-	free_percpu(kvm_host_vfp_state);
+out_free_context:
+	free_percpu(kvm_host_cpu_state);
 out_free_mappings:
-	free_hyp_pmds();
+	free_hyp_pgds();
 out_free_stack_pages:
 	for_each_possible_cpu(cpu)
 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
@@ -943,27 +948,42 @@
 	return err;
 }
 
+static void check_kvm_target_cpu(void *ret)
+{
+	*(int *)ret = kvm_target_cpu();
+}
+
 /**
  * Initialize Hyp-mode and memory mappings on all CPUs.
  */
 int kvm_arch_init(void *opaque)
 {
 	int err;
+	int ret, cpu;
 
 	if (!is_hyp_mode_available()) {
 		kvm_err("HYP mode not available\n");
 		return -ENODEV;
 	}
 
-	if (kvm_target_cpu() < 0) {
-		kvm_err("Target CPU not supported!\n");
-		return -ENODEV;
+	for_each_online_cpu(cpu) {
+		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
+		if (ret < 0) {
+			kvm_err("Error, CPU %d not supported!\n", cpu);
+			return -ENODEV;
+		}
 	}
 
 	err = init_hyp_mode();
 	if (err)
 		goto out_err;
 
+	err = register_cpu_notifier(&hyp_init_cpu_nb);
+	if (err) {
+		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
+		goto out_err;
+	}
+
 	kvm_coproc_table_init();
 	return 0;
 out_err:
@@ -973,6 +993,7 @@
 /* NOP: Compiling as a module not supported */
 void kvm_arch_exit(void)
 {
+	kvm_perf_teardown();
 }
 
 static int arm_init(void)

diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 9f37a79..f048338 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S

@@ -21,13 +21,33 @@
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
 
 /********************************************************************
  * Hypervisor initialization
  *   - should be called with:
- *       r0,r1 = Hypervisor pgd pointer
- *       r2 = top of Hyp stack (kernel VA)
- *       r3 = pointer to hyp vectors
+ *       r0 = top of Hyp stack (kernel VA)
+ *       r1 = pointer to hyp vectors
+ *       r2,r3 = Hypervisor pgd pointer
+ *
+ * The init scenario is:
+ * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd,
+ *   runtime stack, runtime vectors
+ * - Enable the MMU with the boot pgd
+ * - Jump to a target into the trampoline page (remember, this is the same
+ *   physical page!)
+ * - Now switch to the runtime pgd (same VA, and still the same physical
+ *   page!)
+ * - Invalidate TLBs
+ * - Set stack and vectors
+ * - Profit! (or eret, if you only care about the code).
+ *
+ * As we only have four registers available to pass parameters (and we
+ * need six), we split the init in two phases:
+ * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD.
+ *   Provides the basic HYP init, and enable the MMU.
+ * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD.
+ *   Switches to the runtime PGD, set stack and vectors.
  */
 
 	.text
@@ -47,22 +67,25 @@
 	W(b)	.
 
 __do_hyp_init:
+	cmp	r0, #0			@ We have a SP?
+	bne	phase2			@ Yes, second stage init
+
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, r0, r1, c2
+	mcrr	p15, 4, r2, r3, c2
 
 	@ Set the HTCR and VTCR to the same shareability and cacheability
 	@ settings as the non-secure TTBCR and with T0SZ == 0.
 	mrc	p15, 4, r0, c2, c0, 2	@ HTCR
-	ldr	r12, =HTCR_MASK
-	bic	r0, r0, r12
+	ldr	r2, =HTCR_MASK
+	bic	r0, r0, r2
 	mrc	p15, 0, r1, c2, c0, 2	@ TTBCR
 	and	r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
 	orr	r0, r0, r1
 	mcr	p15, 4, r0, c2, c0, 2	@ HTCR
 
 	mrc	p15, 4, r1, c2, c1, 2	@ VTCR
-	ldr	r12, =VTCR_MASK
-	bic	r1, r1, r12
+	ldr	r2, =VTCR_MASK
+	bic	r1, r1, r2
 	bic	r0, r0, #(~VTCR_HTCR_SH)	@ clear non-reusable HTCR bits
 	orr	r1, r0, r1
 	orr	r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
@@ -85,24 +108,41 @@
 	@  - Memory alignment checks: enabled
 	@  - MMU: enabled (this code must be run from an identity mapping)
 	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
-	ldr	r12, =HSCTLR_MASK
-	bic	r0, r0, r12
+	ldr	r2, =HSCTLR_MASK
+	bic	r0, r0, r2
 	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
-	ldr	r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
-	and	r1, r1, r12
- ARM(	ldr	r12, =(HSCTLR_M | HSCTLR_A)			)
- THUMB(	ldr	r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
-	orr	r1, r1, r12
+	ldr	r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
+	and	r1, r1, r2
+ ARM(	ldr	r2, =(HSCTLR_M | HSCTLR_A)			)
+ THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
+	orr	r1, r1, r2
 	orr	r0, r0, r1
 	isb
 	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
-	isb
 
-	@ Set stack pointer and return to the kernel
-	mov	sp, r2
+	@ End of init phase-1
+	eret
+
+phase2:
+	@ Set stack pointer
+	mov	sp, r0
 
 	@ Set HVBAR to point to the HYP vectors
-	mcr	p15, 4, r3, c12, c0, 0	@ HVBAR
+	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
+
+	@ Jump to the trampoline page
+	ldr	r0, =TRAMPOLINE_VA
+	adr	r1, target
+	bfi	r0, r1, #0, #PAGE_SHIFT
+	mov	pc, r0
+
+target:	@ We're now in the trampoline code, switch page tables
+	mcrr	p15, 4, r2, r3, c2
+	isb
+
+	@ Invalidate the old TLBs
+	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
+	dsb
 
 	eret
 

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 2f12e40..9657065 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c

@@ -32,8 +32,15 @@
 
 extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
+static pgd_t *boot_hyp_pgd;
+static pgd_t *hyp_pgd;
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
+static void *init_bounce_page;
+static unsigned long hyp_idmap_start;
+static unsigned long hyp_idmap_end;
+static phys_addr_t hyp_idmap_vector;
+
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
@@ -71,242 +78,6 @@
 	return p;
 }
 
-static void free_ptes(pmd_t *pmd, unsigned long addr)
-{
-	pte_t *pte;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
-		if (!pmd_none(*pmd) && pmd_table(*pmd)) {
-			pte = pte_offset_kernel(pmd, addr);
-			pte_free_kernel(NULL, pte);
-		}
-		pmd++;
-	}
-}
-
-static void free_hyp_pgd_entry(unsigned long addr)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long hyp_addr = KERN_TO_HYP(addr);
-
-	pgd = hyp_pgd + pgd_index(hyp_addr);
-	pud = pud_offset(pgd, hyp_addr);
-
-	if (pud_none(*pud))
-		return;
-	BUG_ON(pud_bad(*pud));
-
-	pmd = pmd_offset(pud, hyp_addr);
-	free_ptes(pmd, addr);
-	pmd_free(NULL, pmd);
-	pud_clear(pud);
-}
-
-/**
- * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
- *
- * Assumes this is a page table used strictly in Hyp-mode and therefore contains
- * either mappings in the kernel memory area (above PAGE_OFFSET), or
- * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
- */
-void free_hyp_pmds(void)
-{
-	unsigned long addr;
-
-	mutex_lock(&kvm_hyp_pgd_mutex);
-	for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-		free_hyp_pgd_entry(addr);
-	for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-		free_hyp_pgd_entry(addr);
-	mutex_unlock(&kvm_hyp_pgd_mutex);
-}
-
-static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
-				    unsigned long end)
-{
-	pte_t *pte;
-	unsigned long addr;
-	struct page *page;
-
-	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
-		unsigned long hyp_addr = KERN_TO_HYP(addr);
-
-		pte = pte_offset_kernel(pmd, hyp_addr);
-		BUG_ON(!virt_addr_valid(addr));
-		page = virt_to_page(addr);
-		kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
-	}
-}
-
-static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
-				       unsigned long end,
-				       unsigned long *pfn_base)
-{
-	pte_t *pte;
-	unsigned long addr;
-
-	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
-		unsigned long hyp_addr = KERN_TO_HYP(addr);
-
-		pte = pte_offset_kernel(pmd, hyp_addr);
-		BUG_ON(pfn_valid(*pfn_base));
-		kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
-		(*pfn_base)++;
-	}
-}
-
-static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
-				   unsigned long end, unsigned long *pfn_base)
-{
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long addr, next;
-
-	for (addr = start; addr < end; addr = next) {
-		unsigned long hyp_addr = KERN_TO_HYP(addr);
-		pmd = pmd_offset(pud, hyp_addr);
-
-		BUG_ON(pmd_sect(*pmd));
-
-		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, hyp_addr);
-			if (!pte) {
-				kvm_err("Cannot allocate Hyp pte\n");
-				return -ENOMEM;
-			}
-			pmd_populate_kernel(NULL, pmd, pte);
-		}
-
-		next = pmd_addr_end(addr, end);
-
-		/*
-		 * If pfn_base is NULL, we map kernel pages into HYP with the
-		 * virtual address. Otherwise, this is considered an I/O
-		 * mapping and we map the physical region starting at
-		 * *pfn_base to [start, end[.
-		 */
-		if (!pfn_base)
-			create_hyp_pte_mappings(pmd, addr, next);
-		else
-			create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
-	}
-
-	return 0;
-}
-
-static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
-{
-	unsigned long start = (unsigned long)from;
-	unsigned long end = (unsigned long)to;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long addr, next;
-	int err = 0;
-
-	if (start >= end)
-		return -EINVAL;
-	/* Check for a valid kernel memory mapping */
-	if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
-		return -EINVAL;
-	/* Check for a valid kernel IO mapping */
-	if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
-		return -EINVAL;
-
-	mutex_lock(&kvm_hyp_pgd_mutex);
-	for (addr = start; addr < end; addr = next) {
-		unsigned long hyp_addr = KERN_TO_HYP(addr);
-		pgd = hyp_pgd + pgd_index(hyp_addr);
-		pud = pud_offset(pgd, hyp_addr);
-
-		if (pud_none_or_clear_bad(pud)) {
-			pmd = pmd_alloc_one(NULL, hyp_addr);
-			if (!pmd) {
-				kvm_err("Cannot allocate Hyp pmd\n");
-				err = -ENOMEM;
-				goto out;
-			}
-			pud_populate(NULL, pud, pmd);
-		}
-
-		next = pgd_addr_end(addr, end);
-		err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
-		if (err)
-			goto out;
-	}
-out:
-	mutex_unlock(&kvm_hyp_pgd_mutex);
-	return err;
-}
-
-/**
- * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
- * @from:	The virtual kernel start address of the range
- * @to:		The virtual kernel end address of the range (exclusive)
- *
- * The same virtual address as the kernel virtual address is also used
- * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
- * physical pages.
- *
- * Note: Wrapping around zero in the "to" address is not supported.
- */
-int create_hyp_mappings(void *from, void *to)
-{
-	return __create_hyp_mappings(from, to, NULL);
-}
-
-/**
- * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
- * @from:	The kernel start VA of the range
- * @to:		The kernel end VA of the range (exclusive)
- * @addr:	The physical start address which gets mapped
- *
- * The resulting HYP VA is the same as the kernel VA, modulo
- * HYP_PAGE_OFFSET.
- */
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
-{
-	unsigned long pfn = __phys_to_pfn(addr);
-	return __create_hyp_mappings(from, to, &pfn);
-}
-
-/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
- * support either full 40-bit input addresses or limited to 32-bit input
- * addresses). Clears the allocated pages.
- *
- * Note we don't need locking here as this is only called when the VM is
- * created, which can only be done once.
- */
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
-{
-	pgd_t *pgd;
-
-	if (kvm->arch.pgd != NULL) {
-		kvm_err("kvm_arch already initialized?\n");
-		return -EINVAL;
-	}
-
-	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
-	if (!pgd)
-		return -ENOMEM;
-
-	/* stage-2 pgd must be aligned to its size */
-	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
-	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
-	kvm_clean_pgd(pgd);
-	kvm->arch.pgd = pgd;
-
-	return 0;
-}
-
 static void clear_pud_entry(pud_t *pud)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
@@ -343,28 +114,17 @@
 	return page_count(pte_page) == 1;
 }
 
-/**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
- * @kvm:   The VM pointer
- * @start: The intermediate physical base address of the range to unmap
- * @size:  The size of the area to unmap
- *
- * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
- * be called while holding mmu_lock (unless for freeing the stage2 pgd before
- * destroying the VM), otherwise another faulting VCPU may come in and mess
- * with things behind our backs.
- */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-	phys_addr_t addr = start, end = start + size;
+	unsigned long long addr = start, end = start + size;
 	u64 range;
 
 	while (addr < end) {
-		pgd = kvm->arch.pgd + pgd_index(addr);
+		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
 		if (pud_none(*pud)) {
 			addr += PUD_SIZE;
@@ -396,6 +156,247 @@
 }
 
 /**
+ * free_boot_hyp_pgd - free HYP boot page tables
+ *
+ * Free the HYP boot page tables. The bounce page is also freed.
+ */
+void free_boot_hyp_pgd(void)
+{
+	mutex_lock(&kvm_hyp_pgd_mutex);
+
+	if (boot_hyp_pgd) {
+		unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+		unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+		kfree(boot_hyp_pgd);
+		boot_hyp_pgd = NULL;
+	}
+
+	if (hyp_pgd)
+		unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+
+	kfree(init_bounce_page);
+	init_bounce_page = NULL;
+
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+}
+
+/**
+ * free_hyp_pgds - free Hyp-mode page tables
+ *
+ * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
+ * therefore contains either mappings in the kernel memory area (above
+ * PAGE_OFFSET), or device mappings in the vmalloc range (from
+ * VMALLOC_START to VMALLOC_END).
+ *
+ * boot_hyp_pgd should only map two pages for the init code.
+ */
+void free_hyp_pgds(void)
+{
+	unsigned long addr;
+
+	free_boot_hyp_pgd();
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+
+	if (hyp_pgd) {
+		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+			unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
+			unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+		kfree(hyp_pgd);
+		hyp_pgd = NULL;
+	}
+
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+}
+
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
+				    unsigned long end, unsigned long pfn,
+				    pgprot_t prot)
+{
+	pte_t *pte;
+	unsigned long addr;
+
+	addr = start;
+	do {
+		pte = pte_offset_kernel(pmd, addr);
+		kvm_set_pte(pte, pfn_pte(pfn, prot));
+		get_page(virt_to_page(pte));
+		kvm_flush_dcache_to_poc(pte, sizeof(*pte));
+		pfn++;
+	} while (addr += PAGE_SIZE, addr != end);
+}
+
+static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
+				   unsigned long end, unsigned long pfn,
+				   pgprot_t prot)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long addr, next;
+
+	addr = start;
+	do {
+		pmd = pmd_offset(pud, addr);
+
+		BUG_ON(pmd_sect(*pmd));
+
+		if (pmd_none(*pmd)) {
+			pte = pte_alloc_one_kernel(NULL, addr);
+			if (!pte) {
+				kvm_err("Cannot allocate Hyp pte\n");
+				return -ENOMEM;
+			}
+			pmd_populate_kernel(NULL, pmd, pte);
+			get_page(virt_to_page(pmd));
+			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
+		}
+
+		next = pmd_addr_end(addr, end);
+
+		create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
+		pfn += (next - addr) >> PAGE_SHIFT;
+	} while (addr = next, addr != end);
+
+	return 0;
+}
+
+static int __create_hyp_mappings(pgd_t *pgdp,
+				 unsigned long start, unsigned long end,
+				 unsigned long pfn, pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr, next;
+	int err = 0;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+	addr = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
+	do {
+		pgd = pgdp + pgd_index(addr);
+		pud = pud_offset(pgd, addr);
+
+		if (pud_none_or_clear_bad(pud)) {
+			pmd = pmd_alloc_one(NULL, addr);
+			if (!pmd) {
+				kvm_err("Cannot allocate Hyp pmd\n");
+				err = -ENOMEM;
+				goto out;
+			}
+			pud_populate(NULL, pud, pmd);
+			get_page(virt_to_page(pud));
+			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+		}
+
+		next = pgd_addr_end(addr, end);
+		err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+		if (err)
+			goto out;
+		pfn += (next - addr) >> PAGE_SHIFT;
+	} while (addr = next, addr != end);
+out:
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+	return err;
+}
+
+/**
+ * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
+ * @from:	The virtual kernel start address of the range
+ * @to:		The virtual kernel end address of the range (exclusive)
+ *
+ * The same virtual address as the kernel virtual address is also used
+ * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
+ * physical pages.
+ */
+int create_hyp_mappings(void *from, void *to)
+{
+	unsigned long phys_addr = virt_to_phys(from);
+	unsigned long start = KERN_TO_HYP((unsigned long)from);
+	unsigned long end = KERN_TO_HYP((unsigned long)to);
+
+	/* Check for a valid kernel memory mapping */
+	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
+		return -EINVAL;
+
+	return __create_hyp_mappings(hyp_pgd, start, end,
+				     __phys_to_pfn(phys_addr), PAGE_HYP);
+}
+
+/**
+ * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
+ * @from:	The kernel start VA of the range
+ * @to:		The kernel end VA of the range (exclusive)
+ * @phys_addr:	The physical start address which gets mapped
+ *
+ * The resulting HYP VA is the same as the kernel VA, modulo
+ * HYP_PAGE_OFFSET.
+ */
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
+{
+	unsigned long start = KERN_TO_HYP((unsigned long)from);
+	unsigned long end = KERN_TO_HYP((unsigned long)to);
+
+	/* Check for a valid kernel IO mapping */
+	if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
+		return -EINVAL;
+
+	return __create_hyp_mappings(hyp_pgd, start, end,
+				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
+}
+
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
+ * support either full 40-bit input addresses or limited to 32-bit input
+ * addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	pgd_t *pgd;
+
+	if (kvm->arch.pgd != NULL) {
+		kvm_err("kvm_arch already initialized?\n");
+		return -EINVAL;
+	}
+
+	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+	if (!pgd)
+		return -ENOMEM;
+
+	/* stage-2 pgd must be aligned to its size */
+	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
+
+	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
+	kvm_clean_pgd(pgd);
+	kvm->arch.pgd = pgd;
+
+	return 0;
+}
+
+/**
+ * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * @kvm:   The VM pointer
+ * @start: The intermediate physical base address of the range to unmap
+ * @size:  The size of the area to unmap
+ *
+ * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
+ * be called while holding mmu_lock (unless for freeing the stage2 pgd before
+ * destroying the VM), otherwise another faulting VCPU may come in and mess
+ * with things behind our backs.
+ */
+static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+{
+	unmap_range(kvm->arch.pgd, start, size);
+}
+
+/**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
  *
@@ -728,47 +729,105 @@
 
 phys_addr_t kvm_mmu_get_httbr(void)
 {
-	VM_BUG_ON(!virt_addr_valid(hyp_pgd));
 	return virt_to_phys(hyp_pgd);
 }
 
+phys_addr_t kvm_mmu_get_boot_httbr(void)
+{
+	return virt_to_phys(boot_hyp_pgd);
+}
+
+phys_addr_t kvm_get_idmap_vector(void)
+{
+	return hyp_idmap_vector;
+}
+
 int kvm_mmu_init(void)
 {
-	if (!hyp_pgd) {
+	int err;
+
+	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+
+	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
+		/*
+		 * Our init code is crossing a page boundary. Allocate
+		 * a bounce page, copy the code over and use that.
+		 */
+		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
+		phys_addr_t phys_base;
+
+		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!init_bounce_page) {
+			kvm_err("Couldn't allocate HYP init bounce page\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
+		memcpy(init_bounce_page, __hyp_idmap_text_start, len);
+		/*
+		 * Warning: the code we just copied to the bounce page
+		 * must be flushed to the point of coherency.
+		 * Otherwise, the data may be sitting in L2, and HYP
+		 * mode won't be able to observe it as it runs with
+		 * caches off at that point.
+		 */
+		kvm_flush_dcache_to_poc(init_bounce_page, len);
+
+		phys_base = virt_to_phys(init_bounce_page);
+		hyp_idmap_vector += phys_base - hyp_idmap_start;
+		hyp_idmap_start = phys_base;
+		hyp_idmap_end = phys_base + len;
+
+		kvm_info("Using HYP init bounce page @%lx\n",
+			 (unsigned long)phys_base);
+	}
+
+	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	if (!hyp_pgd || !boot_hyp_pgd) {
 		kvm_err("Hyp mode PGD not allocated\n");
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* Create the idmap in the boot page tables */
+	err = 	__create_hyp_mappings(boot_hyp_pgd,
+				      hyp_idmap_start, hyp_idmap_end,
+				      __phys_to_pfn(hyp_idmap_start),
+				      PAGE_HYP);
+
+	if (err) {
+		kvm_err("Failed to idmap %lx-%lx\n",
+			hyp_idmap_start, hyp_idmap_end);
+		goto out;
+	}
+
+	/* Map the very same page at the trampoline VA */
+	err = 	__create_hyp_mappings(boot_hyp_pgd,
+				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
+				      __phys_to_pfn(hyp_idmap_start),
+				      PAGE_HYP);
+	if (err) {
+		kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
+			TRAMPOLINE_VA);
+		goto out;
+	}
+
+	/* Map the same page again into the runtime page tables */
+	err = 	__create_hyp_mappings(hyp_pgd,
+				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
+				      __phys_to_pfn(hyp_idmap_start),
+				      PAGE_HYP);
+	if (err) {
+		kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
+			TRAMPOLINE_VA);
+		goto out;
 	}
 
 	return 0;
-}
-
-/**
- * kvm_clear_idmap - remove all idmaps from the hyp pgd
- *
- * Free the underlying pmds for all pgds in range and clear the pgds (but
- * don't free them) afterwards.
- */
-void kvm_clear_hyp_idmap(void)
-{
-	unsigned long addr, end;
-	unsigned long next;
-	pgd_t *pgd = hyp_pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	addr = virt_to_phys(__hyp_idmap_text_start);
-	end = virt_to_phys(__hyp_idmap_text_end);
-
-	pgd += pgd_index(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		pud = pud_offset(pgd, addr);
-		pmd = pmd_offset(pud, addr);
-
-		pud_clear(pud);
-		kvm_clean_pmd_entry(pmd);
-		pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
-	} while (pgd++, addr = next, addr < end);
+out:
+	free_hyp_pgds();
+	return err;
 }

diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c
new file mode 100644
index 0000000..1a3849d
--- /dev/null
+++ b/arch/arm/kvm/perf.c

@@ -0,0 +1,68 @@
+/*
+ * Based on the x86 implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+static int kvm_is_in_guest(void)
+{
+        return kvm_arm_get_running_vcpu() != NULL;
+}
+
+static int kvm_is_user_mode(void)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = kvm_arm_get_running_vcpu();
+
+	if (vcpu)
+		return !vcpu_mode_priv(vcpu);
+
+	return 0;
+}
+
+static unsigned long kvm_get_guest_ip(void)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = kvm_arm_get_running_vcpu();
+
+	if (vcpu)
+		return *vcpu_pc(vcpu);
+
+	return 0;
+}
+
+static struct perf_guest_info_callbacks kvm_guest_cbs = {
+	.is_in_guest	= kvm_is_in_guest,
+	.is_user_mode	= kvm_is_user_mode,
+	.get_guest_ip	= kvm_get_guest_ip,
+};
+
+int kvm_perf_init(void)
+{
+	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
+}
+
+int kvm_perf_teardown(void)
+{
+	return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
+}

diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
index d58ad4f..2ebc97e 100644
--- a/arch/arm/mach-imx/Kconfig
+++ b/arch/arm/mach-imx/Kconfig

@@ -466,8 +466,6 @@
 	depends on MACH_MX31ADS
 	depends on MFD_WM8350_I2C
 	depends on REGULATOR_WM8350 = y
-	select MFD_WM8350_CONFIG_MODE_0
-	select MFD_WM8352_CONFIG_MODE_0
 	help
 	  Include support for the Wolfson Microelectronics 1133-EV1 PMU
 	  and audio module for the MX31ADS platform.

diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig
index 283cb77..2057853 100644
--- a/arch/arm/mach-s3c64xx/Kconfig
+++ b/arch/arm/mach-s3c64xx/Kconfig

@@ -200,10 +200,7 @@
 config SMDK6410_WM1190_EV1
 	bool "Support Wolfson Microelectronics 1190-EV1 PMIC card"
 	depends on MACH_SMDK6410
-	select MFD_WM8350_CONFIG_MODE_0
-	select MFD_WM8350_CONFIG_MODE_3
 	select MFD_WM8350_I2C
-	select MFD_WM8352_CONFIG_MODE_0
 	select REGULATOR
 	select REGULATOR_WM8350
 	select SAMSUNG_GPIO_EXTRA64

diff --git a/arch/arm/mach-s3c64xx/mach-crag6410-module.c b/arch/arm/mach-s3c64xx/mach-crag6410-module.c
index a946b75..7ccfef2 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410-module.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410-module.c

@@ -208,7 +208,7 @@
 static struct arizona_pdata wm5102_reva_pdata = {
 	.ldoena = S3C64XX_GPN(7),
 	.gpio_base = CODEC_GPIO_BASE,
-	.irq_active_high = true,
+	.irq_flags = IRQF_TRIGGER_HIGH,
 	.micd_pol_gpio = CODEC_GPIO_BASE + 4,
 	.micd_rate = 6,
 	.gpio_defaults = {
@@ -238,7 +238,7 @@
 static struct arizona_pdata wm5102_pdata = {
 	.ldoena = S3C64XX_GPN(7),
 	.gpio_base = CODEC_GPIO_BASE,
-	.irq_active_high = true,
+	.irq_flags = IRQF_TRIGGER_HIGH,
 	.micd_pol_gpio = CODEC_GPIO_BASE + 2,
 	.gpio_defaults = {
 		[2] = 0x10000, /* AIF3TXLRCLK */

diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 5ee505c..83cb3ac 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c

@@ -8,7 +8,6 @@
 #include <asm/pgtable.h>
 #include <asm/sections.h>
 #include <asm/system_info.h>
-#include <asm/virt.h>
 
 pgd_t *idmap_pgd;
 
@@ -83,37 +82,10 @@
 	} while (pgd++, addr = next, addr != end);
 }
 
-#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE)
-pgd_t *hyp_pgd;
-
-extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
-
-static int __init init_static_idmap_hyp(void)
-{
-	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
-	if (!hyp_pgd)
-		return -ENOMEM;
-
-	pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n",
-		__hyp_idmap_text_start, __hyp_idmap_text_end);
-	identity_mapping_add(hyp_pgd, __hyp_idmap_text_start,
-			     __hyp_idmap_text_end, PMD_SECT_AP1);
-
-	return 0;
-}
-#else
-static int __init init_static_idmap_hyp(void)
-{
-	return 0;
-}
-#endif
-
 extern char  __idmap_text_start[], __idmap_text_end[];
 
 static int __init init_static_idmap(void)
 {
-	int ret;
-
 	idmap_pgd = pgd_alloc(&init_mm);
 	if (!idmap_pgd)
 		return -ENOMEM;
@@ -123,12 +95,10 @@
 	identity_mapping_add(idmap_pgd, __idmap_text_start,
 			     __idmap_text_end, 0);
 
-	ret = init_static_idmap_hyp();
-
 	/* Flush L1 for the hardware to see this page table content */
 	flush_cache_louis();
 
-	return ret;
+	return 0;
 }
 early_initcall(init_static_idmap);
 

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index cfa7498..989dd3f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h

@@ -26,6 +26,7 @@
 #define KVM_USER_MEM_SLOTS 32
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
 /* define exit reasons from vmm to kvm*/
 #define EXIT_REASON_VM_PANIC		0

diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
index ec6c6b3..99503c2 100644
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ b/arch/ia64/include/uapi/asm/kvm.h

@@ -27,7 +27,6 @@
 /* Select x86 specific features in <linux/kvm.h> */
 #define __KVM_HAVE_IOAPIC
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_DEVICE_ASSIGNMENT
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 2cd225f..990b864 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig

@@ -21,12 +21,11 @@
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on BROKEN
 	depends on HAVE_KVM && MODULES
-	# for device assignment:
-	depends on PCI
 	depends on BROKEN
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO
 	---help---
@@ -50,6 +49,17 @@
 	  Provides support for KVM on Itanium 2 processors equipped with the VT
 	  extensions.
 
+config KVM_DEVICE_ASSIGNMENT
+	bool "KVM legacy PCI device assignment support"
+	depends on KVM && PCI && IOMMU_API
+	default y
+	---help---
+	  Provide support for legacy PCI device assignment through KVM.  The
+	  kernel now also supports a full featured userspace device driver
+	  framework through VFIO, which supersedes much of this support.
+
+	  If unsure, say Y.
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index db3d7c5..1a40537 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile

@@ -49,10 +49,10 @@
 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o assigned-dev.o)
+		coalesced_mmio.o irq_comm.o)
 
-ifeq ($(CONFIG_IOMMU_API),y)
-common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
+ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
+common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index ad3126a..5b2dc0d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c

@@ -204,9 +204,11 @@
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_IOMMU:
 		r = iommu_present(&pci_bus_type);
 		break;
+#endif
 	default:
 		r = 0;
 	}
@@ -924,13 +926,15 @@
 	return 0;
 }
 
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+		bool line_status)
 {
 	if (!irqchip_in_kernel(kvm))
 		return -ENXIO;
 
 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event->irq, irq_event->level);
+					irq_event->irq, irq_event->level,
+					line_status);
 	return 0;
 }
 
@@ -942,24 +946,6 @@
 	int r = -ENOTTY;
 
 	switch (ioctl) {
-	case KVM_SET_MEMORY_REGION: {
-		struct kvm_memory_region kvm_mem;
-		struct kvm_userspace_memory_region kvm_userspace_mem;
-
-		r = -EFAULT;
-		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
-			goto out;
-		kvm_userspace_mem.slot = kvm_mem.slot;
-		kvm_userspace_mem.flags = kvm_mem.flags;
-		kvm_userspace_mem.guest_phys_addr =
-					kvm_mem.guest_phys_addr;
-		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
-		r = kvm_vm_ioctl_set_memory_region(kvm,
-					&kvm_userspace_mem, false);
-		if (r)
-			goto out;
-		break;
-		}
 	case KVM_CREATE_IRQCHIP:
 		r = -EFAULT;
 		r = kvm_ioapic_init(kvm);
@@ -1384,9 +1370,7 @@
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
-#ifdef  KVM_CAP_DEVICE_ASSIGNMENT
 	kvm_free_all_assigned_devices(kvm);
-#endif
 	kfree(kvm->arch.vioapic);
 	kvm_release_vm_pages(kvm);
 }
@@ -1578,9 +1562,8 @@
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
-		struct kvm_memory_slot old,
 		struct kvm_userspace_memory_region *mem,
-		bool user_alloc)
+		enum kvm_mr_change change)
 {
 	unsigned long i;
 	unsigned long pfn;
@@ -1610,8 +1593,8 @@
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 		struct kvm_userspace_memory_region *mem,
-		struct kvm_memory_slot old,
-		bool user_alloc)
+		const struct kvm_memory_slot *old,
+		enum kvm_mr_change change)
 {
 	return;
 }

diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index c3e2935..c5f92a9 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h

@@ -27,10 +27,4 @@
 #define kvm_apic_present(x) (true)
 #define kvm_lapic_enabled(x) (true)
 
-static inline bool kvm_apic_vid_enabled(void)
-{
-	/* IA64 has no apicv supporting, do nothing here */
-	return false;
-}
-
 #endif

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3d..cf4df8e 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h

@@ -270,6 +270,9 @@
 #define H_SET_MODE		0x31C
 #define MAX_HCALL_OPCODE	H_SET_MODE
 
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS			0xf000
+
 #ifndef __ASSEMBLY__
 
 /**

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 5a56e1c..349ed85 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h

@@ -142,6 +142,8 @@
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+					  unsigned int vec);
 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
@@ -156,7 +158,8 @@
 			unsigned long pte_index);
 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 			unsigned long *nb_ret);
-extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
+			unsigned long gpa, bool dirty);
 extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
@@ -458,6 +461,8 @@
 #define OSI_SC_MAGIC_R4			0x77810F9B
 
 #define INS_DCBZ			0x7c0007ec
+/* TO = 31 for unconditional trap */
+#define INS_TW				0x7fe00008
 
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS			(LPID_RSVD + 1)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..9c1ff33 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h

@@ -268,4 +268,17 @@
 		(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
 }
 
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+					  struct revmap_entry *rev)
+{
+	if (atomic_read(&kvm->arch.hpte_mod_interest))
+		rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
 #endif /* __ASM_KVM_BOOK3S_64_H__ */

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index cdc3d27..9039d3c 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h

@@ -20,6 +20,11 @@
 #ifndef __ASM_KVM_BOOK3S_ASM_H__
 #define __ASM_KVM_BOOK3S_ASM_H__
 
+/* XICS ICP register offsets */
+#define XICS_XIRR		4
+#define XICS_MFRR		0xc
+#define XICS_IPI		2	/* interrupt source # for IPIs */
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -81,10 +86,11 @@
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	u8 hwthread_req;
 	u8 hwthread_state;
-
+	u8 host_ipi;
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvmppc_vcore *kvm_vcore;
 	unsigned long xics_phys;
+	u32 saved_xirr;
 	u64 dabr;
 	u64 host_mmcr[3];
 	u32 host_pmc[8];

diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index b7cd335..d3c1eb3 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h

@@ -26,6 +26,8 @@
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS                        64
 
+#define KVMPPC_INST_EHPRIV	0x7c00021c
+
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
 	vcpu->arch.gpr[num] = val;

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d1bb860..af326cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h

@@ -44,6 +44,10 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
 
+/* These values are internal and can be increased later */
+#define KVM_NR_IRQCHIPS          1
+#define KVM_IRQCHIP_NUM_PINS     256
+
 #if !defined(CONFIG_KVM_440)
 #include <linux/mmu_notifier.h>
 
@@ -188,6 +192,10 @@
 	int		 type;
 };
 
+/* XICS components, defined in book3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
@@ -255,6 +263,13 @@
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
+	struct list_head rtas_tokens;
+#endif
+#ifdef CONFIG_KVM_MPIC
+	struct openpic *mpic;
+#endif
+#ifdef CONFIG_KVM_XICS
+	struct kvmppc_xics *xics;
 #endif
 };
 
@@ -301,11 +316,13 @@
  * that a guest can register.
  */
 struct kvmppc_vpa {
+	unsigned long gpa;	/* Current guest phys addr */
 	void *pinned_addr;	/* Address in kernel linear mapping */
 	void *pinned_end;	/* End of region */
 	unsigned long next_gpa;	/* Guest phys addr for update */
 	unsigned long len;	/* Number of bytes required */
 	u8 update_pending;	/* 1 => update pinned_addr from next_gpa */
+	bool dirty;		/* true => area has been modified by kernel */
 };
 
 struct kvmppc_pte {
@@ -359,6 +376,11 @@
 #define KVMPPC_BOOKE_MAX_IAC	4
 #define KVMPPC_BOOKE_MAX_DAC	2
 
+/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
+#define KVMPPC_EPR_NONE		0 /* EPR not supported */
+#define KVMPPC_EPR_USER		1 /* exit to userspace to fill EPR */
+#define KVMPPC_EPR_KERNEL	2 /* in-kernel irqchip */
+
 struct kvmppc_booke_debug_reg {
 	u32 dbcr0;
 	u32 dbcr1;
@@ -370,6 +392,12 @@
 	u64 dac[KVMPPC_BOOKE_MAX_DAC];
 };
 
+#define KVMPPC_IRQ_DEFAULT	0
+#define KVMPPC_IRQ_MPIC		1
+#define KVMPPC_IRQ_XICS		2
+
+struct openpic;
+
 struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
@@ -502,8 +530,11 @@
 	spinlock_t wdt_lock;
 	struct timer_list wdt_timer;
 	u32 tlbcfg[4];
+	u32 tlbps[4];
 	u32 mmucfg;
+	u32 eptcfg;
 	u32 epr;
+	u32 crit_save;
 	struct kvmppc_booke_debug_reg dbg_reg;
 #endif
 	gpa_t paddr_accessed;
@@ -521,7 +552,7 @@
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
-	u8 epr_enabled;
+	u8 epr_flags; /* KVMPPC_EPR_xxx */
 	u8 epr_needed;
 
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -548,6 +579,13 @@
 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
 
+	int irq_type;		/* one of KVM_IRQ_* */
+	int irq_cpu_id;
+	struct openpic *mpic;	/* KVM_IRQ_MPIC */
+#ifdef CONFIG_KVM_XICS
+	struct kvmppc_icp *icp; /* XICS presentation controller */
+#endif
+
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	struct kvm_vcpu_arch_shared shregs;
 
@@ -588,5 +626,6 @@
 #define KVM_MMIO_REG_FQPR	0x0060
 
 #define __KVM_HAVE_ARCH_WQP
+#define __KVM_HAVE_CREATE_DEVICE
 
 #endif /* __POWERPC_KVM_HOST_H__ */

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 44a657a..a5287fe 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h

@@ -44,7 +44,7 @@
 	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
 	EMULATE_FAIL,         /* can't emulate this instruction */
 	EMULATE_AGAIN,        /* something went wrong. go again */
-	EMULATE_DO_PAPR,      /* kvm_run filled with PAPR request */
+	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
 };
 
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -104,8 +104,7 @@
 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
-extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-                                         struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -131,6 +130,7 @@
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
 			struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 				struct kvm_create_spapr_tce *args);
 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -152,7 +152,7 @@
 				struct kvm_userspace_memory_region *mem);
 extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old);
+				const struct kvm_memory_slot *old);
 extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
 				      struct kvm_ppc_smmu_info *info);
 extern void kvmppc_core_flush_memslot(struct kvm *kvm,
@@ -165,6 +165,18 @@
 
 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
 
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				u32 priority);
+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				u32 *priority);
+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
@@ -246,12 +258,29 @@
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
+struct openpic;
+
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
 	paca[cpu].kvm_hstate.xics_phys = addr;
 }
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+	u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+
+	get_paca()->kvm_hstate.saved_xirr = 0;
+
+	return xirr;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{
+	paca[cpu].kvm_hstate.host_ipi = host_ipi;
+}
+
+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
@@ -260,6 +289,46 @@
 
 static inline void kvm_linear_init(void)
 {}
+
+static inline u32 kvmppc_get_xics_latch(void)
+{
+	return 0;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_kick(vcpu);
+}
+#endif
+
+#ifdef CONFIG_KVM_XICS
+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
+}
+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
+extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
+			struct kvm_vcpu *vcpu, u32 cpu);
+#else
+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
+	{ return 0; }
+static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
+					 unsigned long server)
+	{ return -EINVAL; }
+static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
+					struct kvm_irq_level *args)
+	{ return -ENOTTY; }
+static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+	{ return 0; }
 #endif
 
 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
@@ -271,6 +340,32 @@
 #endif
 }
 
+#ifdef CONFIG_KVM_MPIC
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 cpu);
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
+
+#else
+
+static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
+		struct kvm_vcpu *vcpu, u32 cpu)
+{
+	return -EINVAL;
+}
+
+static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
+		struct kvm_vcpu *vcpu)
+{
+}
+
+#endif /* CONFIG_KVM_MPIC */
+
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 			      struct kvm_config_tlb *cfg);
 int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
@@ -283,8 +378,15 @@
 
 static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 {
-	/* Clear i-cache for new pages */
 	struct page *page;
+	/*
+	 * We can only access pages that the kernel maps
+	 * as memory. Bail out for unmapped ones.
+	 */
+	if (!pfn_valid(pfn))
+		return;
+
+	/* Clear i-cache for new pages */
 	page = pfn_to_page(pfn);
 	if (!test_bit(PG_arch_1, &page->flags)) {
 		flush_dcache_icache_page(page);
@@ -324,4 +426,6 @@
 	return ea;
 }
 
+extern void xics_wake_cpu(int cpu);
+
 #endif /* __POWERPC_KVM_PPC_H__ */

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 3d17427..a613651 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h

@@ -300,6 +300,7 @@
 #define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
 #define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
 #define   LPCR_MER	0x00000800	/* Mediated External Exception */
+#define   LPCR_MER_SH	11
 #define   LPCR_LPES    0x0000000c
 #define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
 #define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 16064d0..0fb1a6e 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h

@@ -25,6 +25,8 @@
 /* Select powerpc specific features in <linux/kvm.h> */
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_IRQCHIP
+#define __KVM_HAVE_IRQ_LINE
 
 struct kvm_regs {
 	__u64 pc;
@@ -272,8 +274,31 @@
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
+	struct {
+		/* H/W breakpoint/watchpoint address */
+		__u64 addr;
+		/*
+		 * Type denotes h/w breakpoint, read watchpoint, write
+		 * watchpoint or watchpoint (both read and write).
+		 */
+#define KVMPPC_DEBUG_NONE		0x0
+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
+		__u32 type;
+		__u32 reserved;
+	} bp[16];
 };
 
+/* Debug related defines */
+/*
+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
+
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 };
@@ -299,6 +324,12 @@
 	__u64 rma_size;
 };
 
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+	char name[120];
+	__u64 token;	/* Use a token of 0 to undefine a mapping */
+};
+
 struct kvm_book3e_206_tlb_entry {
 	__u32 mas8;
 	__u32 mas1;
@@ -359,6 +390,26 @@
 	__u16	n_invalid;
 };
 
+/* Per-vcpu XICS interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
+
+#define  KVM_REG_PPC_ICP_CPPR_SHIFT	56	/* current proc priority */
+#define  KVM_REG_PPC_ICP_CPPR_MASK	0xff
+#define  KVM_REG_PPC_ICP_XISR_SHIFT	32	/* interrupt status field */
+#define  KVM_REG_PPC_ICP_XISR_MASK	0xffffff
+#define  KVM_REG_PPC_ICP_MFRR_SHIFT	24	/* pending IPI priority */
+#define  KVM_REG_PPC_ICP_MFRR_MASK	0xff
+#define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
+#define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
+
+/* Device control API: PPC-specific devices */
+#define KVM_DEV_MPIC_GRP_MISC		1
+#define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
+
+#define KVM_DEV_MPIC_GRP_REGISTER	2	/* 32-bit */
+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE	3	/* 32-bit */
+
+/* One-Reg API: PPC-specific registers */
 #define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
 #define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
 #define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@@ -417,4 +468,47 @@
 #define KVM_REG_PPC_EPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
 #define KVM_REG_PPC_EPR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
 
+/* Timer Status Register OR/CLEAR interface */
+#define KVM_REG_PPC_OR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
+#define KVM_REG_PPC_CLEAR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
+#define KVM_REG_PPC_TCR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
+#define KVM_REG_PPC_TSR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
+/* MMU registers */
+#define KVM_REG_PPC_MAS0	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
+
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
+
+/* Layout of 64-bit source attribute values */
+#define  KVM_XICS_DESTINATION_SHIFT	0
+#define  KVM_XICS_DESTINATION_MASK	0xffffffffULL
+#define  KVM_XICS_PRIORITY_SHIFT	32
+#define  KVM_XICS_PRIORITY_MASK		0xff
+#define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
+#define  KVM_XICS_MASKED		(1ULL << 41)
+#define  KVM_XICS_PENDING		(1ULL << 42)
+
 #endif /* __LINUX_KVM_POWERPC_H */

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 172233e..b51a97c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c

@@ -480,6 +480,7 @@
 	DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
+	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -576,6 +577,8 @@
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
 	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
 	HSTATE_FIELD(HSTATE_PMC, host_pmc);
 	HSTATE_FIELD(HSTATE_PURR, host_purr);
@@ -599,6 +602,7 @@
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* CONFIG_KVM */
 

diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21..2f5c6b6 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c

@@ -124,6 +124,18 @@
 	return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	return -EINVAL;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+		       union kvmppc_one_reg *val)
+{
+	return -EINVAL;
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x;

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec..eb643f8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig

@@ -136,21 +136,41 @@
 	  If unsure, say N.
 
 config KVM_E500MC
-	bool "KVM support for PowerPC E500MC/E5500 processors"
+	bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
 	depends on PPC_E500MC
 	select KVM
 	select KVM_MMIO
 	select KVM_BOOKE_HV
 	select MMU_NOTIFIER
 	---help---
-	  Support running unmodified E500MC/E5500 (32-bit) guest kernels in
-	  virtual machines on E500MC/E5500 host processors.
+	  Support running unmodified E500MC/E5500/E6500 guest kernels in
+	  virtual machines on E500MC/E5500/E6500 host processors.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
 
 	  If unsure, say N.
 
+config KVM_MPIC
+	bool "KVM in-kernel MPIC emulation"
+	depends on KVM && E500
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
+	select HAVE_KVM_MSI
+	help
+	  Enable support for emulating MPIC devices inside the
+          host kernel, rather than relying on userspace to emulate.
+          Currently, support is limited to certain versions of
+          Freescale's MPIC implementation.
+
+config KVM_XICS
+	bool "KVM in-kernel XICS emulation"
+	depends on KVM_BOOK3S_64 && !KVM_MPIC
+	---help---
+	  Include support for the XICS (eXternal Interrupt Controller
+	  Specification) interrupt controller architecture used on
+	  IBM POWER (pSeries) servers.
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b772ede..422de3f 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile

@@ -72,12 +72,18 @@
 	book3s_hv.o \
 	book3s_hv_interrupts.o \
 	book3s_64_mmu_hv.o
+kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
+	book3s_hv_rm_xics.o
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_64_vio_hv.o \
 	book3s_hv_ras.o \
-	book3s_hv_builtin.o
+	book3s_hv_builtin.o \
+	$(kvm-book3s_64-builtin-xics-objs-y)
+
+kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
+	book3s_xics.o
 
 kvm-book3s_64-module-objs := \
 	../../../virt/kvm/kvm_main.o \
@@ -86,6 +92,7 @@
 	emulate.o \
 	book3s.o \
 	book3s_64_vio.o \
+	book3s_rtas.o \
 	$(kvm-book3s_64-objs-y)
 
 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -103,6 +110,9 @@
 	book3s_32_mmu.o
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
 obj-$(CONFIG_KVM_440) += kvm.o

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b64528..700df6f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c

@@ -104,7 +104,7 @@
 	return prio;
 }
 
-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 					  unsigned int vec)
 {
 	unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -160,8 +160,7 @@
 	kvmppc_book3s_queue_irqprio(vcpu, vec);
 }
 
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-                                  struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
@@ -530,6 +529,21 @@
 			val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
 			break;
 #endif /* CONFIG_ALTIVEC */
+		case KVM_REG_PPC_DEBUG_INST: {
+			u32 opcode = INS_TW;
+			r = copy_to_user((u32 __user *)(long)reg->addr,
+					 &opcode, sizeof(u32));
+			break;
+		}
+#ifdef CONFIG_KVM_XICS
+		case KVM_REG_PPC_ICP_STATE:
+			if (!vcpu->arch.icp) {
+				r = -ENXIO;
+				break;
+			}
+			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+			break;
+#endif /* CONFIG_KVM_XICS */
 		default:
 			r = -EINVAL;
 			break;
@@ -592,6 +606,16 @@
 			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
 			break;
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_KVM_XICS
+		case KVM_REG_PPC_ICP_STATE:
+			if (!vcpu->arch.icp) {
+				r = -ENXIO;
+				break;
+			}
+			r = kvmppc_xics_set_icp(vcpu,
+						set_reg_val(reg->id, val));
+			break;
+#endif /* CONFIG_KVM_XICS */
 		default:
 			r = -EINVAL;
 			break;
@@ -607,6 +631,12 @@
 	return 0;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	return -EINVAL;
+}
+
 void kvmppc_decrementer_func(unsigned long data)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index da98e26..5880dfb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -893,7 +893,10 @@
 			/* Harvest R and C */
 			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
-			rev[i].guest_rpte = ptel | rcbits;
+			if (rcbits & ~rev[i].guest_rpte) {
+				rev[i].guest_rpte = ptel | rcbits;
+				note_hpte_modification(kvm, &rev[i]);
+			}
 		}
 		unlock_rmap(rmapp);
 		hptep[0] &= ~HPTE_V_HVLOCK;
@@ -976,7 +979,10 @@
 		/* Now check and modify the HPTE */
 		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
 			kvmppc_clear_ref_hpte(kvm, hptep, i);
-			rev[i].guest_rpte |= HPTE_R_R;
+			if (!(rev[i].guest_rpte & HPTE_R_R)) {
+				rev[i].guest_rpte |= HPTE_R_R;
+				note_hpte_modification(kvm, &rev[i]);
+			}
 			ret = 1;
 		}
 		hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1080,7 +1086,10 @@
 			hptep[1] &= ~HPTE_R_C;
 			eieio();
 			hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
-			rev[i].guest_rpte |= HPTE_R_C;
+			if (!(rev[i].guest_rpte & HPTE_R_C)) {
+				rev[i].guest_rpte |= HPTE_R_C;
+				note_hpte_modification(kvm, &rev[i]);
+			}
 			ret = 1;
 		}
 		hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1090,11 +1099,30 @@
 	return ret;
 }
 
+static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+			      struct kvm_memory_slot *memslot,
+			      unsigned long *map)
+{
+	unsigned long gfn;
+
+	if (!vpa->dirty || !vpa->pinned_addr)
+		return;
+	gfn = vpa->gpa >> PAGE_SHIFT;
+	if (gfn < memslot->base_gfn ||
+	    gfn >= memslot->base_gfn + memslot->npages)
+		return;
+
+	vpa->dirty = false;
+	if (map)
+		__set_bit_le(gfn - memslot->base_gfn, map);
+}
+
 long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			     unsigned long *map)
 {
 	unsigned long i;
 	unsigned long *rmapp;
+	struct kvm_vcpu *vcpu;
 
 	preempt_disable();
 	rmapp = memslot->arch.rmap;
@@ -1103,6 +1131,15 @@
 			__set_bit_le(i, map);
 		++rmapp;
 	}
+
+	/* Harvest dirty bits from VPA and DTL updates */
+	/* Note: we never modify the SLB shadow buffer areas */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
+		harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+	}
 	preempt_enable();
 	return 0;
 }
@@ -1114,7 +1151,7 @@
 	unsigned long gfn = gpa >> PAGE_SHIFT;
 	struct page *page, *pages[1];
 	int npages;
-	unsigned long hva, psize, offset;
+	unsigned long hva, offset;
 	unsigned long pa;
 	unsigned long *physp;
 	int srcu_idx;
@@ -1146,14 +1183,9 @@
 	}
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
-	psize = PAGE_SIZE;
-	if (PageHuge(page)) {
-		page = compound_head(page);
-		psize <<= compound_order(page);
-	}
-	offset = gpa & (psize - 1);
+	offset = gpa & (PAGE_SIZE - 1);
 	if (nb_ret)
-		*nb_ret = psize - offset;
+		*nb_ret = PAGE_SIZE - offset;
 	return page_address(page) + offset;
 
  err:
@@ -1161,11 +1193,31 @@
 	return NULL;
 }
 
-void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
+			     bool dirty)
 {
 	struct page *page = virt_to_page(va);
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn;
+	unsigned long *rmap;
+	int srcu_idx;
 
 	put_page(page);
+
+	if (!dirty || !kvm->arch.using_mmu_notifiers)
+		return;
+
+	/* We need to mark this page dirty in the rmap chain */
+	gfn = gpa >> PAGE_SHIFT;
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	memslot = gfn_to_memslot(kvm, gfn);
+	if (memslot) {
+		rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+		lock_rmap(rmap);
+		*rmap |= KVMPPC_RMAP_CHANGED;
+		unlock_rmap(rmap);
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
 }
 
 /*
@@ -1193,16 +1245,36 @@
 
 #define HPTE_SIZE	(2 * sizeof(unsigned long))
 
+/*
+ * Returns 1 if this HPT entry has been modified or has pending
+ * R/C bit changes.
+ */
+static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
+{
+	unsigned long rcbits_unset;
+
+	if (revp->guest_rpte & HPTE_GR_MODIFIED)
+		return 1;
+
+	/* Also need to consider changes in reference and changed bits */
+	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+	if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
+		return 1;
+
+	return 0;
+}
+
 static long record_hpte(unsigned long flags, unsigned long *hptp,
 			unsigned long *hpte, struct revmap_entry *revp,
 			int want_valid, int first_pass)
 {
 	unsigned long v, r;
+	unsigned long rcbits_unset;
 	int ok = 1;
 	int valid, dirty;
 
 	/* Unmodified entries are uninteresting except on the first pass */
-	dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+	dirty = hpte_dirty(revp, hptp);
 	if (!first_pass && !dirty)
 		return 0;
 
@@ -1223,16 +1295,28 @@
 		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
 			cpu_relax();
 		v = hptp[0];
+
+		/* re-evaluate valid and dirty from synchronized HPTE value */
+		valid = !!(v & HPTE_V_VALID);
+		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+
+		/* Harvest R and C into guest view if necessary */
+		rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+		if (valid && (rcbits_unset & hptp[1])) {
+			revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
+				HPTE_GR_MODIFIED;
+			dirty = 1;
+		}
+
 		if (v & HPTE_V_ABSENT) {
 			v &= ~HPTE_V_ABSENT;
 			v |= HPTE_V_VALID;
+			valid = 1;
 		}
-		/* re-evaluate valid and dirty from synchronized HPTE value */
-		valid = !!(v & HPTE_V_VALID);
 		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
 			valid = 0;
-		r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
-		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+
+		r = revp->guest_rpte;
 		/* only clear modified if this is the right sort of entry */
 		if (valid == want_valid && dirty) {
 			r &= ~HPTE_GR_MODIFIED;
@@ -1288,7 +1372,7 @@
 		/* Skip uninteresting entries, i.e. clean on not-first pass */
 		if (!first_pass) {
 			while (i < kvm->arch.hpt_npte &&
-			       !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
+			       !hpte_dirty(revp, hptp)) {
 				++i;
 				hptp += 2;
 				++revp;

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 836c569..1f6344c 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c

@@ -194,7 +194,9 @@
 				run->papr_hcall.args[i] = gpr;
 			}
 
-			emulated = EMULATE_DO_PAPR;
+			run->exit_reason = KVM_EXIT_PAPR_HCALL;
+			vcpu->arch.hcall_needed = 1;
+			emulated = EMULATE_EXIT_USER;
 			break;
 		}
 #endif

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f541693..9de24f8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c

@@ -66,6 +66,31 @@
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	int me;
+	int cpu = vcpu->cpu;
+	wait_queue_head_t *wqp;
+
+	wqp = kvm_arch_vcpu_wq(vcpu);
+	if (waitqueue_active(wqp)) {
+		wake_up_interruptible(wqp);
+		++vcpu->stat.halt_wakeup;
+	}
+
+	me = get_cpu();
+
+	/* CPU points to the first thread of the core */
+	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+		int real_cpu = cpu + vcpu->arch.ptid;
+		if (paca[real_cpu].kvm_hstate.xics_phys)
+			xics_wake_cpu(real_cpu);
+		else if (cpu_online(cpu))
+			smp_send_reschedule(cpu);
+	}
+	put_cpu();
+}
+
 /*
  * We use the vcpu_load/put functions to measure stolen time.
  * Stolen time is counted as time when either the vcpu is able to
@@ -259,7 +284,7 @@
 			len = ((struct reg_vpa *)va)->length.hword;
 		else
 			len = ((struct reg_vpa *)va)->length.word;
-		kvmppc_unpin_guest_page(kvm, va);
+		kvmppc_unpin_guest_page(kvm, va, vpa, false);
 
 		/* Check length */
 		if (len > nb || len < sizeof(struct reg_vpa))
@@ -359,13 +384,13 @@
 		va = NULL;
 		nb = 0;
 		if (gpa)
-			va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+			va = kvmppc_pin_guest_page(kvm, gpa, &nb);
 		spin_lock(&vcpu->arch.vpa_update_lock);
 		if (gpa == vpap->next_gpa)
 			break;
 		/* sigh... unpin that one and try again */
 		if (va)
-			kvmppc_unpin_guest_page(kvm, va);
+			kvmppc_unpin_guest_page(kvm, va, gpa, false);
 	}
 
 	vpap->update_pending = 0;
@@ -375,12 +400,15 @@
 		 * has changed the mappings underlying guest memory,
 		 * so unregister the region.
 		 */
-		kvmppc_unpin_guest_page(kvm, va);
+		kvmppc_unpin_guest_page(kvm, va, gpa, false);
 		va = NULL;
 	}
 	if (vpap->pinned_addr)
-		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
+		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
+					vpap->dirty);
+	vpap->gpa = gpa;
 	vpap->pinned_addr = va;
+	vpap->dirty = false;
 	if (va)
 		vpap->pinned_end = va + vpap->len;
 }
@@ -472,6 +500,7 @@
 	/* order writing *dt vs. writing vpa->dtl_idx */
 	smp_wmb();
 	vpa->dtl_idx = ++vcpu->arch.dtl_index;
+	vcpu->arch.dtl.dirty = true;
 }
 
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -479,7 +508,7 @@
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
 	struct kvm_vcpu *tvcpu;
-	int idx;
+	int idx, rc;
 
 	switch (req) {
 	case H_ENTER:
@@ -515,6 +544,28 @@
 					kvmppc_get_gpr(vcpu, 5),
 					kvmppc_get_gpr(vcpu, 6));
 		break;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+
+		rc = kvmppc_rtas_hcall(vcpu);
+
+		if (rc == -ENOENT)
+			return RESUME_HOST;
+		else if (rc == 0)
+			break;
+
+		/* Send the error out to userspace via KVM_RUN */
+		return rc;
+
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+		if (kvmppc_xics_enabled(vcpu)) {
+			ret = kvmppc_xics_hcall(vcpu, req);
+			break;
+		} /* fallthrough */
 	default:
 		return RESUME_HOST;
 	}
@@ -913,15 +964,19 @@
 	return ERR_PTR(err);
 }
 
+static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+	if (vpa->pinned_addr)
+		kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
+					vpa->dirty);
+}
+
 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->arch.vpa_update_lock);
-	if (vcpu->arch.dtl.pinned_addr)
-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
-	if (vcpu->arch.slb_shadow.pinned_addr)
-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
-	if (vcpu->arch.vpa.pinned_addr)
-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
+	unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
+	unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
+	unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
 	spin_unlock(&vcpu->arch.vpa_update_lock);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -955,7 +1010,6 @@
 }
 
 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern void xics_wake_cpu(int cpu);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
@@ -1330,9 +1384,12 @@
 			break;
 		vc->runner = vcpu;
 		n_ceded = 0;
-		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+		list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
 			if (!v->arch.pending_exceptions)
 				n_ceded += v->arch.ceded;
+			else
+				v->arch.ceded = 0;
+		}
 		if (n_ceded == vc->n_runnable)
 			kvmppc_vcore_blocked(vc);
 		else
@@ -1645,12 +1702,12 @@
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				      struct kvm_userspace_memory_region *mem,
-				      struct kvm_memory_slot old)
+				      const struct kvm_memory_slot *old)
 {
 	unsigned long npages = mem->memory_size >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
 
-	if (npages && old.npages) {
+	if (npages && old->npages) {
 		/*
 		 * If modifying a memslot, reset all the rmap dirty bits.
 		 * If this is a new memslot, we don't need to do anything
@@ -1827,6 +1884,7 @@
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 
 	kvm->arch.rma = NULL;
 
@@ -1872,6 +1930,8 @@
 		kvm->arch.rma = NULL;
 	}
 
+	kvmppc_rtas_tokens_free(kvm);
+
 	kvmppc_free_hpt(kvm);
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93ba..6dcbb49 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c

@@ -97,17 +97,6 @@
 }
 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
-/*
- * Note modification of an HPTE; set the HPTE modified bit
- * if anyone is interested.
- */
-static inline void note_hpte_modification(struct kvm *kvm,
-					  struct revmap_entry *rev)
-{
-	if (atomic_read(&kvm->arch.hpte_mod_interest))
-		rev->guest_rpte |= HPTE_GR_MODIFIED;
-}
-
 /* Remove this HPTE from the chain for a real page */
 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 				struct revmap_entry *rev,

diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 0000000..b4b0082
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c

@@ -0,0 +1,406 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#include "book3s_xics.h"
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+	__asm__ __volatile__("sync; stbcix %0,0,%1"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *this_vcpu)
+{
+	struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
+	unsigned long xics_phys;
+	int cpu;
+
+	/* Mark the target VCPU as having an interrupt pending */
+	vcpu->stat.queue_intr++;
+	set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+
+	/* Kick self ? Just set MER and return */
+	if (vcpu == this_vcpu) {
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+		return;
+	}
+
+	/* Check if the core is loaded, if not, too hard */
+	cpu = vcpu->cpu;
+	if (cpu < 0 || cpu >= nr_cpu_ids) {
+		this_icp->rm_action |= XICS_RM_KICK_VCPU;
+		this_icp->rm_kick_target = vcpu;
+		return;
+	}
+	/* In SMT cpu will always point to thread 0, we adjust it */
+	cpu += vcpu->arch.ptid;
+
+	/* Not too hard, then poke the target */
+	xics_phys = paca[cpu].kvm_hstate.xics_phys;
+	rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+	/* Note: Only called on self ! */
+	clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
+		  &vcpu->arch.pending_exceptions);
+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+				     union kvmppc_icp_state old,
+				     union kvmppc_icp_state new)
+{
+	struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
+	bool success;
+
+	/* Calculate new output value */
+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+	/* Attempt atomic update */
+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+	if (!success)
+		goto bail;
+
+	/*
+	 * Check for output state update
+	 *
+	 * Note that this is racy since another processor could be updating
+	 * the state already. This is why we never clear the interrupt output
+	 * here, we only ever set it. The clear only happens prior to doing
+	 * an update and only by the processor itself. Currently we do it
+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+	 *
+	 * We also do not try to figure out whether the EE state has changed,
+	 * we unconditionally set it if the new state calls for it. The reason
+	 * for that is that we opportunistically remove the pending interrupt
+	 * flag when raising CPPR, so we need to set it back here if an
+	 * interrupt is still pending.
+	 */
+	if (new.out_ee)
+		icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
+
+	/* Expose the state change for debug purposes */
+	this_vcpu->arch.icp->rm_dbgstate = new;
+	this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
+
+ bail:
+	return success;
+}
+
+static inline int check_too_hard(struct kvmppc_xics *xics,
+				 struct kvmppc_icp *icp)
+{
+	return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
+}
+
+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			     u8 new_cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool resend;
+
+	/*
+	 * This handles several related states in one operation:
+	 *
+	 * ICP State: Down_CPPR
+	 *
+	 * Load CPPR with new value and if the XISR is 0
+	 * then check for resends:
+	 *
+	 * ICP State: Resend
+	 *
+	 * If MFRR is more favored than CPPR, check for IPIs
+	 * and notify ICS of a potential resend. This is done
+	 * asynchronously (when used in real mode, we will have
+	 * to exit here).
+	 *
+	 * We do not handle the complete Check_IPI as documented
+	 * here. In the PAPR, this state will be used for both
+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
+	 * changing the MFRR state here so we don't need to handle
+	 * the case of an MFRR causing a reject of a pending irq,
+	 * this will have been handled when the MFRR was set in the
+	 * first place.
+	 *
+	 * Thus we don't have to handle rejects, only resends.
+	 *
+	 * When implementing real mode for HV KVM, resend will lead to
+	 * a H_TOO_HARD return and the whole transaction will be handled
+	 * in virtual mode.
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Down_CPPR */
+		new_state.cppr = new_cppr;
+
+		/*
+		 * Cut down Resend / Check_IPI / IPI
+		 *
+		 * The logic is that we cannot have a pending interrupt
+		 * trumped by an IPI at this point (see above), so we
+		 * know that either the pending interrupt is already an
+		 * IPI (in which case we don't care to override it) or
+		 * it's either more favored than us or non existent
+		 */
+		if (new_state.mfrr < new_cppr &&
+		    new_state.mfrr <= new_state.pending_pri) {
+			new_state.pending_pri = new_state.mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		/* Latch/clear resend bit */
+		resend = new_state.need_resend;
+		new_state.need_resend = 0;
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/*
+	 * Now handle resend checks. Those are asynchronous to the ICP
+	 * state update in HW (ie bus transactions) so we can handle them
+	 * separately here as well.
+	 */
+	if (resend)
+		icp->rm_action |= XICS_RM_CHECK_RESEND;
+}
+
+
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 xirr;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	/* First clear the interrupt */
+	icp_rm_clr_vcpu_irq(icp->vcpu);
+
+	/*
+	 * ICP State: Accept_Interrupt
+	 *
+	 * Return the pending interrupt (if any) along with the
+	 * current CPPR, then clear the XISR & set CPPR to the
+	 * pending priority
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+		if (!old_state.xisr)
+			break;
+		new_state.cppr = new_state.pending_pri;
+		new_state.pending_pri = 0xff;
+		new_state.xisr = 0;
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/* Return the result in GPR4 */
+	vcpu->arch.gpr[4] = xirr;
+
+	return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		    unsigned long mfrr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
+	u32 reject;
+	bool resend;
+	bool local;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	local = this_icp->server_num == server;
+	if (local)
+		icp = this_icp;
+	else
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+	if (!icp)
+		return H_PARAMETER;
+
+	/*
+	 * ICP state: Set_MFRR
+	 *
+	 * If the CPPR is more favored than the new MFRR, then
+	 * nothing needs to be done as there can be no XISR to
+	 * reject.
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it as in
+	 *
+	 * ICP state: Check_IPI
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Set_MFRR */
+		new_state.mfrr = mfrr;
+
+		/* Check_IPI */
+		reject = 0;
+		resend = false;
+		if (mfrr < new_state.cppr) {
+			/* Reject a pending interrupt if not an IPI */
+			if (mfrr <= new_state.pending_pri)
+				reject = new_state.xisr;
+			new_state.pending_pri = mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+			resend = new_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/* Pass rejects to virtual mode */
+	if (reject && reject != XICS_IPI) {
+		this_icp->rm_action |= XICS_RM_REJECT;
+		this_icp->rm_reject = reject;
+	}
+
+	/* Pass resends to virtual mode */
+	if (resend)
+		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+
+	return check_too_hard(xics, this_icp);
+}
+
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 reject;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	/*
+	 * ICP State: Set_CPPR
+	 *
+	 * We can safely compare the new value with the current
+	 * value outside of the transaction as the CPPR is only
+	 * ever changed by the processor on itself
+	 */
+	if (cppr > icp->state.cppr) {
+		icp_rm_down_cppr(xics, icp, cppr);
+		goto bail;
+	} else if (cppr == icp->state.cppr)
+		return H_SUCCESS;
+
+	/*
+	 * ICP State: Up_CPPR
+	 *
+	 * The processor is raising its priority, this can result
+	 * in a rejection of a pending interrupt:
+	 *
+	 * ICP State: Reject_Current
+	 *
+	 * We can remove EE from the current processor, the update
+	 * transaction will set it again if needed
+	 */
+	icp_rm_clr_vcpu_irq(icp->vcpu);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		reject = 0;
+		new_state.cppr = cppr;
+
+		if (cppr <= new_state.pending_pri) {
+			reject = new_state.xisr;
+			new_state.xisr = 0;
+			new_state.pending_pri = 0xff;
+		}
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/* Pass rejects to virtual mode */
+	if (reject && reject != XICS_IPI) {
+		icp->rm_action |= XICS_RM_REJECT;
+		icp->rm_reject = reject;
+	}
+ bail:
+	return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u32 irq = xirr & 0x00ffffff;
+	u16 src;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	/*
+	 * ICP State: EOI
+	 *
+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
+	 * value (ie more favored), we do not check for rejection of
+	 * a pending interrupt, this is a SW error and PAPR sepcifies
+	 * that we don't have to deal with it.
+	 *
+	 * The sending of an EOI to the ICS is handled after the
+	 * CPPR update
+	 *
+	 * ICP State: Down_CPPR which we handle
+	 * in a separate function as it's shared with H_CPPR.
+	 */
+	icp_rm_down_cppr(xics, icp, xirr >> 24);
+
+	/* IPIs have no EOI */
+	if (irq == XICS_IPI)
+		goto bail;
+	/*
+	 * EOI handling: If the interrupt is still asserted, we need to
+	 * resend it. We can take a lockless "peek" at the ICS state here.
+	 *
+	 * "Message" interrupts will never have "asserted" set
+	 */
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		goto bail;
+	state = &ics->irq_state[src];
+
+	/* Still asserted, resend it, we make it look like a reject */
+	if (state->asserted) {
+		icp->rm_action |= XICS_RM_REJECT;
+		icp->rm_reject = irq;
+	}
+ bail:
+	return check_too_hard(xics, icp);
+}

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e33d11f..b02f91e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -79,10 +79,6 @@
  *                                                                            *
  *****************************************************************************/
 
-#define XICS_XIRR		4
-#define XICS_QIRR		0xc
-#define XICS_IPI		2	/* interrupt source # for IPIs */
-
 /*
  * We come in here when wakened from nap mode on a secondary hw thread.
  * Relocation is off and most register values are lost.
@@ -101,50 +97,51 @@
 	li	r0,1
 	stb	r0,PACA_NAPSTATELOST(r13)
 
-	/* get vcpu pointer, NULL if we have no vcpu to run */
-	ld	r4,HSTATE_KVM_VCPU(r13)
-	cmpdi	cr1,r4,0
+	/* were we napping due to cede? */
+	lbz	r0,HSTATE_NAPPING(r13)
+	cmpwi	r0,0
+	bne	kvm_end_cede
+
+	/*
+	 * We weren't napping due to cede, so this must be a secondary
+	 * thread being woken up to run a guest, or being woken up due
+	 * to a stray IPI.  (Or due to some machine check or hypervisor
+	 * maintenance interrupt while the core is in KVM.)
+	 */
 
 	/* Check the wake reason in SRR1 to see why we got here */
 	mfspr	r3,SPRN_SRR1
 	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */
 	cmpwi	r3,4			/* was it an external interrupt? */
-	bne	27f
-
-	/*
-	 * External interrupt - for now assume it is an IPI, since we
-	 * should never get any other interrupts sent to offline threads.
-	 * Only do this for secondary threads.
-	 */
-	beq	cr1,25f
-	lwz	r3,VCPU_PTID(r4)
-	cmpwi	r3,0
-	beq	27f
-25:	ld	r5,HSTATE_XICS_PHYS(r13)
-	li	r0,0xff
-	li	r6,XICS_QIRR
-	li	r7,XICS_XIRR
+	bne	27f			/* if not */
+	ld	r5,HSTATE_XICS_PHYS(r13)
+	li	r7,XICS_XIRR		/* if it was an external interrupt, */
 	lwzcix	r8,r5,r7		/* get and ack the interrupt */
 	sync
 	clrldi.	r9,r8,40		/* get interrupt source ID. */
-	beq	27f			/* none there? */
-	cmpwi	r9,XICS_IPI
-	bne	26f
+	beq	28f			/* none there? */
+	cmpwi	r9,XICS_IPI		/* was it an IPI? */
+	bne	29f
+	li	r0,0xff
+	li	r6,XICS_MFRR
 	stbcix	r0,r5,r6		/* clear IPI */
-26:	stwcix	r8,r5,r7		/* EOI the interrupt */
+	stwcix	r8,r5,r7		/* EOI the interrupt */
+	sync				/* order loading of vcpu after that */
 
-27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
-
-	/* reload vcpu pointer after clearing the IPI */
+	/* get vcpu pointer, NULL if we have no vcpu to run */
 	ld	r4,HSTATE_KVM_VCPU(r13)
 	cmpdi	r4,0
 	/* if we have no vcpu to run, go back to sleep */
 	beq	kvm_no_guest
+	b	kvmppc_hv_entry
 
-	/* were we napping due to cede? */
-	lbz	r0,HSTATE_NAPPING(r13)
-	cmpwi	r0,0
-	bne	kvm_end_cede
+27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
+	b	kvm_no_guest
+28:	/* SRR1 said external but ICP said nope?? */
+	b	kvm_no_guest
+29:	/* External non-IPI interrupt to offline secondary thread? help?? */
+	stw	r8,HSTATE_SAVED_XIRR(r13)
+	b	kvm_no_guest
 
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
@@ -260,6 +257,8 @@
 	lwz	r5, LPPACA_YIELDCOUNT(r3)
 	addi	r5, r5, 1
 	stw	r5, LPPACA_YIELDCOUNT(r3)
+	li	r6, 1
+	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
 	/* Load up DAR and DSISR */
 	ld	r5, VCPU_DAR(r4)
@@ -485,20 +484,20 @@
 	mtctr	r6
 	mtxer	r7
 
+	ld	r10, VCPU_PC(r4)
+	ld	r11, VCPU_MSR(r4)
 kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 	ld	r6, VCPU_SRR0(r4)
 	ld	r7, VCPU_SRR1(r4)
-	ld	r10, VCPU_PC(r4)
-	ld	r11, VCPU_MSR(r4)	/* r11 = vcpu->arch.msr & ~MSR_HV */
 
+	/* r11 = vcpu->arch.msr & ~MSR_HV */
 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
 	rotldi	r11, r11, 1 + MSR_HV_LG
 	ori	r11, r11, MSR_ME
 
 	/* Check if we can deliver an external or decrementer interrupt now */
 	ld	r0,VCPU_PENDING_EXC(r4)
-	li	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+	lis	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
 	and	r0,r0,r8
 	cmpdi	cr1,r0,0
 	andi.	r0,r11,MSR_EE
@@ -526,10 +525,10 @@
 	/* Move SRR0 and SRR1 into the respective regs */
 5:	mtspr	SPRN_SRR0, r6
 	mtspr	SPRN_SRR1, r7
-	li	r0,0
-	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
 
 fast_guest_return:
+	li	r0,0
+	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
 	mtspr	SPRN_HSRR0,r10
 	mtspr	SPRN_HSRR1,r11
 
@@ -676,17 +675,99 @@
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
 
-	/* Check for mediated interrupts (could be done earlier really ...) */
+	/* Only handle external interrupts here on arch 206 and later */
 BEGIN_FTR_SECTION
-	cmpwi	r12,BOOK3S_INTERRUPT_EXTERNAL
-	bne+	1f
-	andi.	r0,r11,MSR_EE
-	beq	1f
-	mfspr	r5,SPRN_LPCR
-	andi.	r0,r5,LPCR_MER
-	bne	bounce_ext_interrupt
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+	b	ext_interrupt_to_host
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+	/* External interrupt ? */
+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
+	bne+	ext_interrupt_to_host
+
+	/* External interrupt, first check for host_ipi. If this is
+	 * set, we know the host wants us out so let's do it now
+	 */
+do_ext_interrupt:
+	lbz	r0, HSTATE_HOST_IPI(r13)
+	cmpwi	r0, 0
+	bne	ext_interrupt_to_host
+
+	/* Now read the interrupt from the ICP */
+	ld	r5, HSTATE_XICS_PHYS(r13)
+	li	r7, XICS_XIRR
+	cmpdi	r5, 0
+	beq-	ext_interrupt_to_host
+	lwzcix	r3, r5, r7
+	rlwinm.	r0, r3, 0, 0xffffff
+	sync
+	beq	3f		/* if nothing pending in the ICP */
+
+	/* We found something in the ICP...
+	 *
+	 * If it's not an IPI, stash it in the PACA and return to
+	 * the host, we don't (yet) handle directing real external
+	 * interrupts directly to the guest
+	 */
+	cmpwi	r0, XICS_IPI
+	bne	ext_stash_for_host
+
+	/* It's an IPI, clear the MFRR and EOI it */
+	li	r0, 0xff
+	li	r6, XICS_MFRR
+	stbcix	r0, r5, r6		/* clear the IPI */
+	stwcix	r3, r5, r7		/* EOI it */
+	sync
+
+	/* We need to re-check host IPI now in case it got set in the
+	 * meantime. If it's clear, we bounce the interrupt to the
+	 * guest
+	 */
+	lbz	r0, HSTATE_HOST_IPI(r13)
+	cmpwi	r0, 0
+	bne-	1f
+
+	/* Allright, looks like an IPI for the guest, we need to set MER */
+3:
+	/* Check if any CPU is heading out to the host, if so head out too */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lwz	r0, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0, 0x100
+	bge	ext_interrupt_to_host
+
+	/* See if there is a pending interrupt for the guest */
+	mfspr	r8, SPRN_LPCR
+	ld	r0, VCPU_PENDING_EXC(r9)
+	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
+	rldicl.	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
+	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
+	beq	2f
+
+	/* And if the guest EE is set, we can deliver immediately, else
+	 * we return to the guest with MER set
+	 */
+	andi.	r0, r11, MSR_EE
+	beq	2f
+	mtspr	SPRN_SRR0, r10
+	mtspr	SPRN_SRR1, r11
+	li	r10, BOOK3S_INTERRUPT_EXTERNAL
+	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
+	rotldi	r11, r11, 63
+2:	mr	r4, r9
+	mtspr	SPRN_LPCR, r8
+	b	fast_guest_return
+
+	/* We raced with the host, we need to resend that IPI, bummer */
+1:	li	r0, IPI_PRIORITY
+	stbcix	r0, r5, r6		/* set the IPI */
+	sync
+	b	ext_interrupt_to_host
+
+ext_stash_for_host:
+	/* It's not an IPI and it's for the host, stash it in the PACA
+	 * before exit, it will be picked up by the host ICP driver
+	 */
+	stw	r3, HSTATE_SAVED_XIRR(r13)
+ext_interrupt_to_host:
 
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Save DEC */
@@ -829,7 +910,7 @@
 	beq	44f
 	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
 	li	r0,IPI_PRIORITY
-	li	r7,XICS_QIRR
+	li	r7,XICS_MFRR
 	stbcix	r0,r7,r8		/* trigger the IPI */
 44:	srdi.	r3,r3,1
 	addi	r6,r6,PACA_SIZE
@@ -1018,6 +1099,8 @@
 	lwz	r3, LPPACA_YIELDCOUNT(r8)
 	addi	r3, r3, 1
 	stw	r3, LPPACA_YIELDCOUNT(r8)
+	li	r3, 1
+	stb	r3, VCPU_VPA_DIRTY(r9)
 25:
 	/* Save PMU registers if requested */
 	/* r8 and cr0.eq are live here */
@@ -1350,11 +1433,19 @@
 	.long	0		/* 0x58 */
 	.long	0		/* 0x5c */
 	.long	0		/* 0x60 */
-	.long	0		/* 0x64 */
-	.long	0		/* 0x68 */
-	.long	0		/* 0x6c */
-	.long	0		/* 0x70 */
-	.long	0		/* 0x74 */
+#ifdef CONFIG_KVM_XICS
+	.long	.kvmppc_rm_h_eoi - hcall_real_table
+	.long	.kvmppc_rm_h_cppr - hcall_real_table
+	.long	.kvmppc_rm_h_ipi - hcall_real_table
+	.long	0		/* 0x70 - H_IPOLL */
+	.long	.kvmppc_rm_h_xirr - hcall_real_table
+#else
+	.long	0		/* 0x64 - H_EOI */
+	.long	0		/* 0x68 - H_CPPR */
+	.long	0		/* 0x6c - H_IPI */
+	.long	0		/* 0x70 - H_IPOLL */
+	.long	0		/* 0x74 - H_XIRR */
+#endif
 	.long	0		/* 0x78 */
 	.long	0		/* 0x7c */
 	.long	0		/* 0x80 */
@@ -1405,15 +1496,6 @@
 	mr	r4,r9
 	b	fast_guest_return
 
-bounce_ext_interrupt:
-	mr	r4,r9
-	mtspr	SPRN_SRR0,r10
-	mtspr	SPRN_SRR1,r11
-	li	r10,BOOK3S_INTERRUPT_EXTERNAL
-	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
-	rotldi	r11,r11,63
-	b	fast_guest_return
-
 _GLOBAL(kvmppc_h_set_dabr)
 	std	r4,VCPU_DABR(r3)
 	/* Work around P7 bug where DABR can get corrupted on mtspr */
@@ -1519,6 +1601,9 @@
 	b	.
 
 kvm_end_cede:
+	/* get vcpu pointer */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+
 	/* Woken by external or decrementer interrupt */
 	ld	r1, HSTATE_HOST_R1(r13)
 
@@ -1558,6 +1643,16 @@
 	li	r0,0
 	stb	r0,HSTATE_NAPPING(r13)
 
+	/* Check the wake reason in SRR1 to see why we got here */
+	mfspr	r3, SPRN_SRR1
+	rlwinm	r3, r3, 44-31, 0x7	/* extract wake reason field */
+	cmpwi	r3, 4			/* was it an external interrupt? */
+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
+	mr	r9, r4
+	ld	r10, VCPU_PC(r9)
+	ld	r11, VCPU_MSR(r9)
+	beq	do_ext_interrupt	/* if so */
+
 	/* see if any other thread is already exiting */
 	lwz	r0,VCORE_ENTRY_EXIT(r5)
 	cmpwi	r0,0x100
@@ -1577,8 +1672,7 @@
 
 	/* we've ceded but we want to give control to the host */
 kvm_cede_exit:
-	li	r3,H_TOO_HARD
-	blr
+	b	hcall_real_fallback
 
 	/* Try to handle a machine check in real mode */
 machine_check_realmode:
@@ -1626,7 +1720,7 @@
 	beq	37f
 	sync
 	li	r0, 0xff
-	li	r6, XICS_QIRR
+	li	r6, XICS_MFRR
 	stbcix	r0, r5, r6		/* clear the IPI */
 	stwcix	r3, r5, r7		/* EOI it */
 37:	sync

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index dbdc15a..bdc40b8 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c

@@ -762,9 +762,7 @@
 			run->exit_reason = KVM_EXIT_MMIO;
 			r = RESUME_HOST_NV;
 			break;
-		case EMULATE_DO_PAPR:
-			run->exit_reason = KVM_EXIT_PAPR_HCALL;
-			vcpu->arch.hcall_needed = 1;
+		case EMULATE_EXIT_USER:
 			r = RESUME_HOST_NV;
 			break;
 		default:
@@ -1283,7 +1281,7 @@
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old)
+				const struct kvm_memory_slot *old)
 {
 }
 
@@ -1298,6 +1296,7 @@
 {
 #ifdef CONFIG_PPC64
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {

diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ee02b30..b24309c 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c

@@ -227,6 +227,13 @@
 	return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+	long rc = kvmppc_xics_hcall(vcpu, cmd);
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
 	switch (cmd) {
@@ -246,6 +253,20 @@
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+		if (kvmppc_xics_enabled(vcpu))
+			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+		break;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+		if (kvmppc_rtas_hcall(vcpu))
+			break;
+		kvmppc_set_gpr(vcpu, 3, 0);
+		return EMULATE_DONE;
 	}
 
 	return EMULATE_FAIL;

diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 0000000..3219ba8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c

@@ -0,0 +1,274 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/rtas.h>
+
+#ifdef CONFIG_KVM_XICS
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (args->nargs != 3 || args->nret != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+	server = args->args[1];
+	priority = args->args[2];
+
+	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = rc;
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (args->nargs != 1 || args->nret != 3) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+
+	server = priority = 0;
+	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+	if (rc) {
+		rc = -3;
+		goto out;
+	}
+
+	args->rets[1] = server;
+	args->rets[2] = priority;
+out:
+	args->rets[0] = rc;
+}
+
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq;
+	int rc;
+
+	if (args->nargs != 1 || args->nret != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+
+	rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = rc;
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq;
+	int rc;
+
+	if (args->nargs != 1 || args->nret != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+
+	rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = rc;
+}
+#endif /* CONFIG_KVM_XICS */
+
+struct rtas_handler {
+	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
+	char *name;
+};
+
+static struct rtas_handler rtas_handlers[] = {
+#ifdef CONFIG_KVM_XICS
+	{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+	{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+	{ .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
+	{ .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
+#endif
+};
+
+struct rtas_token_definition {
+	struct list_head list;
+	struct rtas_handler *handler;
+	u64 token;
+};
+
+static int rtas_name_matches(char *s1, char *s2)
+{
+	struct kvm_rtas_token_args args;
+	return !strncmp(s1, s2, sizeof(args.name));
+}
+
+static int rtas_token_undefine(struct kvm *kvm, char *name)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		if (rtas_name_matches(d->handler->name, name)) {
+			list_del(&d->list);
+			kfree(d);
+			return 0;
+		}
+	}
+
+	/* It's not an error to undefine an undefined token */
+	return 0;
+}
+
+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
+{
+	struct rtas_token_definition *d;
+	struct rtas_handler *h = NULL;
+	bool found;
+	int i;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
+		if (d->token == token)
+			return -EEXIST;
+	}
+
+	found = false;
+	for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
+		h = &rtas_handlers[i];
+		if (rtas_name_matches(h->name, name)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENOENT;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->handler = h;
+	d->token = token;
+
+	list_add_tail(&d->list, &kvm->arch.rtas_tokens);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
+{
+	struct kvm_rtas_token_args args;
+	int rc;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	mutex_lock(&kvm->lock);
+
+	if (args.token)
+		rc = rtas_token_define(kvm, args.name, args.token);
+	else
+		rc = rtas_token_undefine(kvm, args.name);
+
+	mutex_unlock(&kvm->lock);
+
+	return rc;
+}
+
+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
+{
+	struct rtas_token_definition *d;
+	struct rtas_args args;
+	rtas_arg_t *orig_rets;
+	gpa_t args_phys;
+	int rc;
+
+	/* r4 contains the guest physical address of the RTAS args */
+	args_phys = kvmppc_get_gpr(vcpu, 4);
+
+	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+	if (rc)
+		goto fail;
+
+	/*
+	 * args->rets is a pointer into args->args. Now that we've
+	 * copied args we need to fix it up to point into our copy,
+	 * not the guest args. We also need to save the original
+	 * value so we can restore it on the way out.
+	 */
+	orig_rets = args.rets;
+	args.rets = &args.args[args.nargs];
+
+	mutex_lock(&vcpu->kvm->lock);
+
+	rc = -ENOENT;
+	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
+		if (d->token == args.token) {
+			d->handler->handler(vcpu, &args);
+			rc = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&vcpu->kvm->lock);
+
+	if (rc == 0) {
+		args.rets = orig_rets;
+		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+		if (rc)
+			goto fail;
+	}
+
+	return rc;
+
+fail:
+	/*
+	 * We only get here if the guest has called RTAS with a bogus
+	 * args pointer. That means we can't get to the args, and so we
+	 * can't fail the RTAS call. So fail right out to userspace,
+	 * which should kill the guest.
+	 */
+	return rc;
+}
+
+void kvmppc_rtas_tokens_free(struct kvm *kvm)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		list_del(&d->list);
+		kfree(d);
+	}
+}

diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 0000000..f7a1037
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c

@@ -0,0 +1,1270 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xics.h"
+
+#if 1
+#define XICS_DBG(fmt...) do { } while (0)
+#else
+#define XICS_DBG(fmt...) trace_printk(fmt)
+#endif
+
+#define ENABLE_REALMODE	true
+#define DEBUG_REALMODE	false
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICS has a mutex protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries if the same interrupt.
+ *
+ * ICP operations are done via a single compare & swap transaction
+ * (most ICP state fits in the union kvmppc_icp_state)
+ */
+
+/*
+ * TODO
+ * ====
+ *
+ * - To speed up resends, keep a bitmap of "resend" set bits in the
+ *   ICS
+ *
+ * - Speed up server# -> ICP lookup (array ? hash table ?)
+ *
+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
+ *   locks array to improve scalability
+ */
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq);
+
+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
+			   bool report_status)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u16 src;
+
+	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+		return -EINVAL;
+	}
+	state = &ics->irq_state[src];
+	if (!state->exists)
+		return -EINVAL;
+
+	if (report_status)
+		return state->asserted;
+
+	/*
+	 * We set state->asserted locklessly. This should be fine as
+	 * we are the only setter, thus concurrent access is undefined
+	 * to begin with.
+	 */
+	if (level == KVM_INTERRUPT_SET_LEVEL)
+		state->asserted = 1;
+	else if (level == KVM_INTERRUPT_UNSET) {
+		state->asserted = 0;
+		return 0;
+	}
+
+	/* Attempt delivery */
+	icp_deliver_irq(xics, NULL, irq);
+
+	return state->asserted;
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+			     struct kvmppc_icp *icp)
+{
+	int i;
+
+	mutex_lock(&ics->lock);
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct ics_irq_state *state = &ics->irq_state[i];
+
+		if (!state->resend)
+			continue;
+
+		XICS_DBG("resend %#x prio %#x\n", state->number,
+			      state->priority);
+
+		mutex_unlock(&ics->lock);
+		icp_deliver_irq(xics, icp, state->number);
+		mutex_lock(&ics->lock);
+	}
+
+	mutex_unlock(&ics->lock);
+}
+
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+		       struct ics_irq_state *state,
+		       u32 server, u32 priority, u32 saved_priority)
+{
+	bool deliver;
+
+	mutex_lock(&ics->lock);
+
+	state->server = server;
+	state->priority = priority;
+	state->saved_priority = saved_priority;
+	deliver = false;
+	if ((state->masked_pending || state->resend) && priority != MASKED) {
+		state->masked_pending = 0;
+		deliver = true;
+	}
+
+	mutex_unlock(&ics->lock);
+
+	return deliver;
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	icp = kvmppc_xics_find_server(kvm, server);
+	if (!icp)
+		return -EINVAL;
+
+	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
+		 irq, server, priority,
+		 state->masked_pending, state->resend);
+
+	if (write_xive(xics, ics, state, server, priority, priority))
+		icp_deliver_irq(xics, icp, irq);
+
+	return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	mutex_lock(&ics->lock);
+	*server = state->server;
+	*priority = state->priority;
+	mutex_unlock(&ics->lock);
+
+	return 0;
+}
+
+int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	icp = kvmppc_xics_find_server(kvm, state->server);
+	if (!icp)
+		return -EINVAL;
+
+	if (write_xive(xics, ics, state, state->server, state->saved_priority,
+		       state->saved_priority))
+		icp_deliver_irq(xics, icp, irq);
+
+	return 0;
+}
+
+int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	write_xive(xics, ics, state, state->server, MASKED, state->priority);
+
+	return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static inline bool icp_try_update(struct kvmppc_icp *icp,
+				  union kvmppc_icp_state old,
+				  union kvmppc_icp_state new,
+				  bool change_self)
+{
+	bool success;
+
+	/* Calculate new output value */
+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+	/* Attempt atomic update */
+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+	if (!success)
+		goto bail;
+
+	XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 icp->server_num,
+		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
+		 old.need_resend, old.out_ee);
+	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
+		 new.need_resend, new.out_ee);
+	/*
+	 * Check for output state update
+	 *
+	 * Note that this is racy since another processor could be updating
+	 * the state already. This is why we never clear the interrupt output
+	 * here, we only ever set it. The clear only happens prior to doing
+	 * an update and only by the processor itself. Currently we do it
+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+	 *
+	 * We also do not try to figure out whether the EE state has changed,
+	 * we unconditionally set it if the new state calls for it. The reason
+	 * for that is that we opportunistically remove the pending interrupt
+	 * flag when raising CPPR, so we need to set it back here if an
+	 * interrupt is still pending.
+	 */
+	if (new.out_ee) {
+		kvmppc_book3s_queue_irqprio(icp->vcpu,
+					    BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+		if (!change_self)
+			kvmppc_fast_vcpu_kick(icp->vcpu);
+	}
+ bail:
+	return success;
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
+{
+	u32 icsid;
+
+	/* Order this load with the test for need_resend in the caller */
+	smp_rmb();
+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!test_and_clear_bit(icsid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_check_resend(xics, ics, icp);
+	}
+}
+
+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+			       u32 *reject)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool success;
+
+	XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+		 icp->server_num);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		*reject = 0;
+
+		/* See if we can deliver */
+		success = new_state.cppr > priority &&
+			new_state.mfrr > priority &&
+			new_state.pending_pri > priority;
+
+		/*
+		 * If we can, check for a rejection and perform the
+		 * delivery
+		 */
+		if (success) {
+			*reject = new_state.xisr;
+			new_state.xisr = irq;
+			new_state.pending_pri = priority;
+		} else {
+			/*
+			 * If we failed to deliver we set need_resend
+			 * so a subsequent CPPR state change causes us
+			 * to try a new delivery.
+			 */
+			new_state.need_resend = true;
+		}
+
+	} while (!icp_try_update(icp, old_state, new_state, false));
+
+	return success;
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u32 reject;
+	u16 src;
+
+	/*
+	 * This is used both for initial delivery of an interrupt and
+	 * for subsequent rejection.
+	 *
+	 * Rejection can be racy vs. resends. We have evaluated the
+	 * rejection in an atomic ICP transaction which is now complete,
+	 * so potentially the ICP can already accept the interrupt again.
+	 *
+	 * So we need to retry the delivery. Essentially the reject path
+	 * boils down to a failed delivery. Always.
+	 *
+	 * Now the interrupt could also have moved to a different target,
+	 * thus we may need to re-do the ICP lookup as well
+	 */
+
+ again:
+	/* Get the ICS state and lock it */
+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+	if (!ics) {
+		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	/* Get a lock on the ICS */
+	mutex_lock(&ics->lock);
+
+	/* Get our server */
+	if (!icp || state->server != icp->server_num) {
+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
+		if (!icp) {
+			pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
+				new_irq, state->server);
+			goto out;
+		}
+	}
+
+	/* Clear the resend bit of that interrupt */
+	state->resend = 0;
+
+	/*
+	 * If masked, bail out
+	 *
+	 * Note: PAPR doesn't mention anything about masked pending
+	 * when doing a resend, only when doing a delivery.
+	 *
+	 * However that would have the effect of losing a masked
+	 * interrupt that was rejected and isn't consistent with
+	 * the whole masked_pending business which is about not
+	 * losing interrupts that occur while masked.
+	 *
+	 * I don't differenciate normal deliveries and resends, this
+	 * implementation will differ from PAPR and not lose such
+	 * interrupts.
+	 */
+	if (state->priority == MASKED) {
+		XICS_DBG("irq %#x masked pending\n", new_irq);
+		state->masked_pending = 1;
+		goto out;
+	}
+
+	/*
+	 * Try the delivery, this will set the need_resend flag
+	 * in the ICP as part of the atomic transaction if the
+	 * delivery is not possible.
+	 *
+	 * Note that if successful, the new delivery might have itself
+	 * rejected an interrupt that was "delivered" before we took the
+	 * icp mutex.
+	 *
+	 * In this case we do the whole sequence all over again for the
+	 * new guy. We cannot assume that the rejected interrupt is less
+	 * favored than the new one, and thus doesn't need to be delivered,
+	 * because by the time we exit icp_try_to_deliver() the target
+	 * processor may well have alrady consumed & completed it, and thus
+	 * the rejected interrupt might actually be already acceptable.
+	 */
+	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+		/*
+		 * Delivery was successful, did we reject somebody else ?
+		 */
+		if (reject && reject != XICS_IPI) {
+			mutex_unlock(&ics->lock);
+			new_irq = reject;
+			goto again;
+		}
+	} else {
+		/*
+		 * We failed to deliver the interrupt we need to set the
+		 * resend map bit and mark the ICS state as needing a resend
+		 */
+		set_bit(ics->icsid, icp->resend_map);
+		state->resend = 1;
+
+		/*
+		 * If the need_resend flag got cleared in the ICP some time
+		 * between icp_try_to_deliver() atomic update and now, then
+		 * we know it might have missed the resend_map bit. So we
+		 * retry
+		 */
+		smp_mb();
+		if (!icp->state.need_resend) {
+			mutex_unlock(&ics->lock);
+			goto again;
+		}
+	}
+ out:
+	mutex_unlock(&ics->lock);
+}
+
+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			  u8 new_cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool resend;
+
+	/*
+	 * This handles several related states in one operation:
+	 *
+	 * ICP State: Down_CPPR
+	 *
+	 * Load CPPR with new value and if the XISR is 0
+	 * then check for resends:
+	 *
+	 * ICP State: Resend
+	 *
+	 * If MFRR is more favored than CPPR, check for IPIs
+	 * and notify ICS of a potential resend. This is done
+	 * asynchronously (when used in real mode, we will have
+	 * to exit here).
+	 *
+	 * We do not handle the complete Check_IPI as documented
+	 * here. In the PAPR, this state will be used for both
+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
+	 * changing the MFRR state here so we don't need to handle
+	 * the case of an MFRR causing a reject of a pending irq,
+	 * this will have been handled when the MFRR was set in the
+	 * first place.
+	 *
+	 * Thus we don't have to handle rejects, only resends.
+	 *
+	 * When implementing real mode for HV KVM, resend will lead to
+	 * a H_TOO_HARD return and the whole transaction will be handled
+	 * in virtual mode.
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Down_CPPR */
+		new_state.cppr = new_cppr;
+
+		/*
+		 * Cut down Resend / Check_IPI / IPI
+		 *
+		 * The logic is that we cannot have a pending interrupt
+		 * trumped by an IPI at this point (see above), so we
+		 * know that either the pending interrupt is already an
+		 * IPI (in which case we don't care to override it) or
+		 * it's either more favored than us or non existent
+		 */
+		if (new_state.mfrr < new_cppr &&
+		    new_state.mfrr <= new_state.pending_pri) {
+			WARN_ON(new_state.xisr != XICS_IPI &&
+				new_state.xisr != 0);
+			new_state.pending_pri = new_state.mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		/* Latch/clear resend bit */
+		resend = new_state.need_resend;
+		new_state.need_resend = 0;
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	/*
+	 * Now handle resend checks. Those are asynchronous to the ICP
+	 * state update in HW (ie bus transactions) so we can handle them
+	 * separately here too
+	 */
+	if (resend)
+		icp_check_resend(xics, icp);
+}
+
+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 xirr;
+
+	/* First, remove EE from the processor */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	/*
+	 * ICP State: Accept_Interrupt
+	 *
+	 * Return the pending interrupt (if any) along with the
+	 * current CPPR, then clear the XISR & set CPPR to the
+	 * pending priority
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+		if (!old_state.xisr)
+			break;
+		new_state.cppr = new_state.pending_pri;
+		new_state.pending_pri = 0xff;
+		new_state.xisr = 0;
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+	return xirr;
+}
+
+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+				 unsigned long mfrr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	u32 reject;
+	bool resend;
+	bool local;
+
+	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+		 vcpu->vcpu_id, server, mfrr);
+
+	icp = vcpu->arch.icp;
+	local = icp->server_num == server;
+	if (!local) {
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+		if (!icp)
+			return H_PARAMETER;
+	}
+
+	/*
+	 * ICP state: Set_MFRR
+	 *
+	 * If the CPPR is more favored than the new MFRR, then
+	 * nothing needs to be rejected as there can be no XISR to
+	 * reject.  If the MFRR is being made less favored then
+	 * there might be a previously-rejected interrupt needing
+	 * to be resent.
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it as in
+	 *
+	 * ICP state: Check_IPI
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Set_MFRR */
+		new_state.mfrr = mfrr;
+
+		/* Check_IPI */
+		reject = 0;
+		resend = false;
+		if (mfrr < new_state.cppr) {
+			/* Reject a pending interrupt if not an IPI */
+			if (mfrr <= new_state.pending_pri)
+				reject = new_state.xisr;
+			new_state.pending_pri = mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+			resend = new_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_try_update(icp, old_state, new_state, local));
+
+	/* Handle reject */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject);
+
+	/* Handle resend */
+	if (resend)
+		icp_check_resend(xics, icp);
+
+	return H_SUCCESS;
+}
+
+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 reject;
+
+	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+	/*
+	 * ICP State: Set_CPPR
+	 *
+	 * We can safely compare the new value with the current
+	 * value outside of the transaction as the CPPR is only
+	 * ever changed by the processor on itself
+	 */
+	if (cppr > icp->state.cppr)
+		icp_down_cppr(xics, icp, cppr);
+	else if (cppr == icp->state.cppr)
+		return;
+
+	/*
+	 * ICP State: Up_CPPR
+	 *
+	 * The processor is raising its priority, this can result
+	 * in a rejection of a pending interrupt:
+	 *
+	 * ICP State: Reject_Current
+	 *
+	 * We can remove EE from the current processor, the update
+	 * transaction will set it again if needed
+	 */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		reject = 0;
+		new_state.cppr = cppr;
+
+		if (cppr <= new_state.pending_pri) {
+			reject = new_state.xisr;
+			new_state.xisr = 0;
+			new_state.pending_pri = 0xff;
+		}
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	/*
+	 * Check for rejects. They are handled by doing a new delivery
+	 * attempt (see comments in icp_deliver_irq).
+	 */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject);
+}
+
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u32 irq = xirr & 0x00ffffff;
+	u16 src;
+
+	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+	/*
+	 * ICP State: EOI
+	 *
+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
+	 * value (ie more favored), we do not check for rejection of
+	 * a pending interrupt, this is a SW error and PAPR sepcifies
+	 * that we don't have to deal with it.
+	 *
+	 * The sending of an EOI to the ICS is handled after the
+	 * CPPR update
+	 *
+	 * ICP State: Down_CPPR which we handle
+	 * in a separate function as it's shared with H_CPPR.
+	 */
+	icp_down_cppr(xics, icp, xirr >> 24);
+
+	/* IPIs have no EOI */
+	if (irq == XICS_IPI)
+		return H_SUCCESS;
+	/*
+	 * EOI handling: If the interrupt is still asserted, we need to
+	 * resend it. We can take a lockless "peek" at the ICS state here.
+	 *
+	 * "Message" interrupts will never have "asserted" set
+	 */
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
+		return H_PARAMETER;
+	}
+	state = &ics->irq_state[src];
+
+	/* Still asserted, resend it */
+	if (state->asserted)
+		icp_deliver_irq(xics, icp, irq);
+
+	return H_SUCCESS;
+}
+
+static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+
+	XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
+		 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
+
+	if (icp->rm_action & XICS_RM_KICK_VCPU)
+		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
+	if (icp->rm_action & XICS_RM_CHECK_RESEND)
+		icp_check_resend(xics, icp);
+	if (icp->rm_action & XICS_RM_REJECT)
+		icp_deliver_irq(xics, icp, icp->rm_reject);
+
+	icp->rm_action = 0;
+
+	return H_SUCCESS;
+}
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	unsigned long res;
+	int rc = H_SUCCESS;
+
+	/* Check if we have an ICP */
+	if (!xics || !vcpu->arch.icp)
+		return H_HARDWARE;
+
+	/* Check for real mode returning too hard */
+	if (xics->real_mode)
+		return kvmppc_xics_rm_complete(vcpu, req);
+
+	switch (req) {
+	case H_XIRR:
+		res = kvmppc_h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 4, res);
+		break;
+	case H_CPPR:
+		kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_EOI:
+		rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_IPI:
+		rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+				  kvmppc_get_gpr(vcpu, 5));
+		break;
+	}
+
+	return rc;
+}
+
+
+/* -- Initialisation code etc. -- */
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xics *xics = m->private;
+	struct kvm *kvm = xics->kvm;
+	struct kvm_vcpu *vcpu;
+	int icsid, i;
+
+	if (!kvm)
+		return 0;
+
+	seq_printf(m, "=========\nICP state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_icp *icp = vcpu->arch.icp;
+		union kvmppc_icp_state state;
+
+		if (!icp)
+			continue;
+
+		state.raw = ACCESS_ONCE(icp->state.raw);
+		seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
+			   icp->server_num, state.xisr,
+			   state.pending_pri, state.cppr, state.mfrr,
+			   state.out_ee, state.need_resend);
+	}
+
+	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!ics)
+			continue;
+
+		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
+			   icsid);
+
+		mutex_lock(&ics->lock);
+
+		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+			struct ics_irq_state *irq = &ics->irq_state[i];
+
+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
+				   irq->number, irq->server, irq->priority,
+				   irq->saved_priority, irq->asserted,
+				   irq->resend, irq->masked_pending);
+
+		}
+		mutex_unlock(&ics->lock);
+	}
+	return 0;
+}
+
+static int xics_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xics_debug_show, inode->i_private);
+}
+
+static const struct file_operations xics_debug_fops = {
+	.open = xics_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
+	if (!name) {
+		pr_err("%s: no memory for name\n", __func__);
+		return;
+	}
+
+	xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+					   xics, &xics_debug_fops);
+
+	pr_debug("%s: created %s\n", __func__, name);
+	kfree(name);
+}
+
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
+					struct kvmppc_xics *xics, int irq)
+{
+	struct kvmppc_ics *ics;
+	int i, icsid;
+
+	icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+	mutex_lock(&kvm->lock);
+
+	/* ICS already exists - somebody else got here first */
+	if (xics->ics[icsid])
+		goto out;
+
+	/* Create the ICS */
+	ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
+	if (!ics)
+		goto out;
+
+	mutex_init(&ics->lock);
+	ics->icsid = icsid;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
+		ics->irq_state[i].priority = MASKED;
+		ics->irq_state[i].saved_priority = MASKED;
+	}
+	smp_wmb();
+	xics->ics[icsid] = ics;
+
+	if (icsid > xics->max_icsid)
+		xics->max_icsid = icsid;
+
+ out:
+	mutex_unlock(&kvm->lock);
+	return xics->ics[icsid];
+}
+
+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
+{
+	struct kvmppc_icp *icp;
+
+	if (!vcpu->kvm->arch.xics)
+		return -ENODEV;
+
+	if (kvmppc_xics_find_server(vcpu->kvm, server_num))
+		return -EEXIST;
+
+	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+	if (!icp)
+		return -ENOMEM;
+
+	icp->vcpu = vcpu;
+	icp->server_num = server_num;
+	icp->state.mfrr = MASKED;
+	icp->state.pending_pri = MASKED;
+	vcpu->arch.icp = icp;
+
+	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+	return 0;
+}
+
+u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	union kvmppc_icp_state state;
+
+	if (!icp)
+		return 0;
+	state = icp->state;
+	return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
+		((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
+		((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
+		((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
+}
+
+int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_ics *ics;
+	u8 cppr, mfrr, pending_pri;
+	u32 xisr;
+	u16 src;
+	bool resend;
+
+	if (!icp || !xics)
+		return -ENOENT;
+
+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+		KVM_REG_PPC_ICP_XISR_MASK;
+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+	pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
+
+	/* Require the new state to be internally consistent */
+	if (xisr == 0) {
+		if (pending_pri != 0xff)
+			return -EINVAL;
+	} else if (xisr == XICS_IPI) {
+		if (pending_pri != mfrr || pending_pri >= cppr)
+			return -EINVAL;
+	} else {
+		if (pending_pri >= mfrr || pending_pri >= cppr)
+			return -EINVAL;
+		ics = kvmppc_xics_find_ics(xics, xisr, &src);
+		if (!ics)
+			return -EINVAL;
+	}
+
+	new_state.raw = 0;
+	new_state.cppr = cppr;
+	new_state.xisr = xisr;
+	new_state.mfrr = mfrr;
+	new_state.pending_pri = pending_pri;
+
+	/*
+	 * Deassert the CPU interrupt request.
+	 * icp_try_update will reassert it if necessary.
+	 */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	/*
+	 * Note that if we displace an interrupt from old_state.xisr,
+	 * we don't mark it as rejected.  We expect userspace to set
+	 * the state of the interrupt sources to be consistent with
+	 * the ICP states (either before or afterwards, which doesn't
+	 * matter).  We do handle resends due to CPPR becoming less
+	 * favoured because that is necessary to end up with a
+	 * consistent state in the situation where userspace restores
+	 * the ICS states before the ICP states.
+	 */
+	do {
+		old_state = ACCESS_ONCE(icp->state);
+
+		if (new_state.mfrr <= old_state.mfrr) {
+			resend = false;
+			new_state.need_resend = old_state.need_resend;
+		} else {
+			resend = old_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_try_update(icp, old_state, new_state, false));
+
+	if (resend)
+		icp_check_resend(xics, icp);
+
+	return 0;
+}
+
+static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+	int ret;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val, prio;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics)
+		return -ENOENT;
+
+	irqp = &ics->irq_state[idx];
+	mutex_lock(&ics->lock);
+	ret = -ENOENT;
+	if (irqp->exists) {
+		val = irqp->server;
+		prio = irqp->priority;
+		if (prio == MASKED) {
+			val |= KVM_XICS_MASKED;
+			prio = irqp->saved_priority;
+		}
+		val |= prio << KVM_XICS_PRIORITY_SHIFT;
+		if (irqp->asserted)
+			val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
+		else if (irqp->masked_pending || irqp->resend)
+			val |= KVM_XICS_PENDING;
+		ret = 0;
+	}
+	mutex_unlock(&ics->lock);
+
+	if (!ret && put_user(val, ubufp))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val;
+	u8 prio;
+	u32 server;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics) {
+		ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
+		if (!ics)
+			return -ENOMEM;
+	}
+	irqp = &ics->irq_state[idx];
+	if (get_user(val, ubufp))
+		return -EFAULT;
+
+	server = val & KVM_XICS_DESTINATION_MASK;
+	prio = val >> KVM_XICS_PRIORITY_SHIFT;
+	if (prio != MASKED &&
+	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
+		return -EINVAL;
+
+	mutex_lock(&ics->lock);
+	irqp->server = server;
+	irqp->saved_priority = prio;
+	if (val & KVM_XICS_MASKED)
+		prio = MASKED;
+	irqp->priority = prio;
+	irqp->resend = 0;
+	irqp->masked_pending = 0;
+	irqp->asserted = 0;
+	if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
+		irqp->asserted = 1;
+	irqp->exists = 1;
+	mutex_unlock(&ics->lock);
+
+	if (val & KVM_XICS_PENDING)
+		icp_deliver_irq(xics, NULL, irqp->number);
+
+	return 0;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+
+	return ics_deliver_irq(xics, irq, level, line_status);
+}
+
+static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xics_set_source(xics, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xics_get_source(xics, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XICS_NR_IRQS)
+			return 0;
+		break;
+	}
+	return -ENXIO;
+}
+
+static void kvmppc_xics_free(struct kvm_device *dev)
+{
+	struct kvmppc_xics *xics = dev->private;
+	int i;
+	struct kvm *kvm = xics->kvm;
+
+	debugfs_remove(xics->dentry);
+
+	if (kvm)
+		kvm->arch.xics = NULL;
+
+	for (i = 0; i <= xics->max_icsid; i++)
+		kfree(xics->ics[i]);
+	kfree(xics);
+	kfree(dev);
+}
+
+static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xics *xics;
+	struct kvm *kvm = dev->kvm;
+	int ret = 0;
+
+	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+	if (!xics)
+		return -ENOMEM;
+
+	dev->private = xics;
+	xics->dev = dev;
+	xics->kvm = kvm;
+
+	/* Already there ? */
+	mutex_lock(&kvm->lock);
+	if (kvm->arch.xics)
+		ret = -EEXIST;
+	else
+		kvm->arch.xics = xics;
+	mutex_unlock(&kvm->lock);
+
+	if (ret)
+		return ret;
+
+	xics_debugfs_init(xics);
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		/* Enable real mode support */
+		xics->real_mode = ENABLE_REALMODE;
+		xics->real_mode_dbg = DEBUG_REALMODE;
+	}
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
+	return 0;
+}
+
+struct kvm_device_ops kvm_xics_ops = {
+	.name = "kvm-xics",
+	.create = kvmppc_xics_create,
+	.destroy = kvmppc_xics_free,
+	.set_attr = xics_set_attr,
+	.get_attr = xics_get_attr,
+	.has_attr = xics_has_attr,
+};
+
+int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 xcpu)
+{
+	struct kvmppc_xics *xics = dev->private;
+	int r = -EBUSY;
+
+	if (dev->ops != &kvm_xics_ops)
+		return -EPERM;
+	if (xics->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type)
+		return -EBUSY;
+
+	r = kvmppc_xics_create_icp(vcpu, xcpu);
+	if (!r)
+		vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+
+	return r;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->arch.icp)
+		return;
+	kfree(vcpu->arch.icp);
+	vcpu->arch.icp = NULL;
+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+}

diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 0000000..dd9326c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h

@@ -0,0 +1,130 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XICS_H
+#define _KVM_PPC_BOOK3S_XICS_H
+
+/*
+ * We use a two-level tree to store interrupt source information.
+ * There are up to 1024 ICS nodes, each of which can represent
+ * 1024 sources.
+ */
+#define KVMPPC_XICS_MAX_ICS_ID	1023
+#define KVMPPC_XICS_ICS_SHIFT	10
+#define KVMPPC_XICS_IRQ_PER_ICS	(1 << KVMPPC_XICS_ICS_SHIFT)
+#define KVMPPC_XICS_SRC_MASK	(KVMPPC_XICS_IRQ_PER_ICS - 1)
+
+/*
+ * Interrupt source numbers below this are reserved, for example
+ * 0 is "no interrupt", and 2 is used for IPIs.
+ */
+#define KVMPPC_XICS_FIRST_IRQ	16
+#define KVMPPC_XICS_NR_IRQS	((KVMPPC_XICS_MAX_ICS_ID + 1) * \
+				 KVMPPC_XICS_IRQ_PER_ICS)
+
+/* Priority value to use for disabling an interrupt */
+#define MASKED	0xff
+
+/* State for one irq source */
+struct ics_irq_state {
+	u32 number;
+	u32 server;
+	u8  priority;
+	u8  saved_priority;
+	u8  resend;
+	u8  masked_pending;
+	u8  asserted; /* Only for LSI */
+	u8  exists;
+};
+
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+	unsigned long raw;
+	struct {
+		u8 out_ee:1;
+		u8 need_resend:1;
+		u8 cppr;
+		u8 mfrr;
+		u8 pending_pri;
+		u32 xisr;
+	};
+};
+
+/* One bit per ICS */
+#define ICP_RESEND_MAP_SIZE	(KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
+
+struct kvmppc_icp {
+	struct kvm_vcpu *vcpu;
+	unsigned long server_num;
+	union kvmppc_icp_state state;
+	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+
+	/* Real mode might find something too hard, here's the action
+	 * it might request from virtual mode
+	 */
+#define XICS_RM_KICK_VCPU	0x1
+#define XICS_RM_CHECK_RESEND	0x2
+#define XICS_RM_REJECT		0x4
+	u32 rm_action;
+	struct kvm_vcpu *rm_kick_target;
+	u32  rm_reject;
+
+	/* Debug stuff for real mode */
+	union kvmppc_icp_state rm_dbgstate;
+	struct kvm_vcpu *rm_dbgtgt;
+};
+
+struct kvmppc_ics {
+	struct mutex lock;
+	u16 icsid;
+	struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xics {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct dentry *dentry;
+	u32 max_icsid;
+	bool real_mode;
+	bool real_mode_dbg;
+	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+};
+
+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
+							 u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
+			return vcpu->arch.icp;
+	}
+	return NULL;
+}
+
+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+						      u32 irq, u16 *source)
+{
+	u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+	struct kvmppc_ics *ics;
+
+	if (source)
+		*source = src;
+	if (icsid > KVMPPC_XICS_MAX_ICS_ID)
+		return NULL;
+	ics = xics->ics[icsid];
+	if (!ics)
+		return NULL;
+	return ics;
+}
+
+
+#endif /* _KVM_PPC_BOOK3S_XICS_H */

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 020923e..1020119 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c

@@ -222,8 +222,7 @@
 	kvmppc_booke_queue_irqprio(vcpu, prio);
 }
 
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-                                  struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
@@ -347,7 +346,7 @@
 		keep_irq = true;
 	}
 
-	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
+	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
 		update_epr = true;
 
 	switch (priority) {
@@ -428,8 +427,14 @@
 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
 			set_guest_dear(vcpu, vcpu->arch.queued_dear);
-		if (update_epr == true)
-			kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+		if (update_epr == true) {
+			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
+				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+			else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
+				BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
+				kvmppc_mpic_set_epr(vcpu);
+			}
+		}
 
 		new_msr &= msr_mask;
 #if defined(CONFIG_64BIT)
@@ -746,6 +751,9 @@
 		kvmppc_core_queue_program(vcpu, ESR_PIL);
 		return RESUME_HOST;
 
+	case EMULATE_EXIT_USER:
+		return RESUME_HOST;
+
 	default:
 		BUG();
 	}
@@ -1148,6 +1156,18 @@
 	return r;
 }
 
+static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
+{
+	u32 old_tsr = vcpu->arch.tsr;
+
+	vcpu->arch.tsr = new_tsr;
+
+	if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+		arm_next_watchdog(vcpu);
+
+	update_timer_ints(vcpu);
+}
+
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
@@ -1287,16 +1307,8 @@
 		kvmppc_emulate_dec(vcpu);
 	}
 
-	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
-		u32 old_tsr = vcpu->arch.tsr;
-
-		vcpu->arch.tsr = sregs->u.e.tsr;
-
-		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
-			arm_next_watchdog(vcpu);
-
-		update_timer_ints(vcpu);
-	}
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
+		kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
 
 	return 0;
 }
@@ -1409,84 +1421,134 @@
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-	int r = -EINVAL;
+	int r = 0;
+	union kvmppc_one_reg val;
+	int size;
+	long int i;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
 
 	switch (reg->id) {
 	case KVM_REG_PPC_IAC1:
 	case KVM_REG_PPC_IAC2:
 	case KVM_REG_PPC_IAC3:
-	case KVM_REG_PPC_IAC4: {
-		int iac = reg->id - KVM_REG_PPC_IAC1;
-		r = copy_to_user((u64 __user *)(long)reg->addr,
-				 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
+	case KVM_REG_PPC_IAC4:
+		i = reg->id - KVM_REG_PPC_IAC1;
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
 		break;
-	}
 	case KVM_REG_PPC_DAC1:
-	case KVM_REG_PPC_DAC2: {
-		int dac = reg->id - KVM_REG_PPC_DAC1;
-		r = copy_to_user((u64 __user *)(long)reg->addr,
-				 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
+	case KVM_REG_PPC_DAC2:
+		i = reg->id - KVM_REG_PPC_DAC1;
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
 		break;
-	}
 	case KVM_REG_PPC_EPR: {
 		u32 epr = get_guest_epr(vcpu);
-		r = put_user(epr, (u32 __user *)(long)reg->addr);
+		val = get_reg_val(reg->id, epr);
 		break;
 	}
 #if defined(CONFIG_64BIT)
 	case KVM_REG_PPC_EPCR:
-		r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
+		val = get_reg_val(reg->id, vcpu->arch.epcr);
 		break;
 #endif
+	case KVM_REG_PPC_TCR:
+		val = get_reg_val(reg->id, vcpu->arch.tcr);
+		break;
+	case KVM_REG_PPC_TSR:
+		val = get_reg_val(reg->id, vcpu->arch.tsr);
+		break;
+	case KVM_REG_PPC_DEBUG_INST:
+		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
+		break;
 	default:
+		r = kvmppc_get_one_reg(vcpu, reg->id, &val);
 		break;
 	}
+
+	if (r)
+		return r;
+
+	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+		r = -EFAULT;
+
 	return r;
 }
 
 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-	int r = -EINVAL;
+	int r = 0;
+	union kvmppc_one_reg val;
+	int size;
+	long int i;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+		return -EFAULT;
 
 	switch (reg->id) {
 	case KVM_REG_PPC_IAC1:
 	case KVM_REG_PPC_IAC2:
 	case KVM_REG_PPC_IAC3:
-	case KVM_REG_PPC_IAC4: {
-		int iac = reg->id - KVM_REG_PPC_IAC1;
-		r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
-			     (u64 __user *)(long)reg->addr, sizeof(u64));
+	case KVM_REG_PPC_IAC4:
+		i = reg->id - KVM_REG_PPC_IAC1;
+		vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
 		break;
-	}
 	case KVM_REG_PPC_DAC1:
-	case KVM_REG_PPC_DAC2: {
-		int dac = reg->id - KVM_REG_PPC_DAC1;
-		r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
-			     (u64 __user *)(long)reg->addr, sizeof(u64));
+	case KVM_REG_PPC_DAC2:
+		i = reg->id - KVM_REG_PPC_DAC1;
+		vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
 		break;
-	}
 	case KVM_REG_PPC_EPR: {
-		u32 new_epr;
-		r = get_user(new_epr, (u32 __user *)(long)reg->addr);
-		if (!r)
-			kvmppc_set_epr(vcpu, new_epr);
+		u32 new_epr = set_reg_val(reg->id, val);
+		kvmppc_set_epr(vcpu, new_epr);
 		break;
 	}
 #if defined(CONFIG_64BIT)
 	case KVM_REG_PPC_EPCR: {
-		u32 new_epcr;
-		r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
-		if (r == 0)
-			kvmppc_set_epcr(vcpu, new_epcr);
+		u32 new_epcr = set_reg_val(reg->id, val);
+		kvmppc_set_epcr(vcpu, new_epcr);
 		break;
 	}
 #endif
-	default:
+	case KVM_REG_PPC_OR_TSR: {
+		u32 tsr_bits = set_reg_val(reg->id, val);
+		kvmppc_set_tsr_bits(vcpu, tsr_bits);
 		break;
 	}
+	case KVM_REG_PPC_CLEAR_TSR: {
+		u32 tsr_bits = set_reg_val(reg->id, val);
+		kvmppc_clr_tsr_bits(vcpu, tsr_bits);
+		break;
+	}
+	case KVM_REG_PPC_TSR: {
+		u32 tsr = set_reg_val(reg->id, val);
+		kvmppc_set_tsr(vcpu, tsr);
+		break;
+	}
+	case KVM_REG_PPC_TCR: {
+		u32 tcr = set_reg_val(reg->id, val);
+		kvmppc_set_tcr(vcpu, tcr);
+		break;
+	}
+	default:
+		r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+		break;
+	}
+
 	return r;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					 struct kvm_guest_debug *dbg)
+{
+	return -EINVAL;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -ENOTSUPP;
@@ -1531,7 +1593,7 @@
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old)
+				const struct kvm_memory_slot *old)
 {
 }
 

diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index f4bb55c..2c6deb5ef 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S

@@ -54,8 +54,7 @@
                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
                        (1<<BOOKE_INTERRUPT_ALIGNMENT))
 
-.macro KVM_HANDLER ivor_nr scratch srr0
-_GLOBAL(kvmppc_handler_\ivor_nr)
+.macro __KVM_HANDLER ivor_nr scratch srr0
 	/* Get pointer to vcpu and record exit number. */
 	mtspr	\scratch , r4
 	mfspr   r4, SPRN_SPRG_THREAD
@@ -76,6 +75,43 @@
 	bctr
 .endm
 
+.macro KVM_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+	__KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
+.macro KVM_DBG_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+	mtspr   \scratch, r4
+	mfspr	r4, SPRN_SPRG_THREAD
+	lwz	r4, THREAD_KVM_VCPU(r4)
+	stw	r3, VCPU_CRIT_SAVE(r4)
+	mfcr	r3
+	mfspr	r4, SPRN_CSRR1
+	andi.	r4, r4, MSR_PR
+	bne	1f
+	/* debug interrupt happened in enter/exit path */
+	mfspr   r4, SPRN_CSRR1
+	rlwinm  r4, r4, 0, ~MSR_DE
+	mtspr   SPRN_CSRR1, r4
+	lis	r4, 0xffff
+	ori	r4, r4, 0xffff
+	mtspr	SPRN_DBSR, r4
+	mfspr	r4, SPRN_SPRG_THREAD
+	lwz	r4, THREAD_KVM_VCPU(r4)
+	mtcr	r3
+	lwz     r3, VCPU_CRIT_SAVE(r4)
+	mfspr   r4, \scratch
+	rfci
+1:	/* debug interrupt happened in guest */
+	mtcr	r3
+	mfspr	r4, SPRN_SPRG_THREAD
+	lwz	r4, THREAD_KVM_VCPU(r4)
+	lwz     r3, VCPU_CRIT_SAVE(r4)
+	mfspr   r4, \scratch
+	__KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
 .macro KVM_HANDLER_ADDR ivor_nr
 	.long	kvmppc_handler_\ivor_nr
 .endm
@@ -100,7 +136,7 @@
 KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
 KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0

diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 6dd4de7..ce6b73c 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c

@@ -425,6 +425,20 @@
 	return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	return r;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+		       union kvmppc_one_reg *val)
+{
+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	return r;
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 33db48a..c2e5e98 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h

@@ -23,6 +23,10 @@
 #include <asm/mmu-book3e.h>
 #include <asm/tlb.h>
 
+enum vcpu_ftr {
+	VCPU_FTR_MMU_V2
+};
+
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
@@ -131,6 +135,10 @@
 void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+				union kvmppc_one_reg *val);
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+			       union kvmppc_one_reg *val);
 
 #ifdef CONFIG_KVM_E500V2
 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -295,4 +303,18 @@
 #define get_tlb_sts(gtlbe)              (MAS1_TS)
 #endif /* !BOOKE_HV */
 
+static inline bool has_feature(const struct kvm_vcpu *vcpu,
+			       enum vcpu_ftr ftr)
+{
+	bool has_ftr;
+	switch (ftr) {
+	case VCPU_FTR_MMU_V2:
+		has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
+		break;
+	default:
+		return false;
+	}
+	return has_ftr;
+}
+
 #endif /* KVM_E500_H */

diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353..b10a012 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c

@@ -284,6 +284,16 @@
 	case SPRN_TLB1CFG:
 		*spr_val = vcpu->arch.tlbcfg[1];
 		break;
+	case SPRN_TLB0PS:
+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+			return EMULATE_FAIL;
+		*spr_val = vcpu->arch.tlbps[0];
+		break;
+	case SPRN_TLB1PS:
+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+			return EMULATE_FAIL;
+		*spr_val = vcpu->arch.tlbps[1];
+		break;
 	case SPRN_L1CSR0:
 		*spr_val = vcpu_e500->l1csr0;
 		break;
@@ -307,6 +317,15 @@
 	case SPRN_MMUCFG:
 		*spr_val = vcpu->arch.mmucfg;
 		break;
+	case SPRN_EPTCFG:
+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+			return EMULATE_FAIL;
+		/*
+		 * Legacy Linux guests access EPTCFG register even if the E.PT
+		 * category is disabled in the VM. Give them a chance to live.
+		 */
+		*spr_val = vcpu->arch.eptcfg;
+		break;
 
 	/* extra exceptions */
 	case SPRN_IVOR32:

diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 5c44759..c41a5a96 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c

@@ -596,6 +596,140 @@
 	return 0;
 }
 
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+				union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	switch (id) {
+	case KVM_REG_PPC_MAS0:
+		*val = get_reg_val(id, vcpu->arch.shared->mas0);
+		break;
+	case KVM_REG_PPC_MAS1:
+		*val = get_reg_val(id, vcpu->arch.shared->mas1);
+		break;
+	case KVM_REG_PPC_MAS2:
+		*val = get_reg_val(id, vcpu->arch.shared->mas2);
+		break;
+	case KVM_REG_PPC_MAS7_3:
+		*val = get_reg_val(id, vcpu->arch.shared->mas7_3);
+		break;
+	case KVM_REG_PPC_MAS4:
+		*val = get_reg_val(id, vcpu->arch.shared->mas4);
+		break;
+	case KVM_REG_PPC_MAS6:
+		*val = get_reg_val(id, vcpu->arch.shared->mas6);
+		break;
+	case KVM_REG_PPC_MMUCFG:
+		*val = get_reg_val(id, vcpu->arch.mmucfg);
+		break;
+	case KVM_REG_PPC_EPTCFG:
+		*val = get_reg_val(id, vcpu->arch.eptcfg);
+		break;
+	case KVM_REG_PPC_TLB0CFG:
+	case KVM_REG_PPC_TLB1CFG:
+	case KVM_REG_PPC_TLB2CFG:
+	case KVM_REG_PPC_TLB3CFG:
+		i = id - KVM_REG_PPC_TLB0CFG;
+		*val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
+		break;
+	case KVM_REG_PPC_TLB0PS:
+	case KVM_REG_PPC_TLB1PS:
+	case KVM_REG_PPC_TLB2PS:
+	case KVM_REG_PPC_TLB3PS:
+		i = id - KVM_REG_PPC_TLB0PS;
+		*val = get_reg_val(id, vcpu->arch.tlbps[i]);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+			       union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	switch (id) {
+	case KVM_REG_PPC_MAS0:
+		vcpu->arch.shared->mas0 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS1:
+		vcpu->arch.shared->mas1 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS2:
+		vcpu->arch.shared->mas2 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS7_3:
+		vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS4:
+		vcpu->arch.shared->mas4 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS6:
+		vcpu->arch.shared->mas6 = set_reg_val(id, *val);
+		break;
+	/* Only allow MMU registers to be set to the config supported by KVM */
+	case KVM_REG_PPC_MMUCFG: {
+		u32 reg = set_reg_val(id, *val);
+		if (reg != vcpu->arch.mmucfg)
+			r = -EINVAL;
+		break;
+	}
+	case KVM_REG_PPC_EPTCFG: {
+		u32 reg = set_reg_val(id, *val);
+		if (reg != vcpu->arch.eptcfg)
+			r = -EINVAL;
+		break;
+	}
+	case KVM_REG_PPC_TLB0CFG:
+	case KVM_REG_PPC_TLB1CFG:
+	case KVM_REG_PPC_TLB2CFG:
+	case KVM_REG_PPC_TLB3CFG: {
+		/* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
+		u32 reg = set_reg_val(id, *val);
+		i = id - KVM_REG_PPC_TLB0CFG;
+		if (reg != vcpu->arch.tlbcfg[i])
+			r = -EINVAL;
+		break;
+	}
+	case KVM_REG_PPC_TLB0PS:
+	case KVM_REG_PPC_TLB1PS:
+	case KVM_REG_PPC_TLB2PS:
+	case KVM_REG_PPC_TLB3PS: {
+		u32 reg = set_reg_val(id, *val);
+		i = id - KVM_REG_PPC_TLB0PS;
+		if (reg != vcpu->arch.tlbps[i])
+			r = -EINVAL;
+		break;
+	}
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
+		struct kvm_book3e_206_tlb_params *params)
+{
+	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	if (params->tlb_sizes[0] <= 2048)
+		vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
+	vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+
+	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
+	vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+	return 0;
+}
+
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 			      struct kvm_config_tlb *cfg)
 {
@@ -692,16 +826,8 @@
 	vcpu_e500->gtlb_offset[0] = 0;
 	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
 
-	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
-
-	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-	if (params.tlb_sizes[0] <= 2048)
-		vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
-	vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
-
-	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-	vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
-	vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+	/* Update vcpu's MMU geometry based on SW_TLB input */
+	vcpu_mmu_geometry_update(vcpu, &params);
 
 	vcpu_e500->shared_tlb_pages = pages;
 	vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -737,6 +863,39 @@
 	return 0;
 }
 
+/* Vcpu's MMU default configuration */
+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
+		       struct kvmppc_e500_tlb_params *params)
+{
+	/* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
+	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
+
+	/* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
+	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	vcpu->arch.tlbcfg[0] |= params[0].entries;
+	vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
+
+	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	vcpu->arch.tlbcfg[1] |= params[1].entries;
+	vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
+
+	if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
+		vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
+		vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
+
+		vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
+
+		/* Guest mmu emulation currently doesn't handle E.PT */
+		vcpu->arch.eptcfg = 0;
+		vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
+		vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
+	}
+
+	return 0;
+}
+
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
 	struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
@@ -781,18 +940,7 @@
 	if (!vcpu_e500->g2h_tlb1_map)
 		goto err;
 
-	/* Init TLB configuration register */
-	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
-			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-	vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
-	vcpu->arch.tlbcfg[0] |=
-		vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
-
-	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
-			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
-	vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
-	vcpu->arch.tlbcfg[1] |=
-		vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
+	vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
 
 	kvmppc_recalc_tlb1map_range(vcpu_e500);
 	return 0;

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 2f4baa0..753cc99 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c

@@ -177,6 +177,8 @@
 		r = 0;
 	else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
 		r = 0;
+	else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+		r = 0;
 	else
 		r = -ENOTSUPP;
 
@@ -260,6 +262,20 @@
 	return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	return r;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+		       union kvmppc_one_reg *val)
+{
+	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+	return r;
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 7a73b6f..631a265 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c

@@ -38,6 +38,7 @@
 
 #define OP_31_XOP_TRAP      4
 #define OP_31_XOP_LWZX      23
+#define OP_31_XOP_DCBST     54
 #define OP_31_XOP_TRAP_64   68
 #define OP_31_XOP_DCBF      86
 #define OP_31_XOP_LBZX      87
@@ -370,6 +371,7 @@
 			emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
 			break;
 
+		case OP_31_XOP_DCBST:
 		case OP_31_XOP_DCBF:
 		case OP_31_XOP_DCBI:
 			/* Do nothing. The guest is performing dcbi because

diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
new file mode 100644
index 0000000..5a9a10b
--- /dev/null
+++ b/arch/powerpc/kvm/irq.h

@@ -0,0 +1,20 @@
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	int ret = 0;
+
+#ifdef CONFIG_KVM_MPIC
+	ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+	ret = ret || (kvm->arch.xics != NULL);
+#endif
+	smp_rmb();
+	return ret;
+}
+
+#endif

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
new file mode 100644
index 0000000..2861ae9
--- /dev/null
+++ b/arch/powerpc/kvm/mpic.c

@@ -0,0 +1,1853 @@
+/*
+ * OpenPIC emulation
+ *
+ * Copyright (c) 2004 Jocelyn Mayer
+ *               2011 Alexander Graf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <asm/uaccess.h>
+#include <asm/mpic.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_ppc.h>
+#include "iodev.h"
+
+#define MAX_CPU     32
+#define MAX_SRC     256
+#define MAX_TMR     4
+#define MAX_IPI     4
+#define MAX_MSI     8
+#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
+#define VID         0x03	/* MPIC version ID */
+
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
+#define OPENPIC_FLAG_ILR          (2 << 0)
+
+/* OpenPIC address map */
+#define OPENPIC_REG_SIZE             0x40000
+#define OPENPIC_GLB_REG_START        0x0
+#define OPENPIC_GLB_REG_SIZE         0x10F0
+#define OPENPIC_TMR_REG_START        0x10F0
+#define OPENPIC_TMR_REG_SIZE         0x220
+#define OPENPIC_MSI_REG_START        0x1600
+#define OPENPIC_MSI_REG_SIZE         0x200
+#define OPENPIC_SUMMARY_REG_START    0x3800
+#define OPENPIC_SUMMARY_REG_SIZE     0x800
+#define OPENPIC_SRC_REG_START        0x10000
+#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
+#define OPENPIC_CPU_REG_START        0x20000
+#define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
+
+struct fsl_mpic_info {
+	int max_ext;
+};
+
+static struct fsl_mpic_info fsl_mpic_20 = {
+	.max_ext = 12,
+};
+
+static struct fsl_mpic_info fsl_mpic_42 = {
+	.max_ext = 12,
+};
+
+#define FRR_NIRQ_SHIFT    16
+#define FRR_NCPU_SHIFT     8
+#define FRR_VID_SHIFT      0
+
+#define VID_REVISION_1_2   2
+#define VID_REVISION_1_3   3
+
+#define VIR_GENERIC      0x00000000	/* Generic Vendor ID */
+
+#define GCR_RESET        0x80000000
+#define GCR_MODE_PASS    0x00000000
+#define GCR_MODE_MIXED   0x20000000
+#define GCR_MODE_PROXY   0x60000000
+
+#define TBCR_CI           0x80000000	/* count inhibit */
+#define TCCR_TOG          0x80000000	/* toggles when decrement to zero */
+
+#define IDR_EP_SHIFT      31
+#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
+#define IDR_CI0_SHIFT     30
+#define IDR_CI1_SHIFT     29
+#define IDR_P1_SHIFT      1
+#define IDR_P0_SHIFT      0
+
+#define ILR_INTTGT_MASK   0x000000ff
+#define ILR_INTTGT_INT    0x00
+#define ILR_INTTGT_CINT   0x01	/* critical */
+#define ILR_INTTGT_MCP    0x02	/* machine check */
+#define NUM_OUTPUTS       3
+
+#define MSIIR_OFFSET       0x140
+#define MSIIR_SRS_SHIFT    29
+#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
+#define MSIIR_IBS_SHIFT    24
+#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
+
+static int get_current_cpu(void)
+{
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
+	return vcpu ? vcpu->arch.irq_cpu_id : -1;
+#else
+	/* XXX */
+	return -1;
+#endif
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+				      u32 val, int idx);
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+				     u32 *ptr, int idx);
+
+enum irq_type {
+	IRQ_TYPE_NORMAL = 0,
+	IRQ_TYPE_FSLINT,	/* FSL internal interrupt -- level only */
+	IRQ_TYPE_FSLSPECIAL,	/* FSL timer/IPI interrupt, edge, no polarity */
+};
+
+struct irq_queue {
+	/* Round up to the nearest 64 IRQs so that the queue length
+	 * won't change when moving between 32 and 64 bit hosts.
+	 */
+	unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
+	int next;
+	int priority;
+};
+
+struct irq_source {
+	uint32_t ivpr;		/* IRQ vector/priority register */
+	uint32_t idr;		/* IRQ destination register */
+	uint32_t destmask;	/* bitmap of CPU destinations */
+	int last_cpu;
+	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
+	int pending;		/* TRUE if IRQ is pending */
+	enum irq_type type;
+	bool level:1;		/* level-triggered */
+	bool nomask:1;	/* critical interrupts ignore mask on some FSL MPICs */
+};
+
+#define IVPR_MASK_SHIFT       31
+#define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
+#define IVPR_ACTIVITY_SHIFT   30
+#define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
+#define IVPR_MODE_SHIFT       29
+#define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
+#define IVPR_POLARITY_SHIFT   23
+#define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
+#define IVPR_SENSE_SHIFT      22
+#define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
+
+#define IVPR_PRIORITY_MASK     (0xF << 16)
+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
+
+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
+#define IDR_EP      0x80000000	/* external pin */
+#define IDR_CI      0x40000000	/* critical interrupt */
+
+struct irq_dest {
+	struct kvm_vcpu *vcpu;
+
+	int32_t ctpr;		/* CPU current task priority */
+	struct irq_queue raised;
+	struct irq_queue servicing;
+
+	/* Count of IRQ sources asserting on non-INT outputs */
+	uint32_t outputs_active[NUM_OUTPUTS];
+};
+
+#define MAX_MMIO_REGIONS 10
+
+struct openpic {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct kvm_io_device mmio;
+	const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
+	int num_mmio_regions;
+
+	gpa_t reg_base;
+	spinlock_t lock;
+
+	/* Behavior control */
+	struct fsl_mpic_info *fsl;
+	uint32_t model;
+	uint32_t flags;
+	uint32_t nb_irqs;
+	uint32_t vid;
+	uint32_t vir;		/* Vendor identification register */
+	uint32_t vector_mask;
+	uint32_t tfrr_reset;
+	uint32_t ivpr_reset;
+	uint32_t idr_reset;
+	uint32_t brr1;
+	uint32_t mpic_mode_mask;
+
+	/* Global registers */
+	uint32_t frr;		/* Feature reporting register */
+	uint32_t gcr;		/* Global configuration register  */
+	uint32_t pir;		/* Processor initialization register */
+	uint32_t spve;		/* Spurious vector register */
+	uint32_t tfrr;		/* Timer frequency reporting register */
+	/* Source registers */
+	struct irq_source src[MAX_IRQ];
+	/* Local registers per output pin */
+	struct irq_dest dst[MAX_CPU];
+	uint32_t nb_cpus;
+	/* Timer registers */
+	struct {
+		uint32_t tccr;	/* Global timer current count register */
+		uint32_t tbcr;	/* Global timer base count register */
+	} timers[MAX_TMR];
+	/* Shared MSI registers */
+	struct {
+		uint32_t msir;	/* Shared Message Signaled Interrupt Register */
+	} msi[MAX_MSI];
+	uint32_t max_irq;
+	uint32_t irq_ipi0;
+	uint32_t irq_tim0;
+	uint32_t irq_msi;
+};
+
+
+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
+			   int output)
+{
+	struct kvm_interrupt irq = {
+		.irq = KVM_INTERRUPT_SET_LEVEL,
+	};
+
+	if (!dst->vcpu) {
+		pr_debug("%s: destination cpu %d does not exist\n",
+			 __func__, (int)(dst - &opp->dst[0]));
+		return;
+	}
+
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+		output);
+
+	if (output != ILR_INTTGT_INT)	/* TODO */
+		return;
+
+	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
+}
+
+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
+			   int output)
+{
+	if (!dst->vcpu) {
+		pr_debug("%s: destination cpu %d does not exist\n",
+			 __func__, (int)(dst - &opp->dst[0]));
+		return;
+	}
+
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+		output);
+
+	if (output != ILR_INTTGT_INT)	/* TODO */
+		return;
+
+	kvmppc_core_dequeue_external(dst->vcpu);
+}
+
+static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
+{
+	set_bit(n_IRQ, q->queue);
+}
+
+static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
+{
+	clear_bit(n_IRQ, q->queue);
+}
+
+static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
+{
+	return test_bit(n_IRQ, q->queue);
+}
+
+static void IRQ_check(struct openpic *opp, struct irq_queue *q)
+{
+	int irq = -1;
+	int next = -1;
+	int priority = -1;
+
+	for (;;) {
+		irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
+		if (irq == opp->max_irq)
+			break;
+
+		pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
+			irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
+
+		if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
+			next = irq;
+			priority = IVPR_PRIORITY(opp->src[irq].ivpr);
+		}
+	}
+
+	q->next = next;
+	q->priority = priority;
+}
+
+static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
+{
+	/* XXX: optimize */
+	IRQ_check(opp, q);
+
+	return q->next;
+}
+
+static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
+			   bool active, bool was_active)
+{
+	struct irq_dest *dst;
+	struct irq_source *src;
+	int priority;
+
+	dst = &opp->dst[n_CPU];
+	src = &opp->src[n_IRQ];
+
+	pr_debug("%s: IRQ %d active %d was %d\n",
+		__func__, n_IRQ, active, was_active);
+
+	if (src->output != ILR_INTTGT_INT) {
+		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
+			__func__, src->output, n_IRQ, active, was_active,
+			dst->outputs_active[src->output]);
+
+		/* On Freescale MPIC, critical interrupts ignore priority,
+		 * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
+		 * masking.
+		 */
+		if (active) {
+			if (!was_active &&
+			    dst->outputs_active[src->output]++ == 0) {
+				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
+					__func__, src->output, n_CPU, n_IRQ);
+				mpic_irq_raise(opp, dst, src->output);
+			}
+		} else {
+			if (was_active &&
+			    --dst->outputs_active[src->output] == 0) {
+				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
+					__func__, src->output, n_CPU, n_IRQ);
+				mpic_irq_lower(opp, dst, src->output);
+			}
+		}
+
+		return;
+	}
+
+	priority = IVPR_PRIORITY(src->ivpr);
+
+	/* Even if the interrupt doesn't have enough priority,
+	 * it is still raised, in case ctpr is lowered later.
+	 */
+	if (active)
+		IRQ_setbit(&dst->raised, n_IRQ);
+	else
+		IRQ_resetbit(&dst->raised, n_IRQ);
+
+	IRQ_check(opp, &dst->raised);
+
+	if (active && priority <= dst->ctpr) {
+		pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
+			__func__, n_IRQ, priority, dst->ctpr, n_CPU);
+		active = 0;
+	}
+
+	if (active) {
+		if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
+		    priority <= dst->servicing.priority) {
+			pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
+				__func__, n_IRQ, dst->servicing.next, n_CPU);
+		} else {
+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
+				__func__, n_CPU, n_IRQ, dst->raised.next);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+		}
+	} else {
+		IRQ_get_next(opp, &dst->servicing);
+		if (dst->raised.priority > dst->ctpr &&
+		    dst->raised.priority > dst->servicing.priority) {
+			pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
+				__func__, n_IRQ, dst->raised.next,
+				dst->raised.priority, dst->ctpr,
+				dst->servicing.priority, n_CPU);
+			/* IRQ line stays asserted */
+		} else {
+			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
+				__func__, n_IRQ, dst->ctpr,
+				dst->servicing.priority, n_CPU);
+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+		}
+	}
+}
+
+/* update pic state because registers for n_IRQ have changed value */
+static void openpic_update_irq(struct openpic *opp, int n_IRQ)
+{
+	struct irq_source *src;
+	bool active, was_active;
+	int i;
+
+	src = &opp->src[n_IRQ];
+	active = src->pending;
+
+	if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
+		/* Interrupt source is disabled */
+		pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
+		active = false;
+	}
+
+	was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
+
+	/*
+	 * We don't have a similar check for already-active because
+	 * ctpr may have changed and we need to withdraw the interrupt.
+	 */
+	if (!active && !was_active) {
+		pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
+		return;
+	}
+
+	if (active)
+		src->ivpr |= IVPR_ACTIVITY_MASK;
+	else
+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
+
+	if (src->destmask == 0) {
+		/* No target */
+		pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
+		return;
+	}
+
+	if (src->destmask == (1 << src->last_cpu)) {
+		/* Only one CPU is allowed to receive this IRQ */
+		IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
+	} else if (!(src->ivpr & IVPR_MODE_MASK)) {
+		/* Directed delivery mode */
+		for (i = 0; i < opp->nb_cpus; i++) {
+			if (src->destmask & (1 << i)) {
+				IRQ_local_pipe(opp, i, n_IRQ, active,
+					       was_active);
+			}
+		}
+	} else {
+		/* Distributed delivery mode */
+		for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
+			if (i == opp->nb_cpus)
+				i = 0;
+
+			if (src->destmask & (1 << i)) {
+				IRQ_local_pipe(opp, i, n_IRQ, active,
+					       was_active);
+				src->last_cpu = i;
+				break;
+			}
+		}
+	}
+}
+
+static void openpic_set_irq(void *opaque, int n_IRQ, int level)
+{
+	struct openpic *opp = opaque;
+	struct irq_source *src;
+
+	if (n_IRQ >= MAX_IRQ) {
+		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
+		return;
+	}
+
+	src = &opp->src[n_IRQ];
+	pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
+		n_IRQ, level, src->ivpr);
+	if (src->level) {
+		/* level-sensitive irq */
+		src->pending = level;
+		openpic_update_irq(opp, n_IRQ);
+	} else {
+		/* edge-sensitive irq */
+		if (level) {
+			src->pending = 1;
+			openpic_update_irq(opp, n_IRQ);
+		}
+
+		if (src->output != ILR_INTTGT_INT) {
+			/* Edge-triggered interrupts shouldn't be used
+			 * with non-INT delivery, but just in case,
+			 * try to make it do something sane rather than
+			 * cause an interrupt storm.  This is close to
+			 * what you'd probably see happen in real hardware.
+			 */
+			src->pending = 0;
+			openpic_update_irq(opp, n_IRQ);
+		}
+	}
+}
+
+static void openpic_reset(struct openpic *opp)
+{
+	int i;
+
+	opp->gcr = GCR_RESET;
+	/* Initialise controller registers */
+	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+	    (opp->vid << FRR_VID_SHIFT);
+
+	opp->pir = 0;
+	opp->spve = -1 & opp->vector_mask;
+	opp->tfrr = opp->tfrr_reset;
+	/* Initialise IRQ sources */
+	for (i = 0; i < opp->max_irq; i++) {
+		opp->src[i].ivpr = opp->ivpr_reset;
+		opp->src[i].idr = opp->idr_reset;
+
+		switch (opp->src[i].type) {
+		case IRQ_TYPE_NORMAL:
+			opp->src[i].level =
+			    !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+			break;
+
+		case IRQ_TYPE_FSLINT:
+			opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+			break;
+
+		case IRQ_TYPE_FSLSPECIAL:
+			break;
+		}
+	}
+	/* Initialise IRQ destinations */
+	for (i = 0; i < MAX_CPU; i++) {
+		opp->dst[i].ctpr = 15;
+		memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
+		opp->dst[i].raised.next = -1;
+		memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
+		opp->dst[i].servicing.next = -1;
+	}
+	/* Initialise timers */
+	for (i = 0; i < MAX_TMR; i++) {
+		opp->timers[i].tccr = 0;
+		opp->timers[i].tbcr = TBCR_CI;
+	}
+	/* Go out of RESET state */
+	opp->gcr = 0;
+}
+
+static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
+{
+	return opp->src[n_IRQ].idr;
+}
+
+static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
+{
+	if (opp->flags & OPENPIC_FLAG_ILR)
+		return opp->src[n_IRQ].output;
+
+	return 0xffffffff;
+}
+
+static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
+{
+	return opp->src[n_IRQ].ivpr;
+}
+
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+				    uint32_t val)
+{
+	struct irq_source *src = &opp->src[n_IRQ];
+	uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
+	uint32_t crit_mask = 0;
+	uint32_t mask = normal_mask;
+	int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
+	int i;
+
+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+		crit_mask = mask << crit_shift;
+		mask |= crit_mask | IDR_EP;
+	}
+
+	src->idr = val & mask;
+	pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
+
+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+		if (src->idr & crit_mask) {
+			if (src->idr & normal_mask) {
+				pr_debug("%s: IRQ configured for multiple output types, using critical\n",
+					__func__);
+			}
+
+			src->output = ILR_INTTGT_CINT;
+			src->nomask = true;
+			src->destmask = 0;
+
+			for (i = 0; i < opp->nb_cpus; i++) {
+				int n_ci = IDR_CI0_SHIFT - i;
+
+				if (src->idr & (1UL << n_ci))
+					src->destmask |= 1UL << i;
+			}
+		} else {
+			src->output = ILR_INTTGT_INT;
+			src->nomask = false;
+			src->destmask = src->idr & normal_mask;
+		}
+	} else {
+		src->destmask = src->idr;
+	}
+}
+
+static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
+				    uint32_t val)
+{
+	if (opp->flags & OPENPIC_FLAG_ILR) {
+		struct irq_source *src = &opp->src[n_IRQ];
+
+		src->output = val & ILR_INTTGT_MASK;
+		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
+			src->output);
+
+		/* TODO: on MPIC v4.0 only, set nomask for non-INT */
+	}
+}
+
+static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
+				     uint32_t val)
+{
+	uint32_t mask;
+
+	/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+	 * the polarity bit is read-only on internal interrupts.
+	 */
+	mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
+	    IVPR_POLARITY_MASK | opp->vector_mask;
+
+	/* ACTIVITY bit is read-only */
+	opp->src[n_IRQ].ivpr =
+	    (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
+
+	/* For FSL internal interrupts, The sense bit is reserved and zero,
+	 * and the interrupt is always level-triggered.  Timers and IPIs
+	 * have no sense or polarity bits, and are edge-triggered.
+	 */
+	switch (opp->src[n_IRQ].type) {
+	case IRQ_TYPE_NORMAL:
+		opp->src[n_IRQ].level =
+		    !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
+		break;
+
+	case IRQ_TYPE_FSLINT:
+		opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
+		break;
+
+	case IRQ_TYPE_FSLSPECIAL:
+		opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
+		break;
+	}
+
+	openpic_update_irq(opp, n_IRQ);
+	pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
+		opp->src[n_IRQ].ivpr);
+}
+
+static void openpic_gcr_write(struct openpic *opp, uint64_t val)
+{
+	if (val & GCR_RESET) {
+		openpic_reset(opp);
+		return;
+	}
+
+	opp->gcr &= ~opp->mpic_mode_mask;
+	opp->gcr |= val & opp->mpic_mode_mask;
+}
+
+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int err = 0;
+
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+	if (addr & 0xF)
+		return 0;
+
+	switch (addr) {
+	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
+		break;
+	case 0x40:
+	case 0x50:
+	case 0x60:
+	case 0x70:
+	case 0x80:
+	case 0x90:
+	case 0xA0:
+	case 0xB0:
+		err = openpic_cpu_write_internal(opp, addr, val,
+						 get_current_cpu());
+		break;
+	case 0x1000:		/* FRR */
+		break;
+	case 0x1020:		/* GCR */
+		openpic_gcr_write(opp, val);
+		break;
+	case 0x1080:		/* VIR */
+		break;
+	case 0x1090:		/* PIR */
+		/*
+		 * This register is used to reset a CPU core --
+		 * let userspace handle it.
+		 */
+		err = -ENXIO;
+		break;
+	case 0x10A0:		/* IPI_IVPR */
+	case 0x10B0:
+	case 0x10C0:
+	case 0x10D0: {
+		int idx;
+		idx = (addr - 0x10A0) >> 4;
+		write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
+		break;
+	}
+	case 0x10E0:		/* SPVE */
+		opp->spve = val & opp->vector_mask;
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	u32 retval;
+	int err = 0;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	retval = 0xFFFFFFFF;
+	if (addr & 0xF)
+		goto out;
+
+	switch (addr) {
+	case 0x1000:		/* FRR */
+		retval = opp->frr;
+		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
+		break;
+	case 0x1020:		/* GCR */
+		retval = opp->gcr;
+		break;
+	case 0x1080:		/* VIR */
+		retval = opp->vir;
+		break;
+	case 0x1090:		/* PIR */
+		retval = 0x00000000;
+		break;
+	case 0x00:		/* Block Revision Register1 (BRR1) */
+		retval = opp->brr1;
+		break;
+	case 0x40:
+	case 0x50:
+	case 0x60:
+	case 0x70:
+	case 0x80:
+	case 0x90:
+	case 0xA0:
+	case 0xB0:
+		err = openpic_cpu_read_internal(opp, addr,
+			&retval, get_current_cpu());
+		break;
+	case 0x10A0:		/* IPI_IVPR */
+	case 0x10B0:
+	case 0x10C0:
+	case 0x10D0:
+		{
+			int idx;
+			idx = (addr - 0x10A0) >> 4;
+			retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
+		}
+		break;
+	case 0x10E0:		/* SPVE */
+		retval = opp->spve;
+		break;
+	default:
+		break;
+	}
+
+out:
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return err;
+}
+
+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int idx;
+
+	addr += 0x10f0;
+
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+	if (addr & 0xF)
+		return 0;
+
+	if (addr == 0x10f0) {
+		/* TFRR */
+		opp->tfrr = val;
+		return 0;
+	}
+
+	idx = (addr >> 6) & 0x3;
+	addr = addr & 0x30;
+
+	switch (addr & 0x30) {
+	case 0x00:		/* TCCR */
+		break;
+	case 0x10:		/* TBCR */
+		if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
+		    (val & TBCR_CI) == 0 &&
+		    (opp->timers[idx].tbcr & TBCR_CI) != 0)
+			opp->timers[idx].tccr &= ~TCCR_TOG;
+
+		opp->timers[idx].tbcr = val;
+		break;
+	case 0x20:		/* TVPR */
+		write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
+		break;
+	case 0x30:		/* TDR */
+		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	uint32_t retval = -1;
+	int idx;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	if (addr & 0xF)
+		goto out;
+
+	idx = (addr >> 6) & 0x3;
+	if (addr == 0x0) {
+		/* TFRR */
+		retval = opp->tfrr;
+		goto out;
+	}
+
+	switch (addr & 0x30) {
+	case 0x00:		/* TCCR */
+		retval = opp->timers[idx].tccr;
+		break;
+	case 0x10:		/* TBCR */
+		retval = opp->timers[idx].tbcr;
+		break;
+	case 0x20:		/* TIPV */
+		retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
+		break;
+	case 0x30:		/* TIDE (TIDR) */
+		retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
+		break;
+	}
+
+out:
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return 0;
+}
+
+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int idx;
+
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+
+	addr = addr & 0xffff;
+	idx = addr >> 5;
+
+	switch (addr & 0x1f) {
+	case 0x00:
+		write_IRQreg_ivpr(opp, idx, val);
+		break;
+	case 0x10:
+		write_IRQreg_idr(opp, idx, val);
+		break;
+	case 0x18:
+		write_IRQreg_ilr(opp, idx, val);
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	uint32_t retval;
+	int idx;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	retval = 0xFFFFFFFF;
+
+	addr = addr & 0xffff;
+	idx = addr >> 5;
+
+	switch (addr & 0x1f) {
+	case 0x00:
+		retval = read_IRQreg_ivpr(opp, idx);
+		break;
+	case 0x10:
+		retval = read_IRQreg_idr(opp, idx);
+		break;
+	case 0x18:
+		retval = read_IRQreg_ilr(opp, idx);
+		break;
+	}
+
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return 0;
+}
+
+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int idx = opp->irq_msi;
+	int srs, ibs;
+
+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+	if (addr & 0xF)
+		return 0;
+
+	switch (addr) {
+	case MSIIR_OFFSET:
+		srs = val >> MSIIR_SRS_SHIFT;
+		idx += srs;
+		ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
+		opp->msi[srs].msir |= 1 << ibs;
+		openpic_set_irq(opp, idx, 1);
+		break;
+	default:
+		/* most registers are read-only, thus ignored */
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	uint32_t r = 0;
+	int i, srs;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	if (addr & 0xF)
+		return -ENXIO;
+
+	srs = addr >> 4;
+
+	switch (addr) {
+	case 0x00:
+	case 0x10:
+	case 0x20:
+	case 0x30:
+	case 0x40:
+	case 0x50:
+	case 0x60:
+	case 0x70:		/* MSIRs */
+		r = opp->msi[srs].msir;
+		/* Clear on read */
+		opp->msi[srs].msir = 0;
+		openpic_set_irq(opp, opp->irq_msi + srs, 0);
+		break;
+	case 0x120:		/* MSISR */
+		for (i = 0; i < MAX_MSI; i++)
+			r |= (opp->msi[i].msir ? 1 : 0) << i;
+		break;
+	}
+
+	pr_debug("%s: => 0x%08x\n", __func__, r);
+	*ptr = r;
+	return 0;
+}
+
+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	uint32_t r = 0;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+
+	/* TODO: EISR/EIMR */
+
+	*ptr = r;
+	return 0;
+}
+
+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
+{
+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+
+	/* TODO: EISR/EIMR */
+	return 0;
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+				      u32 val, int idx)
+{
+	struct openpic *opp = opaque;
+	struct irq_source *src;
+	struct irq_dest *dst;
+	int s_IRQ, n_IRQ;
+
+	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
+		addr, val);
+
+	if (idx < 0)
+		return 0;
+
+	if (addr & 0xF)
+		return 0;
+
+	dst = &opp->dst[idx];
+	addr &= 0xFF0;
+	switch (addr) {
+	case 0x40:		/* IPIDR */
+	case 0x50:
+	case 0x60:
+	case 0x70:
+		idx = (addr - 0x40) >> 4;
+		/* we use IDE as mask which CPUs to deliver the IPI to still. */
+		opp->src[opp->irq_ipi0 + idx].destmask |= val;
+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
+		break;
+	case 0x80:		/* CTPR */
+		dst->ctpr = val & 0x0000000F;
+
+		pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
+			__func__, idx, dst->ctpr, dst->raised.priority,
+			dst->servicing.priority);
+
+		if (dst->raised.priority <= dst->ctpr) {
+			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
+				__func__, idx);
+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+		} else if (dst->raised.priority > dst->servicing.priority) {
+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
+				__func__, idx, dst->raised.next);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+		}
+
+		break;
+	case 0x90:		/* WHOAMI */
+		/* Read-only register */
+		break;
+	case 0xA0:		/* IACK */
+		/* Read-only register */
+		break;
+	case 0xB0: {		/* EOI */
+		int notify_eoi;
+
+		pr_debug("EOI\n");
+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
+
+		if (s_IRQ < 0) {
+			pr_debug("%s: EOI with no interrupt in service\n",
+				__func__);
+			break;
+		}
+
+		IRQ_resetbit(&dst->servicing, s_IRQ);
+		/* Notify listeners that the IRQ is over */
+		notify_eoi = s_IRQ;
+		/* Set up next servicing IRQ */
+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
+		/* Check queued interrupts. */
+		n_IRQ = IRQ_get_next(opp, &dst->raised);
+		src = &opp->src[n_IRQ];
+		if (n_IRQ != -1 &&
+		    (s_IRQ == -1 ||
+		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
+			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
+				idx, n_IRQ);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+		}
+
+		spin_unlock(&opp->lock);
+		kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
+		spin_lock(&opp->lock);
+
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+
+	return openpic_cpu_write_internal(opp, addr, val,
+					 (addr & 0x1f000) >> 12);
+}
+
+static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
+			     int cpu)
+{
+	struct irq_source *src;
+	int retval, irq;
+
+	pr_debug("Lower OpenPIC INT output\n");
+	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+
+	irq = IRQ_get_next(opp, &dst->raised);
+	pr_debug("IACK: irq=%d\n", irq);
+
+	if (irq == -1)
+		/* No more interrupt pending */
+		return opp->spve;
+
+	src = &opp->src[irq];
+	if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
+	    !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
+		pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
+			__func__, irq, dst->ctpr, src->ivpr);
+		openpic_update_irq(opp, irq);
+		retval = opp->spve;
+	} else {
+		/* IRQ enter servicing state */
+		IRQ_setbit(&dst->servicing, irq);
+		retval = IVPR_VECTOR(opp, src->ivpr);
+	}
+
+	if (!src->level) {
+		/* edge-sensitive IRQ */
+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
+		src->pending = 0;
+		IRQ_resetbit(&dst->raised, irq);
+	}
+
+	if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
+		src->destmask &= ~(1 << cpu);
+		if (src->destmask && !src->level) {
+			/* trigger on CPUs that didn't know about it yet */
+			openpic_set_irq(opp, irq, 1);
+			openpic_set_irq(opp, irq, 0);
+			/* if all CPUs knew about it, set active bit again */
+			src->ivpr |= IVPR_ACTIVITY_MASK;
+		}
+	}
+
+	return retval;
+}
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+	struct openpic *opp = vcpu->arch.mpic;
+	int cpu = vcpu->arch.irq_cpu_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+
+	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
+		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
+
+	spin_unlock_irqrestore(&opp->lock, flags);
+}
+
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+				     u32 *ptr, int idx)
+{
+	struct openpic *opp = opaque;
+	struct irq_dest *dst;
+	uint32_t retval;
+
+	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
+	retval = 0xFFFFFFFF;
+
+	if (idx < 0)
+		goto out;
+
+	if (addr & 0xF)
+		goto out;
+
+	dst = &opp->dst[idx];
+	addr &= 0xFF0;
+	switch (addr) {
+	case 0x80:		/* CTPR */
+		retval = dst->ctpr;
+		break;
+	case 0x90:		/* WHOAMI */
+		retval = idx;
+		break;
+	case 0xA0:		/* IACK */
+		retval = openpic_iack(opp, dst, idx);
+		break;
+	case 0xB0:		/* EOI */
+		retval = 0;
+		break;
+	default:
+		break;
+	}
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+
+out:
+	*ptr = retval;
+	return 0;
+}
+
+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+
+	return openpic_cpu_read_internal(opp, addr, ptr,
+					 (addr & 0x1f000) >> 12);
+}
+
+struct mem_reg {
+	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
+	int (*write)(void *opaque, gpa_t addr, u32 val);
+	gpa_t start_addr;
+	int size;
+};
+
+static const struct mem_reg openpic_gbl_mmio = {
+	.write = openpic_gbl_write,
+	.read = openpic_gbl_read,
+	.start_addr = OPENPIC_GLB_REG_START,
+	.size = OPENPIC_GLB_REG_SIZE,
+};
+
+static const struct mem_reg openpic_tmr_mmio = {
+	.write = openpic_tmr_write,
+	.read = openpic_tmr_read,
+	.start_addr = OPENPIC_TMR_REG_START,
+	.size = OPENPIC_TMR_REG_SIZE,
+};
+
+static const struct mem_reg openpic_cpu_mmio = {
+	.write = openpic_cpu_write,
+	.read = openpic_cpu_read,
+	.start_addr = OPENPIC_CPU_REG_START,
+	.size = OPENPIC_CPU_REG_SIZE,
+};
+
+static const struct mem_reg openpic_src_mmio = {
+	.write = openpic_src_write,
+	.read = openpic_src_read,
+	.start_addr = OPENPIC_SRC_REG_START,
+	.size = OPENPIC_SRC_REG_SIZE,
+};
+
+static const struct mem_reg openpic_msi_mmio = {
+	.read = openpic_msi_read,
+	.write = openpic_msi_write,
+	.start_addr = OPENPIC_MSI_REG_START,
+	.size = OPENPIC_MSI_REG_SIZE,
+};
+
+static const struct mem_reg openpic_summary_mmio = {
+	.read = openpic_summary_read,
+	.write = openpic_summary_write,
+	.start_addr = OPENPIC_SUMMARY_REG_START,
+	.size = OPENPIC_SUMMARY_REG_SIZE,
+};
+
+static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
+{
+	if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
+		WARN(1, "kvm mpic: too many mmio regions\n");
+		return;
+	}
+
+	opp->mmio_regions[opp->num_mmio_regions++] = mr;
+}
+
+static void fsl_common_init(struct openpic *opp)
+{
+	int i;
+	int virq = MAX_SRC;
+
+	add_mmio_region(opp, &openpic_msi_mmio);
+	add_mmio_region(opp, &openpic_summary_mmio);
+
+	opp->vid = VID_REVISION_1_2;
+	opp->vir = VIR_GENERIC;
+	opp->vector_mask = 0xFFFF;
+	opp->tfrr_reset = 0;
+	opp->ivpr_reset = IVPR_MASK_MASK;
+	opp->idr_reset = 1 << 0;
+	opp->max_irq = MAX_IRQ;
+
+	opp->irq_ipi0 = virq;
+	virq += MAX_IPI;
+	opp->irq_tim0 = virq;
+	virq += MAX_TMR;
+
+	BUG_ON(virq > MAX_IRQ);
+
+	opp->irq_msi = 224;
+
+	for (i = 0; i < opp->fsl->max_ext; i++)
+		opp->src[i].level = false;
+
+	/* Internal interrupts, including message and MSI */
+	for (i = 16; i < MAX_SRC; i++) {
+		opp->src[i].type = IRQ_TYPE_FSLINT;
+		opp->src[i].level = true;
+	}
+
+	/* timers and IPIs */
+	for (i = MAX_SRC; i < virq; i++) {
+		opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
+		opp->src[i].level = false;
+	}
+}
+
+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
+{
+	int i;
+
+	for (i = 0; i < opp->num_mmio_regions; i++) {
+		const struct mem_reg *mr = opp->mmio_regions[i];
+
+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+			continue;
+
+		return mr->read(opp, addr - mr->start_addr, ptr);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
+{
+	int i;
+
+	for (i = 0; i < opp->num_mmio_regions; i++) {
+		const struct mem_reg *mr = opp->mmio_regions[i];
+
+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+			continue;
+
+		return mr->write(opp, addr - mr->start_addr, val);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
+			 int len, void *ptr)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+	int ret;
+	union {
+		u32 val;
+		u8 bytes[4];
+	} u;
+
+	if (addr & (len - 1)) {
+		pr_debug("%s: bad alignment %llx/%d\n",
+			 __func__, addr, len);
+		return -EINVAL;
+	}
+
+	spin_lock_irq(&opp->lock);
+	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
+	spin_unlock_irq(&opp->lock);
+
+	/*
+	 * Technically only 32-bit accesses are allowed, but be nice to
+	 * people dumping registers a byte at a time -- it works in real
+	 * hardware (reads only, not writes).
+	 */
+	if (len == 4) {
+		*(u32 *)ptr = u.val;
+		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
+			 __func__, addr, ret, u.val);
+	} else if (len == 1) {
+		*(u8 *)ptr = u.bytes[addr & 3];
+		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
+			 __func__, addr, ret, u.bytes[addr & 3]);
+	} else {
+		pr_debug("%s: bad length %d\n", __func__, len);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
+			  int len, const void *ptr)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+	int ret;
+
+	if (len != 4) {
+		pr_debug("%s: bad length %d\n", __func__, len);
+		return -EOPNOTSUPP;
+	}
+	if (addr & 3) {
+		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
+		return -EOPNOTSUPP;
+	}
+
+	spin_lock_irq(&opp->lock);
+	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
+				      *(const u32 *)ptr);
+	spin_unlock_irq(&opp->lock);
+
+	pr_debug("%s: addr %llx ret %d val %x\n",
+		 __func__, addr, ret, *(const u32 *)ptr);
+
+	return ret;
+}
+
+static const struct kvm_io_device_ops mpic_mmio_ops = {
+	.read = kvm_mpic_read,
+	.write = kvm_mpic_write,
+};
+
+static void map_mmio(struct openpic *opp)
+{
+	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
+
+	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
+				opp->reg_base, OPENPIC_REG_SIZE,
+				&opp->mmio);
+}
+
+static void unmap_mmio(struct openpic *opp)
+{
+	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+}
+
+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
+{
+	u64 base;
+
+	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
+		return -EFAULT;
+
+	if (base & 0x3ffff) {
+		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
+			 __func__, base);
+		return -EINVAL;
+	}
+
+	if (base == opp->reg_base)
+		return 0;
+
+	mutex_lock(&opp->kvm->slots_lock);
+
+	unmap_mmio(opp);
+	opp->reg_base = base;
+
+	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
+		 __func__, base);
+
+	if (base == 0)
+		goto out;
+
+	map_mmio(opp);
+
+out:
+	mutex_unlock(&opp->kvm->slots_lock);
+	return 0;
+}
+
+#define ATTR_SET		0
+#define ATTR_GET		1
+
+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
+{
+	int ret;
+
+	if (addr & 3)
+		return -ENXIO;
+
+	spin_lock_irq(&opp->lock);
+
+	if (type == ATTR_SET)
+		ret = kvm_mpic_write_internal(opp, addr, *val);
+	else
+		ret = kvm_mpic_read_internal(opp, addr, val);
+
+	spin_unlock_irq(&opp->lock);
+
+	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
+
+	return ret;
+}
+
+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct openpic *opp = dev->private;
+	u32 attr32;
+
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			return set_base_addr(opp, attr);
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			return -EINVAL;
+
+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		if (attr32 != 0 && attr32 != 1)
+			return -EINVAL;
+
+		spin_lock_irq(&opp->lock);
+		openpic_set_irq(opp, attr->attr, attr32);
+		spin_unlock_irq(&opp->lock);
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct openpic *opp = dev->private;
+	u64 attr64;
+	u32 attr32;
+	int ret;
+
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			mutex_lock(&opp->kvm->slots_lock);
+			attr64 = opp->reg_base;
+			mutex_unlock(&opp->kvm->slots_lock);
+
+			if (copy_to_user((u64 __user *)(long)attr->addr,
+					 &attr64, sizeof(u64)))
+				return -EFAULT;
+
+			return 0;
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
+		if (ret)
+			return ret;
+
+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return 0;
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			return -EINVAL;
+
+		spin_lock_irq(&opp->lock);
+		attr32 = opp->src[attr->attr].pending;
+		spin_unlock_irq(&opp->lock);
+
+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			return 0;
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		return 0;
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			break;
+
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static void mpic_destroy(struct kvm_device *dev)
+{
+	struct openpic *opp = dev->private;
+
+	dev->kvm->arch.mpic = NULL;
+	kfree(opp);
+}
+
+static int mpic_set_default_irq_routing(struct openpic *opp)
+{
+	struct kvm_irq_routing_entry *routing;
+
+	/* Create a nop default map, so that dereferencing it still works */
+	routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
+	if (!routing)
+		return -ENOMEM;
+
+	kvm_set_irq_routing(opp->kvm, routing, 0, 0);
+
+	kfree(routing);
+	return 0;
+}
+
+static int mpic_create(struct kvm_device *dev, u32 type)
+{
+	struct openpic *opp;
+	int ret;
+
+	/* We only support one MPIC at a time for now */
+	if (dev->kvm->arch.mpic)
+		return -EINVAL;
+
+	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
+	if (!opp)
+		return -ENOMEM;
+
+	dev->private = opp;
+	opp->kvm = dev->kvm;
+	opp->dev = dev;
+	opp->model = type;
+	spin_lock_init(&opp->lock);
+
+	add_mmio_region(opp, &openpic_gbl_mmio);
+	add_mmio_region(opp, &openpic_tmr_mmio);
+	add_mmio_region(opp, &openpic_src_mmio);
+	add_mmio_region(opp, &openpic_cpu_mmio);
+
+	switch (opp->model) {
+	case KVM_DEV_TYPE_FSL_MPIC_20:
+		opp->fsl = &fsl_mpic_20;
+		opp->brr1 = 0x00400200;
+		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
+		opp->nb_irqs = 80;
+		opp->mpic_mode_mask = GCR_MODE_MIXED;
+
+		fsl_common_init(opp);
+
+		break;
+
+	case KVM_DEV_TYPE_FSL_MPIC_42:
+		opp->fsl = &fsl_mpic_42;
+		opp->brr1 = 0x00400402;
+		opp->flags |= OPENPIC_FLAG_ILR;
+		opp->nb_irqs = 196;
+		opp->mpic_mode_mask = GCR_MODE_PROXY;
+
+		fsl_common_init(opp);
+
+		break;
+
+	default:
+		ret = -ENODEV;
+		goto err;
+	}
+
+	ret = mpic_set_default_irq_routing(opp);
+	if (ret)
+		goto err;
+
+	openpic_reset(opp);
+
+	smp_wmb();
+	dev->kvm->arch.mpic = opp;
+
+	return 0;
+
+err:
+	kfree(opp);
+	return ret;
+}
+
+struct kvm_device_ops kvm_mpic_ops = {
+	.name = "kvm-mpic",
+	.create = mpic_create,
+	.destroy = mpic_destroy,
+	.set_attr = mpic_set_attr,
+	.get_attr = mpic_get_attr,
+	.has_attr = mpic_has_attr,
+};
+
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 cpu)
+{
+	struct openpic *opp = dev->private;
+	int ret = 0;
+
+	if (dev->ops != &kvm_mpic_ops)
+		return -EPERM;
+	if (opp->kvm != vcpu->kvm)
+		return -EPERM;
+	if (cpu < 0 || cpu >= MAX_CPU)
+		return -EPERM;
+
+	spin_lock_irq(&opp->lock);
+
+	if (opp->dst[cpu].vcpu) {
+		ret = -EEXIST;
+		goto out;
+	}
+	if (vcpu->arch.irq_type) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	opp->dst[cpu].vcpu = vcpu;
+	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
+
+	vcpu->arch.mpic = opp;
+	vcpu->arch.irq_cpu_id = cpu;
+	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
+
+	/* This might need to be changed if GCR gets extended */
+	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
+		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
+
+out:
+	spin_unlock_irq(&opp->lock);
+	return ret;
+}
+
+/*
+ * This should only happen immediately before the mpic is destroyed,
+ * so we shouldn't need to worry about anything still trying to
+ * access the vcpu pointer.
+ */
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
+
+	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
+}
+
+/*
+ * Return value:
+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
+ *  = 0   Interrupt was coalesced (previous irq is still pending)
+ *  > 0   Number of CPUs interrupt was delivered to
+ */
+static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
+			struct kvm *kvm, int irq_source_id, int level,
+			bool line_status)
+{
+	u32 irq = e->irqchip.pin;
+	struct openpic *opp = kvm->arch.mpic;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+	openpic_set_irq(opp, irq, level);
+	spin_unlock_irqrestore(&opp->lock, flags);
+
+	/* All code paths we care about don't check for the return value */
+	return 0;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+	struct openpic *opp = kvm->arch.mpic;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+
+	/*
+	 * XXX We ignore the target address for now, as we only support
+	 *     a single MSI bank.
+	 */
+	openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
+	spin_unlock_irqrestore(&opp->lock, flags);
+
+	/* All code paths we care about don't check for the return value */
+	return 0;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		e->set = mpic_set_irq;
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin;
+		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
+			goto out;
+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
+		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		break;
+	default:
+		goto out;
+	}
+
+	r = 0;
+out:
+	return r;
+}

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 934413c..6316ee3 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c

@@ -25,6 +25,7 @@
 #include <linux/hrtimer.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
@@ -32,6 +33,7 @@
 #include <asm/cputhreads.h>
 #include <asm/irqflags.h>
 #include "timing.h"
+#include "irq.h"
 #include "../mm/mmu_decl.h"
 
 #define CREATE_TRACE_POINTS
@@ -317,6 +319,7 @@
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
 		r = 1;
 		break;
 #ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -326,6 +329,9 @@
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB:
 #endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -335,6 +341,10 @@
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
+	case KVM_CAP_PPC_RTAS:
+#ifdef CONFIG_KVM_XICS
+	case KVM_CAP_IRQ_XICS:
+#endif
 		r = 1;
 		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -411,18 +421,17 @@
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                                   struct kvm_memory_slot *memslot,
-                                   struct kvm_memory_slot old,
-                                   struct kvm_userspace_memory_region *mem,
-                                   bool user_alloc)
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
 {
 	return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-               struct kvm_userspace_memory_region *mem,
-               struct kvm_memory_slot old,
-               bool user_alloc)
+				   struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
 {
 	kvmppc_core_commit_memory_region(kvm, mem, old);
 }
@@ -460,6 +469,16 @@
 	tasklet_kill(&vcpu->arch.tasklet);
 
 	kvmppc_remove_vcpu_debugfs(vcpu);
+
+	switch (vcpu->arch.irq_type) {
+	case KVMPPC_IRQ_MPIC:
+		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
+		break;
+	case KVMPPC_IRQ_XICS:
+		kvmppc_xics_free_icp(vcpu);
+		break;
+	}
+
 	kvmppc_core_vcpu_free(vcpu);
 }
 
@@ -532,12 +551,6 @@
 #endif
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-                                        struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
@@ -612,6 +625,8 @@
 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int rt, unsigned int bytes, int is_bigendian)
 {
+	int idx, ret;
+
 	if (bytes > sizeof(run->mmio.data)) {
 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
 		       run->mmio.len);
@@ -627,8 +642,14 @@
 	vcpu->mmio_is_write = 0;
 	vcpu->arch.mmio_sign_extend = 0;
 
-	if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
-			     bytes, &run->mmio.data)) {
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+			      bytes, &run->mmio.data);
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	if (!ret) {
 		kvmppc_complete_mmio_load(vcpu, run);
 		vcpu->mmio_needed = 0;
 		return EMULATE_DONE;
@@ -653,6 +674,7 @@
                         u64 val, unsigned int bytes, int is_bigendian)
 {
 	void *data = run->mmio.data;
+	int idx, ret;
 
 	if (bytes > sizeof(run->mmio.data)) {
 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -682,9 +704,14 @@
 		}
 	}
 
-	if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
-			      bytes, &run->mmio.data)) {
-		kvmppc_complete_mmio_load(vcpu, run);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+			       bytes, &run->mmio.data);
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	if (!ret) {
 		vcpu->mmio_needed = 0;
 		return EMULATE_DONE;
 	}
@@ -740,7 +767,7 @@
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
 	if (irq->irq == KVM_INTERRUPT_UNSET) {
-		kvmppc_core_dequeue_external(vcpu, irq);
+		kvmppc_core_dequeue_external(vcpu);
 		return 0;
 	}
 
@@ -770,7 +797,10 @@
 		break;
 	case KVM_CAP_PPC_EPR:
 		r = 0;
-		vcpu->arch.epr_enabled = cap->args[0];
+		if (cap->args[0])
+			vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
+		else
+			vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
 		break;
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
@@ -791,6 +821,44 @@
 		break;
 	}
 #endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC: {
+		struct file *filp;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		filp = fget(cap->args[0]);
+		if (!filp)
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(filp);
+		if (dev)
+			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
+
+		fput(filp);
+		break;
+	}
+#endif
+#ifdef CONFIG_KVM_XICS
+	case KVM_CAP_IRQ_XICS: {
+		struct file *filp;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		filp = fget(cap->args[0]);
+		if (!filp)
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(filp);
+		if (dev)
+			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+
+		fput(filp);
+		break;
+	}
+#endif /* CONFIG_KVM_XICS */
 	default:
 		r = -EINVAL;
 		break;
@@ -913,9 +981,22 @@
 	return 0;
 }
 
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+			  bool line_status)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level,
+					line_status);
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
+	struct kvm *kvm __maybe_unused = filp->private_data;
 	void __user *argp = (void __user *)arg;
 	long r;
 
@@ -934,7 +1015,6 @@
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CREATE_SPAPR_TCE: {
 		struct kvm_create_spapr_tce create_tce;
-		struct kvm *kvm = filp->private_data;
 
 		r = -EFAULT;
 		if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@@ -946,8 +1026,8 @@
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_ALLOCATE_RMA: {
-		struct kvm *kvm = filp->private_data;
 		struct kvm_allocate_rma rma;
+		struct kvm *kvm = filp->private_data;
 
 		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
 		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@@ -956,7 +1036,6 @@
 	}
 
 	case KVM_PPC_ALLOCATE_HTAB: {
-		struct kvm *kvm = filp->private_data;
 		u32 htab_order;
 
 		r = -EFAULT;
@@ -973,7 +1052,6 @@
 	}
 
 	case KVM_PPC_GET_HTAB_FD: {
-		struct kvm *kvm = filp->private_data;
 		struct kvm_get_htab_fd ghf;
 
 		r = -EFAULT;
@@ -986,7 +1064,6 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_PPC_GET_SMMU_INFO: {
-		struct kvm *kvm = filp->private_data;
 		struct kvm_ppc_smmu_info info;
 
 		memset(&info, 0, sizeof(info));
@@ -995,6 +1072,12 @@
 			r = -EFAULT;
 		break;
 	}
+	case KVM_PPC_RTAS_DEFINE_TOKEN: {
+		struct kvm *kvm = filp->private_data;
+
+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+		break;
+	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;

diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 89db29d..7cd728b 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c

@@ -51,6 +51,12 @@
 static inline unsigned int icp_native_get_xirr(void)
 {
 	int cpu = smp_processor_id();
+	unsigned int xirr;
+
+	/* Handled an interrupt latched by KVM */
+	xirr = kvmppc_get_xics_latch();
+	if (xirr)
+		return xirr;
 
 	return in_be32(&icp_native_regs[cpu]->xirr.word);
 }
@@ -138,6 +144,7 @@
 
 static void icp_native_cause_ipi(int cpu, unsigned long data)
 {
+	kvmppc_set_host_ipi(cpu, 1);
 	icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
@@ -151,6 +158,7 @@
 {
 	int cpu = smp_processor_id();
 
+	kvmppc_set_host_ipi(cpu, 0);
 	icp_native_set_qirr(cpu, 0xff);
 
 	return smp_ipi_demux();

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b462291..4105b82 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h

@@ -306,6 +306,7 @@
 #define RCP_HC_BIT	0x00200000UL
 #define RCP_GR_BIT	0x00040000UL
 #define RCP_GC_BIT	0x00020000UL
+#define RCP_IN_BIT	0x00008000UL	/* IPTE notify bit */
 
 /* User dirty / referenced bit for KVM's migration feature */
 #define KVM_UR_BIT	0x00008000UL
@@ -373,6 +374,7 @@
 #define RCP_HC_BIT	0x0020000000000000UL
 #define RCP_GR_BIT	0x0004000000000000UL
 #define RCP_GC_BIT	0x0002000000000000UL
+#define RCP_IN_BIT	0x0000800000000000UL	/* IPTE notify bit */
 
 /* User dirty / referenced bit for KVM's migration feature */
 #define KVM_UR_BIT	0x0000800000000000UL
@@ -746,30 +748,42 @@
 
 /**
  * struct gmap_rmap - reverse mapping for segment table entries
- * @next: pointer to the next gmap_rmap structure in the list
+ * @gmap: pointer to the gmap_struct
  * @entry: pointer to a segment table entry
+ * @vmaddr: virtual address in the guest address space
  */
 struct gmap_rmap {
 	struct list_head list;
+	struct gmap *gmap;
 	unsigned long *entry;
+	unsigned long vmaddr;
 };
 
 /**
  * struct gmap_pgtable - gmap information attached to a page table
  * @vmaddr: address of the 1MB segment in the process virtual memory
- * @mapper: list of segment table entries maping a page table
+ * @mapper: list of segment table entries mapping a page table
  */
 struct gmap_pgtable {
 	unsigned long vmaddr;
 	struct list_head mapper;
 };
 
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+	struct list_head list;
+	void (*notifier_call)(struct gmap *gmap, unsigned long address);
+};
+
 struct gmap *gmap_alloc(struct mm_struct *mm);
 void gmap_free(struct gmap *gmap);
 void gmap_enable(struct gmap *gmap);
 void gmap_disable(struct gmap *gmap);
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
-		     unsigned long to, unsigned long length);
+		     unsigned long to, unsigned long len);
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
 unsigned long __gmap_translate(unsigned long address, struct gmap *);
 unsigned long gmap_translate(unsigned long address, struct gmap *);
@@ -777,6 +791,24 @@
 unsigned long gmap_fault(unsigned long address, struct gmap *);
 void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
+
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+					unsigned long addr,
+					pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	if (pgste_val(pgste) & RCP_IN_BIT) {
+		pgste_val(pgste) &= ~RCP_IN_BIT;
+		gmap_do_ipte_notify(mm, addr, ptep);
+	}
+#endif
+	return pgste;
+}
+
 /*
  * Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
@@ -1032,8 +1064,10 @@
 	pte_t pte;
 
 	mm->context.flush_mm = 1;
-	if (mm_has_pgste(mm))
+	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+	}
 
 	pte = *ptep;
 	if (!mm_exclusive(mm))
@@ -1052,11 +1086,14 @@
 					   unsigned long address,
 					   pte_t *ptep)
 {
+	pgste_t pgste;
 	pte_t pte;
 
 	mm->context.flush_mm = 1;
-	if (mm_has_pgste(mm))
-		pgste_get_lock(ptep);
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste_ipte_notify(mm, address, ptep, pgste);
+	}
 
 	pte = *ptep;
 	if (!mm_exclusive(mm))
@@ -1082,8 +1119,10 @@
 	pgste_t pgste;
 	pte_t pte;
 
-	if (mm_has_pgste(vma->vm_mm))
+	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+	}
 
 	pte = *ptep;
 	__ptep_ipte(address, ptep);
@@ -1111,8 +1150,11 @@
 	pgste_t pgste;
 	pte_t pte;
 
-	if (mm_has_pgste(mm))
+	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
+		if (!full)
+			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+	}
 
 	pte = *ptep;
 	if (!full)
@@ -1135,8 +1177,10 @@
 
 	if (pte_write(pte)) {
 		mm->context.flush_mm = 1;
-		if (mm_has_pgste(mm))
+		if (mm_has_pgste(mm)) {
 			pgste = pgste_get_lock(ptep);
+			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+		}
 
 		if (!mm_exclusive(mm))
 			__ptep_ipte(address, ptep);
@@ -1160,8 +1204,10 @@
 
 	if (pte_same(*ptep, entry))
 		return 0;
-	if (mm_has_pgste(vma->vm_mm))
+	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+	}
 
 	__ptep_ipte(address, ptep);
 

diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index ff67d73..59880dba 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h

@@ -33,8 +33,6 @@
 
 #define CHUNK_READ_WRITE 0
 #define CHUNK_READ_ONLY  1
-#define CHUNK_OLDMEM	 4
-#define CHUNK_CRASHK	 5
 
 struct mem_chunk {
 	unsigned long addr;
@@ -43,13 +41,12 @@
 };
 
 extern struct mem_chunk memory_chunk[];
-extern unsigned long real_memory_size;
 extern int memory_end_set;
 extern unsigned long memory_end;
 
-void detect_memory_layout(struct mem_chunk chunk[]);
-void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr,
-		     unsigned long size, int type);
+void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize);
+void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
+		     unsigned long size);
 
 #define PRIMARY_SPACE_MODE	0
 #define ACCESS_REGISTER_MODE	1

diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 7bf68ff..9ccd190 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild

@@ -44,5 +44,6 @@
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
+header-y += virtio-ccw.h
 header-y += vtoc.h
 header-y += zcrypt.h

diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
new file mode 100644
index 0000000..a9a4ebf
--- /dev/null
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h

@@ -0,0 +1,21 @@
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *  Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 1386fca..4bb2a46 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile

@@ -30,7 +30,7 @@
 
 obj-y	:= bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y	+= debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o
+obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y	+= dumpstack.o
 

diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index fb8d878..f703d91 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c

@@ -88,8 +88,8 @@
 	struct mem_chunk *chunk_array;
 
 	chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk));
-	detect_memory_layout(chunk_array);
-	create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK);
+	detect_memory_layout(chunk_array, 0);
+	create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE);
 	return chunk_array;
 }
 
@@ -344,7 +344,7 @@
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		mem_chunk = &chunk_array[i];
 		if (mem_chunk->size == 0)
-			break;
+			continue;
 		if (chunk_array[i].type != CHUNK_READ_WRITE &&
 		    chunk_array[i].type != CHUNK_READ_ONLY)
 			continue;

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index bda011e..dc8770d 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c

@@ -482,7 +482,6 @@
 	detect_machine_facilities();
 	setup_topology();
 	sclp_facilities_detect();
-	detect_memory_layout(memory_chunk);
 #ifdef CONFIG_DYNAMIC_FTRACE
 	S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
 #endif

diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c
deleted file mode 100644
index 22d502e..0000000
--- a/arch/s390/kernel/mem_detect.c
+++ /dev/null

@@ -1,145 +0,0 @@
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/ipl.h>
-#include <asm/sclp.h>
-#include <asm/setup.h>
-
-#define ADDR2G (1ULL << 31)
-
-static void find_memory_chunks(struct mem_chunk chunk[])
-{
-	unsigned long long memsize, rnmax, rzm;
-	unsigned long addr = 0, size;
-	int i = 0, type;
-
-	rzm = sclp_get_rzm();
-	rnmax = sclp_get_rnmax();
-	memsize = rzm * rnmax;
-	if (!rzm)
-		rzm = 1ULL << 17;
-	if (sizeof(long) == 4) {
-		rzm = min(ADDR2G, rzm);
-		memsize = memsize ? min(ADDR2G, memsize) : ADDR2G;
-	}
-	do {
-		size = 0;
-		type = tprot(addr);
-		do {
-			size += rzm;
-			if (memsize && addr + size >= memsize)
-				break;
-		} while (type == tprot(addr + size));
-		if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
-			chunk[i].addr = addr;
-			chunk[i].size = size;
-			chunk[i].type = type;
-			i++;
-		}
-		addr += size;
-	} while (addr < memsize && i < MEMORY_CHUNKS);
-}
-
-void detect_memory_layout(struct mem_chunk chunk[])
-{
-	unsigned long flags, cr0;
-
-	memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk));
-	/* Disable IRQs, DAT and low address protection so tprot does the
-	 * right thing and we don't get scheduled away with low address
-	 * protection disabled.
-	 */
-	flags = __arch_local_irq_stnsm(0xf8);
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28);
-	find_memory_chunks(chunk);
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-EXPORT_SYMBOL(detect_memory_layout);
-
-/*
- * Move memory chunks array from index "from" to index "to"
- */
-static void mem_chunk_move(struct mem_chunk chunk[], int to, int from)
-{
-	int cnt = MEMORY_CHUNKS - to;
-
-	memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk));
-}
-
-/*
- * Initialize memory chunk
- */
-static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr,
-			   unsigned long size, int type)
-{
-	chunk->type = type;
-	chunk->addr = addr;
-	chunk->size = size;
-}
-
-/*
- * Create memory hole with given address, size, and type
- */
-void create_mem_hole(struct mem_chunk chunk[], unsigned long addr,
-		     unsigned long size, int type)
-{
-	unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size;
-	int i, ch_type;
-
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		if (chunk[i].size == 0)
-			continue;
-
-		/* Define chunk properties */
-		ch_start = chunk[i].addr;
-		ch_size = chunk[i].size;
-		ch_end = ch_start + ch_size - 1;
-		ch_type = chunk[i].type;
-
-		/* Is memory chunk hit by memory hole? */
-		if (addr + size <= ch_start)
-			continue; /* No: memory hole in front of chunk */
-		if (addr > ch_end)
-			continue; /* No: memory hole after chunk */
-
-		/* Yes: Define local hole properties */
-		lh_start = max(addr, chunk[i].addr);
-		lh_end = min(addr + size - 1, ch_end);
-		lh_size = lh_end - lh_start + 1;
-
-		if (lh_start == ch_start && lh_end == ch_end) {
-			/* Hole covers complete memory chunk */
-			mem_chunk_init(&chunk[i], lh_start, lh_size, type);
-		} else if (lh_end == ch_end) {
-			/* Hole starts in memory chunk and convers chunk end */
-			mem_chunk_move(chunk, i + 1, i);
-			mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size,
-				       ch_type);
-			mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type);
-			i += 1;
-		} else if (lh_start == ch_start) {
-			/* Hole ends in memory chunk */
-			mem_chunk_move(chunk, i + 1, i);
-			mem_chunk_init(&chunk[i], lh_start, lh_size, type);
-			mem_chunk_init(&chunk[i + 1], lh_end + 1,
-				       ch_size - lh_size, ch_type);
-			break;
-		} else {
-			/* Hole splits memory chunk */
-			mem_chunk_move(chunk, i + 2, i);
-			mem_chunk_init(&chunk[i], ch_start,
-				       lh_start - ch_start, ch_type);
-			mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type);
-			mem_chunk_init(&chunk[i + 2], lh_end + 1,
-				       ch_end - lh_end, ch_type);
-			break;
-		}
-	}
-}

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 0f419c5..0a49095 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c

@@ -226,25 +226,17 @@
 }
 
 #ifdef CONFIG_ZFCPDUMP
-static void __init setup_zfcpdump(unsigned int console_devno)
+static void __init setup_zfcpdump(void)
 {
-	static char str[41];
-
 	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 		return;
 	if (OLDMEM_BASE)
 		return;
-	if (console_devno != -1)
-		sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
-			ipl_info.data.fcp.dev_id.devno, console_devno);
-	else
-		sprintf(str, " cio_ignore=all,!0.0.%04x",
-			ipl_info.data.fcp.dev_id.devno);
-	strcat(boot_command_line, str);
+	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
 	console_loglevel = 2;
 }
 #else
-static inline void setup_zfcpdump(unsigned int console_devno) {}
+static inline void setup_zfcpdump(void) {}
 #endif /* CONFIG_ZFCPDUMP */
 
  /*
@@ -471,14 +463,10 @@
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (!memory_chunk[i].size)
 			continue;
-		if (memory_chunk[i].type == CHUNK_OLDMEM ||
-		    memory_chunk[i].type == CHUNK_CRASHK)
-			continue;
 		res = alloc_bootmem_low(sizeof(*res));
 		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
 		switch (memory_chunk[i].type) {
 		case CHUNK_READ_WRITE:
-		case CHUNK_CRASHK:
 			res->name = "System RAM";
 			break;
 		case CHUNK_READ_ONLY:
@@ -510,12 +498,10 @@
 	}
 }
 
-unsigned long real_memory_size;
-EXPORT_SYMBOL_GPL(real_memory_size);
-
 static void __init setup_memory_end(void)
 {
 	unsigned long vmax, vmalloc_size, tmp;
+	unsigned long real_memory_size = 0;
 	int i;
 
 
@@ -525,7 +511,6 @@
 		memory_end_set = 1;
 	}
 #endif
-	real_memory_size = 0;
 	memory_end &= PAGE_MASK;
 
 	/*
@@ -538,6 +523,8 @@
 		unsigned long align;
 
 		chunk = &memory_chunk[i];
+		if (!chunk->size)
+			continue;
 		align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
 		start = (chunk->addr + align - 1) & ~(align - 1);
 		end = (chunk->addr + chunk->size) & ~(align - 1);
@@ -588,6 +575,8 @@
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		struct mem_chunk *chunk = &memory_chunk[i];
 
+		if (!chunk->size)
+			continue;
 		if (chunk->addr >= memory_end) {
 			memset(chunk, 0, sizeof(*chunk));
 			continue;
@@ -688,15 +677,6 @@
 }
 
 /*
- * Reserve kdump memory by creating a memory hole in the mem_chunk array
- */
-static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
-					 int type)
-{
-	create_mem_hole(memory_chunk, addr, size, type);
-}
-
-/*
  * When kdump is enabled, we have to ensure that no memory from
  * the area [0 - crashkernel memory size] and
  * [crashk_res.start - crashk_res.end] is set offline.
@@ -727,16 +707,22 @@
 static void reserve_oldmem(void)
 {
 #ifdef CONFIG_CRASH_DUMP
+	unsigned long real_size = 0;
+	int i;
+
 	if (!OLDMEM_BASE)
 		return;
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		struct mem_chunk *chunk = &memory_chunk[i];
 
-	reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
-	reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
-			      CHUNK_OLDMEM);
-	if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
+		real_size = max(real_size, chunk->addr + chunk->size);
+	}
+	create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE);
+	create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE);
+	if (OLDMEM_BASE + OLDMEM_SIZE == real_size)
 		saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
 	else
-		saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
+		saved_max_pfn = PFN_DOWN(real_size) - 1;
 #endif
 }
 
@@ -775,7 +761,7 @@
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);
-	reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK);
+	create_mem_hole(memory_chunk, crash_base, crash_size);
 	pr_info("Reserving %lluMB of memory at %lluMB "
 		"for crashkernel (System RAM: %luMB)\n",
 		crash_size >> 20, crash_base >> 20, memory_end >> 20);
@@ -847,11 +833,10 @@
 	 * Register RAM areas with the bootmem allocator.
 	 */
 
-	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		unsigned long start_chunk, end_chunk, pfn;
 
-		if (memory_chunk[i].type != CHUNK_READ_WRITE &&
-		    memory_chunk[i].type != CHUNK_CRASHK)
+		if (!memory_chunk[i].size)
 			continue;
 		start_chunk = PFN_DOWN(memory_chunk[i].addr);
 		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
@@ -1067,12 +1052,12 @@
 		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
 
 	parse_early_param();
-
+	detect_memory_layout(memory_chunk, memory_end);
 	os_info_init();
 	setup_ipl();
+	reserve_oldmem();
 	setup_memory_end();
 	setup_addressing_mode();
-	reserve_oldmem();
 	reserve_crashkernel();
 	setup_memory();
 	setup_resources();
@@ -1097,5 +1082,5 @@
 	set_preferred_console();
 
 	/* Setup zfcpdump support */
-	setup_zfcpdump(console_devno);
+	setup_zfcpdump();
 }

diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 60f9f8a..70b46ea 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig

@@ -22,6 +22,7 @@
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select HAVE_KVM_EVENTFD
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work

diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 3975722..8fe9d65 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile

@@ -6,7 +6,7 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index a390687..1c01a99 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c

@@ -13,6 +13,7 @@
 
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
 #include "trace.h"
 #include "trace-s390.h"
@@ -104,6 +105,29 @@
 	return -EREMOTE;
 }
 
+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
+{
+	int ret, idx;
+
+	/* No virtio-ccw notification? Get out quickly. */
+	if (!vcpu->kvm->arch.css_support ||
+	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
+		return -EOPNOTSUPP;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	/*
+	 * The layout is as follows:
+	 * - gpr 2 contains the subchannel id (passed as addr)
+	 * - gpr 3 contains the virtqueue index (passed as datamatch)
+	 */
+	ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+				vcpu->run->s.regs.gprs[2],
+				8, &vcpu->run->s.regs.gprs[3]);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	/* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
+	return ret < 0 ? ret : 0;
+}
+
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
 	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
@@ -118,6 +142,8 @@
 		return __diag_time_slice_end_directed(vcpu);
 	case 0x308:
 		return __diag_ipl_functions(vcpu);
+	case 0x500:
+		return __diag_virtio_hypercall(vcpu);
 	default:
 		return -EOPNOTSUPP;
 	}

diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 4703f12..302e0e5 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h

@@ -18,369 +18,86 @@
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
 
-static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
-					       unsigned long guestaddr)
+static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
+					  void __user *gptr,
+					  int prefixing)
 {
 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
+	unsigned long gaddr = (unsigned long) gptr;
+	unsigned long uaddr;
 
-	if (guestaddr < 2 * PAGE_SIZE)
-		guestaddr += prefix;
-	else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
-		guestaddr -= prefix;
-
-	return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap);
+	if (prefixing) {
+		if (gaddr < 2 * PAGE_SIZE)
+			gaddr += prefix;
+		else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
+			gaddr -= prefix;
+	}
+	uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
+	if (IS_ERR_VALUE(uaddr))
+		uaddr = -EFAULT;
+	return (void __user *)uaddr;
 }
 
-static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-				u64 *result)
+#define get_guest(vcpu, x, gptr)				\
+({								\
+	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
+	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
+	int __ret = PTR_RET((void __force *)__uptr);		\
+								\
+	if (!__ret) {						\
+		BUG_ON((unsigned long)__uptr & __mask);		\
+		__ret = get_user(x, __uptr);			\
+	}							\
+	__ret;							\
+})
+
+#define put_guest(vcpu, x, gptr)				\
+({								\
+	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
+	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
+	int __ret = PTR_RET((void __force *)__uptr);		\
+								\
+	if (!__ret) {						\
+		BUG_ON((unsigned long)__uptr & __mask);		\
+		__ret = put_user(x, __uptr);			\
+	}							\
+	__ret;							\
+})
+
+static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
+			       unsigned long from, unsigned long len,
+			       int to_guest, int prefixing)
 {
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+	unsigned long _len, rc;
+	void __user *uptr;
 
-	BUG_ON(guestaddr & 7);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return get_user(*result, (unsigned long __user *) uptr);
-}
-
-static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-				u32 *result)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	BUG_ON(guestaddr & 3);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return get_user(*result, (u32 __user *) uptr);
-}
-
-static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-				u16 *result)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	BUG_ON(guestaddr & 1);
-
-	if (IS_ERR(uptr))
-		return PTR_ERR(uptr);
-
-	return get_user(*result, (u16 __user *) uptr);
-}
-
-static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-			       u8 *result)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return get_user(*result, (u8 __user *) uptr);
-}
-
-static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-				u64 value)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	BUG_ON(guestaddr & 7);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return put_user(value, (u64 __user *) uptr);
-}
-
-static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-				u32 value)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	BUG_ON(guestaddr & 3);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return put_user(value, (u32 __user *) uptr);
-}
-
-static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-				u16 value)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	BUG_ON(guestaddr & 1);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return put_user(value, (u16 __user *) uptr);
-}
-
-static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
-			       u8 value)
-{
-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	return put_user(value, (u8 __user *) uptr);
-}
-
-
-static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu,
-				       unsigned long guestdest,
-				       void *from, unsigned long n)
-{
-	int rc;
-	unsigned long i;
-	u8 *data = from;
-
-	for (i = 0; i < n; i++) {
-		rc = put_guest_u8(vcpu, guestdest++, *(data++));
-		if (rc < 0)
-			return rc;
+	while (len) {
+		uptr = to_guest ? (void __user *)to : (void __user *)from;
+		uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
+		if (IS_ERR((void __force *)uptr))
+			return -EFAULT;
+		_len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
+		_len = min(_len, len);
+		if (to_guest)
+			rc = copy_to_user((void __user *) uptr, (void *)from, _len);
+		else
+			rc = copy_from_user((void *)to, (void __user *)uptr, _len);
+		if (rc)
+			return -EFAULT;
+		len -= _len;
+		from += _len;
+		to += _len;
 	}
 	return 0;
 }
 
-static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu,
-				       unsigned long guestdest,
-				       void *from, unsigned long n)
-{
-	int r;
-	void __user *uptr;
-	unsigned long size;
+#define copy_to_guest(vcpu, to, from, size) \
+	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
+#define copy_from_guest(vcpu, to, from, size) \
+	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
+#define copy_to_guest_absolute(vcpu, to, from, size) \
+	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
+#define copy_from_guest_absolute(vcpu, to, from, size) \
+	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
 
-	if (guestdest + n < guestdest)
-		return -EFAULT;
-
-	/* simple case: all within one segment table entry? */
-	if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) {
-		uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap);
-
-		if (IS_ERR((void __force *) uptr))
-			return PTR_ERR((void __force *) uptr);
-
-		r = copy_to_user(uptr, from, n);
-
-		if (r)
-			r = -EFAULT;
-
-		goto out;
-	}
-
-	/* copy first segment */
-	uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	size = PMD_SIZE - (guestdest & ~PMD_MASK);
-
-	r = copy_to_user(uptr, from, size);
-
-	if (r) {
-		r = -EFAULT;
-		goto out;
-	}
-	from += size;
-	n -= size;
-	guestdest += size;
-
-	/* copy full segments */
-	while (n >= PMD_SIZE) {
-		uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
-
-		if (IS_ERR((void __force *) uptr))
-			return PTR_ERR((void __force *) uptr);
-
-		r = copy_to_user(uptr, from, PMD_SIZE);
-
-		if (r) {
-			r = -EFAULT;
-			goto out;
-		}
-		from += PMD_SIZE;
-		n -= PMD_SIZE;
-		guestdest += PMD_SIZE;
-	}
-
-	/* copy the tail segment */
-	if (n) {
-		uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
-
-		if (IS_ERR((void __force *) uptr))
-			return PTR_ERR((void __force *) uptr);
-
-		r = copy_to_user(uptr, from, n);
-
-		if (r)
-			r = -EFAULT;
-	}
-out:
-	return r;
-}
-
-static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
-					 unsigned long guestdest,
-					 void *from, unsigned long n)
-{
-	return __copy_to_guest_fast(vcpu, guestdest, from, n);
-}
-
-static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
-				void *from, unsigned long n)
-{
-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
-
-	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
-		goto slowpath;
-
-	if ((guestdest < prefix) && (guestdest + n > prefix))
-		goto slowpath;
-
-	if ((guestdest < prefix + 2 * PAGE_SIZE)
-	    && (guestdest + n > prefix + 2 * PAGE_SIZE))
-		goto slowpath;
-
-	if (guestdest < 2 * PAGE_SIZE)
-		guestdest += prefix;
-	else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
-		guestdest -= prefix;
-
-	return __copy_to_guest_fast(vcpu, guestdest, from, n);
-slowpath:
-	return __copy_to_guest_slow(vcpu, guestdest, from, n);
-}
-
-static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
-					 unsigned long guestsrc,
-					 unsigned long n)
-{
-	int rc;
-	unsigned long i;
-	u8 *data = to;
-
-	for (i = 0; i < n; i++) {
-		rc = get_guest_u8(vcpu, guestsrc++, data++);
-		if (rc < 0)
-			return rc;
-	}
-	return 0;
-}
-
-static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to,
-					 unsigned long guestsrc,
-					 unsigned long n)
-{
-	int r;
-	void __user *uptr;
-	unsigned long size;
-
-	if (guestsrc + n < guestsrc)
-		return -EFAULT;
-
-	/* simple case: all within one segment table entry? */
-	if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) {
-		uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap);
-
-		if (IS_ERR((void __force *) uptr))
-			return PTR_ERR((void __force *) uptr);
-
-		r = copy_from_user(to, uptr, n);
-
-		if (r)
-			r = -EFAULT;
-
-		goto out;
-	}
-
-	/* copy first segment */
-	uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
-
-	if (IS_ERR((void __force *) uptr))
-		return PTR_ERR((void __force *) uptr);
-
-	size = PMD_SIZE - (guestsrc & ~PMD_MASK);
-
-	r = copy_from_user(to, uptr, size);
-
-	if (r) {
-		r = -EFAULT;
-		goto out;
-	}
-	to += size;
-	n -= size;
-	guestsrc += size;
-
-	/* copy full segments */
-	while (n >= PMD_SIZE) {
-		uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
-
-		if (IS_ERR((void __force *) uptr))
-			return PTR_ERR((void __force *) uptr);
-
-		r = copy_from_user(to, uptr, PMD_SIZE);
-
-		if (r) {
-			r = -EFAULT;
-			goto out;
-		}
-		to += PMD_SIZE;
-		n -= PMD_SIZE;
-		guestsrc += PMD_SIZE;
-	}
-
-	/* copy the tail segment */
-	if (n) {
-		uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
-
-		if (IS_ERR((void __force *) uptr))
-			return PTR_ERR((void __force *) uptr);
-
-		r = copy_from_user(to, uptr, n);
-
-		if (r)
-			r = -EFAULT;
-	}
-out:
-	return r;
-}
-
-static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
-					   unsigned long guestsrc,
-					   unsigned long n)
-{
-	return __copy_from_guest_fast(vcpu, to, guestsrc, n);
-}
-
-static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
-				  unsigned long guestsrc, unsigned long n)
-{
-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
-
-	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
-		goto slowpath;
-
-	if ((guestsrc < prefix) && (guestsrc + n > prefix))
-		goto slowpath;
-
-	if ((guestsrc < prefix + 2 * PAGE_SIZE)
-	    && (guestsrc + n > prefix + 2 * PAGE_SIZE))
-		goto slowpath;
-
-	if (guestsrc < 2 * PAGE_SIZE)
-		guestsrc += prefix;
-	else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
-		guestsrc -= prefix;
-
-	return __copy_from_guest_fast(vcpu, to, guestsrc, n);
-slowpath:
-	return __copy_from_guest_slow(vcpu, to, guestsrc, n);
-}
-#endif
+#endif /* __KVM_S390_GACCESS_H */

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index f26ff1e..b7d1b2ed 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c

@@ -43,12 +43,10 @@
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
 
 	do {
-		rc = get_guest_u64(vcpu, useraddr,
-				   &vcpu->arch.sie_block->gcr[reg]);
-		if (rc == -EFAULT) {
-			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-			break;
-		}
+		rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
+			       (u64 __user *) useraddr);
+		if (rc)
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 		useraddr += 8;
 		if (reg == reg3)
 			break;
@@ -78,11 +76,9 @@
 
 	reg = reg1;
 	do {
-		rc = get_guest_u32(vcpu, useraddr, &val);
-		if (rc == -EFAULT) {
-			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-			break;
-		}
+		rc = get_guest(vcpu, val, (u32 __user *) useraddr);
+		if (rc)
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
 		vcpu->arch.sie_block->gcr[reg] |= val;
 		useraddr += 4;

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 37116a7..5c94817 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c

@@ -180,7 +180,7 @@
 				   struct kvm_s390_interrupt_info *inti)
 {
 	const unsigned short table[] = { 2, 4, 4, 6 };
-	int rc, exception = 0;
+	int rc = 0;
 
 	switch (inti->type) {
 	case KVM_S390_INT_EMERGENCY:
@@ -188,74 +188,41 @@
 		vcpu->stat.deliver_emergency_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->emerg.code, 0);
-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			__LC_EXT_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, inti->emerg.code,
+				(u16 __user *)__LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
 		break;
-
 	case KVM_S390_INT_EXTERNAL_CALL:
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
 		vcpu->stat.deliver_external_call++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->extcall.code, 0);
-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			__LC_EXT_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, inti->extcall.code,
+				(u16 __user *)__LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
 		break;
-
 	case KVM_S390_INT_SERVICE:
 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
 			   inti->ext.ext_params);
 		vcpu->stat.deliver_service_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params, 0);
-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			__LC_EXT_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest(vcpu, inti->ext.ext_params,
+				(u32 __user *)__LC_EXT_PARAMS);
 		break;
-
 	case KVM_S390_INT_VIRTIO:
 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
@@ -263,34 +230,17 @@
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params,
 						 inti->ext.ext_params2);
-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			__LC_EXT_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2,
-				   inti->ext.ext_params2);
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest(vcpu, inti->ext.ext_params,
+				(u32 __user *)__LC_EXT_PARAMS);
+		rc |= put_guest(vcpu, inti->ext.ext_params2,
+				(u64 __user *)__LC_EXT_PARAMS2);
 		break;
-
 	case KVM_S390_SIGP_STOP:
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
 		vcpu->stat.deliver_stop_signal++;
@@ -313,18 +263,14 @@
 		vcpu->stat.deliver_restart_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 0, 0);
-		rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
-		  restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = copy_to_guest(vcpu,
+				    offsetof(struct _lowcore, restart_old_psw),
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      offsetof(struct _lowcore, restart_psw),
+				      sizeof(psw_t));
 		atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 		break;
-
 	case KVM_S390_PROGRAM_INT:
 		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
 			   inti->pgm.code,
@@ -332,24 +278,13 @@
 		vcpu->stat.deliver_program_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->pgm.code, 0);
-		rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u16(vcpu, __LC_PGM_ILC,
-			table[vcpu->arch.sie_block->ipa >> 14]);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			__LC_PGM_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
+		rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+				(u16 __user *)__LC_PGM_ILC);
+		rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_PGM_NEW_PSW, sizeof(psw_t));
 		break;
 
 	case KVM_S390_MCHK:
@@ -358,24 +293,13 @@
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->mchk.cr14,
 						 inti->mchk.mcic);
-		rc = kvm_s390_vcpu_store_status(vcpu,
-						KVM_S390_STORE_STATUS_PREFIXED);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
-				   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				     __LC_MCK_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = kvm_s390_vcpu_store_status(vcpu,
+						 KVM_S390_STORE_STATUS_PREFIXED);
+		rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
+		rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_MCK_NEW_PSW, sizeof(psw_t));
 		break;
 
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -388,67 +312,44 @@
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 param0, param1);
-		rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID,
-				   inti->io.subchannel_id);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR,
-				   inti->io.subchannel_nr);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u32(vcpu, __LC_IO_INT_PARM,
-				   inti->io.io_int_parm);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = put_guest_u32(vcpu, __LC_IO_INT_WORD,
-				   inti->io.io_int_word);
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW,
-				   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
-
-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				     __LC_IO_NEW_PSW, sizeof(psw_t));
-		if (rc == -EFAULT)
-			exception = 1;
+		rc  = put_guest(vcpu, inti->io.subchannel_id,
+				(u16 __user *) __LC_SUBCHANNEL_ID);
+		rc |= put_guest(vcpu, inti->io.subchannel_nr,
+				(u16 __user *) __LC_SUBCHANNEL_NR);
+		rc |= put_guest(vcpu, inti->io.io_int_parm,
+				(u32 __user *) __LC_IO_INT_PARM);
+		rc |= put_guest(vcpu, inti->io.io_int_word,
+				(u32 __user *) __LC_IO_INT_WORD);
+		rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_IO_NEW_PSW, sizeof(psw_t));
 		break;
 	}
 	default:
 		BUG();
 	}
-	if (exception) {
+	if (rc) {
 		printk("kvm: The guest lowcore is not mapped during interrupt "
-			"delivery, killing userspace\n");
+		       "delivery, killing userspace\n");
 		do_exit(SIGKILL);
 	}
 }
 
 static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
-	int rc, exception = 0;
+	int rc;
 
 	if (psw_extint_disabled(vcpu))
 		return 0;
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
 		return 0;
-	rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
-	if (rc == -EFAULT)
-		exception = 1;
-	rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-		 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	if (rc == -EFAULT)
-		exception = 1;
-	rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-		__LC_EXT_NEW_PSW, sizeof(psw_t));
-	if (rc == -EFAULT)
-		exception = 1;
-	if (exception) {
+	rc  = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			      __LC_EXT_NEW_PSW, sizeof(psw_t));
+	if (rc) {
 		printk("kvm: The guest lowcore is not mapped during interrupt "
 			"delivery, killing userspace\n");
 		do_exit(SIGKILL);

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4cf35a0..c1c7c68 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c

@@ -142,12 +142,16 @@
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_S390_CSS_SUPPORT:
+	case KVM_CAP_IOEVENTFD:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
 	case KVM_CAP_S390_COW:
 		r = MACHINE_HAS_ESOP;
 		break;
@@ -632,8 +636,7 @@
 		} else {
 			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
 			trace_kvm_s390_sie_fault(vcpu);
-			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-			rc = 0;
+			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 		}
 	}
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
@@ -974,22 +977,13 @@
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
-				   struct kvm_memory_slot old,
 				   struct kvm_userspace_memory_region *mem,
-				   bool user_alloc)
+				   enum kvm_mr_change change)
 {
-	/* A few sanity checks. We can have exactly one memory slot which has
-	   to start at guest virtual zero and which has to be located at a
-	   page boundary in userland and which has to end at a page boundary.
-	   The memory in userland is ok to be fragmented into various different
-	   vmas. It is okay to mmap() and munmap() stuff in this slot after
-	   doing this call at any time */
-
-	if (mem->slot)
-		return -EINVAL;
-
-	if (mem->guest_phys_addr)
-		return -EINVAL;
+	/* A few sanity checks. We can have memory slots which have to be
+	   located/ended at a segment boundary (1MB). The memory in userland is
+	   ok to be fragmented into various different vmas. It is okay to mmap()
+	   and munmap() stuff in this slot after doing this call at any time */
 
 	if (mem->userspace_addr & 0xffffful)
 		return -EINVAL;
@@ -997,19 +991,26 @@
 	if (mem->memory_size & 0xffffful)
 		return -EINVAL;
 
-	if (!user_alloc)
-		return -EINVAL;
-
 	return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				bool user_alloc)
+				const struct kvm_memory_slot *old,
+				enum kvm_mr_change change)
 {
 	int rc;
 
+	/* If the basics of the memslot do not change, we do not want
+	 * to update the gmap. Every update causes several unnecessary
+	 * segment translation exceptions. This is usually handled just
+	 * fine by the normal fault handler + gmap, but it will also
+	 * cause faults on the prefix page of running guest CPUs.
+	 */
+	if (old->userspace_addr == mem->userspace_addr &&
+	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
+	    old->npages * PAGE_SIZE == mem->memory_size)
+		return;
 
 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
 		mem->guest_phys_addr, mem->memory_size);

diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 4d89d64..efc14f6 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h

@@ -110,12 +110,12 @@
 void kvm_s390_tasklet(unsigned long parm);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
-int kvm_s390_inject_vm(struct kvm *kvm,
-		struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-		struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+				    struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+				      struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
 

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0ef9894..6bbd7b5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c

@@ -14,6 +14,8 @@
 #include <linux/kvm.h>
 #include <linux/gfp.h>
 #include <linux/errno.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
 #include <asm/current.h>
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
@@ -35,31 +37,24 @@
 	operand2 = kvm_s390_get_base_disp_s(vcpu);
 
 	/* must be word boundary */
-	if (operand2 & 3) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
+	if (operand2 & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* get the value */
-	if (get_guest_u32(vcpu, operand2, &address)) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
+	if (get_guest(vcpu, address, (u32 __user *) operand2))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	address = address & 0x7fffe000u;
 
 	/* make sure that the new value is valid memory */
 	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
-	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
+	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	kvm_s390_set_prefix(vcpu, address);
 
 	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
 	trace_kvm_s390_handle_prefix(vcpu, 1, address);
-out:
 	return 0;
 }
 
@@ -73,49 +68,37 @@
 	operand2 = kvm_s390_get_base_disp_s(vcpu);
 
 	/* must be word boundary */
-	if (operand2 & 3) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
+	if (operand2 & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	address = vcpu->arch.sie_block->prefix;
 	address = address & 0x7fffe000u;
 
 	/* get the value */
-	if (put_guest_u32(vcpu, operand2, address)) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
+	if (put_guest(vcpu, address, (u32 __user *)operand2))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
 	trace_kvm_s390_handle_prefix(vcpu, 0, address);
-out:
 	return 0;
 }
 
 static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 {
 	u64 useraddr;
-	int rc;
 
 	vcpu->stat.instruction_stap++;
 
 	useraddr = kvm_s390_get_base_disp_s(vcpu);
 
-	if (useraddr & 1) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
+	if (useraddr & 1)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
-	if (rc == -EFAULT) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
+	if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
 	trace_kvm_s390_handle_stap(vcpu, useraddr);
-out:
 	return 0;
 }
 
@@ -129,36 +112,38 @@
 
 static int handle_tpi(struct kvm_vcpu *vcpu)
 {
-	u64 addr;
 	struct kvm_s390_interrupt_info *inti;
+	u64 addr;
 	int cc;
 
 	addr = kvm_s390_get_base_disp_s(vcpu);
-
+	if (addr & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	cc = 0;
 	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
-	if (inti) {
-		if (addr) {
-			/*
-			 * Store the two-word I/O interruption code into the
-			 * provided area.
-			 */
-			put_guest_u16(vcpu, addr, inti->io.subchannel_id);
-			put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr);
-			put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm);
-		} else {
-			/*
-			 * Store the three-word I/O interruption code into
-			 * the appropriate lowcore area.
-			 */
-			put_guest_u16(vcpu, 184, inti->io.subchannel_id);
-			put_guest_u16(vcpu, 186, inti->io.subchannel_nr);
-			put_guest_u32(vcpu, 188, inti->io.io_int_parm);
-			put_guest_u32(vcpu, 192, inti->io.io_int_word);
-		}
-		cc = 1;
-	} else
-		cc = 0;
+	if (!inti)
+		goto no_interrupt;
+	cc = 1;
+	if (addr) {
+		/*
+		 * Store the two-word I/O interruption code into the
+		 * provided area.
+		 */
+		put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
+		put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
+		put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
+	} else {
+		/*
+		 * Store the three-word I/O interruption code into
+		 * the appropriate lowcore area.
+		 */
+		put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID);
+		put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR);
+		put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM);
+		put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
+	}
 	kfree(inti);
+no_interrupt:
 	/* Set condition code and we're done. */
 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
 	vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
@@ -230,13 +215,10 @@
 
 	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
 			   &facility_list, sizeof(facility_list));
-	if (rc == -EFAULT)
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-	else {
-		VCPU_EVENT(vcpu, 5, "store facility list value %x",
-			   facility_list);
-		trace_kvm_s390_handle_stfl(vcpu, facility_list);
-	}
+	if (rc)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
+	trace_kvm_s390_handle_stfl(vcpu, facility_list);
 	return 0;
 }
 
@@ -249,112 +231,80 @@
 
 #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
 #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
-#define PSW_ADDR_24 0x00000000000fffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
 #define PSW_ADDR_31 0x000000007fffffffUL
 
+static int is_valid_psw(psw_t *psw) {
+	if (psw->mask & PSW_MASK_UNASSIGNED)
+		return 0;
+	if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+		if (psw->addr & ~PSW_ADDR_31)
+			return 0;
+	}
+	if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+		return 0;
+	if ((psw->mask & PSW_MASK_ADDR_MODE) ==  PSW_MASK_EA)
+		return 0;
+	return 1;
+}
+
 int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 {
-	u64 addr;
+	psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
 	psw_compat_t new_psw;
+	u64 addr;
 
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+	if (gpsw->mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu,
 						   PGM_PRIVILEGED_OPERATION);
-
 	addr = kvm_s390_get_base_disp_s(vcpu);
-
-	if (addr & 7) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
-
-	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
-
-	if (!(new_psw.mask & PSW32_MASK_BASE)) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
-
-	vcpu->arch.sie_block->gpsw.mask =
-		(new_psw.mask & ~PSW32_MASK_BASE) << 32;
-	vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
-
-	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
-	    (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
-	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
-	    ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
-	     PSW_MASK_EA)) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
-
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (!(new_psw.mask & PSW32_MASK_BASE))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+	gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+	gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+	if (!is_valid_psw(gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	handle_new_psw(vcpu);
-out:
 	return 0;
 }
 
 static int handle_lpswe(struct kvm_vcpu *vcpu)
 {
-	u64 addr;
 	psw_t new_psw;
+	u64 addr;
 
 	addr = kvm_s390_get_base_disp_s(vcpu);
-
-	if (addr & 7) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
-
-	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
-
-	vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
-	vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
-
-	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
-	    (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
-	      PSW_MASK_BA) &&
-	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
-	    (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
-	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
-	    ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
-	     PSW_MASK_EA)) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
-
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	vcpu->arch.sie_block->gpsw = new_psw;
+	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	handle_new_psw(vcpu);
-out:
 	return 0;
 }
 
 static int handle_stidp(struct kvm_vcpu *vcpu)
 {
 	u64 operand2;
-	int rc;
 
 	vcpu->stat.instruction_stidp++;
 
 	operand2 = kvm_s390_get_base_disp_s(vcpu);
 
-	if (operand2 & 7) {
-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		goto out;
-	}
+	if (operand2 & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
-	if (rc == -EFAULT) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out;
-	}
+	if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
-out:
 	return 0;
 }
 
@@ -394,8 +344,9 @@
 	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
 	int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
 	int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
+	unsigned long mem = 0;
 	u64 operand2;
-	unsigned long mem;
+	int rc = 0;
 
 	vcpu->stat.instruction_stsi++;
 	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -414,37 +365,37 @@
 	case 2:
 		mem = get_zeroed_page(GFP_KERNEL);
 		if (!mem)
-			goto out_fail;
+			goto out_no_data;
 		if (stsi((void *) mem, fc, sel1, sel2))
-			goto out_mem;
+			goto out_no_data;
 		break;
 	case 3:
 		if (sel1 != 2 || sel2 != 2)
-			goto out_fail;
+			goto out_no_data;
 		mem = get_zeroed_page(GFP_KERNEL);
 		if (!mem)
-			goto out_fail;
+			goto out_no_data;
 		handle_stsi_3_2_2(vcpu, (void *) mem);
 		break;
 	default:
-		goto out_fail;
+		goto out_no_data;
 	}
 
 	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out_mem;
+		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out_exception;
 	}
 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
 	free_page(mem);
 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
 	vcpu->run->s.regs.gprs[0] = 0;
 	return 0;
-out_mem:
-	free_page(mem);
-out_fail:
+out_no_data:
 	/* condition code 3 */
 	vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
-	return 0;
+out_exception:
+	free_page(mem);
+	return rc;
 }
 
 static const intercept_handler_t b2_handlers[256] = {
@@ -575,20 +526,13 @@
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
 		return -EOPNOTSUPP;
 
-
-	/* we must resolve the address without holding the mmap semaphore.
-	 * This is ok since the userspace hypervisor is not supposed to change
-	 * the mapping while the guest queries the memory. Otherwise the guest
-	 * might crash or get wrong info anyway. */
-	user_address = (unsigned long) __guestaddr_to_user(vcpu, address1);
-
 	down_read(&current->mm->mmap_sem);
+	user_address = __gmap_translate(address1, vcpu->arch.gmap);
+	if (IS_ERR_VALUE(user_address))
+		goto out_inject;
 	vma = find_vma(current->mm, user_address);
-	if (!vma) {
-		up_read(&current->mm->mmap_sem);
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-	}
-
+	if (!vma)
+		goto out_inject;
 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
 	if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ))
 		vcpu->arch.sie_block->gpsw.mask |= (1ul << 44);
@@ -597,6 +541,10 @@
 
 	up_read(&current->mm->mmap_sem);
 	return 0;
+
+out_inject:
+	up_read(&current->mm->mmap_sem);
+	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 }
 
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)

diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 466fb33..50ea137 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c

@@ -89,16 +89,19 @@
 		if (unlikely(*table & _REGION_ENTRY_INV))
 			return -0x39UL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* fallthrough */
 	case _ASCE_TYPE_REGION2:
 		table = table + ((address >> 42) & 0x7ff);
 		if (unlikely(*table & _REGION_ENTRY_INV))
 			return -0x3aUL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* fallthrough */
 	case _ASCE_TYPE_REGION3:
 		table = table + ((address >> 31) & 0x7ff);
 		if (unlikely(*table & _REGION_ENTRY_INV))
 			return -0x3bUL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* fallthrough */
 	case _ASCE_TYPE_SEGMENT:
 		table = table + ((address >> 20) & 0x7ff);
 		if (unlikely(*table & _SEGMENT_ENTRY_INV))

diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 640bea1..839592c 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile

@@ -3,7 +3,7 @@
 #
 
 obj-y		:= init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
-obj-y		+= page-states.o gup.o extable.o pageattr.o
+obj-y		+= page-states.o gup.o extable.o pageattr.o mem_detect.o
 
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o

diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 0b09b23..89ebae4 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c

@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
+#include <linux/memory.h>
 #include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/initrd.h>
@@ -36,6 +37,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/ctl_reg.h>
+#include <asm/sclp.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
 
@@ -214,6 +216,15 @@
 	return rc;
 }
 
+unsigned long memory_block_size_bytes(void)
+{
+	/*
+	 * Make sure the memory block size is always greater
+	 * or equal than the memory increment size.
+	 */
+	return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 int arch_remove_memory(u64 start, u64 size)
 {

diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
new file mode 100644
index 0000000..3cbd3b8
--- /dev/null
+++ b/arch/s390/mm/mem_detect.c

@@ -0,0 +1,134 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+#define ADDR2G (1ULL << 31)
+
+static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize)
+{
+	unsigned long long memsize, rnmax, rzm;
+	unsigned long addr = 0, size;
+	int i = 0, type;
+
+	rzm = sclp_get_rzm();
+	rnmax = sclp_get_rnmax();
+	memsize = rzm * rnmax;
+	if (!rzm)
+		rzm = 1ULL << 17;
+	if (sizeof(long) == 4) {
+		rzm = min(ADDR2G, rzm);
+		memsize = memsize ? min(ADDR2G, memsize) : ADDR2G;
+	}
+	if (maxsize)
+		memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize;
+	do {
+		size = 0;
+		type = tprot(addr);
+		do {
+			size += rzm;
+			if (memsize && addr + size >= memsize)
+				break;
+		} while (type == tprot(addr + size));
+		if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+			if (memsize && (addr + size > memsize))
+				size = memsize - addr;
+			chunk[i].addr = addr;
+			chunk[i].size = size;
+			chunk[i].type = type;
+			i++;
+		}
+		addr += size;
+	} while (addr < memsize && i < MEMORY_CHUNKS);
+}
+
+/**
+ * detect_memory_layout - fill mem_chunk array with memory layout data
+ * @chunk: mem_chunk array to be filled
+ * @maxsize: maximum address where memory detection should stop
+ *
+ * Fills the passed in memory chunk array with the memory layout of the
+ * machine. The array must have a size of at least MEMORY_CHUNKS and will
+ * be fully initialized afterwards.
+ * If the maxsize paramater has a value > 0 memory detection will stop at
+ * that address. It is guaranteed that all chunks have an ending address
+ * that is smaller than maxsize.
+ * If maxsize is 0 all memory will be detected.
+ */
+void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize)
+{
+	unsigned long flags, flags_dat, cr0;
+
+	memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk));
+	/*
+	 * Disable IRQs, DAT and low address protection so tprot does the
+	 * right thing and we don't get scheduled away with low address
+	 * protection disabled.
+	 */
+	local_irq_save(flags);
+	flags_dat = __arch_local_irq_stnsm(0xfb);
+	/*
+	 * In case DAT was enabled, make sure chunk doesn't reside in vmalloc
+	 * space. We have disabled DAT and any access to vmalloc area will
+	 * cause an exception.
+	 * If DAT was disabled we are called from early ipl code.
+	 */
+	if (test_bit(5, &flags_dat)) {
+		if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk)))
+			goto out;
+	}
+	__ctl_store(cr0, 0, 0);
+	__ctl_clear_bit(0, 28);
+	find_memory_chunks(chunk, maxsize);
+	__ctl_load(cr0, 0, 0);
+out:
+	__arch_local_irq_ssm(flags_dat);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(detect_memory_layout);
+
+/*
+ * Create memory hole with given address and size.
+ */
+void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
+		     unsigned long size)
+{
+	int i;
+
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		struct mem_chunk *chunk = &mem_chunk[i];
+
+		if (chunk->size == 0)
+			continue;
+		if (addr > chunk->addr + chunk->size)
+			continue;
+		if (addr + size <= chunk->addr)
+			continue;
+		/* Split */
+		if ((addr > chunk->addr) &&
+		    (addr + size < chunk->addr + chunk->size)) {
+			struct mem_chunk *new = chunk + 1;
+
+			memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new));
+			new->addr = addr + size;
+			new->size = chunk->addr + chunk->size - new->addr;
+			chunk->size = addr - chunk->addr;
+			continue;
+		} else if ((addr <= chunk->addr) &&
+			   (addr + size >= chunk->addr + chunk->size)) {
+			memset(chunk, 0 , sizeof(*chunk));
+		} else if (addr + size < chunk->addr + chunk->size) {
+			chunk->size =  chunk->addr + chunk->size - addr - size;
+			chunk->addr = addr + size;
+		} else if (addr > chunk->addr) {
+			chunk->size = addr - chunk->addr;
+		}
+	}
+}

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index bd954e9..7805ddc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c

@@ -454,9 +454,8 @@
 }
 EXPORT_SYMBOL_GPL(gmap_translate);
 
-static int gmap_connect_pgtable(unsigned long segment,
-				unsigned long *segment_ptr,
-				struct gmap *gmap)
+static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
+				unsigned long *segment_ptr, struct gmap *gmap)
 {
 	unsigned long vmaddr;
 	struct vm_area_struct *vma;
@@ -491,7 +490,9 @@
 	/* Link gmap segment table entry location to page table. */
 	page = pmd_page(*pmd);
 	mp = (struct gmap_pgtable *) page->index;
+	rmap->gmap = gmap;
 	rmap->entry = segment_ptr;
+	rmap->vmaddr = address;
 	spin_lock(&mm->page_table_lock);
 	if (*segment_ptr == segment) {
 		list_add(&rmap->list, &mp->mapper);
@@ -553,7 +554,7 @@
 		if (!(segment & _SEGMENT_ENTRY_RO))
 			/* Nothing mapped in the gmap address space. */
 			break;
-		rc = gmap_connect_pgtable(segment, segment_ptr, gmap);
+		rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
 		if (rc)
 			return rc;
 	}
@@ -619,6 +620,118 @@
 }
 EXPORT_SYMBOL_GPL(gmap_discard);
 
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_add(&nb->list, &gmap_notifier_list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_del_init(&nb->list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @address: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+{
+	unsigned long addr;
+	spinlock_t *ptl;
+	pte_t *ptep, entry;
+	pgste_t pgste;
+	int rc = 0;
+
+	if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+		return -EINVAL;
+	down_read(&gmap->mm->mmap_sem);
+	while (len) {
+		/* Convert gmap address and connect the page tables */
+		addr = __gmap_fault(start, gmap);
+		if (IS_ERR_VALUE(addr)) {
+			rc = addr;
+			break;
+		}
+		/* Get the page mapped */
+		if (get_user_pages(current, gmap->mm, addr, 1, 1, 0,
+				   NULL, NULL) != 1) {
+			rc = -EFAULT;
+			break;
+		}
+		/* Walk the process page table, lock and get pte pointer */
+		ptep = get_locked_pte(gmap->mm, addr, &ptl);
+		if (unlikely(!ptep))
+			continue;
+		/* Set notification bit in the pgste of the pte */
+		entry = *ptep;
+		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
+			pgste = pgste_get_lock(ptep);
+			pgste_val(pgste) |= RCP_IN_BIT;
+			pgste_set_unlock(ptep, pgste);
+			start += PAGE_SIZE;
+			len -= PAGE_SIZE;
+		}
+		spin_unlock(ptl);
+	}
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
+{
+	unsigned long segment_offset;
+	struct gmap_notifier *nb;
+	struct gmap_pgtable *mp;
+	struct gmap_rmap *rmap;
+	struct page *page;
+
+	segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+	segment_offset = segment_offset * (4096 / sizeof(pte_t));
+	page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
+	mp = (struct gmap_pgtable *) page->index;
+	spin_lock(&gmap_notifier_lock);
+	list_for_each_entry(rmap, &mp->mapper, list) {
+		list_for_each_entry(nb, &gmap_notifier_list, list)
+			nb->notifier_call(rmap->gmap,
+					  rmap->vmaddr + segment_offset);
+	}
+	spin_unlock(&gmap_notifier_lock);
+}
+
 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 						    unsigned long vmaddr)
 {

diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 3583705..8b268fc 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c

@@ -375,9 +375,8 @@
 
 	ro_start = PFN_ALIGN((unsigned long)&_stext);
 	ro_end = (unsigned long)&_eshared & PAGE_MASK;
-	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
-		if (memory_chunk[i].type == CHUNK_CRASHK ||
-		    memory_chunk[i].type == CHUNK_OLDMEM)
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		if (!memory_chunk[i].size)
 			continue;
 		start = memory_chunk[i].addr;
 		end = memory_chunk[i].addr + memory_chunk[i].size;
@@ -412,9 +411,6 @@
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (!memory_chunk[i].size)
 			continue;
-		if (memory_chunk[i].type == CHUNK_CRASHK ||
-		    memory_chunk[i].type == CHUNK_OLDMEM)
-			continue;
 		seg = kzalloc(sizeof(*seg), GFP_KERNEL);
 		if (!seg)
 			panic("Out of memory...\n");

diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 2df313b..c923068 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h

@@ -30,8 +30,8 @@
 #ifdef CONFIG_PRINTK
 DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK);
 #endif
-#ifdef CONFIG_NO_HZ
-DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ);
+#ifdef CONFIG_NO_HZ_COMMON
+DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON);
 #endif
 #ifdef CONFIG_UML_X86
 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86);

diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index fac388c..e9824d5 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c

@@ -79,7 +79,7 @@
 	return timeval_to_ns(&tv);
 }
 
-#ifdef UML_CONFIG_NO_HZ
+#ifdef UML_CONFIG_NO_HZ_COMMON
 static int after_sleep_interval(struct timespec *ts)
 {
 	return 0;

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 40afa00..9bd4ecac 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h

@@ -19,6 +19,10 @@
 
 BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
+#ifdef CONFIG_HAVE_KVM
+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+#endif
+
 /*
  * every pentium local APIC has two 'local interrupts', with a
  * soft-definable vector attached to both interrupts, one of

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 81f04ce..ab0ae1a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h

@@ -12,6 +12,9 @@
 	unsigned int irq_spurious_count;
 	unsigned int icr_read_retry_count;
 #endif
+#ifdef CONFIG_HAVE_KVM
+	unsigned int kvm_posted_intr_ipis;
+#endif
 	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int apic_perf_irqs;
 	unsigned int apic_irq_work_irqs;

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 10a78c3..1da97ef 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h

@@ -28,6 +28,7 @@
 /* Interrupt handlers registered during init_IRQ */
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
+extern void kvm_posted_intr_ipi(void);
 extern void error_interrupt(void);
 extern void irq_work_interrupt(void);
 

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 95fd352..d806b22 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h

@@ -24,10 +24,18 @@
 
 #include <asm/io_apic.h>
 
+struct IO_APIC_route_entry;
+struct io_apic_irq_attr;
+struct irq_chip;
+struct msi_msg;
+struct pci_dev;
+struct irq_cfg;
+
 #ifdef CONFIG_IRQ_REMAP
 
 extern void setup_irq_remapping_ops(void);
 extern int irq_remapping_supported(void);
+extern void set_irq_remapping_broken(void);
 extern int irq_remapping_prepare(void);
 extern int irq_remapping_enable(void);
 extern void irq_remapping_disable(void);
@@ -54,6 +62,7 @@
 
 static inline void setup_irq_remapping_ops(void) { }
 static inline int irq_remapping_supported(void) { return 0; }
+static inline void set_irq_remapping_broken(void) { }
 static inline int irq_remapping_prepare(void) { return -ENODEV; }
 static inline int irq_remapping_enable(void) { return -ENODEV; }
 static inline void irq_remapping_disable(void) { }

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index aac5fa6..5702d7e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h

@@ -102,6 +102,11 @@
  */
 #define X86_PLATFORM_IPI_VECTOR		0xf7
 
+/* Vector for KVM to deliver posted interrupt IPI */
+#ifdef CONFIG_HAVE_KVM
+#define POSTED_INTR_VECTOR		0xf2
+#endif
+
 /*
  * IRQ work vector:
  */

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4979778..3741c65 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h

@@ -31,7 +31,7 @@
 #include <asm/msr-index.h>
 #include <asm/asm.h>
 
-#define KVM_MAX_VCPUS 254
+#define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
 #define KVM_USER_MEM_SLOTS 125
 /* memory slots that are not exposed to userspace */
@@ -43,6 +43,8 @@
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
 
+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
+
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
@@ -94,9 +96,6 @@
 
 #define ASYNC_PF_PER_VCPU 64
 
-extern raw_spinlock_t kvm_lock;
-extern struct list_head vm_list;
-
 struct kvm_vcpu;
 struct kvm;
 struct kvm_async_pf;
@@ -230,6 +229,7 @@
 #endif
 
 	int write_flooding_count;
+	bool mmio_cached;
 };
 
 struct kvm_pio_request {
@@ -345,7 +345,6 @@
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
 	int mp_state;
-	int sipi_vector;
 	u64 ia32_misc_enable_msr;
 	bool tpr_access_reporting;
 
@@ -643,7 +642,7 @@
 	/* Create, but do not attach this VCPU */
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
-	int (*vcpu_reset)(struct kvm_vcpu *vcpu);
+	void (*vcpu_reset)(struct kvm_vcpu *vcpu);
 
 	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
@@ -696,14 +695,16 @@
 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
 	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
 	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
-	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
-	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+	int (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+	int (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 	int (*vm_has_apicv)(struct kvm *kvm);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -730,6 +731,7 @@
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
+	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -767,6 +769,7 @@
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
+void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
@@ -797,6 +800,7 @@
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
 #define EMULTYPE_RETRY		    (1 << 3)
+#define EMULTYPE_NO_REEXECUTE	    (1 << 4)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
 
@@ -807,6 +811,7 @@
 }
 
 void kvm_enable_efer_bits(u64);
+bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
 int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
 
@@ -819,6 +824,7 @@
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
@@ -973,7 +979,6 @@
  * Trap the fault and ignore the instruction if that happens.
  */
 asmlinkage void kvm_spurious_fault(void);
-extern bool kvm_rebooting;
 
 #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)	\
 	"666: " insn "\n\t" \
@@ -1002,6 +1007,7 @@
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
 
 void kvm_define_shared_msr(unsigned index, u32 msr);
 void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
@@ -1027,7 +1033,7 @@
 void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
 bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index b6fbf86..f3e01a2 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h

@@ -65,11 +65,16 @@
 #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
+#define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
 #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER          0x00000040
+#define PIN_BASED_POSTED_INTR                   0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x00000016
 
 #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
@@ -81,6 +86,8 @@
 #define VM_EXIT_LOAD_IA32_EFER                  0x00200000
 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER       0x00400000
 
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR	0x00036dff
+
 #define VM_ENTRY_LOAD_DEBUG_CONTROLS            0x00000002
 #define VM_ENTRY_IA32E_MODE                     0x00000200
 #define VM_ENTRY_SMM                            0x00000400
@@ -89,9 +96,15 @@
 #define VM_ENTRY_LOAD_IA32_PAT			0x00004000
 #define VM_ENTRY_LOAD_IA32_EFER                 0x00008000
 
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR	0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA			0x00000020
+
 /* VMCS Encodings */
 enum vmcs_field {
 	VIRTUAL_PROCESSOR_ID            = 0x00000000,
+	POSTED_INTR_NV                  = 0x00000002,
 	GUEST_ES_SELECTOR               = 0x00000800,
 	GUEST_CS_SELECTOR               = 0x00000802,
 	GUEST_SS_SELECTOR               = 0x00000804,
@@ -126,6 +139,8 @@
 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
 	APIC_ACCESS_ADDR		= 0x00002014,
 	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
+	POSTED_INTR_DESC_ADDR           = 0x00002016,
+	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
 	EPT_POINTER                     = 0x0000201a,
 	EPT_POINTER_HIGH                = 0x0000201b,
 	EOI_EXIT_BITMAP0                = 0x0000201c,
@@ -136,6 +151,8 @@
 	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
 	EOI_EXIT_BITMAP3                = 0x00002022,
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+	VMREAD_BITMAP                   = 0x00002026,
+	VMWRITE_BITMAP                  = 0x00002028,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,
@@ -209,6 +226,7 @@
 	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
 	GUEST_ACTIVITY_STATE            = 0X00004826,
 	GUEST_SYSENTER_CS               = 0x0000482A,
+	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
 	HOST_IA32_SYSENTER_CS           = 0x00004c00,
 	CR0_GUEST_HOST_MASK             = 0x00006000,
 	CR4_GUEST_HOST_MASK             = 0x00006002,

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a65ec29..5d9a303 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h

@@ -29,7 +29,6 @@
 #define __KVM_HAVE_PIT
 #define __KVM_HAVE_IOAPIC
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_DEVICE_ASSIGNMENT
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index b575788..b3a4866 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h

@@ -528,6 +528,8 @@
 #define VMX_BASIC_MEM_TYPE_WB	6LLU
 #define VMX_BASIC_INOUT		0x0040000000000000LLU
 
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
 /* AMD-V MSRs */
 
 #define MSR_VM_CR                       0xc0010114

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 2871fcc..d651082 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h

@@ -65,6 +65,7 @@
 #define EXIT_REASON_EOI_INDUCED         45
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_PREEMPTION_TIMER    52
 #define EXIT_REASON_WBINVD              54
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
@@ -110,7 +111,7 @@
 	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
-	{ EXIT_REASON_INVPCID,               "INVPCID" }
-
+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }
 
 #endif /* _UAPIVMX_H */

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ffd6050..f60d41f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c

@@ -128,10 +128,15 @@
 	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-	INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /*  MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+	/*
+	 * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT
+	 * siblings; disable these events because they can corrupt unrelated
+	 * counters.
+	 */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 	EVENT_CONSTRAINT_END
 };
 

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index da02e9c..d978353 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c

@@ -310,7 +310,7 @@
  * - in case there is no HW filter
  * - in case the HW filter has errata or limitations
  */
-static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 {
 	u64 br_type = event->attr.branch_sample_type;
 	int mask = 0;
@@ -318,8 +318,11 @@
 	if (br_type & PERF_SAMPLE_BRANCH_USER)
 		mask |= X86_BR_USER;
 
-	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+	if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 		mask |= X86_BR_KERNEL;
+	}
 
 	/* we ignore BRANCH_HV here */
 
@@ -339,6 +342,8 @@
 	 * be used by fixup code for some CPU
 	 */
 	event->hw.branch_reg.reg = mask;
+
+	return 0;
 }
 
 /*
@@ -386,7 +391,9 @@
 	/*
 	 * setup SW LBR filter
 	 */
-	intel_pmu_setup_sw_lbr_filter(event);
+	ret = intel_pmu_setup_sw_lbr_filter(event);
+	if (ret)
+		return ret;
 
 	/*
 	 * setup HW LBR filter, if any
@@ -442,8 +449,18 @@
 			return X86_BR_NONE;
 
 		addr = buf;
-	} else
-		addr = (void *)from;
+	} else {
+		/*
+		 * The LBR logs any address in the IP, even if the IP just
+		 * faulted. This means userspace can control the from address.
+		 * Ensure we don't blindy read any address by validating it is
+		 * a known text address.
+		 */
+		if (kernel_text_address(from))
+			addr = (void *)from;
+		else
+			return X86_BR_NONE;
+	}
 
 	/*
 	 * decoder needs to know the ABI especially

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index d0f9e5a..52441a2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c

@@ -3093,7 +3093,7 @@
 static int __init uncore_type_init(struct intel_uncore_type *type)
 {
 	struct intel_uncore_pmu *pmus;
-	struct attribute_group *events_group;
+	struct attribute_group *attr_group;
 	struct attribute **attrs;
 	int i, j;
 
@@ -3120,19 +3120,19 @@
 		while (type->event_descs[i].attr.attr.name)
 			i++;
 
-		events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
-					sizeof(*events_group), GFP_KERNEL);
-		if (!events_group)
+		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+					sizeof(*attr_group), GFP_KERNEL);
+		if (!attr_group)
 			goto fail;
 
-		attrs = (struct attribute **)(events_group + 1);
-		events_group->name = "events";
-		events_group->attrs = attrs;
+		attrs = (struct attribute **)(attr_group + 1);
+		attr_group->name = "events";
+		attr_group->attrs = attrs;
 
 		for (j = 0; j < i; j++)
 			attrs[j] = &type->event_descs[j].attr.attr;
 
-		type->events_group = events_group;
+		type->events_group = attr_group;
 	}
 
 	type->pmu_group = &uncore_pmu_attr_group;
@@ -3545,11 +3545,12 @@
 		msr_uncores = nhm_msr_uncores;
 		break;
 	case 42: /* Sandy Bridge */
+	case 58: /* Ivy Bridge */
 		if (snb_uncore_cbox.num_boxes > max_cores)
 			snb_uncore_cbox.num_boxes = max_cores;
 		msr_uncores = snb_msr_uncores;
 		break;
-	case 45: /* Sandy Birdge-EP */
+	case 45: /* Sandy Bridge-EP */
 		if (snbep_uncore_cbox.num_boxes > max_cores)
 			snbep_uncore_cbox.num_boxes = max_cores;
 		msr_uncores = snbep_msr_uncores;

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3755ef4..94ab6b9 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c

@@ -18,6 +18,7 @@
 #include <asm/apic.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
+#include <asm/irq_remapping.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -192,6 +193,21 @@
 }
 #endif
 
+static void __init intel_remapping_check(int num, int slot, int func)
+{
+	u8 revision;
+
+	revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
+
+	/*
+	 * Revision 0x13 of this chipset supports irq remapping
+	 * but has an erratum that breaks its behavior, flag it as such
+	 */
+	if (revision == 0x13)
+		set_irq_remapping_broken();
+
+}
+
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -221,6 +237,10 @@
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
 	{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
+	{ PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST,
+	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+	{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
+	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
 	{}
 };
 

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c1d01e6..7272089 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S

@@ -1166,6 +1166,11 @@
 apicinterrupt X86_PLATFORM_IPI_VECTOR \
 	x86_platform_ipi smp_x86_platform_ipi
 
+#ifdef CONFIG_HAVE_KVM
+apicinterrupt POSTED_INTR_VECTOR \
+	kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+#endif
+
 apicinterrupt THRESHOLD_APIC_VECTOR \
 	threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e4595f1..ac0631d 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c

@@ -165,10 +165,6 @@
 u64 arch_irq_stat(void)
 {
 	u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
-	sum += atomic_read(&irq_mis_count);
-#endif
 	return sum;
 }
 
@@ -228,6 +224,28 @@
 	set_irq_regs(old_regs);
 }
 
+#ifdef CONFIG_HAVE_KVM
+/*
+ * Handler for POSTED_INTERRUPT_VECTOR.
+ */
+void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	ack_APIC_irq();
+
+	irq_enter();
+
+	exit_idle();
+
+	inc_irq_stat(kvm_posted_intr_ipis);
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+#endif
+
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7dc4e45..a2a1fbc 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c

@@ -172,6 +172,10 @@
 
 	/* IPI for X86 platform specific use */
 	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
+#ifdef CONFIG_HAVE_KVM
+	/* IPI for KVM to deliver posted interrupt */
+	alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+#endif
 
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 0732f00..d2c3812 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c

@@ -160,8 +160,12 @@
 {
 	int cpu = smp_processor_id();
 	int low, high, ret;
-	struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
+	struct pvclock_vcpu_time_info *src;
 
+	if (!hv_clock)
+		return 0;
+
+	src = &hv_clock[cpu].pvti;
 	low = (int)slow_virt_to_phys(src) | 1;
 	high = ((u64)slow_virt_to_phys(src) >> 32);
 	ret = native_write_msr_safe(msr_kvm_system_time, low, high);
@@ -276,6 +280,9 @@
 	struct pvclock_vcpu_time_info *vcpu_time;
 	unsigned int size;
 
+	if (!hv_clock)
+		return 0;
+
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
 	preempt_disable();

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 586f000..a47a3e5 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig

@@ -21,14 +21,13 @@
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
 	depends on HIGH_RES_TIMERS
-	# for device assignment:
-	depends on PCI
 	# for TASKSTATS/TASK_DELAY_ACCT:
 	depends on NET
 	select PREEMPT_NOTIFIERS
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
 	select KVM_ASYNC_PF
@@ -82,6 +81,17 @@
 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
 	 audit  KVM MMU at runtime.
 
+config KVM_DEVICE_ASSIGNMENT
+	bool "KVM legacy PCI device assignment support"
+	depends on KVM && PCI && IOMMU_API
+	default y
+	---help---
+	  Provide support for legacy PCI device assignment through KVM.  The
+	  kernel now also supports a full featured userspace device driver
+	  framework through VFIO, which supersedes much of this support.
+
+	  If unsure, say Y.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 04d3040..d609e1d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile

@@ -7,8 +7,9 @@
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 				coalesced_mmio.o irq_comm.o eventfd.o \
-				assigned-dev.o)
-kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
+				irqchip.o)
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
+				assigned-dev.o iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a335cc6..8e517bb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c

@@ -132,8 +132,9 @@
 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
 #define No64	    (1<<28)
 #define PageTable   (1 << 29)   /* instruction used to write page table */
+#define NotImpl     (1 << 30)   /* instruction is not implemented */
 /* Source 2 operand type */
-#define Src2Shift   (30)
+#define Src2Shift   (31)
 #define Src2None    (OpNone << Src2Shift)
 #define Src2CL      (OpCL << Src2Shift)
 #define Src2ImmByte (OpImmByte << Src2Shift)
@@ -1578,12 +1579,21 @@
 
 	memset(&seg_desc, 0, sizeof seg_desc);
 
-	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
-	    || ctxt->mode == X86EMUL_MODE_REAL) {
-		/* set real mode segment descriptor */
+	if (ctxt->mode == X86EMUL_MODE_REAL) {
+		/* set real mode segment descriptor (keep limit etc. for
+		 * unreal mode) */
 		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
 		set_desc_base(&seg_desc, selector << 4);
 		goto load;
+	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
+		/* VM86 needs a clean new segment descriptor */
+		set_desc_base(&seg_desc, selector << 4);
+		set_desc_limit(&seg_desc, 0xffff);
+		seg_desc.type = 3;
+		seg_desc.p = 1;
+		seg_desc.s = 1;
+		seg_desc.dpl = 3;
+		goto load;
 	}
 
 	rpl = selector & 3;
@@ -3615,7 +3625,7 @@
 #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
 #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
 		      .check_perm = (_p) }
-#define N    D(0)
+#define N    D(NotImpl)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
@@ -3713,7 +3723,7 @@
 	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
 	I(SrcMem | Stack,			em_grp45),
 	I(SrcMemFAddr | ImplicitOps,		em_grp45),
-	I(SrcMem | Stack,			em_grp45), N,
+	I(SrcMem | Stack,			em_grp45), D(Undefined),
 };
 
 static const struct opcode group6[] = {
@@ -4162,6 +4172,10 @@
 		break;
 	case OpMem8:
 		ctxt->memop.bytes = 1;
+		if (ctxt->memop.type == OP_REG) {
+			ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1);
+			fetch_register_operand(&ctxt->memop);
+		}
 		goto mem_common;
 	case OpMem16:
 		ctxt->memop.bytes = 2;
@@ -4373,7 +4387,7 @@
 	ctxt->intercept = opcode.intercept;
 
 	/* Unrecognised? */
-	if (ctxt->d == 0 || (ctxt->d & Undefined))
+	if (ctxt->d == 0 || (ctxt->d & NotImpl))
 		return EMULATION_FAILED;
 
 	if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
@@ -4511,7 +4525,8 @@
 
 	ctxt->mem_read.pos = 0;
 
-	if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) {
+	if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
+			(ctxt->d & Undefined)) {
 		rc = emulate_ud(ctxt);
 		goto done;
 	}

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c1d30b2..412a5aa 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c

@@ -290,8 +290,8 @@
 	}
 	spin_unlock(&ps->inject_lock);
 	if (inject) {
-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
+		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
+		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
 
 		/*
 		 * Provides NMI watchdog support via Virtual Wire mode.

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f77df1c..e1adbb4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c

@@ -94,6 +94,14 @@
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
+		apic_test_vector(vector, apic->regs + APIC_IRR);
+}
+
 static inline void apic_set_vector(int vec, void *bitmap)
 {
 	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -145,53 +153,6 @@
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-				struct kvm_lapic_irq *irq,
-				u64 *eoi_exit_bitmap)
-{
-	struct kvm_lapic **dst;
-	struct kvm_apic_map *map;
-	unsigned long bitmap = 1;
-	int i;
-
-	rcu_read_lock();
-	map = rcu_dereference(vcpu->kvm->arch.apic_map);
-
-	if (unlikely(!map)) {
-		__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
-		goto out;
-	}
-
-	if (irq->dest_mode == 0) { /* physical mode */
-		if (irq->delivery_mode == APIC_DM_LOWEST ||
-				irq->dest_id == 0xff) {
-			__set_bit(irq->vector,
-				  (unsigned long *)eoi_exit_bitmap);
-			goto out;
-		}
-		dst = &map->phys_map[irq->dest_id & 0xff];
-	} else {
-		u32 mda = irq->dest_id << (32 - map->ldr_bits);
-
-		dst = map->logical_map[apic_cluster_id(map, mda)];
-
-		bitmap = apic_logical_id(map, mda);
-	}
-
-	for_each_set_bit(i, &bitmap, 16) {
-		if (!dst[i])
-			continue;
-		if (dst[i]->vcpu == vcpu) {
-			__set_bit(irq->vector,
-				  (unsigned long *)eoi_exit_bitmap);
-			break;
-		}
-	}
-
-out:
-	rcu_read_unlock();
-}
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -256,7 +217,7 @@
 	if (old)
 		kfree_rcu(old, rcu);
 
-	kvm_ioapic_make_eoibitmap_request(kvm);
+	kvm_vcpu_request_scan_ioapic(kvm);
 }
 
 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -357,6 +318,19 @@
 	return count;
 }
 
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+{
+	u32 i, pir_val;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	for (i = 0; i <= 7; i++) {
+		pir_val = xchg(&pir[i], 0);
+		if (pir_val)
+			*((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
+
 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
 {
 	apic->irr_pending = true;
@@ -379,6 +353,7 @@
 	if (!apic->irr_pending)
 		return -1;
 
+	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 	result = apic_search_irr(apic);
 	ASSERT(result == -1 || result >= 16);
 
@@ -431,14 +406,16 @@
 }
 
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-			     int vector, int level, int trig_mode);
+			     int vector, int level, int trig_mode,
+			     unsigned long *dest_map);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+		unsigned long *dest_map)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
-			irq->level, irq->trig_mode);
+			irq->level, irq->trig_mode, dest_map);
 }
 
 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
@@ -505,6 +482,15 @@
 	return result;
 }
 
+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
+}
+
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
 	u32 tpr, isrv, ppr, old_ppr;
@@ -611,7 +597,7 @@
 }
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq, int *r)
+		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
 	struct kvm_apic_map *map;
 	unsigned long bitmap = 1;
@@ -622,7 +608,7 @@
 	*r = -1;
 
 	if (irq->shorthand == APIC_DEST_SELF) {
-		*r = kvm_apic_set_irq(src->vcpu, irq);
+		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 		return true;
 	}
 
@@ -667,7 +653,7 @@
 			continue;
 		if (*r < 0)
 			*r = 0;
-		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 	}
 
 	ret = true;
@@ -681,7 +667,8 @@
  * Return 1 if successfully added and 0 if discarded.
  */
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-			     int vector, int level, int trig_mode)
+			     int vector, int level, int trig_mode,
+			     unsigned long *dest_map)
 {
 	int result = 0;
 	struct kvm_vcpu *vcpu = apic->vcpu;
@@ -694,24 +681,28 @@
 		if (unlikely(!apic_enabled(apic)))
 			break;
 
-		if (trig_mode) {
-			apic_debug("level trig mode for vector %d", vector);
-			apic_set_vector(vector, apic->regs + APIC_TMR);
-		} else
-			apic_clear_vector(vector, apic->regs + APIC_TMR);
+		if (dest_map)
+			__set_bit(vcpu->vcpu_id, dest_map);
 
-		result = !apic_test_and_set_irr(vector, apic);
-		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
-					  trig_mode, vector, !result);
-		if (!result) {
-			if (trig_mode)
-				apic_debug("level trig mode repeatedly for "
-						"vector %d", vector);
-			break;
+		if (kvm_x86_ops->deliver_posted_interrupt) {
+			result = 1;
+			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
+		} else {
+			result = !apic_test_and_set_irr(vector, apic);
+
+			if (!result) {
+				if (trig_mode)
+					apic_debug("level trig mode repeatedly "
+						"for vector %d", vector);
+				goto out;
+			}
+
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
+			kvm_vcpu_kick(vcpu);
 		}
-
-		kvm_make_request(KVM_REQ_EVENT, vcpu);
-		kvm_vcpu_kick(vcpu);
+out:
+		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
+				trig_mode, vector, !result);
 		break;
 
 	case APIC_DM_REMRD:
@@ -731,7 +722,11 @@
 	case APIC_DM_INIT:
 		if (!trig_mode || level) {
 			result = 1;
-			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+			/* assumes that there are only KVM_APIC_INIT/SIPI */
+			apic->pending_events = (1UL << KVM_APIC_INIT);
+			/* make sure pending_events is visible before sending
+			 * the request */
+			smp_wmb();
 			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
 		} else {
@@ -743,13 +738,13 @@
 	case APIC_DM_STARTUP:
 		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
 			   vcpu->vcpu_id, vector);
-		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
-			result = 1;
-			vcpu->arch.sipi_vector = vector;
-			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
-			kvm_make_request(KVM_REQ_EVENT, vcpu);
-			kvm_vcpu_kick(vcpu);
-		}
+		result = 1;
+		apic->sipi_vector = vector;
+		/* make sure sipi_vector is visible for the receiver */
+		smp_wmb();
+		set_bit(KVM_APIC_SIPI, &apic->pending_events);
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_EXTINT:
@@ -782,7 +777,7 @@
 			trigger_mode = IOAPIC_LEVEL_TRIG;
 		else
 			trigger_mode = IOAPIC_EDGE_TRIG;
-		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+		kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
 	}
 }
 
@@ -848,7 +843,7 @@
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
-	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -1484,7 +1479,8 @@
 		vector = reg & APIC_VECTOR_MASK;
 		mode = reg & APIC_MODE_MASK;
 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
-		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
+					NULL);
 	}
 	return 0;
 }
@@ -1654,6 +1650,7 @@
 	apic->highest_isr_cache = -1;
 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	kvm_rtc_eoi_tracking_restore_one(vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@@ -1860,6 +1857,34 @@
 					 addr, sizeof(u8));
 }
 
+void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	unsigned int sipi_vector;
+
+	if (!kvm_vcpu_has_lapic(vcpu))
+		return;
+
+	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
+		kvm_lapic_reset(vcpu);
+		kvm_vcpu_reset(vcpu);
+		if (kvm_vcpu_is_bsp(apic->vcpu))
+			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		else
+			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+	}
+	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) &&
+	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+		/* evaluate pending_events before reading the vector */
+		smp_rmb();
+		sipi_vector = apic->sipi_vector;
+		pr_debug("vcpu %d received sipi with vector # %x\n",
+			 vcpu->vcpu_id, sipi_vector);
+		kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+	}
+}
+
 void kvm_lapic_init(void)
 {
 	/* do not patch jump label more than once per second */

diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1676d34..c730ac9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h

@@ -5,6 +5,9 @@
 
 #include <linux/kvm_host.h>
 
+#define KVM_APIC_INIT		0
+#define KVM_APIC_SIPI		1
+
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
@@ -32,6 +35,8 @@
 	void *regs;
 	gpa_t vapic_addr;
 	struct page *vapic_page;
+	unsigned long pending_events;
+	unsigned int sipi_vector;
 };
 int kvm_create_lapic(struct kvm_vcpu *vcpu);
 void kvm_free_lapic(struct kvm_vcpu *vcpu);
@@ -39,6 +44,7 @@
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
+void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
@@ -47,13 +53,16 @@
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
+		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq, int *r);
+		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
@@ -154,8 +163,11 @@
 	return ldr & map->lid_mask;
 }
 
-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-				struct kvm_lapic_irq *irq,
-				u64 *eoi_bitmap);
+static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.apic->pending_events;
+}
+
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 #endif

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 956ca35..004cc87 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c

@@ -199,8 +199,11 @@
 
 static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access)
 {
+	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
+
 	access &= ACC_WRITE_MASK | ACC_USER_MASK;
 
+	sp->mmio_cached = true;
 	trace_mark_mmio_spte(sptep, gfn, access);
 	mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
 }
@@ -1502,6 +1505,7 @@
 					       u64 *parent_pte, int direct)
 {
 	struct kvm_mmu_page *sp;
+
 	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
 	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
 	if (!direct)
@@ -1644,16 +1648,14 @@
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list);
 
-#define for_each_gfn_sp(kvm, sp, gfn)					\
-  hlist_for_each_entry(sp,						\
-   &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\
-	if ((sp)->gfn != (gfn)) {} else
+#define for_each_gfn_sp(_kvm, _sp, _gfn)				\
+	hlist_for_each_entry(_sp,					\
+	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
+		if ((_sp)->gfn != (_gfn)) {} else
 
-#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn)			\
-  hlist_for_each_entry(sp,						\
-   &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\
-		if ((sp)->gfn != (gfn) || (sp)->role.direct ||		\
-			(sp)->role.invalid) {} else
+#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
+	for_each_gfn_sp(_kvm, _sp, _gfn)				\
+		if ((_sp)->role.direct || (_sp)->role.invalid) {} else
 
 /* @sp->gfn should be write-protected at the call site */
 static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -2089,7 +2091,7 @@
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list)
 {
-	struct kvm_mmu_page *sp;
+	struct kvm_mmu_page *sp, *nsp;
 
 	if (list_empty(invalid_list))
 		return;
@@ -2106,11 +2108,25 @@
 	 */
 	kvm_flush_remote_tlbs(kvm);
 
-	do {
-		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
+	list_for_each_entry_safe(sp, nsp, invalid_list, link) {
 		WARN_ON(!sp->role.invalid || sp->root_count);
 		kvm_mmu_free_page(sp);
-	} while (!list_empty(invalid_list));
+	}
+}
+
+static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
+					struct list_head *invalid_list)
+{
+	struct kvm_mmu_page *sp;
+
+	if (list_empty(&kvm->arch.active_mmu_pages))
+		return false;
+
+	sp = list_entry(kvm->arch.active_mmu_pages.prev,
+			struct kvm_mmu_page, link);
+	kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+
+	return true;
 }
 
 /*
@@ -2120,23 +2136,15 @@
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
 {
 	LIST_HEAD(invalid_list);
-	/*
-	 * If we set the number of mmu pages to be smaller be than the
-	 * number of actived pages , we must to free some mmu pages before we
-	 * change the value
-	 */
 
 	spin_lock(&kvm->mmu_lock);
 
 	if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
-		while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
-			!list_empty(&kvm->arch.active_mmu_pages)) {
-			struct kvm_mmu_page *page;
+		/* Need to free some mmu pages to achieve the goal. */
+		while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
+			if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
+				break;
 
-			page = container_of(kvm->arch.active_mmu_pages.prev,
-					    struct kvm_mmu_page, link);
-			kvm_mmu_prepare_zap_page(kvm, page, &invalid_list);
-		}
 		kvm_mmu_commit_zap_page(kvm, &invalid_list);
 		goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
 	}
@@ -2794,6 +2802,7 @@
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 			 gva_t gva, pfn_t *pfn, bool write, bool *writable);
+static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
 
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 			 gfn_t gfn, bool prefault)
@@ -2835,7 +2844,7 @@
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
-	kvm_mmu_free_some_pages(vcpu);
+	make_mmu_pages_available(vcpu);
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
@@ -2913,7 +2922,7 @@
 
 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 		spin_lock(&vcpu->kvm->mmu_lock);
-		kvm_mmu_free_some_pages(vcpu);
+		make_mmu_pages_available(vcpu);
 		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
 				      1, ACC_ALL, NULL);
 		++sp->root_count;
@@ -2925,7 +2934,7 @@
 
 			ASSERT(!VALID_PAGE(root));
 			spin_lock(&vcpu->kvm->mmu_lock);
-			kvm_mmu_free_some_pages(vcpu);
+			make_mmu_pages_available(vcpu);
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
 					      i << 30,
 					      PT32_ROOT_LEVEL, 1, ACC_ALL,
@@ -2964,7 +2973,7 @@
 		ASSERT(!VALID_PAGE(root));
 
 		spin_lock(&vcpu->kvm->mmu_lock);
-		kvm_mmu_free_some_pages(vcpu);
+		make_mmu_pages_available(vcpu);
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
 				      0, ACC_ALL, NULL);
 		root = __pa(sp->spt);
@@ -2998,7 +3007,7 @@
 				return 1;
 		}
 		spin_lock(&vcpu->kvm->mmu_lock);
-		kvm_mmu_free_some_pages(vcpu);
+		make_mmu_pages_available(vcpu);
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, 0,
 				      ACC_ALL, NULL);
@@ -3304,7 +3313,7 @@
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
-	kvm_mmu_free_some_pages(vcpu);
+	make_mmu_pages_available(vcpu);
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 	r = __direct_map(vcpu, gpa, write, map_writable,
@@ -4006,17 +4015,17 @@
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
 
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
+static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
 {
 	LIST_HEAD(invalid_list);
 
-	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES &&
-	       !list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
-		struct kvm_mmu_page *sp;
+	if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
+		return;
 
-		sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
-				  struct kvm_mmu_page, link);
-		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
+		if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
+			break;
+
 		++vcpu->kvm->stat.mmu_recycled;
 	}
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -4185,17 +4194,22 @@
 	spin_unlock(&kvm->mmu_lock);
 }
 
-static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
-						struct list_head *invalid_list)
+void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
 {
-	struct kvm_mmu_page *page;
+	struct kvm_mmu_page *sp, *node;
+	LIST_HEAD(invalid_list);
 
-	if (list_empty(&kvm->arch.active_mmu_pages))
-		return;
+	spin_lock(&kvm->mmu_lock);
+restart:
+	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
+		if (!sp->mmio_cached)
+			continue;
+		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
+			goto restart;
+	}
 
-	page = container_of(kvm->arch.active_mmu_pages.prev,
-			    struct kvm_mmu_page, link);
-	kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
+	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	spin_unlock(&kvm->mmu_lock);
 }
 
 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
@@ -4232,7 +4246,7 @@
 		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 
-		kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list);
+		prepare_zap_oldest_mmu_page(kvm, &invalid_list);
 		kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
 		spin_unlock(&kvm->mmu_lock);

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6987108..2adcbc2 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h

@@ -57,14 +57,11 @@
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
-	return kvm->arch.n_max_mmu_pages -
-		kvm->arch.n_used_mmu_pages;
-}
+	if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
+		return kvm->arch.n_max_mmu_pages -
+			kvm->arch.n_used_mmu_pages;
 
-static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
-{
-	if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES))
-		__kvm_mmu_free_some_pages(vcpu);
+	return 0;
 }
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 105dd5b..da20860 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h

@@ -627,7 +627,7 @@
 		goto out_unlock;
 
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
-	kvm_mmu_free_some_pages(vcpu);
+	make_mmu_pages_available(vcpu);
 	if (!force_pt_level)
 		transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cfc258a..c53e797 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c

@@ -360,10 +360,12 @@
 	return 1;
 }
 
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 	struct kvm_pmc *pmc;
+	u32 index = msr_info->index;
+	u64 data = msr_info->data;
 
 	switch (index) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -375,6 +377,10 @@
 		}
 		break;
 	case MSR_CORE_PERF_GLOBAL_STATUS:
+		if (msr_info->host_initiated) {
+			pmu->global_status = data;
+			return 0;
+		}
 		break; /* RO MSR */
 	case MSR_CORE_PERF_GLOBAL_CTRL:
 		if (pmu->global_ctrl == data)
@@ -386,7 +392,8 @@
 		break;
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
-			pmu->global_status &= ~data;
+			if (!msr_info->host_initiated)
+				pmu->global_status &= ~data;
 			pmu->global_ovf_ctrl = data;
 			return 0;
 		}
@@ -394,7 +401,8 @@
 	default:
 		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
 				(pmc = get_fixed_pmc(pmu, index))) {
-			data = (s64)(s32)data;
+			if (!msr_info->host_initiated)
+				data = (s64)(s32)data;
 			pmc->counter += data - read_pmc(pmc);
 			return 0;
 		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7d39d70..a14a6eaf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c

@@ -1131,17 +1131,11 @@
 	init_seg(&save->gs);
 
 	save->cs.selector = 0xf000;
+	save->cs.base = 0xffff0000;
 	/* Executable/Readable Code Segment */
 	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
 		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
 	save->cs.limit = 0xffff;
-	/*
-	 * cs.base should really be 0xffff0000, but vmx can't handle that, so
-	 * be consistent with it.
-	 *
-	 * Replace when we have real mode working for vmx.
-	 */
-	save->cs.base = 0xf0000;
 
 	save->gdtr.limit = 0xffff;
 	save->idtr.limit = 0xffff;
@@ -1191,7 +1185,7 @@
 	enable_gif(svm);
 }
 
-static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
+static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 dummy;
@@ -1199,16 +1193,8 @@
 
 	init_vmcb(svm);
 
-	if (!kvm_vcpu_is_bsp(vcpu)) {
-		kvm_rip_write(vcpu, 0);
-		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
-		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
-	}
-
 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
-
-	return 0;
 }
 
 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@@ -3487,7 +3473,7 @@
 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
 	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
 	    exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
-		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
+		printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
 		       "exit_code 0x%x\n",
 		       __func__, svm->vmcb->control.exit_int_info,
 		       exit_code);
@@ -3591,6 +3577,11 @@
 	return;
 }
 
+static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+	return;
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3641,7 +3632,7 @@
 	return ret;
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
+static int enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -3655,15 +3646,16 @@
 		svm_set_vintr(svm);
 		svm_inject_irq(svm, 0x0);
 	}
+	return 0;
 }
 
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
+static int enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
 	    == HF_NMI_MASK)
-		return; /* IRET will cause a vm exit */
+		return 0; /* IRET will cause a vm exit */
 
 	/*
 	 * Something prevents NMI from been injected. Single step over possible
@@ -3672,6 +3664,7 @@
 	svm->nmi_singlestep = true;
 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
 	update_db_bp_intercept(vcpu);
+	return 0;
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4247,6 +4240,11 @@
 	return ret;
 }
 
+static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+	local_irq_enable();
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -4314,6 +4312,7 @@
 	.vm_has_apicv = svm_vm_has_apicv,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.hwapic_isr_update = svm_hwapic_isr_update,
+	.sync_pir_to_irr = svm_sync_pir_to_irr,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
@@ -4342,6 +4341,7 @@
 	.set_tdp_cr3 = set_tdp_cr3,
 
 	.check_intercept = svm_check_intercept,
+	.handle_external_intr = svm_handle_external_intr,
 };
 
 static int __init svm_init(void)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 867b810..25a791e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -84,8 +84,11 @@
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
-static bool __read_mostly enable_apicv_reg_vid;
+static bool __read_mostly enable_apicv = 1;
+module_param(enable_apicv, bool, S_IRUGO);
 
+static bool __read_mostly enable_shadow_vmcs = 1;
+module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -298,7 +301,8 @@
 	u32 guest_activity_state;
 	u32 guest_sysenter_cs;
 	u32 host_ia32_sysenter_cs;
-	u32 padding32[8]; /* room for future expansion */
+	u32 vmx_preemption_timer_value;
+	u32 padding32[7]; /* room for future expansion */
 	u16 virtual_processor_id;
 	u16 guest_es_selector;
 	u16 guest_cs_selector;
@@ -351,6 +355,12 @@
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
+	struct vmcs *current_shadow_vmcs;
+	/*
+	 * Indicates if the shadow vmcs must be updated with the
+	 * data hold by vmcs12
+	 */
+	bool sync_shadow_vmcs;
 
 	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
 	struct list_head vmcs02_pool;
@@ -365,6 +375,31 @@
 	struct page *apic_access_page;
 };
 
+#define POSTED_INTR_ON  0
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+	u32 pir[8];     /* Posted interrupt requested */
+	u32 control;	/* bit 0 of control is outstanding notification bit */
+	u32 rsvd[7];
+} __aligned(64);
+
+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->control);
+}
+
+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+	return test_and_clear_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	unsigned long         host_rsp;
@@ -377,6 +412,7 @@
 	struct shared_msr_entry *guest_msrs;
 	int                   nmsrs;
 	int                   save_nmsrs;
+	unsigned long	      host_idt_base;
 #ifdef CONFIG_X86_64
 	u64 		      msr_host_kernel_gs_base;
 	u64 		      msr_guest_kernel_gs_base;
@@ -428,6 +464,9 @@
 
 	bool rdtscp_enabled;
 
+	/* Posted interrupt descriptor */
+	struct pi_desc pi_desc;
+
 	/* Support for a guest hypervisor (nested VMX) */
 	struct nested_vmx nested;
 };
@@ -451,6 +490,64 @@
 #define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
 				[number##_HIGH] = VMCS12_OFFSET(name)+4
 
+
+static const unsigned long shadow_read_only_fields[] = {
+	/*
+	 * We do NOT shadow fields that are modified when L0
+	 * traps and emulates any vmx instruction (e.g. VMPTRLD,
+	 * VMXON...) executed by L1.
+	 * For example, VM_INSTRUCTION_ERROR is read
+	 * by L1 if a vmx instruction fails (part of the error path).
+	 * Note the code assumes this logic. If for some reason
+	 * we start shadowing these fields then we need to
+	 * force a shadow sync when L0 emulates vmx instructions
+	 * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+	 * by nested_vmx_failValid)
+	 */
+	VM_EXIT_REASON,
+	VM_EXIT_INTR_INFO,
+	VM_EXIT_INSTRUCTION_LEN,
+	IDT_VECTORING_INFO_FIELD,
+	IDT_VECTORING_ERROR_CODE,
+	VM_EXIT_INTR_ERROR_CODE,
+	EXIT_QUALIFICATION,
+	GUEST_LINEAR_ADDRESS,
+	GUEST_PHYSICAL_ADDRESS
+};
+static const int max_shadow_read_only_fields =
+	ARRAY_SIZE(shadow_read_only_fields);
+
+static const unsigned long shadow_read_write_fields[] = {
+	GUEST_RIP,
+	GUEST_RSP,
+	GUEST_CR0,
+	GUEST_CR3,
+	GUEST_CR4,
+	GUEST_INTERRUPTIBILITY_INFO,
+	GUEST_RFLAGS,
+	GUEST_CS_SELECTOR,
+	GUEST_CS_AR_BYTES,
+	GUEST_CS_LIMIT,
+	GUEST_CS_BASE,
+	GUEST_ES_BASE,
+	CR0_GUEST_HOST_MASK,
+	CR0_READ_SHADOW,
+	CR4_READ_SHADOW,
+	TSC_OFFSET,
+	EXCEPTION_BITMAP,
+	CPU_BASED_VM_EXEC_CONTROL,
+	VM_ENTRY_EXCEPTION_ERROR_CODE,
+	VM_ENTRY_INTR_INFO_FIELD,
+	VM_ENTRY_INSTRUCTION_LEN,
+	VM_ENTRY_EXCEPTION_ERROR_CODE,
+	HOST_FS_BASE,
+	HOST_GS_BASE,
+	HOST_FS_SELECTOR,
+	HOST_GS_SELECTOR
+};
+static const int max_shadow_read_write_fields =
+	ARRAY_SIZE(shadow_read_write_fields);
+
 static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
 	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -537,6 +634,7 @@
 	FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
 	FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
 	FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
+	FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
 	FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
 	FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
 	FIELD(CR0_READ_SHADOW, cr0_read_shadow),
@@ -624,6 +722,9 @@
 			    struct kvm_segment *var, int seg);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
+static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
+static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -640,6 +741,8 @@
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+static unsigned long *vmx_vmread_bitmap;
+static unsigned long *vmx_vmwrite_bitmap;
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
@@ -782,6 +885,18 @@
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_posted_intr(void)
+{
+	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+}
+
+static inline bool cpu_has_vmx_apicv(void)
+{
+	return cpu_has_vmx_apic_register_virt() &&
+		cpu_has_vmx_virtual_intr_delivery() &&
+		cpu_has_vmx_posted_intr();
+}
+
 static inline bool cpu_has_vmx_flexpriority(void)
 {
 	return cpu_has_vmx_tpr_shadow() &&
@@ -895,6 +1010,18 @@
 		SECONDARY_EXEC_WBINVD_EXITING;
 }
 
+static inline bool cpu_has_vmx_shadow_vmcs(void)
+{
+	u64 vmx_msr;
+	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+	/* check if the cpu supports writing r/o exit information fields */
+	if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
+		return false;
+
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_SHADOW_VMCS;
+}
+
 static inline bool report_flexpriority(void)
 {
 	return flexpriority_enabled;
@@ -1790,7 +1917,7 @@
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
 	if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
-		nested_pf_handled(vcpu))
+	    !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
 		return;
 
 	if (has_error_code) {
@@ -2022,6 +2149,7 @@
 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
+static u32 nested_vmx_misc_low, nested_vmx_misc_high;
 static __init void nested_vmx_setup_ctls_msrs(void)
 {
 	/*
@@ -2040,30 +2168,40 @@
 	 */
 
 	/* pin-based controls */
+	rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
+	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
 	/*
 	 * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
 	 * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
 	 */
-	nested_vmx_pinbased_ctls_low = 0x16 ;
-	nested_vmx_pinbased_ctls_high = 0x16 |
-		PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
-		PIN_BASED_VIRTUAL_NMIS;
+	nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+	nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
+		PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+		PIN_BASED_VMX_PREEMPTION_TIMER;
+	nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 
-	/* exit controls */
-	nested_vmx_exit_ctls_low = 0;
+	/*
+	 * Exit controls
+	 * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
+	 * 17 must be 1.
+	 */
+	nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 	/* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
 #ifdef CONFIG_X86_64
 	nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #else
 	nested_vmx_exit_ctls_high = 0;
 #endif
+	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
 		nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
-	nested_vmx_entry_ctls_low = 0;
+	/* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
+	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 	nested_vmx_entry_ctls_high &=
 		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
+	nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2080,6 +2218,7 @@
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
 		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
+		CPU_BASED_PAUSE_EXITING |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
@@ -2094,7 +2233,14 @@
 		nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
 	nested_vmx_secondary_ctls_low = 0;
 	nested_vmx_secondary_ctls_high &=
-		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+		SECONDARY_EXEC_WBINVD_EXITING;
+
+	/* miscellaneous data */
+	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
+	nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
+		VMX_MISC_SAVE_EFER_LMA;
+	nested_vmx_misc_high = 0;
 }
 
 static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
@@ -2165,7 +2311,8 @@
 					nested_vmx_entry_ctls_high);
 		break;
 	case MSR_IA32_VMX_MISC:
-		*pdata = 0;
+		*pdata = vmx_control_msr(nested_vmx_misc_low,
+					 nested_vmx_misc_high);
 		break;
 	/*
 	 * These MSRs specify bits which the guest must keep fixed (on or off)
@@ -2529,12 +2676,6 @@
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
 
-	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-	opt = PIN_BASED_VIRTUAL_NMIS;
-	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
-				&_pin_based_exec_control) < 0)
-		return -EIO;
-
 	min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
 	      CPU_BASED_CR8_LOAD_EXITING |
@@ -2573,7 +2714,8 @@
 			SECONDARY_EXEC_RDTSCP |
 			SECONDARY_EXEC_ENABLE_INVPCID |
 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
-			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+			SECONDARY_EXEC_SHADOW_VMCS;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -2605,11 +2747,23 @@
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
+		VM_EXIT_ACK_INTR_ON_EXIT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
 
+	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+				&_pin_based_exec_control) < 0)
+		return -EIO;
+
+	if (!(_cpu_based_2nd_exec_control &
+		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
+		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
+
 	min = 0;
 	opt = VM_ENTRY_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2762,6 +2916,8 @@
 
 	if (!cpu_has_vmx_vpid())
 		enable_vpid = 0;
+	if (!cpu_has_vmx_shadow_vmcs())
+		enable_shadow_vmcs = 0;
 
 	if (!cpu_has_vmx_ept() ||
 	    !cpu_has_vmx_ept_4levels()) {
@@ -2788,14 +2944,16 @@
 	if (!cpu_has_vmx_ple())
 		ple_gap = 0;
 
-	if (!cpu_has_vmx_apic_register_virt() ||
-				!cpu_has_vmx_virtual_intr_delivery())
-		enable_apicv_reg_vid = 0;
+	if (!cpu_has_vmx_apicv())
+		enable_apicv = 0;
 
-	if (enable_apicv_reg_vid)
+	if (enable_apicv)
 		kvm_x86_ops->update_cr8_intercept = NULL;
-	else
+	else {
 		kvm_x86_ops->hwapic_irr_update = NULL;
+		kvm_x86_ops->deliver_posted_interrupt = NULL;
+		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+	}
 
 	if (nested)
 		nested_vmx_setup_ctls_msrs();
@@ -2876,22 +3034,6 @@
 	vmx->cpl = 0;
 }
 
-static gva_t rmode_tss_base(struct kvm *kvm)
-{
-	if (!kvm->arch.tss_addr) {
-		struct kvm_memslots *slots;
-		struct kvm_memory_slot *slot;
-		gfn_t base_gfn;
-
-		slots = kvm_memslots(kvm);
-		slot = id_to_memslot(slots, 0);
-		base_gfn = slot->base_gfn + slot->npages - 3;
-
-		return base_gfn << PAGE_SHIFT;
-	}
-	return kvm->arch.tss_addr;
-}
-
 static void fix_rmode_seg(int seg, struct kvm_segment *save)
 {
 	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -2942,19 +3084,15 @@
 
 	/*
 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
-	 * vcpu. Call it here with phys address pointing 16M below 4G.
+	 * vcpu. Warn the user that an update is overdue.
 	 */
-	if (!vcpu->kvm->arch.tss_addr) {
+	if (!vcpu->kvm->arch.tss_addr)
 		printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
 			     "called before entering vcpu\n");
-		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-		vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
-		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	}
 
 	vmx_segment_cache_clear(vmx);
 
-	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
+	vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
@@ -3214,7 +3352,9 @@
 		 */
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
-	} else if (to_vmx(vcpu)->nested.vmxon)
+	}
+	if (to_vmx(vcpu)->nested.vmxon &&
+	    ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
 		return 1;
 
 	vcpu->arch.cr4 = cr4;
@@ -3550,7 +3690,7 @@
 		return true;
 
 	/* real mode guest state checks */
-	if (!is_protmode(vcpu)) {
+	if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
 		if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
 			return false;
 		if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -3599,7 +3739,7 @@
 	int r, idx, ret = 0;
 
 	idx = srcu_read_lock(&kvm->srcu);
-	fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
+	fn = kvm->arch.tss_addr >> PAGE_SHIFT;
 	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
 	if (r < 0)
 		goto out;
@@ -3692,7 +3832,7 @@
 	kvm_userspace_mem.flags = 0;
 	kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
 	if (r)
 		goto out;
 
@@ -3722,7 +3862,7 @@
 	kvm_userspace_mem.guest_phys_addr =
 		kvm->arch.ept_identity_map_addr;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
 	if (r)
 		goto out;
 
@@ -3869,13 +4009,59 @@
 			msr, MSR_TYPE_W);
 }
 
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+	return enable_apicv && irqchip_in_kernel(kvm);
+}
+
+/*
+ * Send interrupt to vcpu via posted interrupt way.
+ * 1. If target vcpu is running(non-root mode), send posted interrupt
+ * notification to vcpu and hardware will sync PIR to vIRR atomically.
+ * 2. If target vcpu isn't running(root mode), kick it to pick up the
+ * interrupt from PIR in next vmentry.
+ */
+static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int r;
+
+	if (pi_test_and_set_pir(vector, &vmx->pi_desc))
+		return;
+
+	r = pi_test_and_set_on(&vmx->pi_desc);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+#ifdef CONFIG_SMP
+	if (!r && (vcpu->mode == IN_GUEST_MODE))
+		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
+				POSTED_INTR_VECTOR);
+	else
+#endif
+		kvm_vcpu_kick(vcpu);
+}
+
+static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!pi_test_and_clear_on(&vmx->pi_desc))
+		return;
+
+	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+}
+
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
+{
+	return;
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
  * Note that host-state that does change is set elsewhere. E.g., host-state
  * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
  */
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 {
 	u32 low32, high32;
 	unsigned long tmpl;
@@ -3903,6 +4089,7 @@
 
 	native_store_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
+	vmx->host_idt_base = dt.address;
 
 	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
 
@@ -3928,6 +4115,15 @@
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 }
 
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+{
+	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
+
+	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+	return pin_based_exec_ctrl;
+}
+
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -3945,11 +4141,6 @@
 	return exec_control;
 }
 
-static int vmx_vm_has_apicv(struct kvm *kvm)
-{
-	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
-}
-
 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -3971,6 +4162,12 @@
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+	/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
+	   (handle_vmptrld).
+	   We can NOT enable shadow_vmcs here because we don't have yet
+	   a current VMCS12
+	*/
+	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
 	return exec_control;
 }
 
@@ -3999,14 +4196,17 @@
 	vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
 	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
 
+	if (enable_shadow_vmcs) {
+		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
+		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+	}
 	if (cpu_has_vmx_msr_bitmap())
 		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
 	/* Control */
-	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
-		vmcs_config.pin_based_exec_ctrl);
+	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
 
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
@@ -4015,13 +4215,16 @@
 				vmx_secondary_exec_control(vmx));
 	}
 
-	if (enable_apicv_reg_vid) {
+	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
 		vmcs_write64(EOI_EXIT_BITMAP3, 0);
 
 		vmcs_write16(GUEST_INTR_STATUS, 0);
+
+		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 	}
 
 	if (ple_gap) {
@@ -4035,7 +4238,7 @@
 
 	vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
 	vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
-	vmx_set_constant_host_state();
+	vmx_set_constant_host_state(vmx);
 #ifdef CONFIG_X86_64
 	rdmsrl(MSR_FS_BASE, a);
 	vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -4089,11 +4292,10 @@
 	return 0;
 }
 
-static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 msr;
-	int ret;
 
 	vmx->rmode.vm86_active = 0;
 
@@ -4109,12 +4311,8 @@
 	vmx_segment_cache_clear(vmx);
 
 	seg_setup(VCPU_SREG_CS);
-	if (kvm_vcpu_is_bsp(&vmx->vcpu))
-		vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
-	else {
-		vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
-		vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
-	}
+	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
+	vmcs_write32(GUEST_CS_BASE, 0xffff0000);
 
 	seg_setup(VCPU_SREG_DS);
 	seg_setup(VCPU_SREG_ES);
@@ -4137,10 +4335,7 @@
 	vmcs_writel(GUEST_SYSENTER_EIP, 0);
 
 	vmcs_writel(GUEST_RFLAGS, 0x02);
-	if (kvm_vcpu_is_bsp(&vmx->vcpu))
-		kvm_rip_write(vcpu, 0xfff0);
-	else
-		kvm_rip_write(vcpu, 0);
+	kvm_rip_write(vcpu, 0xfff0);
 
 	vmcs_writel(GUEST_GDTR_BASE, 0);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
@@ -4171,23 +4366,20 @@
 		vmcs_write64(APIC_ACCESS_ADDR,
 			     page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
 
+	if (vmx_vm_has_apicv(vcpu->kvm))
+		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
+
 	if (vmx->vpid != 0)
 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
 	vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	vmx_set_cr4(&vmx->vcpu, 0);
 	vmx_set_efer(&vmx->vcpu, 0);
 	vmx_fpu_activate(&vmx->vcpu);
 	update_exception_bitmap(&vmx->vcpu);
 
 	vpid_sync_context(vmx);
-
-	ret = 0;
-
-	return ret;
 }
 
 /*
@@ -4200,40 +4392,45 @@
 		PIN_BASED_EXT_INTR_MASK;
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
+static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
+{
+	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
+		PIN_BASED_NMI_EXITING;
+}
+
+static int enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+
+	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
 		/*
 		 * We get here if vmx_interrupt_allowed() said we can't
-		 * inject to L1 now because L2 must run. Ask L2 to exit
-		 * right after entry, so we can inject to L1 more promptly.
+		 * inject to L1 now because L2 must run. The caller will have
+		 * to make L2 exit right after entry, so we can inject to L1
+		 * more promptly.
 		 */
-		kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
-		return;
-	}
+		return -EBUSY;
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+	return 0;
 }
 
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
+static int enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
 
-	if (!cpu_has_virtual_nmis()) {
-		enable_irq_window(vcpu);
-		return;
-	}
+	if (!cpu_has_virtual_nmis())
+		return enable_irq_window(vcpu);
 
-	if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
-		enable_irq_window(vcpu);
-		return;
-	}
+	if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI)
+		return enable_irq_window(vcpu);
+
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+	return 0;
 }
 
 static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4294,16 +4491,6 @@
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
-{
-	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
-		return 0;
-
-	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
-		   | GUEST_INTR_STATE_NMI));
-}
-
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
 {
 	if (!cpu_has_virtual_nmis())
@@ -4333,18 +4520,52 @@
 	}
 }
 
+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
+{
+	if (is_guest_mode(vcpu)) {
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+		if (to_vmx(vcpu)->nested.nested_run_pending)
+			return 0;
+		if (nested_exit_on_nmi(vcpu)) {
+			nested_vmx_vmexit(vcpu);
+			vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI;
+			vmcs12->vm_exit_intr_info = NMI_VECTOR |
+				INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
+			/*
+			 * The NMI-triggered VM exit counts as injection:
+			 * clear this one and block further NMIs.
+			 */
+			vcpu->arch.nmi_pending = 0;
+			vmx_set_nmi_mask(vcpu, true);
+			return 0;
+		}
+	}
+
+	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
+		return 0;
+
+	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
+		   | GUEST_INTR_STATE_NMI));
+}
+
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+	if (is_guest_mode(vcpu)) {
 		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		if (to_vmx(vcpu)->nested.nested_run_pending ||
-		    (vmcs12->idt_vectoring_info_field &
-		     VECTORING_INFO_VALID_MASK))
+
+		if (to_vmx(vcpu)->nested.nested_run_pending)
 			return 0;
-		nested_vmx_vmexit(vcpu);
-		vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-		vmcs12->vm_exit_intr_info = 0;
-		/* fall through to normal code, but now in L1, not L2 */
+		if (nested_exit_on_intr(vcpu)) {
+			nested_vmx_vmexit(vcpu);
+			vmcs12->vm_exit_reason =
+				EXIT_REASON_EXTERNAL_INTERRUPT;
+			vmcs12->vm_exit_intr_info = 0;
+			/*
+			 * fall through to normal code, but now in L1, not L2
+			 */
+		}
 	}
 
 	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -4362,7 +4583,7 @@
 		.flags = 0,
 	};
 
-	ret = kvm_set_memory_region(kvm, &tss_mem, false);
+	ret = kvm_set_memory_region(kvm, &tss_mem);
 	if (ret)
 		return ret;
 	kvm->arch.tss_addr = addr;
@@ -4603,34 +4824,50 @@
 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
-	if (to_vmx(vcpu)->nested.vmxon &&
-	    ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
-		return 1;
-
 	if (is_guest_mode(vcpu)) {
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+		unsigned long orig_val = val;
+
 		/*
 		 * We get here when L2 changed cr0 in a way that did not change
 		 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
-		 * but did change L0 shadowed bits. This can currently happen
-		 * with the TS bit: L0 may want to leave TS on (for lazy fpu
-		 * loading) while pretending to allow the guest to change it.
+		 * but did change L0 shadowed bits. So we first calculate the
+		 * effective cr0 value that L1 would like to write into the
+		 * hardware. It consists of the L2-owned bits from the new
+		 * value combined with the L1-owned bits from L1's guest_cr0.
 		 */
-		if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) |
-			 (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits)))
+		val = (val & ~vmcs12->cr0_guest_host_mask) |
+			(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
+
+		/* TODO: will have to take unrestricted guest mode into
+		 * account */
+		if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
 			return 1;
-		vmcs_writel(CR0_READ_SHADOW, val);
+
+		if (kvm_set_cr0(vcpu, val))
+			return 1;
+		vmcs_writel(CR0_READ_SHADOW, orig_val);
 		return 0;
-	} else
+	} else {
+		if (to_vmx(vcpu)->nested.vmxon &&
+		    ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
+			return 1;
 		return kvm_set_cr0(vcpu, val);
+	}
 }
 
 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	if (is_guest_mode(vcpu)) {
-		if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) |
-			 (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits)))
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+		unsigned long orig_val = val;
+
+		/* analogously to handle_set_cr0 */
+		val = (val & ~vmcs12->cr4_guest_host_mask) |
+			(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
+		if (kvm_set_cr4(vcpu, val))
 			return 1;
-		vmcs_writel(CR4_READ_SHADOW, val);
+		vmcs_writel(CR4_READ_SHADOW, orig_val);
 		return 0;
 	} else
 		return kvm_set_cr4(vcpu, val);
@@ -5183,7 +5420,7 @@
 		if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
 			return 1;
 
-		err = emulate_instruction(vcpu, 0);
+		err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
 		if (err == EMULATE_DO_MMIO) {
 			ret = 0;
@@ -5259,8 +5496,7 @@
 	}
 
 	/* Create a new VMCS */
-	item = (struct vmcs02_list *)
-		kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+	item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
 	if (!item)
 		return NULL;
 	item->vmcs02.vmcs = alloc_vmcs();
@@ -5309,6 +5545,9 @@
 		free_loaded_vmcs(&vmx->vmcs01);
 }
 
+static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
+				 u32 vm_instruction_error);
+
 /*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
@@ -5321,6 +5560,7 @@
 {
 	struct kvm_segment cs;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs *shadow_vmcs;
 
 	/* The Intel VMX Instruction Reference lists a bunch of bits that
 	 * are prerequisite to running VMXON, most notably cr4.VMXE must be
@@ -5344,6 +5584,21 @@
 		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
+	if (vmx->nested.vmxon) {
+		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
+		skip_emulated_instruction(vcpu);
+		return 1;
+	}
+	if (enable_shadow_vmcs) {
+		shadow_vmcs = alloc_vmcs();
+		if (!shadow_vmcs)
+			return -ENOMEM;
+		/* mark vmcs as shadow */
+		shadow_vmcs->revision_id |= (1u << 31);
+		/* init shadow vmcs */
+		vmcs_clear(shadow_vmcs);
+		vmx->nested.current_shadow_vmcs = shadow_vmcs;
+	}
 
 	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
 	vmx->nested.vmcs02_num = 0;
@@ -5384,6 +5639,25 @@
 	return 1;
 }
 
+static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
+{
+	u32 exec_control;
+	if (enable_shadow_vmcs) {
+		if (vmx->nested.current_vmcs12 != NULL) {
+			/* copy to memory all shadowed fields in case
+			   they were modified */
+			copy_shadow_to_vmcs12(vmx);
+			vmx->nested.sync_shadow_vmcs = false;
+			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+			exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
+			vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+			vmcs_write64(VMCS_LINK_POINTER, -1ull);
+		}
+	}
+	kunmap(vmx->nested.current_vmcs12_page);
+	nested_release_page(vmx->nested.current_vmcs12_page);
+}
+
 /*
  * Free whatever needs to be freed from vmx->nested when L1 goes down, or
  * just stops using VMX.
@@ -5394,11 +5668,12 @@
 		return;
 	vmx->nested.vmxon = false;
 	if (vmx->nested.current_vmptr != -1ull) {
-		kunmap(vmx->nested.current_vmcs12_page);
-		nested_release_page(vmx->nested.current_vmcs12_page);
+		nested_release_vmcs12(vmx);
 		vmx->nested.current_vmptr = -1ull;
 		vmx->nested.current_vmcs12 = NULL;
 	}
+	if (enable_shadow_vmcs)
+		free_vmcs(vmx->nested.current_shadow_vmcs);
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
@@ -5507,6 +5782,10 @@
 			    X86_EFLAGS_SF | X86_EFLAGS_OF))
 			| X86_EFLAGS_ZF);
 	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
+	/*
+	 * We don't need to force a shadow sync because
+	 * VM_INSTRUCTION_ERROR is not shadowed
+	 */
 }
 
 /* Emulate the VMCLEAR instruction */
@@ -5539,8 +5818,7 @@
 	}
 
 	if (vmptr == vmx->nested.current_vmptr) {
-		kunmap(vmx->nested.current_vmcs12_page);
-		nested_release_page(vmx->nested.current_vmcs12_page);
+		nested_release_vmcs12(vmx);
 		vmx->nested.current_vmptr = -1ull;
 		vmx->nested.current_vmcs12 = NULL;
 	}
@@ -5639,6 +5917,111 @@
 	}
 }
 
+
+static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
+				    unsigned long field, u64 field_value){
+	short offset = vmcs_field_to_offset(field);
+	char *p = ((char *) get_vmcs12(vcpu)) + offset;
+	if (offset < 0)
+		return false;
+
+	switch (vmcs_field_type(field)) {
+	case VMCS_FIELD_TYPE_U16:
+		*(u16 *)p = field_value;
+		return true;
+	case VMCS_FIELD_TYPE_U32:
+		*(u32 *)p = field_value;
+		return true;
+	case VMCS_FIELD_TYPE_U64:
+		*(u64 *)p = field_value;
+		return true;
+	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+		*(natural_width *)p = field_value;
+		return true;
+	default:
+		return false; /* can never happen. */
+	}
+
+}
+
+static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
+{
+	int i;
+	unsigned long field;
+	u64 field_value;
+	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+	unsigned long *fields = (unsigned long *)shadow_read_write_fields;
+	int num_fields = max_shadow_read_write_fields;
+
+	vmcs_load(shadow_vmcs);
+
+	for (i = 0; i < num_fields; i++) {
+		field = fields[i];
+		switch (vmcs_field_type(field)) {
+		case VMCS_FIELD_TYPE_U16:
+			field_value = vmcs_read16(field);
+			break;
+		case VMCS_FIELD_TYPE_U32:
+			field_value = vmcs_read32(field);
+			break;
+		case VMCS_FIELD_TYPE_U64:
+			field_value = vmcs_read64(field);
+			break;
+		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+			field_value = vmcs_readl(field);
+			break;
+		}
+		vmcs12_write_any(&vmx->vcpu, field, field_value);
+	}
+
+	vmcs_clear(shadow_vmcs);
+	vmcs_load(vmx->loaded_vmcs->vmcs);
+}
+
+static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
+{
+	unsigned long *fields[] = {
+		(unsigned long *)shadow_read_write_fields,
+		(unsigned long *)shadow_read_only_fields
+	};
+	int num_lists =  ARRAY_SIZE(fields);
+	int max_fields[] = {
+		max_shadow_read_write_fields,
+		max_shadow_read_only_fields
+	};
+	int i, q;
+	unsigned long field;
+	u64 field_value = 0;
+	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+
+	vmcs_load(shadow_vmcs);
+
+	for (q = 0; q < num_lists; q++) {
+		for (i = 0; i < max_fields[q]; i++) {
+			field = fields[q][i];
+			vmcs12_read_any(&vmx->vcpu, field, &field_value);
+
+			switch (vmcs_field_type(field)) {
+			case VMCS_FIELD_TYPE_U16:
+				vmcs_write16(field, (u16)field_value);
+				break;
+			case VMCS_FIELD_TYPE_U32:
+				vmcs_write32(field, (u32)field_value);
+				break;
+			case VMCS_FIELD_TYPE_U64:
+				vmcs_write64(field, (u64)field_value);
+				break;
+			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+				vmcs_writel(field, (long)field_value);
+				break;
+			}
+		}
+	}
+
+	vmcs_clear(shadow_vmcs);
+	vmcs_load(vmx->loaded_vmcs->vmcs);
+}
+
 /*
  * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
  * used before) all generate the same failure when it is missing.
@@ -5703,8 +6086,6 @@
 	gva_t gva;
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-	char *p;
-	short offset;
 	/* The value to write might be 32 or 64 bits, depending on L1's long
 	 * mode, and eventually we need to write that into a field of several
 	 * possible lengths. The code below first zero-extends the value to 64
@@ -5741,28 +6122,7 @@
 		return 1;
 	}
 
-	offset = vmcs_field_to_offset(field);
-	if (offset < 0) {
-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-		skip_emulated_instruction(vcpu);
-		return 1;
-	}
-	p = ((char *) get_vmcs12(vcpu)) + offset;
-
-	switch (vmcs_field_type(field)) {
-	case VMCS_FIELD_TYPE_U16:
-		*(u16 *)p = field_value;
-		break;
-	case VMCS_FIELD_TYPE_U32:
-		*(u32 *)p = field_value;
-		break;
-	case VMCS_FIELD_TYPE_U64:
-		*(u64 *)p = field_value;
-		break;
-	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-		*(natural_width *)p = field_value;
-		break;
-	default:
+	if (!vmcs12_write_any(vcpu, field, field_value)) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -5780,6 +6140,7 @@
 	gva_t gva;
 	gpa_t vmptr;
 	struct x86_exception e;
+	u32 exec_control;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -5818,14 +6179,20 @@
 			skip_emulated_instruction(vcpu);
 			return 1;
 		}
-		if (vmx->nested.current_vmptr != -1ull) {
-			kunmap(vmx->nested.current_vmcs12_page);
-			nested_release_page(vmx->nested.current_vmcs12_page);
-		}
+		if (vmx->nested.current_vmptr != -1ull)
+			nested_release_vmcs12(vmx);
 
 		vmx->nested.current_vmptr = vmptr;
 		vmx->nested.current_vmcs12 = new_vmcs12;
 		vmx->nested.current_vmcs12_page = page;
+		if (enable_shadow_vmcs) {
+			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+			exec_control |= SECONDARY_EXEC_SHADOW_VMCS;
+			vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+			vmcs_write64(VMCS_LINK_POINTER,
+				     __pa(vmx->nested.current_shadow_vmcs));
+			vmx->nested.sync_shadow_vmcs = true;
+		}
 	}
 
 	nested_vmx_succeed(vcpu);
@@ -5908,6 +6275,52 @@
 static const int kvm_vmx_max_exit_handlers =
 	ARRAY_SIZE(kvm_vmx_exit_handlers);
 
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
+				       struct vmcs12 *vmcs12)
+{
+	unsigned long exit_qualification;
+	gpa_t bitmap, last_bitmap;
+	unsigned int port;
+	int size;
+	u8 b;
+
+	if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
+		return 1;
+
+	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+		return 0;
+
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+	port = exit_qualification >> 16;
+	size = (exit_qualification & 7) + 1;
+
+	last_bitmap = (gpa_t)-1;
+	b = -1;
+
+	while (size > 0) {
+		if (port < 0x8000)
+			bitmap = vmcs12->io_bitmap_a;
+		else if (port < 0x10000)
+			bitmap = vmcs12->io_bitmap_b;
+		else
+			return 1;
+		bitmap += (port & 0x7fff) / 8;
+
+		if (last_bitmap != bitmap)
+			if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
+				return 1;
+		if (b & (1 << (port & 7)))
+			return 1;
+
+		port++;
+		size--;
+		last_bitmap = bitmap;
+	}
+
+	return 0;
+}
+
 /*
  * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
  * rather than handle it ourselves in L0. I.e., check whether L1 expressed
@@ -5939,7 +6352,8 @@
 	/* Then read the msr_index'th bit from this bitmap: */
 	if (msr_index < 1024*8) {
 		unsigned char b;
-		kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1);
+		if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
+			return 1;
 		return 1 & (b >> (msr_index & 7));
 	} else
 		return 1; /* let L1 handle the wrong parameter */
@@ -6033,10 +6447,10 @@
  */
 static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 {
-	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
 	u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	u32 exit_reason = vmx->exit_reason;
 
 	if (vmx->nested.nested_run_pending)
 		return 0;
@@ -6060,14 +6474,9 @@
 	case EXIT_REASON_TRIPLE_FAULT:
 		return 1;
 	case EXIT_REASON_PENDING_INTERRUPT:
+		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
 	case EXIT_REASON_NMI_WINDOW:
-		/*
-		 * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
-		 * (aka Interrupt Window Exiting) only when L1 turned it on,
-		 * so if we got a PENDING_INTERRUPT exit, this must be for L1.
-		 * Same for NMI Window Exiting.
-		 */
-		return 1;
+		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
 	case EXIT_REASON_TASK_SWITCH:
 		return 1;
 	case EXIT_REASON_CPUID:
@@ -6097,8 +6506,7 @@
 	case EXIT_REASON_DR_ACCESS:
 		return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
 	case EXIT_REASON_IO_INSTRUCTION:
-		/* TODO: support IO bitmaps */
-		return 1;
+		return nested_vmx_exit_handled_io(vcpu, vmcs12);
 	case EXIT_REASON_MSR_READ:
 	case EXIT_REASON_MSR_WRITE:
 		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -6122,6 +6530,9 @@
 	case EXIT_REASON_EPT_VIOLATION:
 	case EXIT_REASON_EPT_MISCONFIG:
 		return 0;
+	case EXIT_REASON_PREEMPTION_TIMER:
+		return vmcs12->pin_based_vm_exec_control &
+			PIN_BASED_VMX_PREEMPTION_TIMER;
 	case EXIT_REASON_WBINVD:
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
@@ -6316,6 +6727,9 @@
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
+	if (!vmx_vm_has_apicv(vcpu->kvm))
+		return;
+
 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
 	vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
 	vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
@@ -6346,6 +6760,52 @@
 	}
 }
 
+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+	/*
+	 * If external interrupt exists, IF bit is set in rflags/eflags on the
+	 * interrupt stack frame, and interrupt will be enabled on a return
+	 * from interrupt handler.
+	 */
+	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
+			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
+		unsigned int vector;
+		unsigned long entry;
+		gate_desc *desc;
+		struct vcpu_vmx *vmx = to_vmx(vcpu);
+#ifdef CONFIG_X86_64
+		unsigned long tmp;
+#endif
+
+		vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
+		desc = (gate_desc *)vmx->host_idt_base + vector;
+		entry = gate_offset(*desc);
+		asm volatile(
+#ifdef CONFIG_X86_64
+			"mov %%" _ASM_SP ", %[sp]\n\t"
+			"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
+			"push $%c[ss]\n\t"
+			"push %[sp]\n\t"
+#endif
+			"pushf\n\t"
+			"orl $0x200, (%%" _ASM_SP ")\n\t"
+			__ASM_SIZE(push) " $%c[cs]\n\t"
+			"call *%[entry]\n\t"
+			:
+#ifdef CONFIG_X86_64
+			[sp]"=&r"(tmp)
+#endif
+			:
+			[entry]"r"(entry),
+			[ss]"i"(__KERNEL_DS),
+			[cs]"i"(__KERNEL_CS)
+			);
+	} else
+		local_irq_enable();
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -6388,7 +6848,7 @@
 			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 }
 
-static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
+static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
 				      u32 idt_vectoring_info,
 				      int instr_len_field,
 				      int error_code_field)
@@ -6399,46 +6859,43 @@
 
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
-	vmx->vcpu.arch.nmi_injected = false;
-	kvm_clear_exception_queue(&vmx->vcpu);
-	kvm_clear_interrupt_queue(&vmx->vcpu);
+	vcpu->arch.nmi_injected = false;
+	kvm_clear_exception_queue(vcpu);
+	kvm_clear_interrupt_queue(vcpu);
 
 	if (!idtv_info_valid)
 		return;
 
-	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
 
 	switch (type) {
 	case INTR_TYPE_NMI_INTR:
-		vmx->vcpu.arch.nmi_injected = true;
+		vcpu->arch.nmi_injected = true;
 		/*
 		 * SDM 3: 27.7.1.2 (September 2008)
 		 * Clear bit "block by NMI" before VM entry if a NMI
 		 * delivery faulted.
 		 */
-		vmx_set_nmi_mask(&vmx->vcpu, false);
+		vmx_set_nmi_mask(vcpu, false);
 		break;
 	case INTR_TYPE_SOFT_EXCEPTION:
-		vmx->vcpu.arch.event_exit_inst_len =
-			vmcs_read32(instr_len_field);
+		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
 		/* fall through */
 	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
 			u32 err = vmcs_read32(error_code_field);
-			kvm_queue_exception_e(&vmx->vcpu, vector, err);
+			kvm_queue_exception_e(vcpu, vector, err);
 		} else
-			kvm_queue_exception(&vmx->vcpu, vector);
+			kvm_queue_exception(vcpu, vector);
 		break;
 	case INTR_TYPE_SOFT_INTR:
-		vmx->vcpu.arch.event_exit_inst_len =
-			vmcs_read32(instr_len_field);
+		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
 		/* fall through */
 	case INTR_TYPE_EXT_INTR:
-		kvm_queue_interrupt(&vmx->vcpu, vector,
-			type == INTR_TYPE_SOFT_INTR);
+		kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
 		break;
 	default:
 		break;
@@ -6447,18 +6904,14 @@
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
-	if (is_guest_mode(&vmx->vcpu))
-		return;
-	__vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
+	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
 				  VM_EXIT_INSTRUCTION_LEN,
 				  IDT_VECTORING_ERROR_CODE);
 }
 
 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu))
-		return;
-	__vmx_complete_interrupts(to_vmx(vcpu),
+	__vmx_complete_interrupts(vcpu,
 				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
 				  VM_ENTRY_INSTRUCTION_LEN,
 				  VM_ENTRY_EXCEPTION_ERROR_CODE);
@@ -6489,21 +6942,6 @@
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long debugctlmsr;
 
-	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		if (vmcs12->idt_vectoring_info_field &
-				VECTORING_INFO_VALID_MASK) {
-			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-				vmcs12->idt_vectoring_info_field);
-			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-				vmcs12->vm_exit_instruction_len);
-			if (vmcs12->idt_vectoring_info_field &
-					VECTORING_INFO_DELIVER_CODE_MASK)
-				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-					vmcs12->idt_vectoring_error_code);
-		}
-	}
-
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
 		vmx->entry_time = ktime_get();
@@ -6513,6 +6951,11 @@
 	if (vmx->emulation_required)
 		return;
 
+	if (vmx->nested.sync_shadow_vmcs) {
+		copy_vmcs12_to_shadow(vmx);
+		vmx->nested.sync_shadow_vmcs = false;
+	}
+
 	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
 		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -6662,17 +7105,6 @@
 
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
-	if (is_guest_mode(vcpu)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
-		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
-			vmcs12->idt_vectoring_error_code =
-				vmcs_read32(IDT_VECTORING_ERROR_CODE);
-			vmcs12->vm_exit_instruction_len =
-				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-		}
-	}
-
 	vmx->loaded_vmcs->launched = 1;
 
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6734,10 +7166,11 @@
 	put_cpu();
 	if (err)
 		goto free_vmcs;
-	if (vm_need_virtualize_apic_accesses(kvm))
+	if (vm_need_virtualize_apic_accesses(kvm)) {
 		err = alloc_apic_access_page(kvm);
 		if (err)
 			goto free_vmcs;
+	}
 
 	if (enable_ept) {
 		if (!kvm->arch.ept_identity_map_addr)
@@ -6931,9 +7364,8 @@
 		vmcs12->vm_entry_instruction_len);
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
 		vmcs12->guest_interruptibility_info);
-	vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
-	vmcs_writel(GUEST_DR7, vmcs12->guest_dr7);
+	kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
 	vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
 		vmcs12->guest_pending_dbg_exceptions);
@@ -6946,6 +7378,10 @@
 		(vmcs_config.pin_based_exec_ctrl |
 		 vmcs12->pin_based_vm_exec_control));
 
+	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
+			     vmcs12->vmx_preemption_timer_value);
+
 	/*
 	 * Whether page-faults are trapped is determined by a combination of
 	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
@@ -7016,7 +7452,7 @@
 	 * Other fields are different per CPU, and will be set later when
 	 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
 	 */
-	vmx_set_constant_host_state();
+	vmx_set_constant_host_state(vmx);
 
 	/*
 	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -7082,7 +7518,7 @@
 
 	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
 		vcpu->arch.efer = vmcs12->guest_ia32_efer;
-	if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
+	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
 		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
 	else
 		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
@@ -7121,6 +7557,7 @@
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
 	struct loaded_vmcs *vmcs02;
+	bool ia32e;
 
 	if (!nested_vmx_check_permission(vcpu) ||
 	    !nested_vmx_check_vmcs12(vcpu))
@@ -7129,6 +7566,9 @@
 	skip_emulated_instruction(vcpu);
 	vmcs12 = get_vmcs12(vcpu);
 
+	if (enable_shadow_vmcs)
+		copy_shadow_to_vmcs12(vmx);
+
 	/*
 	 * The nested entry process starts with enforcing various prerequisites
 	 * on vmcs12 as required by the Intel SDM, and act appropriately when
@@ -7146,6 +7586,11 @@
 		return 1;
 	}
 
+	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
+		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+		return 1;
+	}
+
 	if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
 			!IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
 		/*TODO: Also verify bits beyond physical address width are 0*/
@@ -7204,6 +7649,45 @@
 	}
 
 	/*
+	 * If the load IA32_EFER VM-entry control is 1, the following checks
+	 * are performed on the field for the IA32_EFER MSR:
+	 * - Bits reserved in the IA32_EFER MSR must be 0.
+	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
+	 *   the IA-32e mode guest VM-exit control. It must also be identical
+	 *   to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
+	 *   CR0.PG) is 1.
+	 */
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) {
+		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
+		if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
+		    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
+		    ((vmcs12->guest_cr0 & X86_CR0_PG) &&
+		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
+			nested_vmx_entry_failure(vcpu, vmcs12,
+				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
+			return 1;
+		}
+	}
+
+	/*
+	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
+	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
+	 * the values of the LMA and LME bits in the field must each be that of
+	 * the host address-space size VM-exit control.
+	 */
+	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
+		ia32e = (vmcs12->vm_exit_controls &
+			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
+		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
+		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
+			nested_vmx_entry_failure(vcpu, vmcs12,
+				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
+			return 1;
+		}
+	}
+
+	/*
 	 * We're finally done with prerequisite checking, and can start with
 	 * the nested entry.
 	 */
@@ -7223,6 +7707,8 @@
 	vcpu->cpu = cpu;
 	put_cpu();
 
+	vmx_segment_cache_clear(vmx);
+
 	vmcs12->launch_state = 1;
 
 	prepare_vmcs02(vcpu, vmcs12);
@@ -7273,6 +7759,48 @@
 			vcpu->arch.cr4_guest_owned_bits));
 }
 
+static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
+				       struct vmcs12 *vmcs12)
+{
+	u32 idt_vectoring;
+	unsigned int nr;
+
+	if (vcpu->arch.exception.pending) {
+		nr = vcpu->arch.exception.nr;
+		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+		if (kvm_exception_is_soft(nr)) {
+			vmcs12->vm_exit_instruction_len =
+				vcpu->arch.event_exit_inst_len;
+			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
+		} else
+			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
+
+		if (vcpu->arch.exception.has_error_code) {
+			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
+			vmcs12->idt_vectoring_error_code =
+				vcpu->arch.exception.error_code;
+		}
+
+		vmcs12->idt_vectoring_info_field = idt_vectoring;
+	} else if (vcpu->arch.nmi_pending) {
+		vmcs12->idt_vectoring_info_field =
+			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
+	} else if (vcpu->arch.interrupt.pending) {
+		nr = vcpu->arch.interrupt.nr;
+		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+		if (vcpu->arch.interrupt.soft) {
+			idt_vectoring |= INTR_TYPE_SOFT_INTR;
+			vmcs12->vm_entry_instruction_len =
+				vcpu->arch.event_exit_inst_len;
+		} else
+			idt_vectoring |= INTR_TYPE_EXT_INTR;
+
+		vmcs12->idt_vectoring_info_field = idt_vectoring;
+	}
+}
+
 /*
  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -7284,7 +7812,7 @@
  * exit-information fields only. Other fields are modified by L1 with VMWRITE,
  * which already writes to vmcs12 directly.
  */
-void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	/* update guest state fields: */
 	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -7332,16 +7860,19 @@
 	vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
 	vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
 
-	vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
 	vmcs12->guest_interruptibility_info =
 		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
 	vmcs12->guest_pending_dbg_exceptions =
 		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+	vmcs12->vm_entry_controls =
+		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
+		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
+
 	/* TODO: These cannot have changed unless we have MSR bitmaps and
 	 * the relevant bit asks not to trap the change */
 	vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
-	if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT)
+	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
 		vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
 	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
 	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
@@ -7349,21 +7880,38 @@
 
 	/* update exit information fields: */
 
-	vmcs12->vm_exit_reason  = vmcs_read32(VM_EXIT_REASON);
+	vmcs12->vm_exit_reason  = to_vmx(vcpu)->exit_reason;
 	vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
-	vmcs12->idt_vectoring_info_field =
-		vmcs_read32(IDT_VECTORING_INFO_FIELD);
-	vmcs12->idt_vectoring_error_code =
-		vmcs_read32(IDT_VECTORING_ERROR_CODE);
+	if ((vmcs12->vm_exit_intr_info &
+	     (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
+	    (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
+		vmcs12->vm_exit_intr_error_code =
+			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+	vmcs12->idt_vectoring_info_field = 0;
 	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 
-	/* clear vm-entry fields which are to be cleared on exit */
-	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+		/* vm_entry_intr_info_field is cleared on exit. Emulate this
+		 * instead of reading the real value. */
 		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+
+		/*
+		 * Transfer the event that L0 or L1 may wanted to inject into
+		 * L2 to IDT_VECTORING_INFO_FIELD.
+		 */
+		vmcs12_save_pending_event(vcpu, vmcs12);
+	}
+
+	/*
+	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
+	 * preserved above and would only end up incorrectly in L1.
+	 */
+	vcpu->arch.nmi_injected = false;
+	kvm_clear_exception_queue(vcpu);
+	kvm_clear_interrupt_queue(vcpu);
 }
 
 /*
@@ -7375,11 +7923,12 @@
  * Failures During or After Loading Guest State").
  * This function should be called when the active VMCS is L1's (vmcs01).
  */
-void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+				   struct vmcs12 *vmcs12)
 {
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
 		vcpu->arch.efer = vmcs12->host_ia32_efer;
-	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
+	else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
 		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
 	else
 		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
@@ -7387,6 +7936,7 @@
 
 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
+	vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
 	/*
 	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
 	 * actually changed, because it depends on the current state of
@@ -7445,6 +7995,9 @@
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
 		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
 			vmcs12->host_ia32_perf_global_ctrl);
+
+	kvm_set_dr(vcpu, 7, 0x400);
+	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 }
 
 /*
@@ -7458,6 +8011,9 @@
 	int cpu;
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
+	/* trying to cancel vmlaunch/vmresume is a bug */
+	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
 	leave_guest_mode(vcpu);
 	prepare_vmcs12(vcpu, vmcs12);
 
@@ -7468,6 +8024,8 @@
 	vcpu->cpu = cpu;
 	put_cpu();
 
+	vmx_segment_cache_clear(vmx);
+
 	/* if no vmcs02 cache requested, remove the one we used */
 	if (VMCS02_POOL_SIZE == 0)
 		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
@@ -7496,6 +8054,8 @@
 		nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
 	} else
 		nested_vmx_succeed(vcpu);
+	if (enable_shadow_vmcs)
+		vmx->nested.sync_shadow_vmcs = true;
 }
 
 /*
@@ -7513,6 +8073,8 @@
 	vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
 	vmcs12->exit_qualification = qualification;
 	nested_vmx_succeed(vcpu);
+	if (enable_shadow_vmcs)
+		to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
 }
 
 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
@@ -7590,6 +8152,8 @@
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
 	.hwapic_isr_update = vmx_hwapic_isr_update,
+	.sync_pir_to_irr = vmx_sync_pir_to_irr,
+	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
@@ -7618,6 +8182,7 @@
 	.set_tdp_cr3 = vmx_set_cr3,
 
 	.check_intercept = vmx_check_intercept,
+	.handle_external_intr = vmx_handle_external_intr,
 };
 
 static int __init vmx_init(void)
@@ -7656,6 +8221,24 @@
 				(unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_msr_bitmap_longmode_x2apic)
 		goto out4;
+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmread_bitmap)
+		goto out5;
+
+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmwrite_bitmap)
+		goto out6;
+
+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+	/* shadowed read/write fields */
+	for (i = 0; i < max_shadow_read_write_fields; i++) {
+		clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
+		clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
+	}
+	/* shadowed read only fields */
+	for (i = 0; i < max_shadow_read_only_fields; i++)
+		clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
 
 	/*
 	 * Allow direct access to the PC debug port (it is often used for I/O
@@ -7674,7 +8257,7 @@
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
 		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out3;
+		goto out7;
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -7692,7 +8275,7 @@
 	memcpy(vmx_msr_bitmap_longmode_x2apic,
 			vmx_msr_bitmap_longmode, PAGE_SIZE);
 
-	if (enable_apicv_reg_vid) {
+	if (enable_apicv) {
 		for (msr = 0x800; msr <= 0x8ff; msr++)
 			vmx_disable_intercept_msr_read_x2apic(msr);
 
@@ -7722,6 +8305,12 @@
 
 	return 0;
 
+out7:
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+	free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
 	free_page((unsigned long)vmx_msr_bitmap_longmode);
 out3:
@@ -7743,6 +8332,8 @@
 	free_page((unsigned long)vmx_msr_bitmap_longmode);
 	free_page((unsigned long)vmx_io_bitmap_b);
 	free_page((unsigned long)vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+	free_page((unsigned long)vmx_vmread_bitmap);
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e172132..05a8b1a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -162,8 +162,6 @@
 
 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
 
-static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
-
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -263,6 +261,13 @@
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
+asmlinkage void kvm_spurious_fault(void)
+{
+	/* Fault while not rebooting.  We want the trace. */
+	BUG();
+}
+EXPORT_SYMBOL_GPL(kvm_spurious_fault);
+
 #define EXCPT_BENIGN		0
 #define EXCPT_CONTRIBUTORY	1
 #define EXCPT_PF		2
@@ -840,23 +845,17 @@
 	MSR_IA32_MCG_CTL,
 };
 
-static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
+bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
-	u64 old_efer = vcpu->arch.efer;
-
 	if (efer & efer_reserved_bits)
-		return 1;
-
-	if (is_paging(vcpu)
-	    && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
-		return 1;
+		return false;
 
 	if (efer & EFER_FFXSR) {
 		struct kvm_cpuid_entry2 *feat;
 
 		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
 		if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
-			return 1;
+			return false;
 	}
 
 	if (efer & EFER_SVME) {
@@ -864,9 +863,24 @@
 
 		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
 		if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
-			return 1;
+			return false;
 	}
 
+	return true;
+}
+EXPORT_SYMBOL_GPL(kvm_valid_efer);
+
+static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+	u64 old_efer = vcpu->arch.efer;
+
+	if (!kvm_valid_efer(vcpu, efer))
+		return 1;
+
+	if (is_paging(vcpu)
+	    && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
+		return 1;
+
 	efer &= ~EFER_LMA;
 	efer |= vcpu->arch.efer & EFER_LMA;
 
@@ -1079,6 +1093,10 @@
 	u32 thresh_lo, thresh_hi;
 	int use_scaling = 0;
 
+	/* tsc_khz can be zero if TSC calibration fails */
+	if (this_tsc_khz == 0)
+		return;
+
 	/* Compute a scale to convert nanoseconds in TSC cycles */
 	kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
 			   &vcpu->arch.virtual_tsc_shift,
@@ -1156,20 +1174,23 @@
 	ns = get_kernel_ns();
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 
-	/* n.b - signed multiplication and division required */
-	usdiff = data - kvm->arch.last_tsc_write;
+	if (vcpu->arch.virtual_tsc_khz) {
+		/* n.b - signed multiplication and division required */
+		usdiff = data - kvm->arch.last_tsc_write;
 #ifdef CONFIG_X86_64
-	usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
+		usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
 #else
-	/* do_div() only does unsigned */
-	asm("idivl %2; xor %%edx, %%edx"
-	    : "=A"(usdiff)
-	    : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+		/* do_div() only does unsigned */
+		asm("idivl %2; xor %%edx, %%edx"
+		: "=A"(usdiff)
+		: "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
 #endif
-	do_div(elapsed, 1000);
-	usdiff -= elapsed;
-	if (usdiff < 0)
-		usdiff = -usdiff;
+		do_div(elapsed, 1000);
+		usdiff -= elapsed;
+		if (usdiff < 0)
+			usdiff = -usdiff;
+	} else
+		usdiff = USEC_PER_SEC; /* disable TSC match window below */
 
 	/*
 	 * Special case: TSC write with a small delta (1 second) of virtual
@@ -2034,7 +2055,7 @@
 	case MSR_P6_EVNTSEL0:
 	case MSR_P6_EVNTSEL1:
 		if (kvm_pmu_msr(vcpu, msr))
-			return kvm_pmu_set_msr(vcpu, msr, data);
+			return kvm_pmu_set_msr(vcpu, msr_info);
 
 		if (pr || data != 0)
 			vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
@@ -2080,7 +2101,7 @@
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
 		if (kvm_pmu_msr(vcpu, msr))
-			return kvm_pmu_set_msr(vcpu, msr, data);
+			return kvm_pmu_set_msr(vcpu, msr_info);
 		if (!ignore_msrs) {
 			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
 				    msr, data);
@@ -2479,7 +2500,6 @@
 	case KVM_CAP_USER_NMI:
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_IRQFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_PIT2:
@@ -2497,10 +2517,12 @@
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
 	case KVM_CAP_GET_TSC_KHZ:
-	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
-	case KVM_CAP_IRQFD_RESAMPLE:
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+	case KVM_CAP_ASSIGN_DEV_IRQ:
+	case KVM_CAP_PCI_2_3:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2521,9 +2543,11 @@
 	case KVM_CAP_PV_MMU:	/* obsolete */
 		r = 0;
 		break;
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_IOMMU:
 		r = iommu_present(&pci_bus_type);
 		break;
+#endif
 	case KVM_CAP_MCE:
 		r = KVM_MAX_MCE_BANKS;
 		break;
@@ -2679,6 +2703,7 @@
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
+	kvm_x86_ops->sync_pir_to_irr(vcpu);
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 
 	return 0;
@@ -2696,7 +2721,7 @@
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 				    struct kvm_interrupt *irq)
 {
-	if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
+	if (irq->irq >= KVM_NR_INTERRUPTS)
 		return -EINVAL;
 	if (irqchip_in_kernel(vcpu->kvm))
 		return -ENXIO;
@@ -2819,10 +2844,9 @@
 	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
 	events->nmi.pad = 0;
 
-	events->sipi_vector = vcpu->arch.sipi_vector;
+	events->sipi_vector = 0; /* never valid when reporting to user space */
 
 	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
-			 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
 			 | KVM_VCPUEVENT_VALID_SHADOW);
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
@@ -2853,8 +2877,9 @@
 		vcpu->arch.nmi_pending = events->nmi.pending;
 	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
 
-	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
-		vcpu->arch.sipi_vector = events->sipi_vector;
+	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
+	    kvm_vcpu_has_lapic(vcpu))
+		vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
@@ -3478,13 +3503,15 @@
 	return r;
 }
 
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+			bool line_status)
 {
 	if (!irqchip_in_kernel(kvm))
 		return -ENXIO;
 
 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event->irq, irq_event->level);
+					irq_event->irq, irq_event->level,
+					line_status);
 	return 0;
 }
 
@@ -4752,11 +4779,15 @@
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
-				  bool write_fault_to_shadow_pgtable)
+				  bool write_fault_to_shadow_pgtable,
+				  int emulation_type)
 {
 	gpa_t gpa = cr2;
 	pfn_t pfn;
 
+	if (emulation_type & EMULTYPE_NO_REEXECUTE)
+		return false;
+
 	if (!vcpu->arch.mmu.direct_map) {
 		/*
 		 * Write permission should be allowed since only
@@ -4899,8 +4930,8 @@
 		if (r != EMULATION_OK)  {
 			if (emulation_type & EMULTYPE_TRAP_UD)
 				return EMULATE_FAIL;
-			if (reexecute_instruction(vcpu, cr2,
-						  write_fault_to_spt))
+			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+						emulation_type))
 				return EMULATE_DONE;
 			if (emulation_type & EMULTYPE_SKIP)
 				return EMULATE_FAIL;
@@ -4930,7 +4961,8 @@
 		return EMULATE_DONE;
 
 	if (r == EMULATION_FAILED) {
-		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
+		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+					emulation_type))
 			return EMULATE_DONE;
 
 		return handle_emulation_failure(vcpu);
@@ -5641,14 +5673,20 @@
 #endif
 }
 
-static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
 	u64 eoi_exit_bitmap[4];
+	u32 tmr[8];
+
+	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+		return;
 
 	memset(eoi_exit_bitmap, 0, 32);
+	memset(tmr, 0, 32);
 
-	kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
+	kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+	kvm_apic_update_tmr(vcpu, tmr);
 }
 
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
@@ -5656,7 +5694,7 @@
 	int r;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
-	bool req_immediate_exit = 0;
+	bool req_immediate_exit = false;
 
 	if (vcpu->requests) {
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
@@ -5698,24 +5736,30 @@
 			record_steal_time(vcpu);
 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
 			process_nmi(vcpu);
-		req_immediate_exit =
-			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
 		if (kvm_check_request(KVM_REQ_PMU, vcpu))
 			kvm_handle_pmu_event(vcpu);
 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
 			kvm_deliver_pmi(vcpu);
-		if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
-			update_eoi_exitmap(vcpu);
+		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
+			vcpu_scan_ioapic(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+		kvm_apic_accept_events(vcpu);
+		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+			r = 1;
+			goto out;
+		}
+
 		inject_pending_event(vcpu);
 
 		/* enable NMI/IRQ window open exits if needed */
 		if (vcpu->arch.nmi_pending)
-			kvm_x86_ops->enable_nmi_window(vcpu);
+			req_immediate_exit =
+				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
 		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-			kvm_x86_ops->enable_irq_window(vcpu);
+			req_immediate_exit =
+				kvm_x86_ops->enable_irq_window(vcpu) != 0;
 
 		if (kvm_lapic_enabled(vcpu)) {
 			/*
@@ -5794,7 +5838,9 @@
 
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
-	local_irq_enable();
+
+	/* Interrupt is enabled by handle_external_intr() */
+	kvm_x86_ops->handle_external_intr(vcpu);
 
 	++vcpu->stat.exits;
 
@@ -5843,16 +5889,6 @@
 	int r;
 	struct kvm *kvm = vcpu->kvm;
 
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
-		pr_debug("vcpu %d received sipi with vector # %x\n",
-			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
-		kvm_lapic_reset(vcpu);
-		r = kvm_vcpu_reset(vcpu);
-		if (r)
-			return r;
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-	}
-
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	r = vapic_enter(vcpu);
 	if (r) {
@@ -5869,8 +5905,8 @@
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_block(vcpu);
 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-			if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
-			{
+			if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
+				kvm_apic_accept_events(vcpu);
 				switch(vcpu->arch.mp_state) {
 				case KVM_MP_STATE_HALTED:
 					vcpu->arch.mp_state =
@@ -5878,7 +5914,8 @@
 				case KVM_MP_STATE_RUNNABLE:
 					vcpu->arch.apf.halted = false;
 					break;
-				case KVM_MP_STATE_SIPI_RECEIVED:
+				case KVM_MP_STATE_INIT_RECEIVED:
+					break;
 				default:
 					r = -EINTR;
 					break;
@@ -6013,6 +6050,7 @@
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		kvm_vcpu_block(vcpu);
+		kvm_apic_accept_events(vcpu);
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		r = -EAGAIN;
 		goto out;
@@ -6169,6 +6207,7 @@
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
+	kvm_apic_accept_events(vcpu);
 	mp_state->mp_state = vcpu->arch.mp_state;
 	return 0;
 }
@@ -6176,7 +6215,15 @@
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	vcpu->arch.mp_state = mp_state->mp_state;
+	if (!kvm_vcpu_has_lapic(vcpu) &&
+	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
+		return -EINVAL;
+
+	if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
+		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+		set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
+	} else
+		vcpu->arch.mp_state = mp_state->mp_state;
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return 0;
 }
@@ -6475,9 +6522,8 @@
 	r = vcpu_load(vcpu);
 	if (r)
 		return r;
-	r = kvm_vcpu_reset(vcpu);
-	if (r == 0)
-		r = kvm_mmu_setup(vcpu);
+	kvm_vcpu_reset(vcpu);
+	r = kvm_mmu_setup(vcpu);
 	vcpu_put(vcpu);
 
 	return r;
@@ -6514,7 +6560,7 @@
 	kvm_x86_ops->vcpu_free(vcpu);
 }
 
-static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
+void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	atomic_set(&vcpu->arch.nmi_queued, 0);
 	vcpu->arch.nmi_pending = 0;
@@ -6541,7 +6587,18 @@
 	vcpu->arch.regs_avail = ~0;
 	vcpu->arch.regs_dirty = ~0;
 
-	return kvm_x86_ops->vcpu_reset(vcpu);
+	kvm_x86_ops->vcpu_reset(vcpu);
+}
+
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+{
+	struct kvm_segment cs;
+
+	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
+	cs.selector = vector << 8;
+	cs.base = vector << 12;
+	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+	kvm_rip_write(vcpu, 0);
 }
 
 int kvm_arch_hardware_enable(void *garbage)
@@ -6706,8 +6763,10 @@
 	}
 	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
 
-	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
+		r = -ENOMEM;
 		goto fail_free_mce_banks;
+	}
 
 	r = fx_init(vcpu);
 	if (r)
@@ -6811,6 +6870,23 @@
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	if (current->mm == kvm->mm) {
+		/*
+		 * Free memory regions allocated on behalf of userspace,
+		 * unless the the memory map has changed due to process exit
+		 * or fd copying.
+		 */
+		struct kvm_userspace_memory_region mem;
+		memset(&mem, 0, sizeof(mem));
+		mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
+		kvm_set_memory_region(kvm, &mem);
+
+		mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
+		kvm_set_memory_region(kvm, &mem);
+
+		mem.slot = TSS_PRIVATE_MEMSLOT;
+		kvm_set_memory_region(kvm, &mem);
+	}
 	kvm_iommu_unmap_guest(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
@@ -6903,24 +6979,21 @@
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
-				struct kvm_memory_slot old,
 				struct kvm_userspace_memory_region *mem,
-				bool user_alloc)
+				enum kvm_mr_change change)
 {
-	int npages = memslot->npages;
-
 	/*
 	 * Only private memory slots need to be mapped here since
 	 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
 	 */
-	if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
+	if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
 		unsigned long userspace_addr;
 
 		/*
 		 * MAP_SHARED to prevent internal slot pages from being moved
 		 * by fork()/COW.
 		 */
-		userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
+		userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
 					 PROT_READ | PROT_WRITE,
 					 MAP_SHARED | MAP_ANONYMOUS, 0);
 
@@ -6935,17 +7008,17 @@
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				bool user_alloc)
+				const struct kvm_memory_slot *old,
+				enum kvm_mr_change change)
 {
 
-	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
+	int nr_mmu_pages = 0;
 
-	if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
+	if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
 		int ret;
 
-		ret = vm_munmap(old.userspace_addr,
-				old.npages * PAGE_SIZE);
+		ret = vm_munmap(old->userspace_addr,
+				old->npages * PAGE_SIZE);
 		if (ret < 0)
 			printk(KERN_WARNING
 			       "kvm_vm_ioctl_set_memory_region: "
@@ -6962,14 +7035,14 @@
 	 * Existing largepage mappings are destroyed here and new ones will
 	 * not be created until the end of the logging.
 	 */
-	if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
+	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
 		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	/*
 	 * If memory slot is created, or moved, we need to clear all
 	 * mmio sptes.
 	 */
-	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
-		kvm_mmu_zap_all(kvm);
+	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+		kvm_mmu_zap_mmio_sptes(kvm);
 		kvm_reload_remote_mmus(kvm);
 	}
 }
@@ -6991,7 +7064,7 @@
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)
-		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+		|| kvm_apic_has_events(vcpu)
 		|| atomic_read(&vcpu->arch.nmi_queued) ||
 		(kvm_arch_interrupt_allowed(vcpu) &&
 		 kvm_cpu_has_interrupt(vcpu));

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b7b7a88..c2ca181 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -1,3 +1,4 @@
+
 /*
    rbd.c -- Export ceph rados objects as a Linux block device
 
@@ -32,12 +33,14 @@
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/decode.h>
 #include <linux/parser.h>
+#include <linux/bsearch.h>
 
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
+#include <linux/slab.h>
 
 #include "rbd_types.h"
 
@@ -52,13 +55,6 @@
 #define	SECTOR_SHIFT	9
 #define	SECTOR_SIZE	(1ULL << SECTOR_SHIFT)
 
-/* It might be useful to have these defined elsewhere */
-
-#define	U8_MAX	((u8)	(~0U))
-#define	U16_MAX	((u16)	(~0U))
-#define	U32_MAX	((u32)	(~0U))
-#define	U64_MAX	((u64)	(~0ULL))
-
 #define RBD_DRV_NAME "rbd"
 #define RBD_DRV_NAME_LONG "rbd (rados block device)"
 
@@ -72,6 +68,8 @@
 
 #define RBD_SNAP_HEAD_NAME	"-"
 
+#define	BAD_SNAP_INDEX	U32_MAX		/* invalid index into snap array */
+
 /* This allows a single page to hold an image name sent by OSD */
 #define RBD_IMAGE_NAME_LEN_MAX	(PAGE_SIZE - sizeof (__le32) - 1)
 #define RBD_IMAGE_ID_LEN_MAX	64
@@ -80,11 +78,14 @@
 
 /* Feature bits */
 
-#define RBD_FEATURE_LAYERING      1
+#define RBD_FEATURE_LAYERING	(1<<0)
+#define RBD_FEATURE_STRIPINGV2	(1<<1)
+#define RBD_FEATURES_ALL \
+	    (RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2)
 
 /* Features supported by this (client software) implementation. */
 
-#define RBD_FEATURES_ALL          (0)
+#define RBD_FEATURES_SUPPORTED	(RBD_FEATURES_ALL)
 
 /*
  * An RBD device name will be "rbd#", where the "rbd" comes from
@@ -112,7 +113,8 @@
 	char *snap_names;
 	u64 *snap_sizes;
 
-	u64 obj_version;
+	u64 stripe_unit;
+	u64 stripe_count;
 };
 
 /*
@@ -142,13 +144,13 @@
  */
 struct rbd_spec {
 	u64		pool_id;
-	char		*pool_name;
+	const char	*pool_name;
 
-	char		*image_id;
-	char		*image_name;
+	const char	*image_id;
+	const char	*image_name;
 
 	u64		snap_id;
-	char		*snap_name;
+	const char	*snap_name;
 
 	struct kref	kref;
 };
@@ -174,13 +176,44 @@
 	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
 };
 
+enum obj_req_flags {
+	OBJ_REQ_DONE,		/* completion flag: not done = 0, done = 1 */
+	OBJ_REQ_IMG_DATA,	/* object usage: standalone = 0, image = 1 */
+	OBJ_REQ_KNOWN,		/* EXISTS flag valid: no = 0, yes = 1 */
+	OBJ_REQ_EXISTS,		/* target exists: no = 0, yes = 1 */
+};
+
 struct rbd_obj_request {
 	const char		*object_name;
 	u64			offset;		/* object start byte */
 	u64			length;		/* bytes from offset */
+	unsigned long		flags;
 
-	struct rbd_img_request	*img_request;
-	struct list_head	links;		/* img_request->obj_requests */
+	/*
+	 * An object request associated with an image will have its
+	 * img_data flag set; a standalone object request will not.
+	 *
+	 * A standalone object request will have which == BAD_WHICH
+	 * and a null obj_request pointer.
+	 *
+	 * An object request initiated in support of a layered image
+	 * object (to check for its existence before a write) will
+	 * have which == BAD_WHICH and a non-null obj_request pointer.
+	 *
+	 * Finally, an object request for rbd image data will have
+	 * which != BAD_WHICH, and will have a non-null img_request
+	 * pointer.  The value of which will be in the range
+	 * 0..(img_request->obj_request_count-1).
+	 */
+	union {
+		struct rbd_obj_request	*obj_request;	/* STAT op */
+		struct {
+			struct rbd_img_request	*img_request;
+			u64			img_offset;
+			/* links for img_request->obj_requests list */
+			struct list_head	links;
+		};
+	};
 	u32			which;		/* posn image request list */
 
 	enum obj_request_type	type;
@@ -191,13 +224,12 @@
 			u32		page_count;
 		};
 	};
+	struct page		**copyup_pages;
 
 	struct ceph_osd_request	*osd_req;
 
 	u64			xferred;	/* bytes transferred */
-	u64			version;
 	int			result;
-	atomic_t		done;
 
 	rbd_obj_callback_t	callback;
 	struct completion	completion;
@@ -205,19 +237,31 @@
 	struct kref		kref;
 };
 
+enum img_req_flags {
+	IMG_REQ_WRITE,		/* I/O direction: read = 0, write = 1 */
+	IMG_REQ_CHILD,		/* initiator: block = 0, child image = 1 */
+	IMG_REQ_LAYERED,	/* ENOENT handling: normal = 0, layered = 1 */
+};
+
 struct rbd_img_request {
-	struct request		*rq;
 	struct rbd_device	*rbd_dev;
 	u64			offset;	/* starting image byte offset */
 	u64			length;	/* byte count from offset */
-	bool			write_request;	/* false for read */
+	unsigned long		flags;
 	union {
+		u64			snap_id;	/* for reads */
 		struct ceph_snap_context *snapc;	/* for writes */
-		u64		snap_id;		/* for reads */
 	};
+	union {
+		struct request		*rq;		/* block request */
+		struct rbd_obj_request	*obj_request;	/* obj req initiator */
+	};
+	struct page		**copyup_pages;
 	spinlock_t		completion_lock;/* protects next_completion */
 	u32			next_completion;
 	rbd_img_callback_t	callback;
+	u64			xferred;/* aggregate bytes transferred */
+	int			result;	/* first nonzero obj_request result */
 
 	u32			obj_request_count;
 	struct list_head	obj_requests;	/* rbd_obj_request structs */
@@ -232,15 +276,6 @@
 #define for_each_obj_request_safe(ireq, oreq, n) \
 	list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
 
-struct rbd_snap {
-	struct	device		dev;
-	const char		*name;
-	u64			size;
-	struct list_head	node;
-	u64			id;
-	u64			features;
-};
-
 struct rbd_mapping {
 	u64                     size;
 	u64                     features;
@@ -276,6 +311,7 @@
 
 	struct rbd_spec		*parent_spec;
 	u64			parent_overlap;
+	struct rbd_device	*parent;
 
 	/* protects updating the header */
 	struct rw_semaphore     header_rwsem;
@@ -284,9 +320,6 @@
 
 	struct list_head	node;
 
-	/* list of snapshots */
-	struct list_head	snaps;
-
 	/* sysfs related */
 	struct device		dev;
 	unsigned long		open_count;	/* protected by lock */
@@ -312,16 +345,21 @@
 static LIST_HEAD(rbd_client_list);		/* clients */
 static DEFINE_SPINLOCK(rbd_client_list_lock);
 
-static int rbd_dev_snaps_update(struct rbd_device *rbd_dev);
-static int rbd_dev_snaps_register(struct rbd_device *rbd_dev);
+/* Slab caches for frequently-allocated structures */
 
-static void rbd_dev_release(struct device *dev);
-static void rbd_remove_snap_dev(struct rbd_snap *snap);
+static struct kmem_cache	*rbd_img_request_cache;
+static struct kmem_cache	*rbd_obj_request_cache;
+static struct kmem_cache	*rbd_segment_name_cache;
+
+static int rbd_img_request_submit(struct rbd_img_request *img_request);
+
+static void rbd_dev_device_release(struct device *dev);
 
 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
 		       size_t count);
 static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
 			  size_t count);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev);
 
 static struct bus_attribute rbd_bus_attrs[] = {
 	__ATTR(add, S_IWUSR, NULL, rbd_add),
@@ -383,8 +421,19 @@
 #  define rbd_assert(expr)	((void) 0)
 #endif /* !RBD_DEBUG */
 
-static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver);
-static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
+static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
+static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev);
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+					u64 snap_id);
+static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+				u8 *order, u64 *snap_size);
+static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+		u64 *snap_features);
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name);
 
 static int rbd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -484,6 +533,13 @@
 	return ERR_PTR(ret);
 }
 
+static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
+{
+	kref_get(&rbdc->kref);
+
+	return rbdc;
+}
+
 /*
  * Find a ceph client with specific addr and configuration.  If
  * found, bump its reference count.
@@ -499,7 +555,8 @@
 	spin_lock(&rbd_client_list_lock);
 	list_for_each_entry(client_node, &rbd_client_list, node) {
 		if (!ceph_compare_options(ceph_opts, client_node->client)) {
-			kref_get(&client_node->kref);
+			__rbd_get_client(client_node);
+
 			found = true;
 			break;
 		}
@@ -722,7 +779,6 @@
 			header->snap_sizes[i] =
 				le64_to_cpu(ondisk->snaps[i].image_size);
 	} else {
-		WARN_ON(ondisk->snap_names_len);
 		header->snap_names = NULL;
 		header->snap_sizes = NULL;
 	}
@@ -735,18 +791,13 @@
 	/* Allocate and fill in the snapshot context */
 
 	header->image_size = le64_to_cpu(ondisk->image_size);
-	size = sizeof (struct ceph_snap_context);
-	size += snap_count * sizeof (header->snapc->snaps[0]);
-	header->snapc = kzalloc(size, GFP_KERNEL);
+
+	header->snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
 	if (!header->snapc)
 		goto out_err;
-
-	atomic_set(&header->snapc->nref, 1);
 	header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
-	header->snapc->num_snaps = snap_count;
 	for (i = 0; i < snap_count; i++)
-		header->snapc->snaps[i] =
-			le64_to_cpu(ondisk->snaps[i].id);
+		header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id);
 
 	return 0;
 
@@ -761,70 +812,174 @@
 	return -ENOMEM;
 }
 
+static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
+{
+	const char *snap_name;
+
+	rbd_assert(which < rbd_dev->header.snapc->num_snaps);
+
+	/* Skip over names until we find the one we are looking for */
+
+	snap_name = rbd_dev->header.snap_names;
+	while (which--)
+		snap_name += strlen(snap_name) + 1;
+
+	return kstrdup(snap_name, GFP_KERNEL);
+}
+
+/*
+ * Snapshot id comparison function for use with qsort()/bsearch().
+ * Note that result is for snapshots in *descending* order.
+ */
+static int snapid_compare_reverse(const void *s1, const void *s2)
+{
+	u64 snap_id1 = *(u64 *)s1;
+	u64 snap_id2 = *(u64 *)s2;
+
+	if (snap_id1 < snap_id2)
+		return 1;
+	return snap_id1 == snap_id2 ? 0 : -1;
+}
+
+/*
+ * Search a snapshot context to see if the given snapshot id is
+ * present.
+ *
+ * Returns the position of the snapshot id in the array if it's found,
+ * or BAD_SNAP_INDEX otherwise.
+ *
+ * Note: The snapshot array is in kept sorted (by the osd) in
+ * reverse order, highest snapshot id first.
+ */
+static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
+{
+	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+	u64 *found;
+
+	found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
+				sizeof (snap_id), snapid_compare_reverse);
+
+	return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
+}
+
+static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
+					u64 snap_id)
+{
+	u32 which;
+
+	which = rbd_dev_snap_index(rbd_dev, snap_id);
+	if (which == BAD_SNAP_INDEX)
+		return NULL;
+
+	return _rbd_dev_v1_snap_name(rbd_dev, which);
+}
+
 static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
 {
-	struct rbd_snap *snap;
-
 	if (snap_id == CEPH_NOSNAP)
 		return RBD_SNAP_HEAD_NAME;
 
-	list_for_each_entry(snap, &rbd_dev->snaps, node)
-		if (snap_id == snap->id)
-			return snap->name;
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	if (rbd_dev->image_format == 1)
+		return rbd_dev_v1_snap_name(rbd_dev, snap_id);
 
-	return NULL;
+	return rbd_dev_v2_snap_name(rbd_dev, snap_id);
 }
 
-static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name)
+static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+				u64 *snap_size)
 {
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	if (snap_id == CEPH_NOSNAP) {
+		*snap_size = rbd_dev->header.image_size;
+	} else if (rbd_dev->image_format == 1) {
+		u32 which;
 
-	struct rbd_snap *snap;
+		which = rbd_dev_snap_index(rbd_dev, snap_id);
+		if (which == BAD_SNAP_INDEX)
+			return -ENOENT;
 
-	list_for_each_entry(snap, &rbd_dev->snaps, node) {
-		if (!strcmp(snap_name, snap->name)) {
-			rbd_dev->spec->snap_id = snap->id;
-			rbd_dev->mapping.size = snap->size;
-			rbd_dev->mapping.features = snap->features;
+		*snap_size = rbd_dev->header.snap_sizes[which];
+	} else {
+		u64 size = 0;
+		int ret;
 
-			return 0;
-		}
+		ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
+		if (ret)
+			return ret;
+
+		*snap_size = size;
 	}
-
-	return -ENOENT;
+	return 0;
 }
 
-static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
+static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+			u64 *snap_features)
 {
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	if (snap_id == CEPH_NOSNAP) {
+		*snap_features = rbd_dev->header.features;
+	} else if (rbd_dev->image_format == 1) {
+		*snap_features = 0;	/* No features for format 1 */
+	} else {
+		u64 features = 0;
+		int ret;
+
+		ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
+		if (ret)
+			return ret;
+
+		*snap_features = features;
+	}
+	return 0;
+}
+
+static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
+{
+	const char *snap_name = rbd_dev->spec->snap_name;
+	u64 snap_id;
+	u64 size = 0;
+	u64 features = 0;
 	int ret;
 
-	if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME,
-		    sizeof (RBD_SNAP_HEAD_NAME))) {
-		rbd_dev->spec->snap_id = CEPH_NOSNAP;
-		rbd_dev->mapping.size = rbd_dev->header.image_size;
-		rbd_dev->mapping.features = rbd_dev->header.features;
-		ret = 0;
+	if (strcmp(snap_name, RBD_SNAP_HEAD_NAME)) {
+		snap_id = rbd_snap_id_by_name(rbd_dev, snap_name);
+		if (snap_id == CEPH_NOSNAP)
+			return -ENOENT;
 	} else {
-		ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name);
-		if (ret < 0)
-			goto done;
-		rbd_dev->mapping.read_only = true;
+		snap_id = CEPH_NOSNAP;
 	}
-	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 
-done:
-	return ret;
+	ret = rbd_snap_size(rbd_dev, snap_id, &size);
+	if (ret)
+		return ret;
+	ret = rbd_snap_features(rbd_dev, snap_id, &features);
+	if (ret)
+		return ret;
+
+	rbd_dev->mapping.size = size;
+	rbd_dev->mapping.features = features;
+
+	/* If we are mapping a snapshot it must be marked read-only */
+
+	if (snap_id != CEPH_NOSNAP)
+		rbd_dev->mapping.read_only = true;
+
+	return 0;
 }
 
-static void rbd_header_free(struct rbd_image_header *header)
+static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
 {
-	kfree(header->object_prefix);
-	header->object_prefix = NULL;
-	kfree(header->snap_sizes);
-	header->snap_sizes = NULL;
-	kfree(header->snap_names);
-	header->snap_names = NULL;
-	ceph_put_snap_context(header->snapc);
-	header->snapc = NULL;
+	rbd_dev->mapping.size = 0;
+	rbd_dev->mapping.features = 0;
+	rbd_dev->mapping.read_only = true;
+}
+
+static void rbd_dev_clear_mapping(struct rbd_device *rbd_dev)
+{
+	rbd_dev->mapping.size = 0;
+	rbd_dev->mapping.features = 0;
+	rbd_dev->mapping.read_only = true;
 }
 
 static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
@@ -833,7 +988,7 @@
 	u64 segment;
 	int ret;
 
-	name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO);
+	name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
 	if (!name)
 		return NULL;
 	segment = offset >> rbd_dev->header.obj_order;
@@ -849,6 +1004,13 @@
 	return name;
 }
 
+static void rbd_segment_name_free(const char *name)
+{
+	/* The explicit cast here is needed to drop the const qualifier */
+
+	kmem_cache_free(rbd_segment_name_cache, (void *)name);
+}
+
 static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 {
 	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
@@ -921,6 +1083,37 @@
 }
 
 /*
+ * similar to zero_bio_chain(), zeros data defined by a page array,
+ * starting at the given byte offset from the start of the array and
+ * continuing up to the given end offset.  The pages array is
+ * assumed to be big enough to hold all bytes up to the end.
+ */
+static void zero_pages(struct page **pages, u64 offset, u64 end)
+{
+	struct page **page = &pages[offset >> PAGE_SHIFT];
+
+	rbd_assert(end > offset);
+	rbd_assert(end - offset <= (u64)SIZE_MAX);
+	while (offset < end) {
+		size_t page_offset;
+		size_t length;
+		unsigned long flags;
+		void *kaddr;
+
+		page_offset = (size_t)(offset & ~PAGE_MASK);
+		length = min(PAGE_SIZE - page_offset, (size_t)(end - offset));
+		local_irq_save(flags);
+		kaddr = kmap_atomic(*page);
+		memset(kaddr + page_offset, 0, length);
+		kunmap_atomic(kaddr);
+		local_irq_restore(flags);
+
+		offset += length;
+		page++;
+	}
+}
+
+/*
  * Clone a portion of a bio, starting at the given byte offset
  * and continuing for the number of bytes indicated.
  */
@@ -1064,6 +1257,77 @@
 	return NULL;
 }
 
+/*
+ * The default/initial value for all object request flags is 0.  For
+ * each flag, once its value is set to 1 it is never reset to 0
+ * again.
+ */
+static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
+{
+	if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
+		struct rbd_device *rbd_dev;
+
+		rbd_dev = obj_request->img_request->rbd_dev;
+		rbd_warn(rbd_dev, "obj_request %p already marked img_data\n",
+			obj_request);
+	}
+}
+
+static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
+}
+
+static void obj_request_done_set(struct rbd_obj_request *obj_request)
+{
+	if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
+		struct rbd_device *rbd_dev = NULL;
+
+		if (obj_request_img_data_test(obj_request))
+			rbd_dev = obj_request->img_request->rbd_dev;
+		rbd_warn(rbd_dev, "obj_request %p already marked done\n",
+			obj_request);
+	}
+}
+
+static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
+}
+
+/*
+ * This sets the KNOWN flag after (possibly) setting the EXISTS
+ * flag.  The latter is set based on the "exists" value provided.
+ *
+ * Note that for our purposes once an object exists it never goes
+ * away again.  It's possible that the response from two existence
+ * checks are separated by the creation of the target object, and
+ * the first ("doesn't exist") response arrives *after* the second
+ * ("does exist").  In that case we ignore the second one.
+ */
+static void obj_request_existence_set(struct rbd_obj_request *obj_request,
+				bool exists)
+{
+	if (exists)
+		set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
+	set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
+	smp_mb();
+}
+
+static bool obj_request_known_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
+}
+
+static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
+}
+
 static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
 {
 	dout("%s: obj %p (was %d)\n", __func__, obj_request,
@@ -1101,9 +1365,11 @@
 {
 	rbd_assert(obj_request->img_request == NULL);
 
-	rbd_obj_request_get(obj_request);
+	/* Image request now owns object's original reference */
 	obj_request->img_request = img_request;
 	obj_request->which = img_request->obj_request_count;
+	rbd_assert(!obj_request_img_data_test(obj_request));
+	obj_request_img_data_set(obj_request);
 	rbd_assert(obj_request->which != BAD_WHICH);
 	img_request->obj_request_count++;
 	list_add_tail(&obj_request->links, &img_request->obj_requests);
@@ -1123,6 +1389,7 @@
 	img_request->obj_request_count--;
 	rbd_assert(obj_request->which == img_request->obj_request_count);
 	obj_request->which = BAD_WHICH;
+	rbd_assert(obj_request_img_data_test(obj_request));
 	rbd_assert(obj_request->img_request == img_request);
 	obj_request->img_request = NULL;
 	obj_request->callback = NULL;
@@ -1141,76 +1408,6 @@
 	}
 }
 
-static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...)
-{
-	struct ceph_osd_req_op *op;
-	va_list args;
-	size_t size;
-
-	op = kzalloc(sizeof (*op), GFP_NOIO);
-	if (!op)
-		return NULL;
-	op->op = opcode;
-	va_start(args, opcode);
-	switch (opcode) {
-	case CEPH_OSD_OP_READ:
-	case CEPH_OSD_OP_WRITE:
-		/* rbd_osd_req_op_create(READ, offset, length) */
-		/* rbd_osd_req_op_create(WRITE, offset, length) */
-		op->extent.offset = va_arg(args, u64);
-		op->extent.length = va_arg(args, u64);
-		if (opcode == CEPH_OSD_OP_WRITE)
-			op->payload_len = op->extent.length;
-		break;
-	case CEPH_OSD_OP_STAT:
-		break;
-	case CEPH_OSD_OP_CALL:
-		/* rbd_osd_req_op_create(CALL, class, method, data, datalen) */
-		op->cls.class_name = va_arg(args, char *);
-		size = strlen(op->cls.class_name);
-		rbd_assert(size <= (size_t) U8_MAX);
-		op->cls.class_len = size;
-		op->payload_len = size;
-
-		op->cls.method_name = va_arg(args, char *);
-		size = strlen(op->cls.method_name);
-		rbd_assert(size <= (size_t) U8_MAX);
-		op->cls.method_len = size;
-		op->payload_len += size;
-
-		op->cls.argc = 0;
-		op->cls.indata = va_arg(args, void *);
-		size = va_arg(args, size_t);
-		rbd_assert(size <= (size_t) U32_MAX);
-		op->cls.indata_len = (u32) size;
-		op->payload_len += size;
-		break;
-	case CEPH_OSD_OP_NOTIFY_ACK:
-	case CEPH_OSD_OP_WATCH:
-		/* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */
-		/* rbd_osd_req_op_create(WATCH, cookie, version, flag) */
-		op->watch.cookie = va_arg(args, u64);
-		op->watch.ver = va_arg(args, u64);
-		op->watch.ver = cpu_to_le64(op->watch.ver);
-		if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int))
-			op->watch.flag = (u8) 1;
-		break;
-	default:
-		rbd_warn(NULL, "unsupported opcode %hu\n", opcode);
-		kfree(op);
-		op = NULL;
-		break;
-	}
-	va_end(args);
-
-	return op;
-}
-
-static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op)
-{
-	kfree(op);
-}
-
 static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
 				struct rbd_obj_request *obj_request)
 {
@@ -1221,7 +1418,24 @@
 
 static void rbd_img_request_complete(struct rbd_img_request *img_request)
 {
+
 	dout("%s: img %p\n", __func__, img_request);
+
+	/*
+	 * If no error occurred, compute the aggregate transfer
+	 * count for the image request.  We could instead use
+	 * atomic64_cmpxchg() to update it as each object request
+	 * completes; not clear which way is better off hand.
+	 */
+	if (!img_request->result) {
+		struct rbd_obj_request *obj_request;
+		u64 xferred = 0;
+
+		for_each_obj_request(img_request, obj_request)
+			xferred += obj_request->xferred;
+		img_request->xferred = xferred;
+	}
+
 	if (img_request->callback)
 		img_request->callback(img_request);
 	else
@@ -1237,39 +1451,56 @@
 	return wait_for_completion_interruptible(&obj_request->completion);
 }
 
-static void obj_request_done_init(struct rbd_obj_request *obj_request)
+/*
+ * The default/initial value for all image request flags is 0.  Each
+ * is conditionally set to 1 at image request initialization time
+ * and currently never change thereafter.
+ */
+static void img_request_write_set(struct rbd_img_request *img_request)
 {
-	atomic_set(&obj_request->done, 0);
-	smp_wmb();
+	set_bit(IMG_REQ_WRITE, &img_request->flags);
+	smp_mb();
 }
 
-static void obj_request_done_set(struct rbd_obj_request *obj_request)
-{
-	int done;
-
-	done = atomic_inc_return(&obj_request->done);
-	if (done > 1) {
-		struct rbd_img_request *img_request = obj_request->img_request;
-		struct rbd_device *rbd_dev;
-
-		rbd_dev = img_request ? img_request->rbd_dev : NULL;
-		rbd_warn(rbd_dev, "obj_request %p was already done\n",
-			obj_request);
-	}
-}
-
-static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+static bool img_request_write_test(struct rbd_img_request *img_request)
 {
 	smp_mb();
-	return atomic_read(&obj_request->done) != 0;
+	return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
+}
+
+static void img_request_child_set(struct rbd_img_request *img_request)
+{
+	set_bit(IMG_REQ_CHILD, &img_request->flags);
+	smp_mb();
+}
+
+static bool img_request_child_test(struct rbd_img_request *img_request)
+{
+	smp_mb();
+	return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0;
+}
+
+static void img_request_layered_set(struct rbd_img_request *img_request)
+{
+	set_bit(IMG_REQ_LAYERED, &img_request->flags);
+	smp_mb();
+}
+
+static bool img_request_layered_test(struct rbd_img_request *img_request)
+{
+	smp_mb();
+	return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
 }
 
 static void
 rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
 {
+	u64 xferred = obj_request->xferred;
+	u64 length = obj_request->length;
+
 	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
 		obj_request, obj_request->img_request, obj_request->result,
-		obj_request->xferred, obj_request->length);
+		xferred, length);
 	/*
 	 * ENOENT means a hole in the image.  We zero-fill the
 	 * entire length of the request.  A short read also implies
@@ -1277,15 +1508,20 @@
 	 * update the xferred count to indicate the whole request
 	 * was satisfied.
 	 */
-	BUG_ON(obj_request->type != OBJ_REQUEST_BIO);
+	rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
 	if (obj_request->result == -ENOENT) {
-		zero_bio_chain(obj_request->bio_list, 0);
+		if (obj_request->type == OBJ_REQUEST_BIO)
+			zero_bio_chain(obj_request->bio_list, 0);
+		else
+			zero_pages(obj_request->pages, 0, length);
 		obj_request->result = 0;
-		obj_request->xferred = obj_request->length;
-	} else if (obj_request->xferred < obj_request->length &&
-			!obj_request->result) {
-		zero_bio_chain(obj_request->bio_list, obj_request->xferred);
-		obj_request->xferred = obj_request->length;
+		obj_request->xferred = length;
+	} else if (xferred < length && !obj_request->result) {
+		if (obj_request->type == OBJ_REQUEST_BIO)
+			zero_bio_chain(obj_request->bio_list, xferred);
+		else
+			zero_pages(obj_request->pages, xferred, length);
+		obj_request->xferred = length;
 	}
 	obj_request_done_set(obj_request);
 }
@@ -1308,9 +1544,23 @@
 
 static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
 {
-	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
-		obj_request->result, obj_request->xferred, obj_request->length);
-	if (obj_request->img_request)
+	struct rbd_img_request *img_request = NULL;
+	struct rbd_device *rbd_dev = NULL;
+	bool layered = false;
+
+	if (obj_request_img_data_test(obj_request)) {
+		img_request = obj_request->img_request;
+		layered = img_request && img_request_layered_test(img_request);
+		rbd_dev = img_request->rbd_dev;
+	}
+
+	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+		obj_request, img_request, obj_request->result,
+		obj_request->xferred, obj_request->length);
+	if (layered && obj_request->result == -ENOENT &&
+			obj_request->img_offset < rbd_dev->parent_overlap)
+		rbd_img_parent_read(obj_request);
+	else if (img_request)
 		rbd_img_obj_request_read_callback(obj_request);
 	else
 		obj_request_done_set(obj_request);
@@ -1321,9 +1571,8 @@
 	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
 		obj_request->result, obj_request->length);
 	/*
-	 * There is no such thing as a successful short write.
-	 * Our xferred value is the number of bytes transferred
-	 * back.  Set it to our originally-requested length.
+	 * There is no such thing as a successful short write.  Set
+	 * it to our originally-requested length.
 	 */
 	obj_request->xferred = obj_request->length;
 	obj_request_done_set(obj_request);
@@ -1347,22 +1596,25 @@
 
 	dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
 	rbd_assert(osd_req == obj_request->osd_req);
-	rbd_assert(!!obj_request->img_request ^
-				(obj_request->which == BAD_WHICH));
+	if (obj_request_img_data_test(obj_request)) {
+		rbd_assert(obj_request->img_request);
+		rbd_assert(obj_request->which != BAD_WHICH);
+	} else {
+		rbd_assert(obj_request->which == BAD_WHICH);
+	}
 
 	if (osd_req->r_result < 0)
 		obj_request->result = osd_req->r_result;
-	obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version);
 
-	WARN_ON(osd_req->r_num_ops != 1);	/* For now */
+	BUG_ON(osd_req->r_num_ops > 2);
 
 	/*
 	 * We support a 64-bit length, but ultimately it has to be
 	 * passed to blk_end_request(), which takes an unsigned int.
 	 */
 	obj_request->xferred = osd_req->r_reply_op_len[0];
-	rbd_assert(obj_request->xferred < (u64) UINT_MAX);
-	opcode = osd_req->r_request_ops[0].op;
+	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
+	opcode = osd_req->r_ops[0].op;
 	switch (opcode) {
 	case CEPH_OSD_OP_READ:
 		rbd_osd_read_callback(obj_request);
@@ -1388,28 +1640,49 @@
 		rbd_obj_request_complete(obj_request);
 }
 
+static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = obj_request->img_request;
+	struct ceph_osd_request *osd_req = obj_request->osd_req;
+	u64 snap_id;
+
+	rbd_assert(osd_req != NULL);
+
+	snap_id = img_request ? img_request->snap_id : CEPH_NOSNAP;
+	ceph_osdc_build_request(osd_req, obj_request->offset,
+			NULL, snap_id, NULL);
+}
+
+static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = obj_request->img_request;
+	struct ceph_osd_request *osd_req = obj_request->osd_req;
+	struct ceph_snap_context *snapc;
+	struct timespec mtime = CURRENT_TIME;
+
+	rbd_assert(osd_req != NULL);
+
+	snapc = img_request ? img_request->snapc : NULL;
+	ceph_osdc_build_request(osd_req, obj_request->offset,
+			snapc, CEPH_NOSNAP, &mtime);
+}
+
 static struct ceph_osd_request *rbd_osd_req_create(
 					struct rbd_device *rbd_dev,
 					bool write_request,
-					struct rbd_obj_request *obj_request,
-					struct ceph_osd_req_op *op)
+					struct rbd_obj_request *obj_request)
 {
-	struct rbd_img_request *img_request = obj_request->img_request;
 	struct ceph_snap_context *snapc = NULL;
 	struct ceph_osd_client *osdc;
 	struct ceph_osd_request *osd_req;
-	struct timespec now;
-	struct timespec *mtime;
-	u64 snap_id = CEPH_NOSNAP;
-	u64 offset = obj_request->offset;
-	u64 length = obj_request->length;
 
-	if (img_request) {
-		rbd_assert(img_request->write_request == write_request);
-		if (img_request->write_request)
+	if (obj_request_img_data_test(obj_request)) {
+		struct rbd_img_request *img_request = obj_request->img_request;
+
+		rbd_assert(write_request ==
+				img_request_write_test(img_request));
+		if (write_request)
 			snapc = img_request->snapc;
-		else
-			snap_id = img_request->snap_id;
 	}
 
 	/* Allocate and initialize the request, for the single op */
@@ -1419,31 +1692,10 @@
 	if (!osd_req)
 		return NULL;	/* ENOMEM */
 
-	rbd_assert(obj_request_type_valid(obj_request->type));
-	switch (obj_request->type) {
-	case OBJ_REQUEST_NODATA:
-		break;		/* Nothing to do */
-	case OBJ_REQUEST_BIO:
-		rbd_assert(obj_request->bio_list != NULL);
-		osd_req->r_bio = obj_request->bio_list;
-		break;
-	case OBJ_REQUEST_PAGES:
-		osd_req->r_pages = obj_request->pages;
-		osd_req->r_num_pages = obj_request->page_count;
-		osd_req->r_page_alignment = offset & ~PAGE_MASK;
-		break;
-	}
-
-	if (write_request) {
+	if (write_request)
 		osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
-		now = CURRENT_TIME;
-		mtime = &now;
-	} else {
+	else
 		osd_req->r_flags = CEPH_OSD_FLAG_READ;
-		mtime = NULL;	/* not needed for reads */
-		offset = 0;	/* These are not used... */
-		length = 0;	/* ...for osd read requests */
-	}
 
 	osd_req->r_callback = rbd_osd_req_callback;
 	osd_req->r_priv = obj_request;
@@ -1454,14 +1706,51 @@
 
 	osd_req->r_file_layout = rbd_dev->layout;	/* struct */
 
-	/* osd_req will get its own reference to snapc (if non-null) */
+	return osd_req;
+}
 
-	ceph_osdc_build_request(osd_req, offset, length, 1, op,
-				snapc, snap_id, mtime);
+/*
+ * Create a copyup osd request based on the information in the
+ * object request supplied.  A copyup request has two osd ops,
+ * a copyup method call, and a "normal" write request.
+ */
+static struct ceph_osd_request *
+rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	struct ceph_snap_context *snapc;
+	struct rbd_device *rbd_dev;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_request *osd_req;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+	rbd_assert(img_request);
+	rbd_assert(img_request_write_test(img_request));
+
+	/* Allocate and initialize the request, for the two ops */
+
+	snapc = img_request->snapc;
+	rbd_dev = img_request->rbd_dev;
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, 2, false, GFP_ATOMIC);
+	if (!osd_req)
+		return NULL;	/* ENOMEM */
+
+	osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+	osd_req->r_callback = rbd_osd_req_callback;
+	osd_req->r_priv = obj_request;
+
+	osd_req->r_oid_len = strlen(obj_request->object_name);
+	rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
+	memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
+
+	osd_req->r_file_layout = rbd_dev->layout;	/* struct */
 
 	return osd_req;
 }
 
+
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 {
 	ceph_osdc_put_request(osd_req);
@@ -1480,18 +1769,23 @@
 	rbd_assert(obj_request_type_valid(type));
 
 	size = strlen(object_name) + 1;
-	obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL);
-	if (!obj_request)
+	name = kmalloc(size, GFP_KERNEL);
+	if (!name)
 		return NULL;
 
-	name = (char *)(obj_request + 1);
+	obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+	if (!obj_request) {
+		kfree(name);
+		return NULL;
+	}
+
 	obj_request->object_name = memcpy(name, object_name, size);
 	obj_request->offset = offset;
 	obj_request->length = length;
+	obj_request->flags = 0;
 	obj_request->which = BAD_WHICH;
 	obj_request->type = type;
 	INIT_LIST_HEAD(&obj_request->links);
-	obj_request_done_init(obj_request);
 	init_completion(&obj_request->completion);
 	kref_init(&obj_request->kref);
 
@@ -1530,7 +1824,9 @@
 		break;
 	}
 
-	kfree(obj_request);
+	kfree(obj_request->object_name);
+	obj_request->object_name = NULL;
+	kmem_cache_free(rbd_obj_request_cache, obj_request);
 }
 
 /*
@@ -1541,37 +1837,40 @@
 static struct rbd_img_request *rbd_img_request_create(
 					struct rbd_device *rbd_dev,
 					u64 offset, u64 length,
-					bool write_request)
+					bool write_request,
+					bool child_request)
 {
 	struct rbd_img_request *img_request;
-	struct ceph_snap_context *snapc = NULL;
 
-	img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC);
+	img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC);
 	if (!img_request)
 		return NULL;
 
 	if (write_request) {
 		down_read(&rbd_dev->header_rwsem);
-		snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+		ceph_get_snap_context(rbd_dev->header.snapc);
 		up_read(&rbd_dev->header_rwsem);
-		if (WARN_ON(!snapc)) {
-			kfree(img_request);
-			return NULL;	/* Shouldn't happen */
-		}
 	}
 
 	img_request->rq = NULL;
 	img_request->rbd_dev = rbd_dev;
 	img_request->offset = offset;
 	img_request->length = length;
-	img_request->write_request = write_request;
-	if (write_request)
-		img_request->snapc = snapc;
-	else
+	img_request->flags = 0;
+	if (write_request) {
+		img_request_write_set(img_request);
+		img_request->snapc = rbd_dev->header.snapc;
+	} else {
 		img_request->snap_id = rbd_dev->spec->snap_id;
+	}
+	if (child_request)
+		img_request_child_set(img_request);
+	if (rbd_dev->parent_spec)
+		img_request_layered_set(img_request);
 	spin_lock_init(&img_request->completion_lock);
 	img_request->next_completion = 0;
 	img_request->callback = NULL;
+	img_request->result = 0;
 	img_request->obj_request_count = 0;
 	INIT_LIST_HEAD(&img_request->obj_requests);
 	kref_init(&img_request->kref);
@@ -1600,78 +1899,204 @@
 		rbd_img_obj_request_del(img_request, obj_request);
 	rbd_assert(img_request->obj_request_count == 0);
 
-	if (img_request->write_request)
+	if (img_request_write_test(img_request))
 		ceph_put_snap_context(img_request->snapc);
 
-	kfree(img_request);
+	if (img_request_child_test(img_request))
+		rbd_obj_request_put(img_request->obj_request);
+
+	kmem_cache_free(rbd_img_request_cache, img_request);
 }
 
-static int rbd_img_request_fill_bio(struct rbd_img_request *img_request,
-					struct bio *bio_list)
+static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	unsigned int xferred;
+	int result;
+	bool more;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+
+	rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
+	xferred = (unsigned int)obj_request->xferred;
+	result = obj_request->result;
+	if (result) {
+		struct rbd_device *rbd_dev = img_request->rbd_dev;
+
+		rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n",
+			img_request_write_test(img_request) ? "write" : "read",
+			obj_request->length, obj_request->img_offset,
+			obj_request->offset);
+		rbd_warn(rbd_dev, "  result %d xferred %x\n",
+			result, xferred);
+		if (!img_request->result)
+			img_request->result = result;
+	}
+
+	/* Image object requests don't own their page array */
+
+	if (obj_request->type == OBJ_REQUEST_PAGES) {
+		obj_request->pages = NULL;
+		obj_request->page_count = 0;
+	}
+
+	if (img_request_child_test(img_request)) {
+		rbd_assert(img_request->obj_request != NULL);
+		more = obj_request->which < img_request->obj_request_count - 1;
+	} else {
+		rbd_assert(img_request->rq != NULL);
+		more = blk_end_request(img_request->rq, result, xferred);
+	}
+
+	return more;
+}
+
+static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	u32 which = obj_request->which;
+	bool more = true;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+	rbd_assert(img_request != NULL);
+	rbd_assert(img_request->obj_request_count > 0);
+	rbd_assert(which != BAD_WHICH);
+	rbd_assert(which < img_request->obj_request_count);
+	rbd_assert(which >= img_request->next_completion);
+
+	spin_lock_irq(&img_request->completion_lock);
+	if (which != img_request->next_completion)
+		goto out;
+
+	for_each_obj_request_from(img_request, obj_request) {
+		rbd_assert(more);
+		rbd_assert(which < img_request->obj_request_count);
+
+		if (!obj_request_done_test(obj_request))
+			break;
+		more = rbd_img_obj_end_request(obj_request);
+		which++;
+	}
+
+	rbd_assert(more ^ (which == img_request->obj_request_count));
+	img_request->next_completion = which;
+out:
+	spin_unlock_irq(&img_request->completion_lock);
+
+	if (!more)
+		rbd_img_request_complete(img_request);
+}
+
+/*
+ * Split up an image request into one or more object requests, each
+ * to a different object.  The "type" parameter indicates whether
+ * "data_desc" is the pointer to the head of a list of bio
+ * structures, or the base of a page array.  In either case this
+ * function assumes data_desc describes memory sufficient to hold
+ * all data described by the image request.
+ */
+static int rbd_img_request_fill(struct rbd_img_request *img_request,
+					enum obj_request_type type,
+					void *data_desc)
 {
 	struct rbd_device *rbd_dev = img_request->rbd_dev;
 	struct rbd_obj_request *obj_request = NULL;
 	struct rbd_obj_request *next_obj_request;
-	unsigned int bio_offset;
-	u64 image_offset;
+	bool write_request = img_request_write_test(img_request);
+	struct bio *bio_list;
+	unsigned int bio_offset = 0;
+	struct page **pages;
+	u64 img_offset;
 	u64 resid;
 	u16 opcode;
 
-	dout("%s: img %p bio %p\n", __func__, img_request, bio_list);
+	dout("%s: img %p type %d data_desc %p\n", __func__, img_request,
+		(int)type, data_desc);
 
-	opcode = img_request->write_request ? CEPH_OSD_OP_WRITE
-					      : CEPH_OSD_OP_READ;
-	bio_offset = 0;
-	image_offset = img_request->offset;
-	rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT);
+	opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ;
+	img_offset = img_request->offset;
 	resid = img_request->length;
 	rbd_assert(resid > 0);
+
+	if (type == OBJ_REQUEST_BIO) {
+		bio_list = data_desc;
+		rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT);
+	} else {
+		rbd_assert(type == OBJ_REQUEST_PAGES);
+		pages = data_desc;
+	}
+
 	while (resid) {
+		struct ceph_osd_request *osd_req;
 		const char *object_name;
-		unsigned int clone_size;
-		struct ceph_osd_req_op *op;
 		u64 offset;
 		u64 length;
 
-		object_name = rbd_segment_name(rbd_dev, image_offset);
+		object_name = rbd_segment_name(rbd_dev, img_offset);
 		if (!object_name)
 			goto out_unwind;
-		offset = rbd_segment_offset(rbd_dev, image_offset);
-		length = rbd_segment_length(rbd_dev, image_offset, resid);
+		offset = rbd_segment_offset(rbd_dev, img_offset);
+		length = rbd_segment_length(rbd_dev, img_offset, resid);
 		obj_request = rbd_obj_request_create(object_name,
-						offset, length,
-						OBJ_REQUEST_BIO);
-		kfree(object_name);	/* object request has its own copy */
+						offset, length, type);
+		/* object request has its own copy of the object name */
+		rbd_segment_name_free(object_name);
 		if (!obj_request)
 			goto out_unwind;
 
-		rbd_assert(length <= (u64) UINT_MAX);
-		clone_size = (unsigned int) length;
-		obj_request->bio_list = bio_chain_clone_range(&bio_list,
-						&bio_offset, clone_size,
-						GFP_ATOMIC);
-		if (!obj_request->bio_list)
-			goto out_partial;
+		if (type == OBJ_REQUEST_BIO) {
+			unsigned int clone_size;
 
-		/*
-		 * Build up the op to use in building the osd
-		 * request.  Note that the contents of the op are
-		 * copied by rbd_osd_req_create().
-		 */
-		op = rbd_osd_req_op_create(opcode, offset, length);
-		if (!op)
-			goto out_partial;
-		obj_request->osd_req = rbd_osd_req_create(rbd_dev,
-						img_request->write_request,
-						obj_request, op);
-		rbd_osd_req_op_destroy(op);
-		if (!obj_request->osd_req)
-			goto out_partial;
-		/* status and version are initially zero-filled */
+			rbd_assert(length <= (u64)UINT_MAX);
+			clone_size = (unsigned int)length;
+			obj_request->bio_list =
+					bio_chain_clone_range(&bio_list,
+								&bio_offset,
+								clone_size,
+								GFP_ATOMIC);
+			if (!obj_request->bio_list)
+				goto out_partial;
+		} else {
+			unsigned int page_count;
 
+			obj_request->pages = pages;
+			page_count = (u32)calc_pages_for(offset, length);
+			obj_request->page_count = page_count;
+			if ((offset + length) & ~PAGE_MASK)
+				page_count--;	/* more on last page */
+			pages += page_count;
+		}
+
+		osd_req = rbd_osd_req_create(rbd_dev, write_request,
+						obj_request);
+		if (!osd_req)
+			goto out_partial;
+		obj_request->osd_req = osd_req;
+		obj_request->callback = rbd_img_obj_callback;
+
+		osd_req_op_extent_init(osd_req, 0, opcode, offset, length,
+						0, 0);
+		if (type == OBJ_REQUEST_BIO)
+			osd_req_op_extent_osd_data_bio(osd_req, 0,
+					obj_request->bio_list, length);
+		else
+			osd_req_op_extent_osd_data_pages(osd_req, 0,
+					obj_request->pages, length,
+					offset & ~PAGE_MASK, false, false);
+
+		if (write_request)
+			rbd_osd_req_format_write(obj_request);
+		else
+			rbd_osd_req_format_read(obj_request);
+
+		obj_request->img_offset = img_offset;
 		rbd_img_obj_request_add(img_request, obj_request);
 
-		image_offset += length;
+		img_offset += length;
 		resid -= length;
 	}
 
@@ -1686,61 +2111,389 @@
 	return -ENOMEM;
 }
 
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+static void
+rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
 {
 	struct rbd_img_request *img_request;
-	u32 which = obj_request->which;
-	bool more = true;
+	struct rbd_device *rbd_dev;
+	u64 length;
+	u32 page_count;
+
+	rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+	rbd_assert(img_request);
+
+	rbd_dev = img_request->rbd_dev;
+	rbd_assert(rbd_dev);
+	length = (u64)1 << rbd_dev->header.obj_order;
+	page_count = (u32)calc_pages_for(0, length);
+
+	rbd_assert(obj_request->copyup_pages);
+	ceph_release_page_vector(obj_request->copyup_pages, page_count);
+	obj_request->copyup_pages = NULL;
+
+	/*
+	 * We want the transfer count to reflect the size of the
+	 * original write request.  There is no such thing as a
+	 * successful short write, so if the request was successful
+	 * we can just set it to the originally-requested length.
+	 */
+	if (!obj_request->result)
+		obj_request->xferred = obj_request->length;
+
+	/* Finish up with the normal image object callback */
+
+	rbd_img_obj_callback(obj_request);
+}
+
+static void
+rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
+{
+	struct rbd_obj_request *orig_request;
+	struct ceph_osd_request *osd_req;
+	struct ceph_osd_client *osdc;
+	struct rbd_device *rbd_dev;
+	struct page **pages;
+	int result;
+	u64 obj_size;
+	u64 xferred;
+
+	rbd_assert(img_request_child_test(img_request));
+
+	/* First get what we need from the image request */
+
+	pages = img_request->copyup_pages;
+	rbd_assert(pages != NULL);
+	img_request->copyup_pages = NULL;
+
+	orig_request = img_request->obj_request;
+	rbd_assert(orig_request != NULL);
+	rbd_assert(orig_request->type == OBJ_REQUEST_BIO);
+	result = img_request->result;
+	obj_size = img_request->length;
+	xferred = img_request->xferred;
+
+	rbd_dev = img_request->rbd_dev;
+	rbd_assert(rbd_dev);
+	rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order);
+
+	rbd_img_request_put(img_request);
+
+	if (result)
+		goto out_err;
+
+	/* Allocate the new copyup osd request for the original request */
+
+	result = -ENOMEM;
+	rbd_assert(!orig_request->osd_req);
+	osd_req = rbd_osd_req_create_copyup(orig_request);
+	if (!osd_req)
+		goto out_err;
+	orig_request->osd_req = osd_req;
+	orig_request->copyup_pages = pages;
+
+	/* Initialize the copyup op */
+
+	osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
+	osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0,
+						false, false);
+
+	/* Then the original write request op */
+
+	osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE,
+					orig_request->offset,
+					orig_request->length, 0, 0);
+	osd_req_op_extent_osd_data_bio(osd_req, 1, orig_request->bio_list,
+					orig_request->length);
+
+	rbd_osd_req_format_write(orig_request);
+
+	/* All set, send it off. */
+
+	orig_request->callback = rbd_img_obj_copyup_callback;
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	result = rbd_obj_request_submit(osdc, orig_request);
+	if (!result)
+		return;
+out_err:
+	/* Record the error code and complete the request */
+
+	orig_request->result = result;
+	orig_request->xferred = 0;
+	obj_request_done_set(orig_request);
+	rbd_obj_request_complete(orig_request);
+}
+
+/*
+ * Read from the parent image the range of data that covers the
+ * entire target of the given object request.  This is used for
+ * satisfying a layered image write request when the target of an
+ * object request from the image request does not exist.
+ *
+ * A page array big enough to hold the returned data is allocated
+ * and supplied to rbd_img_request_fill() as the "data descriptor."
+ * When the read completes, this page array will be transferred to
+ * the original object request for the copyup operation.
+ *
+ * If an error occurs, record it as the result of the original
+ * object request and mark it done so it gets completed.
+ */
+static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = NULL;
+	struct rbd_img_request *parent_request = NULL;
+	struct rbd_device *rbd_dev;
+	u64 img_offset;
+	u64 length;
+	struct page **pages = NULL;
+	u32 page_count;
+	int result;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
 
 	img_request = obj_request->img_request;
-
-	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
 	rbd_assert(img_request != NULL);
-	rbd_assert(img_request->rq != NULL);
-	rbd_assert(img_request->obj_request_count > 0);
-	rbd_assert(which != BAD_WHICH);
-	rbd_assert(which < img_request->obj_request_count);
-	rbd_assert(which >= img_request->next_completion);
+	rbd_dev = img_request->rbd_dev;
+	rbd_assert(rbd_dev->parent != NULL);
 
-	spin_lock_irq(&img_request->completion_lock);
-	if (which != img_request->next_completion)
-		goto out;
+	/*
+	 * First things first.  The original osd request is of no
+	 * use to use any more, we'll need a new one that can hold
+	 * the two ops in a copyup request.  We'll get that later,
+	 * but for now we can release the old one.
+	 */
+	rbd_osd_req_destroy(obj_request->osd_req);
+	obj_request->osd_req = NULL;
 
-	for_each_obj_request_from(img_request, obj_request) {
-		unsigned int xferred;
-		int result;
+	/*
+	 * Determine the byte range covered by the object in the
+	 * child image to which the original request was to be sent.
+	 */
+	img_offset = obj_request->img_offset - obj_request->offset;
+	length = (u64)1 << rbd_dev->header.obj_order;
 
-		rbd_assert(more);
-		rbd_assert(which < img_request->obj_request_count);
-
-		if (!obj_request_done_test(obj_request))
-			break;
-
-		rbd_assert(obj_request->xferred <= (u64) UINT_MAX);
-		xferred = (unsigned int) obj_request->xferred;
-		result = (int) obj_request->result;
-		if (result)
-			rbd_warn(NULL, "obj_request %s result %d xferred %u\n",
-				img_request->write_request ? "write" : "read",
-				result, xferred);
-
-		more = blk_end_request(img_request->rq, result, xferred);
-		which++;
+	/*
+	 * There is no defined parent data beyond the parent
+	 * overlap, so limit what we read at that boundary if
+	 * necessary.
+	 */
+	if (img_offset + length > rbd_dev->parent_overlap) {
+		rbd_assert(img_offset < rbd_dev->parent_overlap);
+		length = rbd_dev->parent_overlap - img_offset;
 	}
 
-	rbd_assert(more ^ (which == img_request->obj_request_count));
-	img_request->next_completion = which;
-out:
-	spin_unlock_irq(&img_request->completion_lock);
+	/*
+	 * Allocate a page array big enough to receive the data read
+	 * from the parent.
+	 */
+	page_count = (u32)calc_pages_for(0, length);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages)) {
+		result = PTR_ERR(pages);
+		pages = NULL;
+		goto out_err;
+	}
 
-	if (!more)
-		rbd_img_request_complete(img_request);
+	result = -ENOMEM;
+	parent_request = rbd_img_request_create(rbd_dev->parent,
+						img_offset, length,
+						false, true);
+	if (!parent_request)
+		goto out_err;
+	rbd_obj_request_get(obj_request);
+	parent_request->obj_request = obj_request;
+
+	result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
+	if (result)
+		goto out_err;
+	parent_request->copyup_pages = pages;
+
+	parent_request->callback = rbd_img_obj_parent_read_full_callback;
+	result = rbd_img_request_submit(parent_request);
+	if (!result)
+		return 0;
+
+	parent_request->copyup_pages = NULL;
+	parent_request->obj_request = NULL;
+	rbd_obj_request_put(obj_request);
+out_err:
+	if (pages)
+		ceph_release_page_vector(pages, page_count);
+	if (parent_request)
+		rbd_img_request_put(parent_request);
+	obj_request->result = result;
+	obj_request->xferred = 0;
+	obj_request_done_set(obj_request);
+
+	return result;
+}
+
+static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_obj_request *orig_request;
+	int result;
+
+	rbd_assert(!obj_request_img_data_test(obj_request));
+
+	/*
+	 * All we need from the object request is the original
+	 * request and the result of the STAT op.  Grab those, then
+	 * we're done with the request.
+	 */
+	orig_request = obj_request->obj_request;
+	obj_request->obj_request = NULL;
+	rbd_assert(orig_request);
+	rbd_assert(orig_request->img_request);
+
+	result = obj_request->result;
+	obj_request->result = 0;
+
+	dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
+		obj_request, orig_request, result,
+		obj_request->xferred, obj_request->length);
+	rbd_obj_request_put(obj_request);
+
+	rbd_assert(orig_request);
+	rbd_assert(orig_request->img_request);
+
+	/*
+	 * Our only purpose here is to determine whether the object
+	 * exists, and we don't want to treat the non-existence as
+	 * an error.  If something else comes back, transfer the
+	 * error to the original request and complete it now.
+	 */
+	if (!result) {
+		obj_request_existence_set(orig_request, true);
+	} else if (result == -ENOENT) {
+		obj_request_existence_set(orig_request, false);
+	} else if (result) {
+		orig_request->result = result;
+		goto out;
+	}
+
+	/*
+	 * Resubmit the original request now that we have recorded
+	 * whether the target object exists.
+	 */
+	orig_request->result = rbd_img_obj_request_submit(orig_request);
+out:
+	if (orig_request->result)
+		rbd_obj_request_complete(orig_request);
+	rbd_obj_request_put(orig_request);
+}
+
+static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+{
+	struct rbd_obj_request *stat_request;
+	struct rbd_device *rbd_dev;
+	struct ceph_osd_client *osdc;
+	struct page **pages = NULL;
+	u32 page_count;
+	size_t size;
+	int ret;
+
+	/*
+	 * The response data for a STAT call consists of:
+	 *     le64 length;
+	 *     struct {
+	 *         le32 tv_sec;
+	 *         le32 tv_nsec;
+	 *     } mtime;
+	 */
+	size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
+	page_count = (u32)calc_pages_for(0, size);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	ret = -ENOMEM;
+	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+							OBJ_REQUEST_PAGES);
+	if (!stat_request)
+		goto out;
+
+	rbd_obj_request_get(obj_request);
+	stat_request->obj_request = obj_request;
+	stat_request->pages = pages;
+	stat_request->page_count = page_count;
+
+	rbd_assert(obj_request->img_request);
+	rbd_dev = obj_request->img_request->rbd_dev;
+	stat_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+						stat_request);
+	if (!stat_request->osd_req)
+		goto out;
+	stat_request->callback = rbd_img_obj_exists_callback;
+
+	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
+					false, false);
+	rbd_osd_req_format_read(stat_request);
+
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	ret = rbd_obj_request_submit(osdc, stat_request);
+out:
+	if (ret)
+		rbd_obj_request_put(obj_request);
+
+	return ret;
+}
+
+static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	struct rbd_device *rbd_dev;
+	bool known;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+
+	img_request = obj_request->img_request;
+	rbd_assert(img_request);
+	rbd_dev = img_request->rbd_dev;
+
+	/*
+	 * Only writes to layered images need special handling.
+	 * Reads and non-layered writes are simple object requests.
+	 * Layered writes that start beyond the end of the overlap
+	 * with the parent have no parent data, so they too are
+	 * simple object requests.  Finally, if the target object is
+	 * known to already exist, its parent data has already been
+	 * copied, so a write to the object can also be handled as a
+	 * simple object request.
+	 */
+	if (!img_request_write_test(img_request) ||
+		!img_request_layered_test(img_request) ||
+		rbd_dev->parent_overlap <= obj_request->img_offset ||
+		((known = obj_request_known_test(obj_request)) &&
+			obj_request_exists_test(obj_request))) {
+
+		struct rbd_device *rbd_dev;
+		struct ceph_osd_client *osdc;
+
+		rbd_dev = obj_request->img_request->rbd_dev;
+		osdc = &rbd_dev->rbd_client->client->osdc;
+
+		return rbd_obj_request_submit(osdc, obj_request);
+	}
+
+	/*
+	 * It's a layered write.  The target object might exist but
+	 * we may not know that yet.  If we know it doesn't exist,
+	 * start by reading the data for the full target object from
+	 * the parent so we can use it for a copyup to the target.
+	 */
+	if (known)
+		return rbd_img_obj_parent_read_full(obj_request);
+
+	/* We don't know whether the target exists.  Go find out. */
+
+	return rbd_img_obj_exists_submit(obj_request);
 }
 
 static int rbd_img_request_submit(struct rbd_img_request *img_request)
 {
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
-	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	struct rbd_obj_request *next_obj_request;
 
@@ -1748,27 +2501,105 @@
 	for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
 		int ret;
 
-		obj_request->callback = rbd_img_obj_callback;
-		ret = rbd_obj_request_submit(osdc, obj_request);
+		ret = rbd_img_obj_request_submit(obj_request);
 		if (ret)
 			return ret;
-		/*
-		 * The image request has its own reference to each
-		 * of its object requests, so we can safely drop the
-		 * initial one here.
-		 */
-		rbd_obj_request_put(obj_request);
 	}
 
 	return 0;
 }
 
-static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
-				   u64 ver, u64 notify_id)
+static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
 {
 	struct rbd_obj_request *obj_request;
-	struct ceph_osd_req_op *op;
-	struct ceph_osd_client *osdc;
+	struct rbd_device *rbd_dev;
+	u64 obj_end;
+
+	rbd_assert(img_request_child_test(img_request));
+
+	obj_request = img_request->obj_request;
+	rbd_assert(obj_request);
+	rbd_assert(obj_request->img_request);
+
+	obj_request->result = img_request->result;
+	if (obj_request->result)
+		goto out;
+
+	/*
+	 * We need to zero anything beyond the parent overlap
+	 * boundary.  Since rbd_img_obj_request_read_callback()
+	 * will zero anything beyond the end of a short read, an
+	 * easy way to do this is to pretend the data from the
+	 * parent came up short--ending at the overlap boundary.
+	 */
+	rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
+	obj_end = obj_request->img_offset + obj_request->length;
+	rbd_dev = obj_request->img_request->rbd_dev;
+	if (obj_end > rbd_dev->parent_overlap) {
+		u64 xferred = 0;
+
+		if (obj_request->img_offset < rbd_dev->parent_overlap)
+			xferred = rbd_dev->parent_overlap -
+					obj_request->img_offset;
+
+		obj_request->xferred = min(img_request->xferred, xferred);
+	} else {
+		obj_request->xferred = img_request->xferred;
+	}
+out:
+	rbd_img_request_put(img_request);
+	rbd_img_obj_request_read_callback(obj_request);
+	rbd_obj_request_complete(obj_request);
+}
+
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+{
+	struct rbd_device *rbd_dev;
+	struct rbd_img_request *img_request;
+	int result;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request->img_request != NULL);
+	rbd_assert(obj_request->result == (s32) -ENOENT);
+	rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
+
+	rbd_dev = obj_request->img_request->rbd_dev;
+	rbd_assert(rbd_dev->parent != NULL);
+	/* rbd_read_finish(obj_request, obj_request->length); */
+	img_request = rbd_img_request_create(rbd_dev->parent,
+						obj_request->img_offset,
+						obj_request->length,
+						false, true);
+	result = -ENOMEM;
+	if (!img_request)
+		goto out_err;
+
+	rbd_obj_request_get(obj_request);
+	img_request->obj_request = obj_request;
+
+	result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+					obj_request->bio_list);
+	if (result)
+		goto out_err;
+
+	img_request->callback = rbd_img_parent_read_callback;
+	result = rbd_img_request_submit(img_request);
+	if (result)
+		goto out_err;
+
+	return;
+out_err:
+	if (img_request)
+		rbd_img_request_put(img_request);
+	obj_request->result = result;
+	obj_request->xferred = 0;
+	obj_request_done_set(obj_request);
+}
+
+static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id)
+{
+	struct rbd_obj_request *obj_request;
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	int ret;
 
 	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
@@ -1777,17 +2608,15 @@
 		return -ENOMEM;
 
 	ret = -ENOMEM;
-	op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver);
-	if (!op)
-		goto out;
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
-						obj_request, op);
-	rbd_osd_req_op_destroy(op);
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request);
 	if (!obj_request->osd_req)
 		goto out;
-
-	osdc = &rbd_dev->rbd_client->client->osdc;
 	obj_request->callback = rbd_obj_request_put;
+
+	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
+					notify_id, 0, 0);
+	rbd_osd_req_format_read(obj_request);
+
 	ret = rbd_obj_request_submit(osdc, obj_request);
 out:
 	if (ret)
@@ -1799,21 +2628,16 @@
 static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
 {
 	struct rbd_device *rbd_dev = (struct rbd_device *)data;
-	u64 hver;
-	int rc;
 
 	if (!rbd_dev)
 		return;
 
 	dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
-		rbd_dev->header_name, (unsigned long long) notify_id,
-		(unsigned int) opcode);
-	rc = rbd_dev_refresh(rbd_dev, &hver);
-	if (rc)
-		rbd_warn(rbd_dev, "got notification but failed to "
-			   " update snaps: %d\n", rc);
+		rbd_dev->header_name, (unsigned long long)notify_id,
+		(unsigned int)opcode);
+	(void)rbd_dev_refresh(rbd_dev);
 
-	rbd_obj_notify_ack(rbd_dev, hver, notify_id);
+	rbd_obj_notify_ack(rbd_dev, notify_id);
 }
 
 /*
@@ -1824,7 +2648,6 @@
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
-	struct ceph_osd_req_op *op;
 	int ret;
 
 	rbd_assert(start ^ !!rbd_dev->watch_event);
@@ -1844,14 +2667,7 @@
 	if (!obj_request)
 		goto out_cancel;
 
-	op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH,
-				rbd_dev->watch_event->cookie,
-				rbd_dev->header.obj_version, start);
-	if (!op)
-		goto out_cancel;
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true,
-							obj_request, op);
-	rbd_osd_req_op_destroy(op);
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, obj_request);
 	if (!obj_request->osd_req)
 		goto out_cancel;
 
@@ -1860,6 +2676,11 @@
 	else
 		ceph_osdc_unregister_linger_request(osdc,
 					rbd_dev->watch_request->osd_req);
+
+	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
+				rbd_dev->watch_event->cookie, 0, start);
+	rbd_osd_req_format_write(obj_request);
+
 	ret = rbd_obj_request_submit(osdc, obj_request);
 	if (ret)
 		goto out_cancel;
@@ -1899,40 +2720,38 @@
 }
 
 /*
- * Synchronous osd object method call
+ * Synchronous osd object method call.  Returns the number of bytes
+ * returned in the outbound buffer, or a negative error code.
  */
 static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 			     const char *object_name,
 			     const char *class_name,
 			     const char *method_name,
-			     const char *outbound,
+			     const void *outbound,
 			     size_t outbound_size,
-			     char *inbound,
-			     size_t inbound_size,
-			     u64 *version)
+			     void *inbound,
+			     size_t inbound_size)
 {
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
-	struct ceph_osd_client *osdc;
-	struct ceph_osd_req_op *op;
 	struct page **pages;
 	u32 page_count;
 	int ret;
 
 	/*
-	 * Method calls are ultimately read operations but they
-	 * don't involve object data (so no offset or length).
-	 * The result should placed into the inbound buffer
-	 * provided.  They also supply outbound data--parameters for
-	 * the object method.  Currently if this is present it will
-	 * be a snapshot id.
+	 * Method calls are ultimately read operations.  The result
+	 * should placed into the inbound buffer provided.  They
+	 * also supply outbound data--parameters for the object
+	 * method.  Currently if this is present it will be a
+	 * snapshot id.
 	 */
-	page_count = (u32) calc_pages_for(0, inbound_size);
+	page_count = (u32)calc_pages_for(0, inbound_size);
 	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
 	ret = -ENOMEM;
-	obj_request = rbd_obj_request_create(object_name, 0, 0,
+	obj_request = rbd_obj_request_create(object_name, 0, inbound_size,
 							OBJ_REQUEST_PAGES);
 	if (!obj_request)
 		goto out;
@@ -1940,17 +2759,29 @@
 	obj_request->pages = pages;
 	obj_request->page_count = page_count;
 
-	op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name,
-					method_name, outbound, outbound_size);
-	if (!op)
-		goto out;
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
-						obj_request, op);
-	rbd_osd_req_op_destroy(op);
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request);
 	if (!obj_request->osd_req)
 		goto out;
 
-	osdc = &rbd_dev->rbd_client->client->osdc;
+	osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL,
+					class_name, method_name);
+	if (outbound_size) {
+		struct ceph_pagelist *pagelist;
+
+		pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+		if (!pagelist)
+			goto out;
+
+		ceph_pagelist_init(pagelist);
+		ceph_pagelist_append(pagelist, outbound, outbound_size);
+		osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0,
+						pagelist);
+	}
+	osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
+					obj_request->pages, inbound_size,
+					0, false, false);
+	rbd_osd_req_format_read(obj_request);
+
 	ret = rbd_obj_request_submit(osdc, obj_request);
 	if (ret)
 		goto out;
@@ -1961,10 +2792,10 @@
 	ret = obj_request->result;
 	if (ret < 0)
 		goto out;
-	ret = 0;
+
+	rbd_assert(obj_request->xferred < (u64)INT_MAX);
+	ret = (int)obj_request->xferred;
 	ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
-	if (version)
-		*version = obj_request->version;
 out:
 	if (obj_request)
 		rbd_obj_request_put(obj_request);
@@ -2034,18 +2865,22 @@
 		}
 
 		result = -EINVAL;
-		if (WARN_ON(offset && length > U64_MAX - offset + 1))
+		if (offset && length > U64_MAX - offset + 1) {
+			rbd_warn(rbd_dev, "bad request range (%llu~%llu)\n",
+				offset, length);
 			goto end_request;	/* Shouldn't happen */
+		}
 
 		result = -ENOMEM;
 		img_request = rbd_img_request_create(rbd_dev, offset, length,
-							write_request);
+							write_request, false);
 		if (!img_request)
 			goto end_request;
 
 		img_request->rq = rq;
 
-		result = rbd_img_request_fill_bio(img_request, rq->bio);
+		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+						rq->bio);
 		if (!result)
 			result = rbd_img_request_submit(img_request);
 		if (result)
@@ -2053,8 +2888,10 @@
 end_request:
 		spin_lock_irq(q->queue_lock);
 		if (result < 0) {
-			rbd_warn(rbd_dev, "obj_request %s result %d\n",
-				write_request ? "write" : "read", result);
+			rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
+				write_request ? "write" : "read",
+				length, offset, result);
+
 			__blk_end_request_all(rq, result);
 		}
 	}
@@ -2113,22 +2950,22 @@
 	if (!disk)
 		return;
 
-	if (disk->flags & GENHD_FL_UP)
+	rbd_dev->disk = NULL;
+	if (disk->flags & GENHD_FL_UP) {
 		del_gendisk(disk);
-	if (disk->queue)
-		blk_cleanup_queue(disk->queue);
+		if (disk->queue)
+			blk_cleanup_queue(disk->queue);
+	}
 	put_disk(disk);
 }
 
 static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 				const char *object_name,
-				u64 offset, u64 length,
-				char *buf, u64 *version)
+				u64 offset, u64 length, void *buf)
 
 {
-	struct ceph_osd_req_op *op;
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
-	struct ceph_osd_client *osdc;
 	struct page **pages = NULL;
 	u32 page_count;
 	size_t size;
@@ -2148,16 +2985,19 @@
 	obj_request->pages = pages;
 	obj_request->page_count = page_count;
 
-	op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length);
-	if (!op)
-		goto out;
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
-						obj_request, op);
-	rbd_osd_req_op_destroy(op);
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request);
 	if (!obj_request->osd_req)
 		goto out;
 
-	osdc = &rbd_dev->rbd_client->client->osdc;
+	osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ,
+					offset, length, 0, 0);
+	osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0,
+					obj_request->pages,
+					obj_request->length,
+					obj_request->offset & ~PAGE_MASK,
+					false, false);
+	rbd_osd_req_format_read(obj_request);
+
 	ret = rbd_obj_request_submit(osdc, obj_request);
 	if (ret)
 		goto out;
@@ -2172,10 +3012,8 @@
 	rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
 	size = (size_t) obj_request->xferred;
 	ceph_copy_from_page_vector(pages, buf, 0, size);
-	rbd_assert(size <= (size_t) INT_MAX);
-	ret = (int) size;
-	if (version)
-		*version = obj_request->version;
+	rbd_assert(size <= (size_t)INT_MAX);
+	ret = (int)size;
 out:
 	if (obj_request)
 		rbd_obj_request_put(obj_request);
@@ -2196,7 +3034,7 @@
  * Returns a pointer-coded errno if a failure occurs.
  */
 static struct rbd_image_header_ondisk *
-rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
+rbd_dev_v1_header_read(struct rbd_device *rbd_dev)
 {
 	struct rbd_image_header_ondisk *ondisk = NULL;
 	u32 snap_count = 0;
@@ -2224,11 +3062,10 @@
 			return ERR_PTR(-ENOMEM);
 
 		ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
-				       0, size,
-				       (char *) ondisk, version);
+				       0, size, ondisk);
 		if (ret < 0)
 			goto out_err;
-		if (WARN_ON((size_t) ret < size)) {
+		if ((size_t)ret < size) {
 			ret = -ENXIO;
 			rbd_warn(rbd_dev, "short header read (want %zd got %d)",
 				size, ret);
@@ -2260,46 +3097,36 @@
 			   struct rbd_image_header *header)
 {
 	struct rbd_image_header_ondisk *ondisk;
-	u64 ver = 0;
 	int ret;
 
-	ondisk = rbd_dev_v1_header_read(rbd_dev, &ver);
+	ondisk = rbd_dev_v1_header_read(rbd_dev);
 	if (IS_ERR(ondisk))
 		return PTR_ERR(ondisk);
 	ret = rbd_header_from_disk(header, ondisk);
-	if (ret >= 0)
-		header->obj_version = ver;
 	kfree(ondisk);
 
 	return ret;
 }
 
-static void rbd_remove_all_snaps(struct rbd_device *rbd_dev)
-{
-	struct rbd_snap *snap;
-	struct rbd_snap *next;
-
-	list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node)
-		rbd_remove_snap_dev(snap);
-}
-
 static void rbd_update_mapping_size(struct rbd_device *rbd_dev)
 {
-	sector_t size;
-
 	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
 		return;
 
-	size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE;
-	dout("setting size to %llu sectors", (unsigned long long) size);
-	rbd_dev->mapping.size = (u64) size;
-	set_capacity(rbd_dev->disk, size);
+	if (rbd_dev->mapping.size != rbd_dev->header.image_size) {
+		sector_t size;
+
+		rbd_dev->mapping.size = rbd_dev->header.image_size;
+		size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
+		dout("setting size to %llu sectors", (unsigned long long)size);
+		set_capacity(rbd_dev->disk, size);
+	}
 }
 
 /*
  * only read the first part of the ondisk header, without the snaps info
  */
-static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver)
+static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev)
 {
 	int ret;
 	struct rbd_image_header h;
@@ -2320,37 +3147,61 @@
 	/* osd requests may still refer to snapc */
 	ceph_put_snap_context(rbd_dev->header.snapc);
 
-	if (hver)
-		*hver = h.obj_version;
-	rbd_dev->header.obj_version = h.obj_version;
 	rbd_dev->header.image_size = h.image_size;
 	rbd_dev->header.snapc = h.snapc;
 	rbd_dev->header.snap_names = h.snap_names;
 	rbd_dev->header.snap_sizes = h.snap_sizes;
 	/* Free the extra copy of the object prefix */
-	WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
+	if (strcmp(rbd_dev->header.object_prefix, h.object_prefix))
+		rbd_warn(rbd_dev, "object prefix changed (ignoring)");
 	kfree(h.object_prefix);
 
-	ret = rbd_dev_snaps_update(rbd_dev);
-	if (!ret)
-		ret = rbd_dev_snaps_register(rbd_dev);
-
 	up_write(&rbd_dev->header_rwsem);
 
 	return ret;
 }
 
-static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver)
+/*
+ * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
+ * has disappeared from the (just updated) snapshot context.
+ */
+static void rbd_exists_validate(struct rbd_device *rbd_dev)
 {
+	u64 snap_id;
+
+	if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
+		return;
+
+	snap_id = rbd_dev->spec->snap_id;
+	if (snap_id == CEPH_NOSNAP)
+		return;
+
+	if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
+		clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+	u64 image_size;
 	int ret;
 
 	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	image_size = rbd_dev->header.image_size;
 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 	if (rbd_dev->image_format == 1)
-		ret = rbd_dev_v1_refresh(rbd_dev, hver);
+		ret = rbd_dev_v1_refresh(rbd_dev);
 	else
-		ret = rbd_dev_v2_refresh(rbd_dev, hver);
+		ret = rbd_dev_v2_refresh(rbd_dev);
+
+	/* If it's a mapped snapshot, validate its EXISTS flag */
+
+	rbd_exists_validate(rbd_dev);
 	mutex_unlock(&ctl_mutex);
+	if (ret)
+		rbd_warn(rbd_dev, "got notification but failed to "
+			   " update snaps: %d\n", ret);
+	if (image_size != rbd_dev->header.image_size)
+		revalidate_disk(rbd_dev->disk);
 
 	return ret;
 }
@@ -2394,8 +3245,6 @@
 
 	rbd_dev->disk = disk;
 
-	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
-
 	return 0;
 out_disk:
 	put_disk(disk);
@@ -2416,13 +3265,9 @@
 			     struct device_attribute *attr, char *buf)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
-	sector_t size;
 
-	down_read(&rbd_dev->header_rwsem);
-	size = get_capacity(rbd_dev->disk);
-	up_read(&rbd_dev->header_rwsem);
-
-	return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
+	return sprintf(buf, "%llu\n",
+		(unsigned long long)rbd_dev->mapping.size);
 }
 
 /*
@@ -2435,7 +3280,7 @@
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
 	return sprintf(buf, "0x%016llx\n",
-			(unsigned long long) rbd_dev->mapping.features);
+			(unsigned long long)rbd_dev->mapping.features);
 }
 
 static ssize_t rbd_major_show(struct device *dev,
@@ -2443,7 +3288,11 @@
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "%d\n", rbd_dev->major);
+	if (rbd_dev->major)
+		return sprintf(buf, "%d\n", rbd_dev->major);
+
+	return sprintf(buf, "(none)\n");
+
 }
 
 static ssize_t rbd_client_id_show(struct device *dev,
@@ -2469,7 +3318,7 @@
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
 	return sprintf(buf, "%llu\n",
-		(unsigned long long) rbd_dev->spec->pool_id);
+			(unsigned long long) rbd_dev->spec->pool_id);
 }
 
 static ssize_t rbd_name_show(struct device *dev,
@@ -2555,7 +3404,7 @@
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 	int ret;
 
-	ret = rbd_dev_refresh(rbd_dev, NULL);
+	ret = rbd_dev_refresh(rbd_dev);
 
 	return ret < 0 ? ret : size;
 }
@@ -2606,71 +3455,6 @@
 	.release	= rbd_sysfs_dev_release,
 };
 
-
-/*
-  sysfs - snapshots
-*/
-
-static ssize_t rbd_snap_size_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
-{
-	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
-
-	return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
-}
-
-static ssize_t rbd_snap_id_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
-
-	return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
-}
-
-static ssize_t rbd_snap_features_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
-
-	return sprintf(buf, "0x%016llx\n",
-			(unsigned long long) snap->features);
-}
-
-static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
-static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
-static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL);
-
-static struct attribute *rbd_snap_attrs[] = {
-	&dev_attr_snap_size.attr,
-	&dev_attr_snap_id.attr,
-	&dev_attr_snap_features.attr,
-	NULL,
-};
-
-static struct attribute_group rbd_snap_attr_group = {
-	.attrs = rbd_snap_attrs,
-};
-
-static void rbd_snap_dev_release(struct device *dev)
-{
-	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
-	kfree(snap->name);
-	kfree(snap);
-}
-
-static const struct attribute_group *rbd_snap_attr_groups[] = {
-	&rbd_snap_attr_group,
-	NULL
-};
-
-static struct device_type rbd_snap_device_type = {
-	.groups		= rbd_snap_attr_groups,
-	.release	= rbd_snap_dev_release,
-};
-
 static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
 {
 	kref_get(&spec->kref);
@@ -2694,8 +3478,6 @@
 		return NULL;
 	kref_init(&spec->kref);
 
-	rbd_spec_put(rbd_spec_get(spec));	/* TEMPORARY */
-
 	return spec;
 }
 
@@ -2722,7 +3504,6 @@
 	spin_lock_init(&rbd_dev->lock);
 	rbd_dev->flags = 0;
 	INIT_LIST_HEAD(&rbd_dev->node);
-	INIT_LIST_HEAD(&rbd_dev->snaps);
 	init_rwsem(&rbd_dev->header_rwsem);
 
 	rbd_dev->spec = spec;
@@ -2740,96 +3521,11 @@
 
 static void rbd_dev_destroy(struct rbd_device *rbd_dev)
 {
-	rbd_spec_put(rbd_dev->parent_spec);
-	kfree(rbd_dev->header_name);
 	rbd_put_client(rbd_dev->rbd_client);
 	rbd_spec_put(rbd_dev->spec);
 	kfree(rbd_dev);
 }
 
-static bool rbd_snap_registered(struct rbd_snap *snap)
-{
-	bool ret = snap->dev.type == &rbd_snap_device_type;
-	bool reg = device_is_registered(&snap->dev);
-
-	rbd_assert(!ret ^ reg);
-
-	return ret;
-}
-
-static void rbd_remove_snap_dev(struct rbd_snap *snap)
-{
-	list_del(&snap->node);
-	if (device_is_registered(&snap->dev))
-		device_unregister(&snap->dev);
-}
-
-static int rbd_register_snap_dev(struct rbd_snap *snap,
-				  struct device *parent)
-{
-	struct device *dev = &snap->dev;
-	int ret;
-
-	dev->type = &rbd_snap_device_type;
-	dev->parent = parent;
-	dev->release = rbd_snap_dev_release;
-	dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name);
-	dout("%s: registering device for snapshot %s\n", __func__, snap->name);
-
-	ret = device_register(dev);
-
-	return ret;
-}
-
-static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
-						const char *snap_name,
-						u64 snap_id, u64 snap_size,
-						u64 snap_features)
-{
-	struct rbd_snap *snap;
-	int ret;
-
-	snap = kzalloc(sizeof (*snap), GFP_KERNEL);
-	if (!snap)
-		return ERR_PTR(-ENOMEM);
-
-	ret = -ENOMEM;
-	snap->name = kstrdup(snap_name, GFP_KERNEL);
-	if (!snap->name)
-		goto err;
-
-	snap->id = snap_id;
-	snap->size = snap_size;
-	snap->features = snap_features;
-
-	return snap;
-
-err:
-	kfree(snap->name);
-	kfree(snap);
-
-	return ERR_PTR(ret);
-}
-
-static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which,
-		u64 *snap_size, u64 *snap_features)
-{
-	char *snap_name;
-
-	rbd_assert(which < rbd_dev->header.snapc->num_snaps);
-
-	*snap_size = rbd_dev->header.snap_sizes[which];
-	*snap_features = 0;	/* No features for v1 */
-
-	/* Skip over names until we find the one we are looking for */
-
-	snap_name = rbd_dev->header.snap_names;
-	while (which--)
-		snap_name += strlen(snap_name) + 1;
-
-	return snap_name;
-}
-
 /*
  * Get the size and object order for an image snapshot, or if
  * snap_id is CEPH_NOSNAP, gets this information for the base
@@ -2847,18 +3543,21 @@
 
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_size",
-				(char *) &snapid, sizeof (snapid),
-				(char *) &size_buf, sizeof (size_buf), NULL);
+				&snapid, sizeof (snapid),
+				&size_buf, sizeof (size_buf));
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
+	if (ret < sizeof (size_buf))
+		return -ERANGE;
 
-	*order = size_buf.order;
+	if (order)
+		*order = size_buf.order;
 	*snap_size = le64_to_cpu(size_buf.size);
 
 	dout("  snap_id 0x%016llx order = %u, snap_size = %llu\n",
-		(unsigned long long) snap_id, (unsigned int) *order,
-		(unsigned long long) *snap_size);
+		(unsigned long long)snap_id, (unsigned int)*order,
+		(unsigned long long)*snap_size);
 
 	return 0;
 }
@@ -2881,17 +3580,16 @@
 		return -ENOMEM;
 
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
-				"rbd", "get_object_prefix",
-				NULL, 0,
-				reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL);
+				"rbd", "get_object_prefix", NULL, 0,
+				reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
 
 	p = reply_buf;
 	rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
-						p + RBD_OBJ_PREFIX_LEN_MAX,
-						NULL, GFP_NOIO);
+						p + ret, NULL, GFP_NOIO);
+	ret = 0;
 
 	if (IS_ERR(rbd_dev->header.object_prefix)) {
 		ret = PTR_ERR(rbd_dev->header.object_prefix);
@@ -2899,7 +3597,6 @@
 	} else {
 		dout("  object_prefix = %s\n", rbd_dev->header.object_prefix);
 	}
-
 out:
 	kfree(reply_buf);
 
@@ -2913,29 +3610,30 @@
 	struct {
 		__le64 features;
 		__le64 incompat;
-	} features_buf = { 0 };
+	} __attribute__ ((packed)) features_buf = { 0 };
 	u64 incompat;
 	int ret;
 
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_features",
-				(char *) &snapid, sizeof (snapid),
-				(char *) &features_buf, sizeof (features_buf),
-				NULL);
+				&snapid, sizeof (snapid),
+				&features_buf, sizeof (features_buf));
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
+	if (ret < sizeof (features_buf))
+		return -ERANGE;
 
 	incompat = le64_to_cpu(features_buf.incompat);
-	if (incompat & ~RBD_FEATURES_ALL)
+	if (incompat & ~RBD_FEATURES_SUPPORTED)
 		return -ENXIO;
 
 	*snap_features = le64_to_cpu(features_buf.features);
 
 	dout("  snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
-		(unsigned long long) snap_id,
-		(unsigned long long) *snap_features,
-		(unsigned long long) le64_to_cpu(features_buf.incompat));
+		(unsigned long long)snap_id,
+		(unsigned long long)*snap_features,
+		(unsigned long long)le64_to_cpu(features_buf.incompat));
 
 	return 0;
 }
@@ -2975,15 +3673,15 @@
 	snapid = cpu_to_le64(CEPH_NOSNAP);
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_parent",
-				(char *) &snapid, sizeof (snapid),
-				(char *) reply_buf, size, NULL);
+				&snapid, sizeof (snapid),
+				reply_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out_err;
 
-	ret = -ERANGE;
 	p = reply_buf;
-	end = (char *) reply_buf + size;
+	end = reply_buf + ret;
+	ret = -ERANGE;
 	ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err);
 	if (parent_spec->pool_id == CEPH_NOPOOL)
 		goto out;	/* No parent?  No problem. */
@@ -2991,8 +3689,11 @@
 	/* The ceph file layout needs to fit pool id in 32 bits */
 
 	ret = -EIO;
-	if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX))
-		goto out;
+	if (parent_spec->pool_id > (u64)U32_MAX) {
+		rbd_warn(NULL, "parent pool id too large (%llu > %u)\n",
+			(unsigned long long)parent_spec->pool_id, U32_MAX);
+		goto out_err;
+	}
 
 	image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
 	if (IS_ERR(image_id)) {
@@ -3015,6 +3716,56 @@
 	return ret;
 }
 
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+{
+	struct {
+		__le64 stripe_unit;
+		__le64 stripe_count;
+	} __attribute__ ((packed)) striping_info_buf = { 0 };
+	size_t size = sizeof (striping_info_buf);
+	void *p;
+	u64 obj_size;
+	u64 stripe_unit;
+	u64 stripe_count;
+	int ret;
+
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_stripe_unit_count", NULL, 0,
+				(char *)&striping_info_buf, size);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		return ret;
+	if (ret < size)
+		return -ERANGE;
+
+	/*
+	 * We don't actually support the "fancy striping" feature
+	 * (STRIPINGV2) yet, but if the striping sizes are the
+	 * defaults the behavior is the same as before.  So find
+	 * out, and only fail if the image has non-default values.
+	 */
+	ret = -EINVAL;
+	obj_size = (u64)1 << rbd_dev->header.obj_order;
+	p = &striping_info_buf;
+	stripe_unit = ceph_decode_64(&p);
+	if (stripe_unit != obj_size) {
+		rbd_warn(rbd_dev, "unsupported stripe unit "
+				"(got %llu want %llu)",
+				stripe_unit, obj_size);
+		return -EINVAL;
+	}
+	stripe_count = ceph_decode_64(&p);
+	if (stripe_count != 1) {
+		rbd_warn(rbd_dev, "unsupported stripe count "
+				"(got %llu want 1)", stripe_count);
+		return -EINVAL;
+	}
+	rbd_dev->header.stripe_unit = stripe_unit;
+	rbd_dev->header.stripe_count = stripe_count;
+
+	return 0;
+}
+
 static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 {
 	size_t image_id_size;
@@ -3036,8 +3787,8 @@
 		return NULL;
 
 	p = image_id;
-	end = (char *) image_id + image_id_size;
-	ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len);
+	end = image_id + image_id_size;
+	ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len);
 
 	size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
 	reply_buf = kmalloc(size, GFP_KERNEL);
@@ -3047,11 +3798,12 @@
 	ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
 				"rbd", "dir_get_name",
 				image_id, image_id_size,
-				(char *) reply_buf, size, NULL);
+				reply_buf, size);
 	if (ret < 0)
 		goto out;
 	p = reply_buf;
-	end = (char *) reply_buf + size;
+	end = reply_buf + ret;
+
 	image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
 	if (IS_ERR(image_name))
 		image_name = NULL;
@@ -3064,69 +3816,134 @@
 	return image_name;
 }
 
-/*
- * When a parent image gets probed, we only have the pool, image,
- * and snapshot ids but not the names of any of them.  This call
- * is made later to fill in those names.  It has to be done after
- * rbd_dev_snaps_update() has completed because some of the
- * information (in particular, snapshot name) is not available
- * until then.
- */
-static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
+static u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
 {
-	struct ceph_osd_client *osdc;
-	const char *name;
-	void *reply_buf = NULL;
+	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+	const char *snap_name;
+	u32 which = 0;
+
+	/* Skip over names until we find the one we are looking for */
+
+	snap_name = rbd_dev->header.snap_names;
+	while (which < snapc->num_snaps) {
+		if (!strcmp(name, snap_name))
+			return snapc->snaps[which];
+		snap_name += strlen(snap_name) + 1;
+		which++;
+	}
+	return CEPH_NOSNAP;
+}
+
+static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+	u32 which;
+	bool found = false;
+	u64 snap_id;
+
+	for (which = 0; !found && which < snapc->num_snaps; which++) {
+		const char *snap_name;
+
+		snap_id = snapc->snaps[which];
+		snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
+		if (IS_ERR(snap_name))
+			break;
+		found = !strcmp(name, snap_name);
+		kfree(snap_name);
+	}
+	return found ? snap_id : CEPH_NOSNAP;
+}
+
+/*
+ * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if
+ * no snapshot by that name is found, or if an error occurs.
+ */
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+	if (rbd_dev->image_format == 1)
+		return rbd_v1_snap_id_by_name(rbd_dev, name);
+
+	return rbd_v2_snap_id_by_name(rbd_dev, name);
+}
+
+/*
+ * When an rbd image has a parent image, it is identified by the
+ * pool, image, and snapshot ids (not names).  This function fills
+ * in the names for those ids.  (It's OK if we can't figure out the
+ * name for an image id, but the pool and snapshot ids should always
+ * exist and have names.)  All names in an rbd spec are dynamically
+ * allocated.
+ *
+ * When an image being mapped (not a parent) is probed, we have the
+ * pool name and pool id, image name and image id, and the snapshot
+ * name.  The only thing we're missing is the snapshot id.
+ */
+static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_spec *spec = rbd_dev->spec;
+	const char *pool_name;
+	const char *image_name;
+	const char *snap_name;
 	int ret;
 
-	if (rbd_dev->spec->pool_name)
-		return 0;	/* Already have the names */
+	/*
+	 * An image being mapped will have the pool name (etc.), but
+	 * we need to look up the snapshot id.
+	 */
+	if (spec->pool_name) {
+		if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
+			u64 snap_id;
 
-	/* Look up the pool name */
+			snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
+			if (snap_id == CEPH_NOSNAP)
+				return -ENOENT;
+			spec->snap_id = snap_id;
+		} else {
+			spec->snap_id = CEPH_NOSNAP;
+		}
 
-	osdc = &rbd_dev->rbd_client->client->osdc;
-	name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id);
-	if (!name) {
-		rbd_warn(rbd_dev, "there is no pool with id %llu",
-			rbd_dev->spec->pool_id);	/* Really a BUG() */
-		return -EIO;
+		return 0;
 	}
 
-	rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL);
-	if (!rbd_dev->spec->pool_name)
+	/* Get the pool name; we have to make our own copy of this */
+
+	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id);
+	if (!pool_name) {
+		rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id);
+		return -EIO;
+	}
+	pool_name = kstrdup(pool_name, GFP_KERNEL);
+	if (!pool_name)
 		return -ENOMEM;
 
 	/* Fetch the image name; tolerate failure here */
 
-	name = rbd_dev_image_name(rbd_dev);
-	if (name)
-		rbd_dev->spec->image_name = (char *) name;
-	else
+	image_name = rbd_dev_image_name(rbd_dev);
+	if (!image_name)
 		rbd_warn(rbd_dev, "unable to get image name");
 
-	/* Look up the snapshot name. */
+	/* Look up the snapshot name, and make a copy */
 
-	name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id);
-	if (!name) {
-		rbd_warn(rbd_dev, "no snapshot with id %llu",
-			rbd_dev->spec->snap_id);	/* Really a BUG() */
-		ret = -EIO;
+	snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
+	if (!snap_name) {
+		ret = -ENOMEM;
 		goto out_err;
 	}
-	rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL);
-	if(!rbd_dev->spec->snap_name)
-		goto out_err;
+
+	spec->pool_name = pool_name;
+	spec->image_name = image_name;
+	spec->snap_name = snap_name;
 
 	return 0;
 out_err:
-	kfree(reply_buf);
-	kfree(rbd_dev->spec->pool_name);
-	rbd_dev->spec->pool_name = NULL;
+	kfree(image_name);
+	kfree(pool_name);
 
 	return ret;
 }
 
-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
 {
 	size_t size;
 	int ret;
@@ -3151,16 +3968,15 @@
 		return -ENOMEM;
 
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
-				"rbd", "get_snapcontext",
-				NULL, 0,
-				reply_buf, size, ver);
+				"rbd", "get_snapcontext", NULL, 0,
+				reply_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
 
-	ret = -ERANGE;
 	p = reply_buf;
-	end = (char *) reply_buf + size;
+	end = reply_buf + ret;
+	ret = -ERANGE;
 	ceph_decode_64_safe(&p, end, seq, out);
 	ceph_decode_32_safe(&p, end, snap_count, out);
 
@@ -3177,37 +3993,33 @@
 	}
 	if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
 		goto out;
+	ret = 0;
 
-	size = sizeof (struct ceph_snap_context) +
-				snap_count * sizeof (snapc->snaps[0]);
-	snapc = kmalloc(size, GFP_KERNEL);
+	snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
 	if (!snapc) {
 		ret = -ENOMEM;
 		goto out;
 	}
-
-	atomic_set(&snapc->nref, 1);
 	snapc->seq = seq;
-	snapc->num_snaps = snap_count;
 	for (i = 0; i < snap_count; i++)
 		snapc->snaps[i] = ceph_decode_64(&p);
 
 	rbd_dev->header.snapc = snapc;
 
 	dout("  snap context seq = %llu, snap_count = %u\n",
-		(unsigned long long) seq, (unsigned int) snap_count);
-
+		(unsigned long long)seq, (unsigned int)snap_count);
 out:
 	kfree(reply_buf);
 
-	return 0;
+	return ret;
 }
 
-static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+					u64 snap_id)
 {
 	size_t size;
 	void *reply_buf;
-	__le64 snap_id;
+	__le64 snapid;
 	int ret;
 	void *p;
 	void *end;
@@ -3218,236 +4030,52 @@
 	if (!reply_buf)
 		return ERR_PTR(-ENOMEM);
 
-	snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
+	snapid = cpu_to_le64(snap_id);
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_snapshot_name",
-				(char *) &snap_id, sizeof (snap_id),
-				reply_buf, size, NULL);
+				&snapid, sizeof (snapid),
+				reply_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
-	if (ret < 0)
+	if (ret < 0) {
+		snap_name = ERR_PTR(ret);
 		goto out;
+	}
 
 	p = reply_buf;
-	end = (char *) reply_buf + size;
+	end = reply_buf + ret;
 	snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
-	if (IS_ERR(snap_name)) {
-		ret = PTR_ERR(snap_name);
+	if (IS_ERR(snap_name))
 		goto out;
-	} else {
-		dout("  snap_id 0x%016llx snap_name = %s\n",
-			(unsigned long long) le64_to_cpu(snap_id), snap_name);
-	}
+
+	dout("  snap_id 0x%016llx snap_name = %s\n",
+		(unsigned long long)snap_id, snap_name);
+out:
 	kfree(reply_buf);
 
 	return snap_name;
-out:
-	kfree(reply_buf);
-
-	return ERR_PTR(ret);
 }
 
-static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which,
-		u64 *snap_size, u64 *snap_features)
-{
-	u64 snap_id;
-	u8 order;
-	int ret;
-
-	snap_id = rbd_dev->header.snapc->snaps[which];
-	ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size);
-	if (ret)
-		return ERR_PTR(ret);
-	ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return rbd_dev_v2_snap_name(rbd_dev, which);
-}
-
-static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which,
-		u64 *snap_size, u64 *snap_features)
-{
-	if (rbd_dev->image_format == 1)
-		return rbd_dev_v1_snap_info(rbd_dev, which,
-					snap_size, snap_features);
-	if (rbd_dev->image_format == 2)
-		return rbd_dev_v2_snap_info(rbd_dev, which,
-					snap_size, snap_features);
-	return ERR_PTR(-EINVAL);
-}
-
-static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver)
+static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev)
 {
 	int ret;
-	__u8 obj_order;
 
 	down_write(&rbd_dev->header_rwsem);
 
-	/* Grab old order first, to see if it changes */
-
-	obj_order = rbd_dev->header.obj_order,
 	ret = rbd_dev_v2_image_size(rbd_dev);
 	if (ret)
 		goto out;
-	if (rbd_dev->header.obj_order != obj_order) {
-		ret = -EIO;
-		goto out;
-	}
 	rbd_update_mapping_size(rbd_dev);
 
-	ret = rbd_dev_v2_snap_context(rbd_dev, hver);
+	ret = rbd_dev_v2_snap_context(rbd_dev);
 	dout("rbd_dev_v2_snap_context returned %d\n", ret);
 	if (ret)
 		goto out;
-	ret = rbd_dev_snaps_update(rbd_dev);
-	dout("rbd_dev_snaps_update returned %d\n", ret);
-	if (ret)
-		goto out;
-	ret = rbd_dev_snaps_register(rbd_dev);
-	dout("rbd_dev_snaps_register returned %d\n", ret);
 out:
 	up_write(&rbd_dev->header_rwsem);
 
 	return ret;
 }
 
-/*
- * Scan the rbd device's current snapshot list and compare it to the
- * newly-received snapshot context.  Remove any existing snapshots
- * not present in the new snapshot context.  Add a new snapshot for
- * any snaphots in the snapshot context not in the current list.
- * And verify there are no changes to snapshots we already know
- * about.
- *
- * Assumes the snapshots in the snapshot context are sorted by
- * snapshot id, highest id first.  (Snapshots in the rbd_dev's list
- * are also maintained in that order.)
- */
-static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
-{
-	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
-	const u32 snap_count = snapc->num_snaps;
-	struct list_head *head = &rbd_dev->snaps;
-	struct list_head *links = head->next;
-	u32 index = 0;
-
-	dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count);
-	while (index < snap_count || links != head) {
-		u64 snap_id;
-		struct rbd_snap *snap;
-		char *snap_name;
-		u64 snap_size = 0;
-		u64 snap_features = 0;
-
-		snap_id = index < snap_count ? snapc->snaps[index]
-					     : CEPH_NOSNAP;
-		snap = links != head ? list_entry(links, struct rbd_snap, node)
-				     : NULL;
-		rbd_assert(!snap || snap->id != CEPH_NOSNAP);
-
-		if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
-			struct list_head *next = links->next;
-
-			/*
-			 * A previously-existing snapshot is not in
-			 * the new snap context.
-			 *
-			 * If the now missing snapshot is the one the
-			 * image is mapped to, clear its exists flag
-			 * so we can avoid sending any more requests
-			 * to it.
-			 */
-			if (rbd_dev->spec->snap_id == snap->id)
-				clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
-			rbd_remove_snap_dev(snap);
-			dout("%ssnap id %llu has been removed\n",
-				rbd_dev->spec->snap_id == snap->id ?
-							"mapped " : "",
-				(unsigned long long) snap->id);
-
-			/* Done with this list entry; advance */
-
-			links = next;
-			continue;
-		}
-
-		snap_name = rbd_dev_snap_info(rbd_dev, index,
-					&snap_size, &snap_features);
-		if (IS_ERR(snap_name))
-			return PTR_ERR(snap_name);
-
-		dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count,
-			(unsigned long long) snap_id);
-		if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) {
-			struct rbd_snap *new_snap;
-
-			/* We haven't seen this snapshot before */
-
-			new_snap = __rbd_add_snap_dev(rbd_dev, snap_name,
-					snap_id, snap_size, snap_features);
-			if (IS_ERR(new_snap)) {
-				int err = PTR_ERR(new_snap);
-
-				dout("  failed to add dev, error %d\n", err);
-
-				return err;
-			}
-
-			/* New goes before existing, or at end of list */
-
-			dout("  added dev%s\n", snap ? "" : " at end\n");
-			if (snap)
-				list_add_tail(&new_snap->node, &snap->node);
-			else
-				list_add_tail(&new_snap->node, head);
-		} else {
-			/* Already have this one */
-
-			dout("  already present\n");
-
-			rbd_assert(snap->size == snap_size);
-			rbd_assert(!strcmp(snap->name, snap_name));
-			rbd_assert(snap->features == snap_features);
-
-			/* Done with this list entry; advance */
-
-			links = links->next;
-		}
-
-		/* Advance to the next entry in the snapshot context */
-
-		index++;
-	}
-	dout("%s: done\n", __func__);
-
-	return 0;
-}
-
-/*
- * Scan the list of snapshots and register the devices for any that
- * have not already been registered.
- */
-static int rbd_dev_snaps_register(struct rbd_device *rbd_dev)
-{
-	struct rbd_snap *snap;
-	int ret = 0;
-
-	dout("%s:\n", __func__);
-	if (WARN_ON(!device_is_registered(&rbd_dev->dev)))
-		return -EIO;
-
-	list_for_each_entry(snap, &rbd_dev->snaps, node) {
-		if (!rbd_snap_registered(snap)) {
-			ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
-			if (ret < 0)
-				break;
-		}
-	}
-	dout("%s: returning %d\n", __func__, ret);
-
-	return ret;
-}
-
 static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
 {
 	struct device *dev;
@@ -3459,7 +4087,7 @@
 	dev->bus = &rbd_bus_type;
 	dev->type = &rbd_device_type;
 	dev->parent = &rbd_root_dev;
-	dev->release = rbd_dev_release;
+	dev->release = rbd_dev_device_release;
 	dev_set_name(dev, "%d", rbd_dev->dev_id);
 	ret = device_register(dev);
 
@@ -3673,6 +4301,7 @@
 	size_t len;
 	char *options;
 	const char *mon_addrs;
+	char *snap_name;
 	size_t mon_addrs_size;
 	struct rbd_spec *spec = NULL;
 	struct rbd_options *rbd_opts = NULL;
@@ -3731,10 +4360,11 @@
 		ret = -ENAMETOOLONG;
 		goto out_err;
 	}
-	spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
-	if (!spec->snap_name)
+	snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
+	if (!snap_name)
 		goto out_mem;
-	*(spec->snap_name + len) = '\0';
+	*(snap_name + len) = '\0';
+	spec->snap_name = snap_name;
 
 	/* Initialize all rbd options to the defaults */
 
@@ -3788,15 +4418,19 @@
 	size_t size;
 	char *object_name;
 	void *response;
-	void *p;
+	char *image_id;
 
 	/*
 	 * When probing a parent image, the image id is already
 	 * known (and the image name likely is not).  There's no
-	 * need to fetch the image id again in this case.
+	 * need to fetch the image id again in this case.  We
+	 * do still need to set the image format though.
 	 */
-	if (rbd_dev->spec->image_id)
+	if (rbd_dev->spec->image_id) {
+		rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1;
+
 		return 0;
+	}
 
 	/*
 	 * First, see if the format 2 image id file exists, and if
@@ -3818,23 +4452,32 @@
 		goto out;
 	}
 
-	ret = rbd_obj_method_sync(rbd_dev, object_name,
-				"rbd", "get_id",
-				NULL, 0,
-				response, RBD_IMAGE_ID_LEN_MAX, NULL);
-	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
-	if (ret < 0)
-		goto out;
+	/* If it doesn't exist we'll assume it's a format 1 image */
 
-	p = response;
-	rbd_dev->spec->image_id = ceph_extract_encoded_string(&p,
-						p + RBD_IMAGE_ID_LEN_MAX,
+	ret = rbd_obj_method_sync(rbd_dev, object_name,
+				"rbd", "get_id", NULL, 0,
+				response, RBD_IMAGE_ID_LEN_MAX);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret == -ENOENT) {
+		image_id = kstrdup("", GFP_KERNEL);
+		ret = image_id ? 0 : -ENOMEM;
+		if (!ret)
+			rbd_dev->image_format = 1;
+	} else if (ret > sizeof (__le32)) {
+		void *p = response;
+
+		image_id = ceph_extract_encoded_string(&p, p + ret,
 						NULL, GFP_NOIO);
-	if (IS_ERR(rbd_dev->spec->image_id)) {
-		ret = PTR_ERR(rbd_dev->spec->image_id);
-		rbd_dev->spec->image_id = NULL;
+		ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0;
+		if (!ret)
+			rbd_dev->image_format = 2;
 	} else {
-		dout("image_id is %s\n", rbd_dev->spec->image_id);
+		ret = -EINVAL;
+	}
+
+	if (!ret) {
+		rbd_dev->spec->image_id = image_id;
+		dout("image_id is %s\n", image_id);
 	}
 out:
 	kfree(response);
@@ -3843,27 +4486,30 @@
 	return ret;
 }
 
+/* Undo whatever state changes are made by v1 or v2 image probe */
+
+static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
+{
+	struct rbd_image_header	*header;
+
+	rbd_dev_remove_parent(rbd_dev);
+	rbd_spec_put(rbd_dev->parent_spec);
+	rbd_dev->parent_spec = NULL;
+	rbd_dev->parent_overlap = 0;
+
+	/* Free dynamic fields from the header, then zero it out */
+
+	header = &rbd_dev->header;
+	ceph_put_snap_context(header->snapc);
+	kfree(header->snap_sizes);
+	kfree(header->snap_names);
+	kfree(header->object_prefix);
+	memset(header, 0, sizeof (*header));
+}
+
 static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
 {
 	int ret;
-	size_t size;
-
-	/* Version 1 images have no id; empty string is used */
-
-	rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL);
-	if (!rbd_dev->spec->image_id)
-		return -ENOMEM;
-
-	/* Record the header object name for this rbd image. */
-
-	size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX);
-	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
-	if (!rbd_dev->header_name) {
-		ret = -ENOMEM;
-		goto out_err;
-	}
-	sprintf(rbd_dev->header_name, "%s%s",
-		rbd_dev->spec->image_name, RBD_SUFFIX);
 
 	/* Populate rbd image metadata */
 
@@ -3876,8 +4522,6 @@
 	rbd_dev->parent_spec = NULL;
 	rbd_dev->parent_overlap = 0;
 
-	rbd_dev->image_format = 1;
-
 	dout("discovered version 1 image, header name is %s\n",
 		rbd_dev->header_name);
 
@@ -3894,43 +4538,45 @@
 
 static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
 {
-	size_t size;
 	int ret;
-	u64 ver = 0;
-
-	/*
-	 * Image id was filled in by the caller.  Record the header
-	 * object name for this rbd image.
-	 */
-	size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id);
-	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
-	if (!rbd_dev->header_name)
-		return -ENOMEM;
-	sprintf(rbd_dev->header_name, "%s%s",
-			RBD_HEADER_PREFIX, rbd_dev->spec->image_id);
-
-	/* Get the size and object order for the image */
 
 	ret = rbd_dev_v2_image_size(rbd_dev);
-	if (ret < 0)
+	if (ret)
 		goto out_err;
 
 	/* Get the object prefix (a.k.a. block_name) for the image */
 
 	ret = rbd_dev_v2_object_prefix(rbd_dev);
-	if (ret < 0)
+	if (ret)
 		goto out_err;
 
 	/* Get the and check features for the image */
 
 	ret = rbd_dev_v2_features(rbd_dev);
-	if (ret < 0)
+	if (ret)
 		goto out_err;
 
 	/* If the image supports layering, get the parent info */
 
 	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
 		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret)
+			goto out_err;
+
+		/*
+		 * Don't print a warning for parent images.  We can
+		 * tell this point because we won't know its pool
+		 * name yet (just its pool id).
+		 */
+		if (rbd_dev->spec->pool_name)
+			rbd_warn(rbd_dev, "WARNING: kernel layering "
+					"is EXPERIMENTAL!");
+	}
+
+	/* If the image supports fancy striping, get its parameters */
+
+	if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
+		ret = rbd_dev_v2_striping_info(rbd_dev);
 		if (ret < 0)
 			goto out_err;
 	}
@@ -3942,12 +4588,9 @@
 
 	/* Get the snapshot context, plus the header version */
 
-	ret = rbd_dev_v2_snap_context(rbd_dev, &ver);
+	ret = rbd_dev_v2_snap_context(rbd_dev);
 	if (ret)
 		goto out_err;
-	rbd_dev->header.obj_version = ver;
-
-	rbd_dev->image_format = 2;
 
 	dout("discovered version 2 image, header name is %s\n",
 		rbd_dev->header_name);
@@ -3965,23 +4608,55 @@
 	return ret;
 }
 
-static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+{
+	struct rbd_device *parent = NULL;
+	struct rbd_spec *parent_spec;
+	struct rbd_client *rbdc;
+	int ret;
+
+	if (!rbd_dev->parent_spec)
+		return 0;
+	/*
+	 * We need to pass a reference to the client and the parent
+	 * spec when creating the parent rbd_dev.  Images related by
+	 * parent/child relationships always share both.
+	 */
+	parent_spec = rbd_spec_get(rbd_dev->parent_spec);
+	rbdc = __rbd_get_client(rbd_dev->rbd_client);
+
+	ret = -ENOMEM;
+	parent = rbd_dev_create(rbdc, parent_spec);
+	if (!parent)
+		goto out_err;
+
+	ret = rbd_dev_image_probe(parent);
+	if (ret < 0)
+		goto out_err;
+	rbd_dev->parent = parent;
+
+	return 0;
+out_err:
+	if (parent) {
+		rbd_spec_put(rbd_dev->parent_spec);
+		kfree(rbd_dev->header_name);
+		rbd_dev_destroy(parent);
+	} else {
+		rbd_put_client(rbdc);
+		rbd_spec_put(parent_spec);
+	}
+
+	return ret;
+}
+
+static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 {
 	int ret;
 
-	/* no need to lock here, as rbd_dev is not registered yet */
-	ret = rbd_dev_snaps_update(rbd_dev);
+	ret = rbd_dev_mapping_set(rbd_dev);
 	if (ret)
 		return ret;
 
-	ret = rbd_dev_probe_update_spec(rbd_dev);
-	if (ret)
-		goto err_out_snaps;
-
-	ret = rbd_dev_set_mapping(rbd_dev);
-	if (ret)
-		goto err_out_snaps;
-
 	/* generate unique id: find highest unique id, add one */
 	rbd_dev_id_get(rbd_dev);
 
@@ -4007,54 +4682,81 @@
 	if (ret)
 		goto err_out_disk;
 
-	/*
-	 * At this point cleanup in the event of an error is the job
-	 * of the sysfs code (initiated by rbd_bus_del_dev()).
-	 */
-	down_write(&rbd_dev->header_rwsem);
-	ret = rbd_dev_snaps_register(rbd_dev);
-	up_write(&rbd_dev->header_rwsem);
-	if (ret)
-		goto err_out_bus;
-
-	ret = rbd_dev_header_watch_sync(rbd_dev, 1);
-	if (ret)
-		goto err_out_bus;
-
 	/* Everything's ready.  Announce the disk to the world. */
 
+	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
+	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 	add_disk(rbd_dev->disk);
 
 	pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
 		(unsigned long long) rbd_dev->mapping.size);
 
 	return ret;
-err_out_bus:
-	/* this will also clean up rest of rbd_dev stuff */
 
-	rbd_bus_del_dev(rbd_dev);
-
-	return ret;
 err_out_disk:
 	rbd_free_disk(rbd_dev);
 err_out_blkdev:
 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
 err_out_id:
 	rbd_dev_id_put(rbd_dev);
-err_out_snaps:
-	rbd_remove_all_snaps(rbd_dev);
+	rbd_dev_mapping_clear(rbd_dev);
 
 	return ret;
 }
 
+static int rbd_dev_header_name(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *spec = rbd_dev->spec;
+	size_t size;
+
+	/* Record the header object name for this rbd image. */
+
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+	if (rbd_dev->image_format == 1)
+		size = strlen(spec->image_name) + sizeof (RBD_SUFFIX);
+	else
+		size = sizeof (RBD_HEADER_PREFIX) + strlen(spec->image_id);
+
+	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
+	if (!rbd_dev->header_name)
+		return -ENOMEM;
+
+	if (rbd_dev->image_format == 1)
+		sprintf(rbd_dev->header_name, "%s%s",
+			spec->image_name, RBD_SUFFIX);
+	else
+		sprintf(rbd_dev->header_name, "%s%s",
+			RBD_HEADER_PREFIX, spec->image_id);
+	return 0;
+}
+
+static void rbd_dev_image_release(struct rbd_device *rbd_dev)
+{
+	int ret;
+
+	rbd_dev_unprobe(rbd_dev);
+	ret = rbd_dev_header_watch_sync(rbd_dev, 0);
+	if (ret)
+		rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
+	kfree(rbd_dev->header_name);
+	rbd_dev->header_name = NULL;
+	rbd_dev->image_format = 0;
+	kfree(rbd_dev->spec->image_id);
+	rbd_dev->spec->image_id = NULL;
+
+	rbd_dev_destroy(rbd_dev);
+}
+
 /*
  * Probe for the existence of the header object for the given rbd
  * device.  For format 2 images this includes determining the image
  * id.
  */
-static int rbd_dev_probe(struct rbd_device *rbd_dev)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev)
 {
 	int ret;
+	int tmp;
 
 	/*
 	 * Get the id from the image id object.  If it's not a
@@ -4063,18 +4765,48 @@
 	 */
 	ret = rbd_dev_image_id(rbd_dev);
 	if (ret)
+		return ret;
+	rbd_assert(rbd_dev->spec->image_id);
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+	ret = rbd_dev_header_name(rbd_dev);
+	if (ret)
+		goto err_out_format;
+
+	ret = rbd_dev_header_watch_sync(rbd_dev, 1);
+	if (ret)
+		goto out_header_name;
+
+	if (rbd_dev->image_format == 1)
 		ret = rbd_dev_v1_probe(rbd_dev);
 	else
 		ret = rbd_dev_v2_probe(rbd_dev);
-	if (ret) {
-		dout("probe failed, returning %d\n", ret);
-
-		return ret;
-	}
-
-	ret = rbd_dev_probe_finish(rbd_dev);
 	if (ret)
-		rbd_header_free(&rbd_dev->header);
+		goto err_out_watch;
+
+	ret = rbd_dev_spec_update(rbd_dev);
+	if (ret)
+		goto err_out_probe;
+
+	ret = rbd_dev_probe_parent(rbd_dev);
+	if (!ret)
+		return 0;
+
+err_out_probe:
+	rbd_dev_unprobe(rbd_dev);
+err_out_watch:
+	tmp = rbd_dev_header_watch_sync(rbd_dev, 0);
+	if (tmp)
+		rbd_warn(rbd_dev, "unable to tear down watch request\n");
+out_header_name:
+	kfree(rbd_dev->header_name);
+	rbd_dev->header_name = NULL;
+err_out_format:
+	rbd_dev->image_format = 0;
+	kfree(rbd_dev->spec->image_id);
+	rbd_dev->spec->image_id = NULL;
+
+	dout("probe failed, returning %d\n", ret);
 
 	return ret;
 }
@@ -4111,11 +4843,13 @@
 	rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
 	if (rc < 0)
 		goto err_out_client;
-	spec->pool_id = (u64) rc;
+	spec->pool_id = (u64)rc;
 
 	/* The ceph file layout needs to fit pool id in 32 bits */
 
-	if (WARN_ON(spec->pool_id > (u64) U32_MAX)) {
+	if (spec->pool_id > (u64)U32_MAX) {
+		rbd_warn(NULL, "pool id too large (%llu > %u)\n",
+				(unsigned long long)spec->pool_id, U32_MAX);
 		rc = -EIO;
 		goto err_out_client;
 	}
@@ -4130,11 +4864,15 @@
 	kfree(rbd_opts);
 	rbd_opts = NULL;	/* done with this */
 
-	rc = rbd_dev_probe(rbd_dev);
+	rc = rbd_dev_image_probe(rbd_dev);
 	if (rc < 0)
 		goto err_out_rbd_dev;
 
-	return count;
+	rc = rbd_dev_device_setup(rbd_dev);
+	if (!rc)
+		return count;
+
+	rbd_dev_image_release(rbd_dev);
 err_out_rbd_dev:
 	rbd_dev_destroy(rbd_dev);
 err_out_client:
@@ -4149,7 +4887,7 @@
 
 	dout("Error adding device %s\n", buf);
 
-	return (ssize_t) rc;
+	return (ssize_t)rc;
 }
 
 static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
@@ -4169,27 +4907,43 @@
 	return NULL;
 }
 
-static void rbd_dev_release(struct device *dev)
+static void rbd_dev_device_release(struct device *dev)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	if (rbd_dev->watch_event)
-		rbd_dev_header_watch_sync(rbd_dev, 0);
-
-	/* clean up and free blkdev */
 	rbd_free_disk(rbd_dev);
+	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+	rbd_dev_clear_mapping(rbd_dev);
 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
-
-	/* release allocated disk header fields */
-	rbd_header_free(&rbd_dev->header);
-
-	/* done with the id, and with the rbd_dev */
+	rbd_dev->major = 0;
 	rbd_dev_id_put(rbd_dev);
-	rbd_assert(rbd_dev->rbd_client != NULL);
-	rbd_dev_destroy(rbd_dev);
+	rbd_dev_mapping_clear(rbd_dev);
+}
 
-	/* release module ref */
-	module_put(THIS_MODULE);
+static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
+{
+	while (rbd_dev->parent) {
+		struct rbd_device *first = rbd_dev;
+		struct rbd_device *second = first->parent;
+		struct rbd_device *third;
+
+		/*
+		 * Follow to the parent with no grandparent and
+		 * remove it.
+		 */
+		while (second && (third = second->parent)) {
+			first = second;
+			second = third;
+		}
+		rbd_assert(second);
+		rbd_dev_image_release(second);
+		first->parent = NULL;
+		first->parent_overlap = 0;
+
+		rbd_assert(first->parent_spec);
+		rbd_spec_put(first->parent_spec);
+		first->parent_spec = NULL;
+	}
 }
 
 static ssize_t rbd_remove(struct bus_type *bus,
@@ -4197,13 +4951,13 @@
 			  size_t count)
 {
 	struct rbd_device *rbd_dev = NULL;
-	int target_id, rc;
+	int target_id;
 	unsigned long ul;
-	int ret = count;
+	int ret;
 
-	rc = strict_strtoul(buf, 10, &ul);
-	if (rc)
-		return rc;
+	ret = strict_strtoul(buf, 10, &ul);
+	if (ret)
+		return ret;
 
 	/* convert to int; abort if we lost anything in the conversion */
 	target_id = (int) ul;
@@ -4226,10 +4980,10 @@
 	spin_unlock_irq(&rbd_dev->lock);
 	if (ret < 0)
 		goto done;
-
-	rbd_remove_all_snaps(rbd_dev);
+	ret = count;
 	rbd_bus_del_dev(rbd_dev);
-
+	rbd_dev_image_release(rbd_dev);
+	module_put(THIS_MODULE);
 done:
 	mutex_unlock(&ctl_mutex);
 
@@ -4261,6 +5015,56 @@
 	device_unregister(&rbd_root_dev);
 }
 
+static int rbd_slab_init(void)
+{
+	rbd_assert(!rbd_img_request_cache);
+	rbd_img_request_cache = kmem_cache_create("rbd_img_request",
+					sizeof (struct rbd_img_request),
+					__alignof__(struct rbd_img_request),
+					0, NULL);
+	if (!rbd_img_request_cache)
+		return -ENOMEM;
+
+	rbd_assert(!rbd_obj_request_cache);
+	rbd_obj_request_cache = kmem_cache_create("rbd_obj_request",
+					sizeof (struct rbd_obj_request),
+					__alignof__(struct rbd_obj_request),
+					0, NULL);
+	if (!rbd_obj_request_cache)
+		goto out_err;
+
+	rbd_assert(!rbd_segment_name_cache);
+	rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
+					MAX_OBJ_NAME_SIZE + 1, 1, 0, NULL);
+	if (rbd_segment_name_cache)
+		return 0;
+out_err:
+	if (rbd_obj_request_cache) {
+		kmem_cache_destroy(rbd_obj_request_cache);
+		rbd_obj_request_cache = NULL;
+	}
+
+	kmem_cache_destroy(rbd_img_request_cache);
+	rbd_img_request_cache = NULL;
+
+	return -ENOMEM;
+}
+
+static void rbd_slab_exit(void)
+{
+	rbd_assert(rbd_segment_name_cache);
+	kmem_cache_destroy(rbd_segment_name_cache);
+	rbd_segment_name_cache = NULL;
+
+	rbd_assert(rbd_obj_request_cache);
+	kmem_cache_destroy(rbd_obj_request_cache);
+	rbd_obj_request_cache = NULL;
+
+	rbd_assert(rbd_img_request_cache);
+	kmem_cache_destroy(rbd_img_request_cache);
+	rbd_img_request_cache = NULL;
+}
+
 static int __init rbd_init(void)
 {
 	int rc;
@@ -4270,16 +5074,22 @@
 
 		return -EINVAL;
 	}
-	rc = rbd_sysfs_init();
+	rc = rbd_slab_init();
 	if (rc)
 		return rc;
-	pr_info("loaded " RBD_DRV_NAME_LONG "\n");
-	return 0;
+	rc = rbd_sysfs_init();
+	if (rc)
+		rbd_slab_exit();
+	else
+		pr_info("loaded " RBD_DRV_NAME_LONG "\n");
+
+	return rc;
 }
 
 static void __exit rbd_exit(void)
 {
 	rbd_sysfs_cleanup();
+	rbd_slab_exit();
 }
 
 module_init(rbd_init);

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index b166e30b..ff7f0c8 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig

@@ -297,12 +297,21 @@
 
 config GPIO_LYNXPOINT
 	bool "Intel Lynxpoint GPIO support"
-	depends on ACPI
+	depends on ACPI && X86
 	select IRQ_DOMAIN
 	help
 	  driver for GPIO functionality on Intel Lynxpoint PCH chipset
 	  Requires ACPI device enumeration code to set up a platform device.
 
+config GPIO_GRGPIO
+	tristate "Aeroflex Gaisler GRGPIO support"
+	depends on OF
+	select GPIO_GENERIC
+	select IRQ_DOMAIN
+	help
+	  Select this to support Aeroflex Gaisler GRGPIO cores from the GRLIB
+	  VHDL IP core library.
+
 comment "I2C GPIO expanders:"
 
 config GPIO_ARIZONA

diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index a274d7d..6aab73d 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile

@@ -25,6 +25,7 @@
 obj-$(CONFIG_GPIO_EM)		+= gpio-em.o
 obj-$(CONFIG_GPIO_EP93XX)	+= gpio-ep93xx.o
 obj-$(CONFIG_GPIO_GE_FPGA)	+= gpio-ge.o
+obj-$(CONFIG_GPIO_GRGPIO)	+= gpio-grgpio.o
 obj-$(CONFIG_GPIO_ICH)		+= gpio-ich.o
 obj-$(CONFIG_GPIO_IT8761E)	+= gpio-it8761e.o
 obj-$(CONFIG_GPIO_JANZ_TTL)	+= gpio-janz-ttl.o

diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index 464be96..7216079 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c

@@ -137,7 +137,7 @@
 
 	mutex_init(&chip->lock);
 
-	dev_set_drvdata(&spi->dev, chip);
+	spi_set_drvdata(spi, chip);
 
 	chip->spi = spi;
 
@@ -176,7 +176,7 @@
 	return ret;
 
 exit_destroy:
-	dev_set_drvdata(&spi->dev, NULL);
+	spi_set_drvdata(spi, NULL);
 	mutex_destroy(&chip->lock);
 	return ret;
 }
@@ -186,11 +186,11 @@
 	struct gen_74x164_chip *chip;
 	int ret;
 
-	chip = dev_get_drvdata(&spi->dev);
+	chip = spi_get_drvdata(spi);
 	if (chip == NULL)
 		return -ENODEV;
 
-	dev_set_drvdata(&spi->dev, NULL);
+	spi_set_drvdata(spi, NULL);
 
 	ret = gpiochip_remove(&chip->gpio_chip);
 	if (!ret)

diff --git a/drivers/gpio/gpio-adp5520.c b/drivers/gpio/gpio-adp5520.c
index 8afa95f..f33f78d 100644
--- a/drivers/gpio/gpio-adp5520.c
+++ b/drivers/gpio/gpio-adp5520.c

@@ -105,7 +105,7 @@
 		return -ENODEV;
 	}
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL) {
 		dev_err(&pdev->dev, "failed to alloc memory\n");
 		return -ENOMEM;
@@ -163,7 +163,6 @@
 	return 0;
 
 err:
-	kfree(dev);
 	return ret;
 }
 
@@ -180,7 +179,6 @@
 		return ret;
 	}
 
-	kfree(dev);
 	return 0;
 }
 

diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index deca78f..5cba855 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c

@@ -231,10 +231,12 @@
 
 static struct irq_domain_ops em_gio_irq_domain_ops = {
 	.map	= em_gio_irq_domain_map,
+	.xlate	= irq_domain_xlate_twocell,
 };
 
 static int em_gio_probe(struct platform_device *pdev)
 {
+	struct gpio_em_config pdata_dt;
 	struct gpio_em_config *pdata = pdev->dev.platform_data;
 	struct em_gio_priv *p;
 	struct resource *io[2], *irq[2];
@@ -243,7 +245,7 @@
 	const char *name = dev_name(&pdev->dev);
 	int ret;
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
 	if (!p) {
 		dev_err(&pdev->dev, "failed to allocate driver data\n");
 		ret = -ENOMEM;
@@ -259,24 +261,45 @@
 	irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
 
-	if (!io[0] || !io[1] || !irq[0] || !irq[1] || !pdata) {
-		dev_err(&pdev->dev, "missing IRQ, IOMEM or configuration\n");
+	if (!io[0] || !io[1] || !irq[0] || !irq[1]) {
+		dev_err(&pdev->dev, "missing IRQ or IOMEM\n");
 		ret = -EINVAL;
-		goto err1;
+		goto err0;
 	}
 
-	p->base0 = ioremap_nocache(io[0]->start, resource_size(io[0]));
+	p->base0 = devm_ioremap_nocache(&pdev->dev, io[0]->start,
+					resource_size(io[0]));
 	if (!p->base0) {
 		dev_err(&pdev->dev, "failed to remap low I/O memory\n");
 		ret = -ENXIO;
-		goto err1;
+		goto err0;
 	}
 
-	p->base1 = ioremap_nocache(io[1]->start, resource_size(io[1]));
+	p->base1 = devm_ioremap_nocache(&pdev->dev, io[1]->start,
+				   resource_size(io[1]));
 	if (!p->base1) {
 		dev_err(&pdev->dev, "failed to remap high I/O memory\n");
 		ret = -ENXIO;
-		goto err2;
+		goto err0;
+	}
+
+	if (!pdata) {
+		memset(&pdata_dt, 0, sizeof(pdata_dt));
+		pdata = &pdata_dt;
+
+		if (of_property_read_u32(pdev->dev.of_node, "ngpios",
+					 &pdata->number_of_pins)) {
+			dev_err(&pdev->dev, "Missing ngpios OF property\n");
+			ret = -EINVAL;
+			goto err0;
+		}
+
+		ret = of_alias_get_id(pdev->dev.of_node, "gpio");
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Couldn't get OF id\n");
+			goto err0;
+		}
+		pdata->gpio_base = ret * 32; /* 32 GPIOs per instance */
 	}
 
 	gpio_chip = &p->gpio_chip;
@@ -306,40 +329,32 @@
 	if (!p->irq_domain) {
 		ret = -ENXIO;
 		dev_err(&pdev->dev, "cannot initialize irq domain\n");
-		goto err3;
+		goto err0;
 	}
 
-	if (request_irq(irq[0]->start, em_gio_irq_handler, 0, name, p)) {
+	if (devm_request_irq(&pdev->dev, irq[0]->start,
+			     em_gio_irq_handler, 0, name, p)) {
 		dev_err(&pdev->dev, "failed to request low IRQ\n");
 		ret = -ENOENT;
-		goto err4;
+		goto err1;
 	}
 
-	if (request_irq(irq[1]->start, em_gio_irq_handler, 0, name, p)) {
+	if (devm_request_irq(&pdev->dev, irq[1]->start,
+			     em_gio_irq_handler, 0, name, p)) {
 		dev_err(&pdev->dev, "failed to request high IRQ\n");
 		ret = -ENOENT;
-		goto err5;
+		goto err1;
 	}
 
 	ret = gpiochip_add(gpio_chip);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add GPIO controller\n");
-		goto err6;
+		goto err1;
 	}
 	return 0;
 
-err6:
-	free_irq(irq[1]->start, pdev);
-err5:
-	free_irq(irq[0]->start, pdev);
-err4:
-	irq_domain_remove(p->irq_domain);
-err3:
-	iounmap(p->base1);
-err2:
-	iounmap(p->base0);
 err1:
-	kfree(p);
+	irq_domain_remove(p->irq_domain);
 err0:
 	return ret;
 }
@@ -347,34 +362,43 @@
 static int em_gio_remove(struct platform_device *pdev)
 {
 	struct em_gio_priv *p = platform_get_drvdata(pdev);
-	struct resource *irq[2];
 	int ret;
 
 	ret = gpiochip_remove(&p->gpio_chip);
 	if (ret)
 		return ret;
 
-	irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
-
-	free_irq(irq[1]->start, pdev);
-	free_irq(irq[0]->start, pdev);
 	irq_domain_remove(p->irq_domain);
-	iounmap(p->base1);
-	iounmap(p->base0);
-	kfree(p);
 	return 0;
 }
 
+static const struct of_device_id em_gio_dt_ids[] = {
+	{ .compatible = "renesas,em-gio", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, em_gio_dt_ids);
+
 static struct platform_driver em_gio_device_driver = {
 	.probe		= em_gio_probe,
 	.remove		= em_gio_remove,
 	.driver		= {
 		.name	= "em_gio",
+		.of_match_table = em_gio_dt_ids,
+		.owner		= THIS_MODULE,
 	}
 };
 
-module_platform_driver(em_gio_device_driver);
+static int __init em_gio_init(void)
+{
+	return platform_driver_register(&em_gio_device_driver);
+}
+postcore_initcall(em_gio_init);
+
+static void __exit em_gio_exit(void)
+{
+	platform_driver_unregister(&em_gio_device_driver);
+}
+module_exit(em_gio_exit);
 
 MODULE_AUTHOR("Magnus Damm");
 MODULE_DESCRIPTION("Renesas Emma Mobile GIO Driver");

diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c
index 05fcc0f..d2196bf 100644
--- a/drivers/gpio/gpio-generic.c
+++ b/drivers/gpio/gpio-generic.c

@@ -104,6 +104,26 @@
 }
 #endif /* BITS_PER_LONG >= 64 */
 
+static void bgpio_write16be(void __iomem *reg, unsigned long data)
+{
+	iowrite16be(data, reg);
+}
+
+static unsigned long bgpio_read16be(void __iomem *reg)
+{
+	return ioread16be(reg);
+}
+
+static void bgpio_write32be(void __iomem *reg, unsigned long data)
+{
+	iowrite32be(data, reg);
+}
+
+static unsigned long bgpio_read32be(void __iomem *reg)
+{
+	return ioread32be(reg);
+}
+
 static unsigned long bgpio_pin2mask(struct bgpio_chip *bgc, unsigned int pin)
 {
 	return 1 << pin;
@@ -249,7 +269,8 @@
 
 static int bgpio_setup_accessors(struct device *dev,
 				 struct bgpio_chip *bgc,
-				 bool be)
+				 bool bit_be,
+				 bool byte_be)
 {
 
 	switch (bgc->bits) {
@@ -258,17 +279,33 @@
 		bgc->write_reg	= bgpio_write8;
 		break;
 	case 16:
-		bgc->read_reg	= bgpio_read16;
-		bgc->write_reg	= bgpio_write16;
+		if (byte_be) {
+			bgc->read_reg	= bgpio_read16be;
+			bgc->write_reg	= bgpio_write16be;
+		} else {
+			bgc->read_reg	= bgpio_read16;
+			bgc->write_reg	= bgpio_write16;
+		}
 		break;
 	case 32:
-		bgc->read_reg	= bgpio_read32;
-		bgc->write_reg	= bgpio_write32;
+		if (byte_be) {
+			bgc->read_reg	= bgpio_read32be;
+			bgc->write_reg	= bgpio_write32be;
+		} else {
+			bgc->read_reg	= bgpio_read32;
+			bgc->write_reg	= bgpio_write32;
+		}
 		break;
 #if BITS_PER_LONG >= 64
 	case 64:
-		bgc->read_reg	= bgpio_read64;
-		bgc->write_reg	= bgpio_write64;
+		if (byte_be) {
+			dev_err(dev,
+				"64 bit big endian byte order unsupported\n");
+			return -EINVAL;
+		} else {
+			bgc->read_reg	= bgpio_read64;
+			bgc->write_reg	= bgpio_write64;
+		}
 		break;
 #endif /* BITS_PER_LONG >= 64 */
 	default:
@@ -276,7 +313,7 @@
 		return -EINVAL;
 	}
 
-	bgc->pin2mask = be ? bgpio_pin2mask_be : bgpio_pin2mask;
+	bgc->pin2mask = bit_be ? bgpio_pin2mask_be : bgpio_pin2mask;
 
 	return 0;
 }
@@ -353,11 +390,7 @@
 
 int bgpio_remove(struct bgpio_chip *bgc)
 {
-	int err = gpiochip_remove(&bgc->gc);
-
-	kfree(bgc);
-
-	return err;
+	return gpiochip_remove(&bgc->gc);
 }
 EXPORT_SYMBOL_GPL(bgpio_remove);
 
@@ -385,7 +418,8 @@
 	if (ret)
 		return ret;
 
-	ret = bgpio_setup_accessors(dev, bgc, flags & BGPIOF_BIG_ENDIAN);
+	ret = bgpio_setup_accessors(dev, bgc, flags & BGPIOF_BIG_ENDIAN,
+				    flags & BGPIOF_BIG_ENDIAN_BYTE_ORDER);
 	if (ret)
 		return ret;
 

diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
new file mode 100644
index 0000000..8e08b86
--- /dev/null
+++ b/drivers/gpio/gpio-grgpio.c

@@ -0,0 +1,505 @@
+/*
+ * Driver for Aeroflex Gaisler GRGPIO General Purpose I/O cores.
+ *
+ * 2013 (c) Aeroflex Gaisler AB
+ *
+ * This driver supports the GRGPIO GPIO core available in the GRLIB VHDL
+ * IP core library.
+ *
+ * Full documentation of the GRGPIO core can be found here:
+ * http://www.gaisler.com/products/grlib/grip.pdf
+ *
+ * See "Documentation/devicetree/bindings/gpio/gpio-grgpio.txt" for
+ * information on open firmware properties.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * Contributors: Andreas Larsson <andreas@gaisler.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/gpio.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/basic_mmio_gpio.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+#define GRGPIO_MAX_NGPIO 32
+
+#define GRGPIO_DATA		0x00
+#define GRGPIO_OUTPUT		0x04
+#define GRGPIO_DIR		0x08
+#define GRGPIO_IMASK		0x0c
+#define GRGPIO_IPOL		0x10
+#define GRGPIO_IEDGE		0x14
+#define GRGPIO_BYPASS		0x18
+#define GRGPIO_IMAP_BASE	0x20
+
+/* Structure for an irq of the core - called an underlying irq */
+struct grgpio_uirq {
+	u8 refcnt; /* Reference counter to manage requesting/freeing of uirq */
+	u8 uirq; /* Underlying irq of the gpio driver */
+};
+
+/*
+ * Structure for an irq of a gpio line handed out by this driver. The index is
+ * used to map to the corresponding underlying irq.
+ */
+struct grgpio_lirq {
+	s8 index; /* Index into struct grgpio_priv's uirqs, or -1 */
+	u8 irq; /* irq for the gpio line */
+};
+
+struct grgpio_priv {
+	struct bgpio_chip bgc;
+	void __iomem *regs;
+	struct device *dev;
+
+	u32 imask; /* irq mask shadow register */
+
+	/*
+	 * The grgpio core can have multiple "underlying" irqs. The gpio lines
+	 * can be mapped to any one or none of these underlying irqs
+	 * independently of each other. This driver sets up an irq domain and
+	 * hands out separate irqs to each gpio line
+	 */
+	struct irq_domain *domain;
+
+	/*
+	 * This array contains information on each underlying irq, each
+	 * irq of the grgpio core itself.
+	 */
+	struct grgpio_uirq uirqs[GRGPIO_MAX_NGPIO];
+
+	/*
+	 * This array contains information for each gpio line on the irqs
+	 * obtains from this driver. An index value of -1 for a certain gpio
+	 * line indicates that the line has no irq. Otherwise the index connects
+	 * the irq to the underlying irq by pointing into the uirqs array.
+	 */
+	struct grgpio_lirq lirqs[GRGPIO_MAX_NGPIO];
+};
+
+static inline struct grgpio_priv *grgpio_gc_to_priv(struct gpio_chip *gc)
+{
+	struct bgpio_chip *bgc = to_bgpio_chip(gc);
+
+	return container_of(bgc, struct grgpio_priv, bgc);
+}
+
+static void grgpio_set_imask(struct grgpio_priv *priv, unsigned int offset,
+			     int val)
+{
+	struct bgpio_chip *bgc = &priv->bgc;
+	unsigned long mask = bgc->pin2mask(bgc, offset);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bgc->lock, flags);
+
+	if (val)
+		priv->imask |= mask;
+	else
+		priv->imask &= ~mask;
+	bgc->write_reg(priv->regs + GRGPIO_IMASK, priv->imask);
+
+	spin_unlock_irqrestore(&bgc->lock, flags);
+}
+
+static int grgpio_to_irq(struct gpio_chip *gc, unsigned offset)
+{
+	struct grgpio_priv *priv = grgpio_gc_to_priv(gc);
+
+	if (offset > gc->ngpio)
+		return -ENXIO;
+
+	if (priv->lirqs[offset].index < 0)
+		return -ENXIO;
+
+	return irq_create_mapping(priv->domain, offset);
+}
+
+/* -------------------- IRQ chip functions -------------------- */
+
+static int grgpio_irq_set_type(struct irq_data *d, unsigned int type)
+{
+	struct grgpio_priv *priv = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+	u32 mask = BIT(d->hwirq);
+	u32 ipol;
+	u32 iedge;
+	u32 pol;
+	u32 edge;
+
+	switch (type) {
+	case IRQ_TYPE_LEVEL_LOW:
+		pol = 0;
+		edge = 0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		pol = mask;
+		edge = 0;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		pol = 0;
+		edge = mask;
+		break;
+	case IRQ_TYPE_EDGE_RISING:
+		pol = mask;
+		edge = mask;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&priv->bgc.lock, flags);
+
+	ipol = priv->bgc.read_reg(priv->regs + GRGPIO_IPOL) & ~mask;
+	iedge = priv->bgc.read_reg(priv->regs + GRGPIO_IEDGE) & ~mask;
+
+	priv->bgc.write_reg(priv->regs + GRGPIO_IPOL, ipol | pol);
+	priv->bgc.write_reg(priv->regs + GRGPIO_IEDGE, iedge | edge);
+
+	spin_unlock_irqrestore(&priv->bgc.lock, flags);
+
+	return 0;
+}
+
+static void grgpio_irq_mask(struct irq_data *d)
+{
+	struct grgpio_priv *priv = irq_data_get_irq_chip_data(d);
+	int offset = d->hwirq;
+
+	grgpio_set_imask(priv, offset, 0);
+}
+
+static void grgpio_irq_unmask(struct irq_data *d)
+{
+	struct grgpio_priv *priv = irq_data_get_irq_chip_data(d);
+	int offset = d->hwirq;
+
+	grgpio_set_imask(priv, offset, 1);
+}
+
+static struct irq_chip grgpio_irq_chip = {
+	.name			= "grgpio",
+	.irq_mask		= grgpio_irq_mask,
+	.irq_unmask		= grgpio_irq_unmask,
+	.irq_set_type		= grgpio_irq_set_type,
+};
+
+static irqreturn_t grgpio_irq_handler(int irq, void *dev)
+{
+	struct grgpio_priv *priv = dev;
+	int ngpio = priv->bgc.gc.ngpio;
+	unsigned long flags;
+	int i;
+	int match = 0;
+
+	spin_lock_irqsave(&priv->bgc.lock, flags);
+
+	/*
+	 * For each gpio line, call its interrupt handler if it its underlying
+	 * irq matches the current irq that is handled.
+	 */
+	for (i = 0; i < ngpio; i++) {
+		struct grgpio_lirq *lirq = &priv->lirqs[i];
+
+		if (priv->imask & BIT(i) && lirq->index >= 0 &&
+		    priv->uirqs[lirq->index].uirq == irq) {
+			generic_handle_irq(lirq->irq);
+			match = 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&priv->bgc.lock, flags);
+
+	if (!match)
+		dev_warn(priv->dev, "No gpio line matched irq %d\n", irq);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * This function will be called as a consequence of the call to
+ * irq_create_mapping in grgpio_to_irq
+ */
+int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
+		   irq_hw_number_t hwirq)
+{
+	struct grgpio_priv *priv = d->host_data;
+	struct grgpio_lirq *lirq;
+	struct grgpio_uirq *uirq;
+	unsigned long flags;
+	int offset = hwirq;
+	int ret = 0;
+
+	if (!priv)
+		return -EINVAL;
+
+	lirq = &priv->lirqs[offset];
+	if (lirq->index < 0)
+		return -EINVAL;
+
+	dev_dbg(priv->dev, "Mapping irq %d for gpio line %d\n",
+		irq, offset);
+
+	spin_lock_irqsave(&priv->bgc.lock, flags);
+
+	/* Request underlying irq if not already requested */
+	lirq->irq = irq;
+	uirq = &priv->uirqs[lirq->index];
+	if (uirq->refcnt == 0) {
+		ret = request_irq(uirq->uirq, grgpio_irq_handler, 0,
+				  dev_name(priv->dev), priv);
+		if (ret) {
+			dev_err(priv->dev,
+				"Could not request underlying irq %d\n",
+				uirq->uirq);
+
+			spin_unlock_irqrestore(&priv->bgc.lock, flags);
+
+			return ret;
+		}
+	}
+	uirq->refcnt++;
+
+	spin_unlock_irqrestore(&priv->bgc.lock, flags);
+
+	/* Setup irq  */
+	irq_set_chip_data(irq, priv);
+	irq_set_chip_and_handler(irq, &grgpio_irq_chip,
+				 handle_simple_irq);
+	irq_clear_status_flags(irq, IRQ_NOREQUEST);
+#ifdef CONFIG_ARM
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	irq_set_noprobe(irq);
+#endif
+
+	return ret;
+}
+
+void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
+{
+	struct grgpio_priv *priv = d->host_data;
+	int index;
+	struct grgpio_lirq *lirq;
+	struct grgpio_uirq *uirq;
+	unsigned long flags;
+	int ngpio = priv->bgc.gc.ngpio;
+	int i;
+
+#ifdef CONFIG_ARM
+	set_irq_flags(irq, 0);
+#endif
+	irq_set_chip_and_handler(irq, NULL, NULL);
+	irq_set_chip_data(irq, NULL);
+
+	spin_lock_irqsave(&priv->bgc.lock, flags);
+
+	/* Free underlying irq if last user unmapped */
+	index = -1;
+	for (i = 0; i < ngpio; i++) {
+		lirq = &priv->lirqs[i];
+		if (lirq->irq == irq) {
+			grgpio_set_imask(priv, i, 0);
+			lirq->irq = 0;
+			index = lirq->index;
+			break;
+		}
+	}
+	WARN_ON(index < 0);
+
+	if (index >= 0) {
+		uirq = &priv->uirqs[lirq->index];
+		uirq->refcnt--;
+		if (uirq->refcnt == 0)
+			free_irq(uirq->uirq, priv);
+	}
+
+	spin_unlock_irqrestore(&priv->bgc.lock, flags);
+}
+
+static struct irq_domain_ops grgpio_irq_domain_ops = {
+	.map	= grgpio_irq_map,
+	.unmap	= grgpio_irq_unmap,
+};
+
+/* ------------------------------------------------------------ */
+
+static int grgpio_probe(struct platform_device *ofdev)
+{
+	struct device_node *np = ofdev->dev.of_node;
+	void  __iomem *regs;
+	struct gpio_chip *gc;
+	struct bgpio_chip *bgc;
+	struct grgpio_priv *priv;
+	struct resource *res;
+	int err;
+	u32 prop;
+	s32 *irqmap;
+	int size;
+	int i;
+
+	priv = devm_kzalloc(&ofdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(&ofdev->dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	bgc = &priv->bgc;
+	err = bgpio_init(bgc, &ofdev->dev, 4, regs + GRGPIO_DATA,
+			 regs + GRGPIO_OUTPUT, NULL, regs + GRGPIO_DIR, NULL,
+			 BGPIOF_BIG_ENDIAN_BYTE_ORDER);
+	if (err) {
+		dev_err(&ofdev->dev, "bgpio_init() failed\n");
+		return err;
+	}
+
+	priv->regs = regs;
+	priv->imask = bgc->read_reg(regs + GRGPIO_IMASK);
+	priv->dev = &ofdev->dev;
+
+	gc = &bgc->gc;
+	gc->of_node = np;
+	gc->owner = THIS_MODULE;
+	gc->to_irq = grgpio_to_irq;
+	gc->label = np->full_name;
+	gc->base = -1;
+
+	err = of_property_read_u32(np, "nbits", &prop);
+	if (err || prop <= 0 || prop > GRGPIO_MAX_NGPIO) {
+		gc->ngpio = GRGPIO_MAX_NGPIO;
+		dev_dbg(&ofdev->dev,
+			"No or invalid nbits property: assume %d\n", gc->ngpio);
+	} else {
+		gc->ngpio = prop;
+	}
+
+	/*
+	 * The irqmap contains the index values indicating which underlying irq,
+	 * if anyone, is connected to that line
+	 */
+	irqmap = (s32 *)of_get_property(np, "irqmap", &size);
+	if (irqmap) {
+		if (size < gc->ngpio) {
+			dev_err(&ofdev->dev,
+				"irqmap shorter than ngpio (%d < %d)\n",
+				size, gc->ngpio);
+			return -EINVAL;
+		}
+
+		priv->domain = irq_domain_add_linear(np, gc->ngpio,
+						     &grgpio_irq_domain_ops,
+						     priv);
+		if (!priv->domain) {
+			dev_err(&ofdev->dev, "Could not add irq domain\n");
+			return -EINVAL;
+		}
+
+		for (i = 0; i < gc->ngpio; i++) {
+			struct grgpio_lirq *lirq;
+			int ret;
+
+			lirq = &priv->lirqs[i];
+			lirq->index = irqmap[i];
+
+			if (lirq->index < 0)
+				continue;
+
+			ret = platform_get_irq(ofdev, lirq->index);
+			if (ret <= 0) {
+				/*
+				 * Continue without irq functionality for that
+				 * gpio line
+				 */
+				dev_err(priv->dev,
+					"Failed to get irq for offset %d\n", i);
+				continue;
+			}
+			priv->uirqs[lirq->index].uirq = ret;
+		}
+	}
+
+	platform_set_drvdata(ofdev, priv);
+
+	err = gpiochip_add(gc);
+	if (err) {
+		dev_err(&ofdev->dev, "Could not add gpiochip\n");
+		return err;
+	}
+
+	dev_info(&ofdev->dev, "regs=0x%p, base=%d, ngpio=%d, irqs=%s\n",
+		 priv->regs, gc->base, gc->ngpio, priv->domain ? "on" : "off");
+
+	return 0;
+}
+
+static int grgpio_remove(struct platform_device *ofdev)
+{
+	struct grgpio_priv *priv = platform_get_drvdata(ofdev);
+	unsigned long flags;
+	int i;
+	int ret = 0;
+
+	spin_lock_irqsave(&priv->bgc.lock, flags);
+
+	if (priv->domain) {
+		for (i = 0; i < GRGPIO_MAX_NGPIO; i++) {
+			if (priv->uirqs[i].refcnt != 0) {
+				ret = -EBUSY;
+				goto out;
+			}
+		}
+	}
+
+	ret = gpiochip_remove(&priv->bgc.gc);
+	if (ret)
+		goto out;
+
+	if (priv->domain)
+		irq_domain_remove(priv->domain);
+
+out:
+	spin_unlock_irqrestore(&priv->bgc.lock, flags);
+
+	return ret;
+}
+
+static struct of_device_id grgpio_match[] = {
+	{.name = "GAISLER_GPIO"},
+	{.name = "01_01a"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, grgpio_match);
+
+static struct platform_driver grgpio_driver = {
+	.driver = {
+		.name = "grgpio",
+		.owner = THIS_MODULE,
+		.of_match_table = grgpio_match,
+	},
+	.probe = grgpio_probe,
+	.remove = grgpio_remove,
+};
+module_platform_driver(grgpio_driver);
+
+MODULE_AUTHOR("Aeroflex Gaisler AB.");
+MODULE_DESCRIPTION("Driver for Aeroflex Gaisler GRGPIO");
+MODULE_LICENSE("GPL");

diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c
index de3c317..e16d932 100644
--- a/drivers/gpio/gpio-ich.c
+++ b/drivers/gpio/gpio-ich.c

@@ -130,14 +130,11 @@
 
 static bool ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr)
 {
-	return ichx_priv.use_gpio & (1 << (nr / 32));
+	return !!(ichx_priv.use_gpio & (1 << (nr / 32)));
 }
 
 static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
 {
-	if (!ichx_gpio_check_available(gpio, nr))
-		return -ENXIO;
-
 	/*
 	 * Try setting pin as an input and verify it worked since many pins
 	 * are output-only.
@@ -151,9 +148,6 @@
 static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr,
 					int val)
 {
-	if (!ichx_gpio_check_available(gpio, nr))
-		return -ENXIO;
-
 	/* Set GPIO output value. */
 	ichx_write_bit(GPIO_LVL, nr, val, 0);
 
@@ -169,9 +163,6 @@
 
 static int ichx_gpio_get(struct gpio_chip *chip, unsigned nr)
 {
-	if (!ichx_gpio_check_available(chip, nr))
-		return -ENXIO;
-
 	return ichx_read_bit(GPIO_LVL, nr);
 }
 
@@ -180,9 +171,6 @@
 	unsigned long flags;
 	u32 data;
 
-	if (!ichx_gpio_check_available(chip, nr))
-		return -ENXIO;
-
 	/*
 	 * GPI 0 - 15 need to be read from the power management registers on
 	 * a ICH6/3100 bridge.
@@ -207,6 +195,9 @@
 
 static int ichx_gpio_request(struct gpio_chip *chip, unsigned nr)
 {
+	if (!ichx_gpio_check_available(chip, nr))
+		return -ENXIO;
+
 	/*
 	 * Note we assume the BIOS properly set a bridge's USE value.  Some
 	 * chips (eg Intel 3100) have bogus USE values though, so first see if

diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c
index 36d7dee..dda6a75 100644
--- a/drivers/gpio/gpio-lpc32xx.c
+++ b/drivers/gpio/gpio-lpc32xx.c

@@ -533,7 +533,7 @@
 {
 	/* Is this the correct bank? */
 	u32 bank = gpiospec->args[0];
-	if ((bank > ARRAY_SIZE(lpc32xx_gpiochip) ||
+	if ((bank >= ARRAY_SIZE(lpc32xx_gpiochip) ||
 	    (gc != &lpc32xx_gpiochip[bank].chip)))
 		return -EINVAL;
 

diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c
index 3472b05..86c17de 100644
--- a/drivers/gpio/gpio-lynxpoint.c
+++ b/drivers/gpio/gpio-lynxpoint.c

@@ -32,6 +32,7 @@
 #include <linux/acpi.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/io.h>
 
 /* LynxPoint chipset has support for 94 gpio pins */
 

diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c
index 4b6b9a0..40ab6df 100644
--- a/drivers/gpio/gpio-max7300.c
+++ b/drivers/gpio/gpio-max7300.c

@@ -41,7 +41,7 @@
 			I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
-	ts = kzalloc(sizeof(struct max7301), GFP_KERNEL);
+	ts = devm_kzalloc(&client->dev, sizeof(struct max7301), GFP_KERNEL);
 	if (!ts)
 		return -ENOMEM;
 
@@ -50,8 +50,6 @@
 	ts->dev = &client->dev;
 
 	ret = __max730x_probe(ts);
-	if (ret)
-		kfree(ts);
 	return ret;
 }
 

diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c
index c6c535c..3b16ab7 100644
--- a/drivers/gpio/gpio-max7301.c
+++ b/drivers/gpio/gpio-max7301.c

@@ -56,12 +56,13 @@
 	int ret;
 
 	/* bits_per_word cannot be configured in platform data */
-	spi->bits_per_word = 16;
+	if (spi->dev.platform_data)
+		spi->bits_per_word = 16;
 	ret = spi_setup(spi);
 	if (ret < 0)
 		return ret;
 
-	ts = kzalloc(sizeof(struct max7301), GFP_KERNEL);
+	ts = devm_kzalloc(&spi->dev, sizeof(struct max7301), GFP_KERNEL);
 	if (!ts)
 		return -ENOMEM;
 
@@ -70,8 +71,6 @@
 	ts->dev = &spi->dev;
 
 	ret = __max730x_probe(ts);
-	if (ret)
-		kfree(ts);
 	return ret;
 }
 

diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index 1e0467c..d4b51b1 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c

@@ -589,7 +589,8 @@
 		return -EINVAL;
 	}
 
-	chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+	chip = devm_kzalloc(&client->dev, sizeof(struct max732x_chip),
+			GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 	chip->client = client;
@@ -647,7 +648,6 @@
 
 out_failed:
 	max732x_irq_teardown(chip);
-	kfree(chip);
 	return ret;
 }
 
@@ -680,7 +680,6 @@
 	if (chip->client_dummy)
 		i2c_unregister_device(chip->client_dummy);
 
-	kfree(chip);
 	return 0;
 }
 

diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c
index 6a8fdc2..63a7a1b 100644
--- a/drivers/gpio/gpio-mc33880.c
+++ b/drivers/gpio/gpio-mc33880.c

@@ -101,13 +101,13 @@
 	if (ret < 0)
 		return ret;
 
-	mc = kzalloc(sizeof(struct mc33880), GFP_KERNEL);
+	mc = devm_kzalloc(&spi->dev, sizeof(struct mc33880), GFP_KERNEL);
 	if (!mc)
 		return -ENOMEM;
 
 	mutex_init(&mc->lock);
 
-	dev_set_drvdata(&spi->dev, mc);
+	spi_set_drvdata(spi, mc);
 
 	mc->spi = spi;
 
@@ -130,7 +130,8 @@
 		ret = mc33880_write_config(mc);
 
 	if (ret) {
-		printk(KERN_ERR "Failed writing to " DRIVER_NAME ": %d\n", ret);
+		dev_err(&spi->dev, "Failed writing to " DRIVER_NAME ": %d\n",
+			ret);
 		goto exit_destroy;
 	}
 
@@ -141,9 +142,8 @@
 	return ret;
 
 exit_destroy:
-	dev_set_drvdata(&spi->dev, NULL);
+	spi_set_drvdata(spi, NULL);
 	mutex_destroy(&mc->lock);
-	kfree(mc);
 	return ret;
 }
 
@@ -152,17 +152,16 @@
 	struct mc33880 *mc;
 	int ret;
 
-	mc = dev_get_drvdata(&spi->dev);
+	mc = spi_get_drvdata(spi);
 	if (mc == NULL)
 		return -ENODEV;
 
-	dev_set_drvdata(&spi->dev, NULL);
+	spi_set_drvdata(spi, NULL);
 
 	ret = gpiochip_remove(&mc->chip);
-	if (!ret) {
+	if (!ret)
 		mutex_destroy(&mc->lock);
-		kfree(mc);
-	} else
+	else
 		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
 			ret);
 

diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index 3cea0ea..6a4470b 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c

@@ -12,6 +12,8 @@
 #include <linux/spi/mcp23s08.h>
 #include <linux/slab.h>
 #include <asm/byteorder.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 /**
  * MCP types supported by driver
@@ -383,6 +385,10 @@
 	mcp->chip.direction_output = mcp23s08_direction_output;
 	mcp->chip.set = mcp23s08_set;
 	mcp->chip.dbg_show = mcp23s08_dbg_show;
+#ifdef CONFIG_OF
+	mcp->chip.of_gpio_n_cells = 2;
+	mcp->chip.of_node = dev->of_node;
+#endif
 
 	switch (type) {
 #ifdef CONFIG_SPI_MASTER
@@ -473,6 +479,35 @@
 
 /*----------------------------------------------------------------------*/
 
+#ifdef CONFIG_OF
+#ifdef CONFIG_SPI_MASTER
+static struct of_device_id mcp23s08_spi_of_match[] = {
+	{
+		.compatible = "mcp,mcp23s08", .data = (void *) MCP_TYPE_S08,
+	},
+	{
+		.compatible = "mcp,mcp23s17", .data = (void *) MCP_TYPE_S17,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mcp23s08_spi_of_match);
+#endif
+
+#if IS_ENABLED(CONFIG_I2C)
+static struct of_device_id mcp23s08_i2c_of_match[] = {
+	{
+		.compatible = "mcp,mcp23008", .data = (void *) MCP_TYPE_008,
+	},
+	{
+		.compatible = "mcp,mcp23017", .data = (void *) MCP_TYPE_017,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mcp23s08_i2c_of_match);
+#endif
+#endif /* CONFIG_OF */
+
+
 #if IS_ENABLED(CONFIG_I2C)
 
 static int mcp230xx_probe(struct i2c_client *client,
@@ -480,12 +515,23 @@
 {
 	struct mcp23s08_platform_data *pdata;
 	struct mcp23s08 *mcp;
-	int status;
+	int status, base, pullups;
+	const struct of_device_id *match;
 
-	pdata = client->dev.platform_data;
-	if (!pdata || !gpio_is_valid(pdata->base)) {
-		dev_dbg(&client->dev, "invalid or missing platform data\n");
-		return -EINVAL;
+	match = of_match_device(of_match_ptr(mcp23s08_i2c_of_match),
+					&client->dev);
+	if (match) {
+		base = -1;
+		pullups = 0;
+	} else {
+		pdata = client->dev.platform_data;
+		if (!pdata || !gpio_is_valid(pdata->base)) {
+			dev_dbg(&client->dev,
+					"invalid or missing platform data\n");
+			return -EINVAL;
+		}
+		base = pdata->base;
+		pullups = pdata->chip[0].pullups;
 	}
 
 	mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
@@ -493,8 +539,7 @@
 		return -ENOMEM;
 
 	status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr,
-				    id->driver_data, pdata->base,
-				    pdata->chip[0].pullups);
+				    id->driver_data, base, pullups);
 	if (status)
 		goto fail;
 
@@ -531,6 +576,7 @@
 	.driver = {
 		.name	= "mcp230xx",
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(mcp23s08_i2c_of_match),
 	},
 	.probe		= mcp230xx_probe,
 	.remove		= mcp230xx_remove,
@@ -565,28 +611,55 @@
 	unsigned			chips = 0;
 	struct mcp23s08_driver_data	*data;
 	int				status, type;
-	unsigned			base;
+	unsigned			base = -1,
+					ngpio = 0,
+					pullups[ARRAY_SIZE(pdata->chip)];
+	const struct			of_device_id *match;
+	u32				spi_present_mask = 0;
 
-	type = spi_get_device_id(spi)->driver_data;
+	match = of_match_device(of_match_ptr(mcp23s08_spi_of_match), &spi->dev);
+	if (match) {
+		type = (int)match->data;
+		status = of_property_read_u32(spi->dev.of_node,
+				"mcp,spi-present-mask", &spi_present_mask);
+		if (status) {
+			dev_err(&spi->dev, "DT has no spi-present-mask\n");
+			return -ENODEV;
+		}
+		if ((spi_present_mask <= 0) || (spi_present_mask >= 256)) {
+			dev_err(&spi->dev, "invalid spi-present-mask\n");
+			return -ENODEV;
+		}
 
-	pdata = spi->dev.platform_data;
-	if (!pdata || !gpio_is_valid(pdata->base)) {
-		dev_dbg(&spi->dev, "invalid or missing platform data\n");
-		return -EINVAL;
-	}
-
-	for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-		if (!pdata->chip[addr].is_present)
-			continue;
-		chips++;
-		if ((type == MCP_TYPE_S08) && (addr > 3)) {
-			dev_err(&spi->dev,
-				"mcp23s08 only supports address 0..3\n");
+		for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++)
+			pullups[addr] = 0;
+	} else {
+		type = spi_get_device_id(spi)->driver_data;
+		pdata = spi->dev.platform_data;
+		if (!pdata || !gpio_is_valid(pdata->base)) {
+			dev_dbg(&spi->dev,
+					"invalid or missing platform data\n");
 			return -EINVAL;
 		}
+
+		for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
+			if (!pdata->chip[addr].is_present)
+				continue;
+			chips++;
+			if ((type == MCP_TYPE_S08) && (addr > 3)) {
+				dev_err(&spi->dev,
+					"mcp23s08 only supports address 0..3\n");
+				return -EINVAL;
+			}
+			spi_present_mask |= 1 << addr;
+			pullups[addr] = pdata->chip[addr].pullups;
+		}
+
+		if (!chips)
+			return -ENODEV;
+
+		base = pdata->base;
 	}
-	if (!chips)
-		return -ENODEV;
 
 	data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
 			GFP_KERNEL);
@@ -594,21 +667,22 @@
 		return -ENOMEM;
 	spi_set_drvdata(spi, data);
 
-	base = pdata->base;
 	for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-		if (!pdata->chip[addr].is_present)
+		if (!(spi_present_mask & (1 << addr)))
 			continue;
 		chips--;
 		data->mcp[addr] = &data->chip[chips];
 		status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi,
 					    0x40 | (addr << 1), type, base,
-					    pdata->chip[addr].pullups);
+					    pullups[addr]);
 		if (status < 0)
 			goto fail;
 
-		base += (type == MCP_TYPE_S17) ? 16 : 8;
+		if (base != -1)
+			base += (type == MCP_TYPE_S17) ? 16 : 8;
+		ngpio += (type == MCP_TYPE_S17) ? 16 : 8;
 	}
-	data->ngpio = base - pdata->base;
+	data->ngpio = ngpio;
 
 	/* NOTE:  these chips have a relatively sane IRQ framework, with
 	 * per-signal masking and level/edge triggering.  It's not yet
@@ -668,6 +742,7 @@
 	.driver = {
 		.name	= "mcp23s08",
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(mcp23s08_spi_of_match),
 	},
 };
 

diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 61a6fde..bf69a7e 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c

@@ -117,7 +117,7 @@
 {
 	int cpu;
 
-	switch(mvchip->soc_variant) {
+	switch (mvchip->soc_variant) {
 	case MVEBU_GPIO_SOC_VARIANT_ORION:
 	case MVEBU_GPIO_SOC_VARIANT_MV78200:
 		return mvchip->membase + GPIO_EDGE_CAUSE_OFF;
@@ -133,7 +133,7 @@
 {
 	int cpu;
 
-	switch(mvchip->soc_variant) {
+	switch (mvchip->soc_variant) {
 	case MVEBU_GPIO_SOC_VARIANT_ORION:
 		return mvchip->membase + GPIO_EDGE_MASK_OFF;
 	case MVEBU_GPIO_SOC_VARIANT_MV78200:
@@ -151,7 +151,7 @@
 {
 	int cpu;
 
-	switch(mvchip->soc_variant) {
+	switch (mvchip->soc_variant) {
 	case MVEBU_GPIO_SOC_VARIANT_ORION:
 		return mvchip->membase + GPIO_LEVEL_MASK_OFF;
 	case MVEBU_GPIO_SOC_VARIANT_MV78200:
@@ -401,7 +401,7 @@
 	/*
 	 * Configure interrupt polarity.
 	 */
-	switch(type) {
+	switch (type) {
 	case IRQ_TYPE_EDGE_RISING:
 	case IRQ_TYPE_LEVEL_HIGH:
 		u = readl_relaxed(mvebu_gpioreg_in_pol(mvchip));
@@ -470,18 +470,76 @@
 	}
 }
 
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+
+static void mvebu_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	struct mvebu_gpio_chip *mvchip =
+		container_of(chip, struct mvebu_gpio_chip, chip);
+	u32 out, io_conf, blink, in_pol, data_in, cause, edg_msk, lvl_msk;
+	int i;
+
+	out	= readl_relaxed(mvebu_gpioreg_out(mvchip));
+	io_conf	= readl_relaxed(mvebu_gpioreg_io_conf(mvchip));
+	blink	= readl_relaxed(mvebu_gpioreg_blink(mvchip));
+	in_pol	= readl_relaxed(mvebu_gpioreg_in_pol(mvchip));
+	data_in	= readl_relaxed(mvebu_gpioreg_data_in(mvchip));
+	cause	= readl_relaxed(mvebu_gpioreg_edge_cause(mvchip));
+	edg_msk	= readl_relaxed(mvebu_gpioreg_edge_mask(mvchip));
+	lvl_msk	= readl_relaxed(mvebu_gpioreg_level_mask(mvchip));
+
+	for (i = 0; i < chip->ngpio; i++) {
+		const char *label;
+		u32 msk;
+		bool is_out;
+
+		label = gpiochip_is_requested(chip, i);
+		if (!label)
+			continue;
+
+		msk = 1 << i;
+		is_out = !(io_conf & msk);
+
+		seq_printf(s, " gpio-%-3d (%-20.20s)", chip->base + i, label);
+
+		if (is_out) {
+			seq_printf(s, " out %s %s\n",
+				   out & msk ? "hi" : "lo",
+				   blink & msk ? "(blink )" : "");
+			continue;
+		}
+
+		seq_printf(s, " in  %s (act %s) - IRQ",
+			   (data_in ^ in_pol) & msk  ? "hi" : "lo",
+			   in_pol & msk ? "lo" : "hi");
+		if (!((edg_msk | lvl_msk) & msk)) {
+			seq_printf(s, " disabled\n");
+			continue;
+		}
+		if (edg_msk & msk)
+			seq_printf(s, " edge ");
+		if (lvl_msk & msk)
+			seq_printf(s, " level");
+		seq_printf(s, " (%s)\n", cause & msk ? "pending" : "clear  ");
+	}
+}
+#else
+#define mvebu_gpio_dbg_show NULL
+#endif
+
 static struct of_device_id mvebu_gpio_of_match[] = {
 	{
 		.compatible = "marvell,orion-gpio",
-		.data       = (void*) MVEBU_GPIO_SOC_VARIANT_ORION,
+		.data       = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
 	},
 	{
 		.compatible = "marvell,mv78200-gpio",
-		.data       = (void*) MVEBU_GPIO_SOC_VARIANT_MV78200,
+		.data       = (void *) MVEBU_GPIO_SOC_VARIANT_MV78200,
 	},
 	{
 		.compatible = "marvell,armadaxp-gpio",
-		.data       = (void*) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
+		.data       = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
 	},
 	{
 		/* sentinel */
@@ -509,13 +567,13 @@
 		soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (! res) {
+	if (!res) {
 		dev_err(&pdev->dev, "Cannot get memory resource\n");
 		return -ENODEV;
 	}
 
 	mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip), GFP_KERNEL);
-	if (! mvchip){
+	if (!mvchip) {
 		dev_err(&pdev->dev, "Cannot allocate memory\n");
 		return -ENOMEM;
 	}
@@ -550,6 +608,7 @@
 	mvchip->chip.ngpio = ngpios;
 	mvchip->chip.can_sleep = 0;
 	mvchip->chip.of_node = np;
+	mvchip->chip.dbg_show = mvebu_gpio_dbg_show;
 
 	spin_lock_init(&mvchip->lock);
 	mvchip->membase = devm_ioremap_resource(&pdev->dev, res);
@@ -560,21 +619,21 @@
 	 * per-CPU registers */
 	if (soc_variant == MVEBU_GPIO_SOC_VARIANT_ARMADAXP) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		if (! res) {
+		if (!res) {
 			dev_err(&pdev->dev, "Cannot get memory resource\n");
 			return -ENODEV;
 		}
 
 		mvchip->percpu_membase = devm_ioremap_resource(&pdev->dev,
 							       res);
-		if (IS_ERR(mvchip->percpu_membase)) 
+		if (IS_ERR(mvchip->percpu_membase))
 			return PTR_ERR(mvchip->percpu_membase);
 	}
 
 	/*
 	 * Mask and clear GPIO interrupts.
 	 */
-	switch(soc_variant) {
+	switch (soc_variant) {
 	case MVEBU_GPIO_SOC_VARIANT_ORION:
 		writel_relaxed(0, mvchip->membase + GPIO_EDGE_CAUSE_OFF);
 		writel_relaxed(0, mvchip->membase + GPIO_EDGE_MASK_OFF);
@@ -632,7 +691,7 @@
 
 	gc = irq_alloc_generic_chip("mvebu_gpio_irq", 2, mvchip->irqbase,
 				    mvchip->membase, handle_level_irq);
-	if (! gc) {
+	if (!gc) {
 		dev_err(&pdev->dev, "Cannot allocate generic irq_chip\n");
 		return -ENOMEM;
 	}

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index a612ea1..2050891 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c

@@ -52,7 +52,6 @@
 	struct list_head node;
 	void __iomem *base;
 	u16 irq;
-	int irq_base;
 	struct irq_domain *domain;
 	u32 non_wakeup_gpios;
 	u32 enabled_non_wakeup_gpios;
@@ -88,7 +87,14 @@
 
 static int irq_to_gpio(struct gpio_bank *bank, unsigned int gpio_irq)
 {
-	return gpio_irq - bank->irq_base + bank->chip.base;
+	return bank->chip.base + gpio_irq;
+}
+
+static int omap_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
+
+	return irq_find_mapping(bank->domain, offset);
 }
 
 static void _set_gpio_direction(struct gpio_bank *bank, int gpio, int is_input)
@@ -420,13 +426,16 @@
 	int retval;
 	unsigned long flags;
 
+	if (WARN_ON(!bank->mod_usage))
+		return -EINVAL;
+
 #ifdef CONFIG_ARCH_OMAP1
 	if (d->irq > IH_MPUIO_BASE)
 		gpio = OMAP_MPUIO(d->irq - IH_MPUIO_BASE);
 #endif
 
 	if (!gpio)
-		gpio = irq_to_gpio(bank, d->irq);
+		gpio = irq_to_gpio(bank, d->hwirq);
 
 	if (type & ~IRQ_TYPE_SENSE_MASK)
 		return -EINVAL;
@@ -579,7 +588,7 @@
 static int gpio_wake_enable(struct irq_data *d, unsigned int enable)
 {
 	struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
-	unsigned int gpio = irq_to_gpio(bank, d->irq);
+	unsigned int gpio = irq_to_gpio(bank, d->hwirq);
 
 	return _set_gpio_wakeup(bank, gpio, enable);
 }
@@ -679,7 +688,7 @@
 {
 	void __iomem *isr_reg = NULL;
 	u32 isr;
-	unsigned int gpio_irq, gpio_index;
+	unsigned int bit;
 	struct gpio_bank *bank;
 	int unmasked = 0;
 	struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -693,7 +702,7 @@
 	if (WARN_ON(!isr_reg))
 		goto exit;
 
-	while(1) {
+	while (1) {
 		u32 isr_saved, level_mask = 0;
 		u32 enabled;
 
@@ -720,14 +729,9 @@
 		if (!isr)
 			break;
 
-		gpio_irq = bank->irq_base;
-		for (; isr != 0; isr >>= 1, gpio_irq++) {
-			int gpio = irq_to_gpio(bank, gpio_irq);
-
-			if (!(isr & 1))
-				continue;
-
-			gpio_index = GPIO_INDEX(bank, gpio);
+		while (isr) {
+			bit = __ffs(isr);
+			isr &= ~(1 << bit);
 
 			/*
 			 * Some chips can't respond to both rising and falling
@@ -736,10 +740,10 @@
 			 * to respond to the IRQ for the opposite direction.
 			 * This will be indicated in the bank toggle_mask.
 			 */
-			if (bank->toggle_mask & (1 << gpio_index))
-				_toggle_gpio_edge_triggering(bank, gpio_index);
+			if (bank->toggle_mask & (1 << bit))
+				_toggle_gpio_edge_triggering(bank, bit);
 
-			generic_handle_irq(gpio_irq);
+			generic_handle_irq(irq_find_mapping(bank->domain, bit));
 		}
 	}
 	/* if bank has any level sensitive GPIO pin interrupt
@@ -755,7 +759,7 @@
 static void gpio_irq_shutdown(struct irq_data *d)
 {
 	struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
-	unsigned int gpio = irq_to_gpio(bank, d->irq);
+	unsigned int gpio = irq_to_gpio(bank, d->hwirq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bank->lock, flags);
@@ -766,7 +770,7 @@
 static void gpio_ack_irq(struct irq_data *d)
 {
 	struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
-	unsigned int gpio = irq_to_gpio(bank, d->irq);
+	unsigned int gpio = irq_to_gpio(bank, d->hwirq);
 
 	_clear_gpio_irqstatus(bank, gpio);
 }
@@ -774,7 +778,7 @@
 static void gpio_mask_irq(struct irq_data *d)
 {
 	struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
-	unsigned int gpio = irq_to_gpio(bank, d->irq);
+	unsigned int gpio = irq_to_gpio(bank, d->hwirq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bank->lock, flags);
@@ -786,7 +790,7 @@
 static void gpio_unmask_irq(struct irq_data *d)
 {
 	struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
-	unsigned int gpio = irq_to_gpio(bank, d->irq);
+	unsigned int gpio = irq_to_gpio(bank, d->hwirq);
 	unsigned int irq_mask = GPIO_BIT(bank, gpio);
 	u32 trigger = irqd_get_trigger_type(d);
 	unsigned long flags;
@@ -952,14 +956,6 @@
 	spin_unlock_irqrestore(&bank->lock, flags);
 }
 
-static int gpio_2irq(struct gpio_chip *chip, unsigned offset)
-{
-	struct gpio_bank *bank;
-
-	bank = container_of(chip, struct gpio_bank, chip);
-	return bank->irq_base + offset;
-}
-
 /*---------------------------------------------------------------------*/
 
 static void __init omap_gpio_show_rev(struct gpio_bank *bank)
@@ -1056,7 +1052,7 @@
 	bank->chip.direction_output = gpio_output;
 	bank->chip.set_debounce = gpio_debounce;
 	bank->chip.set = gpio_set;
-	bank->chip.to_irq = gpio_2irq;
+	bank->chip.to_irq = omap_gpio_to_irq;
 	if (bank->is_mpuio) {
 		bank->chip.label = "mpuio";
 		if (bank->regs->wkup_en)
@@ -1071,15 +1067,16 @@
 
 	gpiochip_add(&bank->chip);
 
-	for (j = bank->irq_base; j < bank->irq_base + bank->width; j++) {
-		irq_set_lockdep_class(j, &gpio_lock_class);
-		irq_set_chip_data(j, bank);
+	for (j = 0; j < bank->width; j++) {
+		int irq = irq_create_mapping(bank->domain, j);
+		irq_set_lockdep_class(irq, &gpio_lock_class);
+		irq_set_chip_data(irq, bank);
 		if (bank->is_mpuio) {
-			omap_mpuio_alloc_gc(bank, j, bank->width);
+			omap_mpuio_alloc_gc(bank, irq, bank->width);
 		} else {
-			irq_set_chip(j, &gpio_irq_chip);
-			irq_set_handler(j, handle_simple_irq);
-			set_irq_flags(j, IRQF_VALID);
+			irq_set_chip_and_handler(irq, &gpio_irq_chip,
+						 handle_simple_irq);
+			set_irq_flags(irq, IRQF_VALID);
 		}
 	}
 	irq_set_chained_handler(bank->irq, gpio_irq_handler);
@@ -1096,7 +1093,6 @@
 	const struct omap_gpio_platform_data *pdata;
 	struct resource *res;
 	struct gpio_bank *bank;
-	int ret = 0;
 
 	match = of_match_device(of_match_ptr(omap_gpio_match), dev);
 
@@ -1123,20 +1119,22 @@
 	bank->width = pdata->bank_width;
 	bank->is_mpuio = pdata->is_mpuio;
 	bank->non_wakeup_gpios = pdata->non_wakeup_gpios;
-	bank->loses_context = pdata->loses_context;
 	bank->regs = pdata->regs;
 #ifdef CONFIG_OF_GPIO
 	bank->chip.of_node = of_node_get(node);
 #endif
-
-	bank->irq_base = irq_alloc_descs(-1, 0, bank->width, 0);
-	if (bank->irq_base < 0) {
-		dev_err(dev, "Couldn't allocate IRQ numbers\n");
-		return -ENODEV;
+	if (node) {
+		if (!of_property_read_bool(node, "ti,gpio-always-on"))
+			bank->loses_context = true;
+	} else {
+		bank->loses_context = pdata->loses_context;
 	}
 
-	bank->domain = irq_domain_add_legacy(node, bank->width, bank->irq_base,
-					     0, &irq_domain_simple_ops, NULL);
+
+	bank->domain = irq_domain_add_linear(node, bank->width,
+					     &irq_domain_simple_ops, NULL);
+	if (!bank->domain)
+		return -ENODEV;
 
 	if (bank->regs->set_dataout && bank->regs->clr_dataout)
 		bank->set_dataout = _set_gpio_dataout_reg;
@@ -1149,18 +1147,21 @@
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (unlikely(!res)) {
 		dev_err(dev, "Invalid mem resource\n");
+		irq_domain_remove(bank->domain);
 		return -ENODEV;
 	}
 
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
 				     pdev->name)) {
 		dev_err(dev, "Region already claimed\n");
+		irq_domain_remove(bank->domain);
 		return -EBUSY;
 	}
 
 	bank->base = devm_ioremap(dev, res->start, resource_size(res));
 	if (!bank->base) {
 		dev_err(dev, "Could not ioremap\n");
+		irq_domain_remove(bank->domain);
 		return -ENOMEM;
 	}
 
@@ -1184,7 +1185,7 @@
 
 	list_add_tail(&bank->node, &omap_gpio_list);
 
-	return ret;
+	return 0;
 }
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
@@ -1262,9 +1263,9 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct gpio_bank *bank = platform_get_drvdata(pdev);
-	int context_lost_cnt_after;
 	u32 l = 0, gen, gen0, gen1;
 	unsigned long flags;
+	int c;
 
 	spin_lock_irqsave(&bank->lock, flags);
 	_gpio_dbck_enable(bank);
@@ -1280,14 +1281,17 @@
 	__raw_writel(bank->context.risingdetect,
 		     bank->base + bank->regs->risingdetect);
 
-	if (bank->get_context_loss_count) {
-		context_lost_cnt_after =
-			bank->get_context_loss_count(bank->dev);
-		if (context_lost_cnt_after != bank->context_loss_count) {
+	if (bank->loses_context) {
+		if (!bank->get_context_loss_count) {
 			omap_gpio_restore_context(bank);
 		} else {
-			spin_unlock_irqrestore(&bank->lock, flags);
-			return 0;
+			c = bank->get_context_loss_count(bank->dev);
+			if (c != bank->context_loss_count) {
+				omap_gpio_restore_context(bank);
+			} else {
+				spin_unlock_irqrestore(&bank->lock, flags);
+				return 0;
+			}
 		}
 	}
 
@@ -1296,10 +1300,6 @@
 		return 0;
 	}
 
-	__raw_writel(bank->context.fallingdetect,
-			bank->base + bank->regs->fallingdetect);
-	__raw_writel(bank->context.risingdetect,
-			bank->base + bank->regs->risingdetect);
 	l = __raw_readl(bank->base + bank->regs->datain);
 
 	/*

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 9391cf1..426c51d 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c

@@ -146,8 +146,7 @@
 		ret = i2c_smbus_write_i2c_block_data(chip->client,
 					(reg << bank_shift) | REG_ADDR_AI,
 					NBANK(chip), val);
-	}
-	else {
+	} else {
 		switch (chip->chip_type) {
 		case PCA953X_TYPE:
 			ret = i2c_smbus_write_word_data(chip->client,

diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index a19b745..e8faf53 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c

@@ -45,6 +45,7 @@
 	{ "pca9675", 16 },
 	{ "max7328", 8 },
 	{ "max7329", 8 },
+	{ "tca9554", 8 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, pcf857x_id);
@@ -267,7 +268,7 @@
 	}
 
 	/* Allocate, initialize, and register this gpio_chip. */
-	gpio = kzalloc(sizeof *gpio, GFP_KERNEL);
+	gpio = devm_kzalloc(&client->dev, sizeof(*gpio), GFP_KERNEL);
 	if (!gpio)
 		return -ENOMEM;
 
@@ -390,7 +391,6 @@
 	if (pdata && client->irq)
 		pcf857x_irq_domain_cleanup(gpio);
 
-	kfree(gpio);
 	return status;
 }
 
@@ -415,9 +415,7 @@
 		pcf857x_irq_domain_cleanup(gpio);
 
 	status = gpiochip_remove(&gpio->chip);
-	if (status == 0)
-		kfree(gpio);
-	else
+	if (status)
 		dev_err(&client->dev, "%s --> %d\n", "remove", status);
 	return status;
 }

diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index d7a5c9d7..df2199d 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c

@@ -628,7 +628,7 @@
 	for_each_gpio_chip(gpio, c) {
 		writel_relaxed(0, c->regbase + GFER_OFFSET);
 		writel_relaxed(0, c->regbase + GRER_OFFSET);
-		writel_relaxed(~0,c->regbase + GEDR_OFFSET);
+		writel_relaxed(~0, c->regbase + GEDR_OFFSET);
 		/* unmask GPIO edge detect for AP side */
 		if (gpio_is_mmp_type(gpio_type))
 			writel_relaxed(~0, c->regbase + ED_MASK_OFFSET);
@@ -712,7 +712,7 @@
 
 	for_each_gpio_chip(gpio, c) {
 		/* restore level with set/clear */
-		writel_relaxed( c->saved_gplr, c->regbase + GPSR_OFFSET);
+		writel_relaxed(c->saved_gplr, c->regbase + GPSR_OFFSET);
 		writel_relaxed(~c->saved_gplr, c->regbase + GPCR_OFFSET);
 
 		writel_relaxed(c->saved_grer, c->regbase + GRER_OFFSET);

diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c
index 99e0fa4..b22ca79 100644
--- a/drivers/gpio/gpio-samsung.c
+++ b/drivers/gpio/gpio-samsung.c

@@ -3030,6 +3030,7 @@
 		{ .compatible = "samsung,exynos4x12-pinctrl", },
 		{ .compatible = "samsung,exynos5250-pinctrl", },
 		{ .compatible = "samsung,exynos5440-pinctrl", },
+		{ }
 	};
 	for_each_matching_node(pctrl_np, exynos_pinctrl_ids)
 		if (pctrl_np && of_device_is_available(pctrl_np))

diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index edae963..1e4de16 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c

@@ -125,13 +125,17 @@
 					unsigned gpio_num)
 {
 	u8 curr_dirs;
+	unsigned short offset, bit;
 
 	spin_lock(&gpio_lock);
 
-	curr_dirs = inb(gpio_ba + RGIO);
+	offset = RGIO + gpio_num / 8;
+	bit = gpio_num % 8;
 
-	if (!(curr_dirs & (1 << gpio_num)))
-		outb(curr_dirs | (1 << gpio_num) , gpio_ba + RGIO);
+	curr_dirs = inb(gpio_ba + offset);
+
+	if (!(curr_dirs & (1 << bit)))
+		outb(curr_dirs | (1 << bit), gpio_ba + offset);
 
 	spin_unlock(&gpio_lock);
 	return 0;
@@ -139,22 +143,31 @@
 
 static int sch_gpio_resume_get(struct gpio_chip *gc, unsigned gpio_num)
 {
-	return !!(inb(gpio_ba + RGLV) & (1 << gpio_num));
+	unsigned short offset, bit;
+
+	offset = RGLV + gpio_num / 8;
+	bit = gpio_num % 8;
+
+	return !!(inb(gpio_ba + offset) & (1 << bit));
 }
 
 static void sch_gpio_resume_set(struct gpio_chip *gc,
 				unsigned gpio_num, int val)
 {
 	u8 curr_vals;
+	unsigned short offset, bit;
 
 	spin_lock(&gpio_lock);
 
-	curr_vals = inb(gpio_ba + RGLV);
+	offset = RGLV + gpio_num / 8;
+	bit = gpio_num % 8;
+
+	curr_vals = inb(gpio_ba + offset);
 
 	if (val)
-		outb(curr_vals | (1 << gpio_num), gpio_ba + RGLV);
+		outb(curr_vals | (1 << bit), gpio_ba + offset);
 	else
-		outb((curr_vals & ~(1 << gpio_num)), gpio_ba + RGLV);
+		outb((curr_vals & ~(1 << bit)), gpio_ba + offset);
 
 	spin_unlock(&gpio_lock);
 }
@@ -163,14 +176,18 @@
 					unsigned gpio_num, int val)
 {
 	u8 curr_dirs;
+	unsigned short offset, bit;
 
 	sch_gpio_resume_set(gc, gpio_num, val);
 
+	offset = RGIO + gpio_num / 8;
+	bit = gpio_num % 8;
+
 	spin_lock(&gpio_lock);
 
-	curr_dirs = inb(gpio_ba + RGIO);
-	if (curr_dirs & (1 << gpio_num))
-		outb(curr_dirs & ~(1 << gpio_num), gpio_ba + RGIO);
+	curr_dirs = inb(gpio_ba + offset);
+	if (curr_dirs & (1 << bit))
+		outb(curr_dirs & ~(1 << bit), gpio_ba + offset);
 
 	spin_unlock(&gpio_lock);
 	return 0;
@@ -204,45 +221,41 @@
 	gpio_ba = res->start;
 
 	switch (id) {
-		case PCI_DEVICE_ID_INTEL_SCH_LPC:
-			sch_gpio_core.base = 0;
-			sch_gpio_core.ngpio = 10;
+	case PCI_DEVICE_ID_INTEL_SCH_LPC:
+		sch_gpio_core.base = 0;
+		sch_gpio_core.ngpio = 10;
+		sch_gpio_resume.base = 10;
+		sch_gpio_resume.ngpio = 4;
+		/*
+		 * GPIO[6:0] enabled by default
+		 * GPIO7 is configured by the CMC as SLPIOVR
+		 * Enable GPIO[9:8] core powered gpios explicitly
+		 */
+		outb(0x3, gpio_ba + CGEN + 1);
+		/*
+		 * SUS_GPIO[2:0] enabled by default
+		 * Enable SUS_GPIO3 resume powered gpio explicitly
+		 */
+		outb(0x8, gpio_ba + RGEN);
+		break;
 
-			sch_gpio_resume.base = 10;
-			sch_gpio_resume.ngpio = 4;
+	case PCI_DEVICE_ID_INTEL_ITC_LPC:
+		sch_gpio_core.base = 0;
+		sch_gpio_core.ngpio = 5;
+		sch_gpio_resume.base = 5;
+		sch_gpio_resume.ngpio = 9;
+		break;
 
-			/*
-			 * GPIO[6:0] enabled by default
-			 * GPIO7 is configured by the CMC as SLPIOVR
-			 * Enable GPIO[9:8] core powered gpios explicitly
-			 */
-			outb(0x3, gpio_ba + CGEN + 1);
-			/*
-			 * SUS_GPIO[2:0] enabled by default
-			 * Enable SUS_GPIO3 resume powered gpio explicitly
-			 */
-			outb(0x8, gpio_ba + RGEN);
-			break;
+	case PCI_DEVICE_ID_INTEL_CENTERTON_ILB:
+		sch_gpio_core.base = 0;
+		sch_gpio_core.ngpio = 21;
+		sch_gpio_resume.base = 21;
+		sch_gpio_resume.ngpio = 9;
+		break;
 
-		case PCI_DEVICE_ID_INTEL_ITC_LPC:
-			sch_gpio_core.base = 0;
-			sch_gpio_core.ngpio = 5;
-
-			sch_gpio_resume.base = 5;
-			sch_gpio_resume.ngpio = 9;
-			break;
-
-		case PCI_DEVICE_ID_INTEL_CENTERTON_ILB:
-			sch_gpio_core.base = 0;
-			sch_gpio_core.ngpio = 21;
-
-			sch_gpio_resume.base = 21;
-			sch_gpio_resume.ngpio = 9;
-			break;
-
-		default:
-			err = -ENODEV;
-			goto err_sch_gpio_core;
+	default:
+		err = -ENODEV;
+		goto err_sch_gpio_core;
 	}
 
 	sch_gpio_core.dev = &pdev->dev;

diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c
index c20e051..04882a9 100644
--- a/drivers/gpio/gpio-stp-xway.c
+++ b/drivers/gpio/gpio-stp-xway.c

@@ -217,7 +217,7 @@
 	chip->virt = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(chip->virt))
 		return PTR_ERR(chip->virt);
-	
+
 	chip->gc.dev = &pdev->dev;
 	chip->gc.label = "stp-xway";
 	chip->gc.direction_output = xway_stp_dir_out;

diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index c0595bb..d34d80d 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c

@@ -282,9 +282,9 @@
 }
 
 static struct irq_domain_ops tc3589x_irq_ops = {
-        .map    = tc3589x_gpio_irq_map,
-        .unmap  = tc3589x_gpio_irq_unmap,
-        .xlate  = irq_domain_xlate_twocell,
+	.map    = tc3589x_gpio_irq_map,
+	.unmap  = tc3589x_gpio_irq_unmap,
+	.xlate  = irq_domain_xlate_twocell,
 };
 
 static int tc3589x_gpio_irq_init(struct tc3589x_gpio *tc3589x_gpio,
@@ -344,7 +344,7 @@
 	tc3589x_gpio->chip.base = (pdata) ? pdata->gpio_base : -1;
 
 #ifdef CONFIG_OF_GPIO
-        tc3589x_gpio->chip.of_node = np;
+	tc3589x_gpio->chip.of_node = np;
 #endif
 
 	tc3589x_gpio->irq_base = tc3589x->irq_base ?

diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index dde0656..da4cb5b 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c

@@ -414,10 +414,11 @@
 	int j;
 
 	match = of_match_device(tegra_gpio_of_match, &pdev->dev);
-	if (match)
-		config = (struct tegra_gpio_soc_config *)match->data;
-	else
-		config = &tegra20_gpio_config;
+	if (!match) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
+	}
+	config = (struct tegra_gpio_soc_config *)match->data;
 
 	tegra_gpio_bank_stride = config->bank_stride;
 	tegra_gpio_upper_offset = config->upper_offset;
@@ -478,9 +479,7 @@
 		}
 	}
 
-#ifdef CONFIG_OF_GPIO
 	tegra_gpio_chip.of_node = pdev->dev.of_node;
-#endif
 
 	gpiochip_add(&tegra_gpio_chip);
 

diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c
index 702cca9..4377405 100644
--- a/drivers/gpio/gpio-timberdale.c
+++ b/drivers/gpio/gpio-timberdale.c

@@ -167,8 +167,7 @@
 		if (ver < 3) {
 			ret = -EINVAL;
 			goto out;
-		}
-		else {
+		} else {
 			flr |= 1 << offset;
 			bflr |= 1 << offset;
 		}

diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c
index 5083825..06146219 100644
--- a/drivers/gpio/gpio-tps65910.c
+++ b/drivers/gpio/gpio-tps65910.c

@@ -133,7 +133,7 @@
 	tps65910_gpio->gpio_chip.owner = THIS_MODULE;
 	tps65910_gpio->gpio_chip.label = tps65910->i2c_client->name;
 
-	switch(tps65910_chip_id(tps65910)) {
+	switch (tps65910_chip_id(tps65910)) {
 	case TPS65910:
 		tps65910_gpio->gpio_chip.ngpio = TPS65910_NUM_GPIO;
 		break;

diff --git a/drivers/gpio/gpio-ucb1400.c b/drivers/gpio/gpio-ucb1400.c
index 26405ef..6d0feb2 100644
--- a/drivers/gpio/gpio-ucb1400.c
+++ b/drivers/gpio/gpio-ucb1400.c

@@ -12,8 +12,6 @@
 #include <linux/module.h>
 #include <linux/ucb1400.h>
 
-struct ucb1400_gpio_data *ucbdata;
-
 static int ucb1400_gpio_dir_in(struct gpio_chip *gc, unsigned off)
 {
 	struct ucb1400_gpio *gpio;
@@ -50,7 +48,7 @@
 	struct ucb1400_gpio *ucb = dev->dev.platform_data;
 	int err = 0;
 
-	if (!(ucbdata && ucbdata->gpio_offset)) {
+	if (!(ucb && ucb->gpio_offset)) {
 		err = -EINVAL;
 		goto err;
 	}
@@ -58,7 +56,7 @@
 	platform_set_drvdata(dev, ucb);
 
 	ucb->gc.label = "ucb1400_gpio";
-	ucb->gc.base = ucbdata->gpio_offset;
+	ucb->gc.base = ucb->gpio_offset;
 	ucb->gc.ngpio = 10;
 	ucb->gc.owner = THIS_MODULE;
 
@@ -72,8 +70,8 @@
 	if (err)
 		goto err;
 
-	if (ucbdata && ucbdata->gpio_setup)
-		err = ucbdata->gpio_setup(&dev->dev, ucb->gc.ngpio);
+	if (ucb && ucb->gpio_setup)
+		err = ucb->gpio_setup(&dev->dev, ucb->gc.ngpio);
 
 err:
 	return err;
@@ -85,8 +83,8 @@
 	int err = 0;
 	struct ucb1400_gpio *ucb = platform_get_drvdata(dev);
 
-	if (ucbdata && ucbdata->gpio_teardown) {
-		err = ucbdata->gpio_teardown(&dev->dev, ucb->gc.ngpio);
+	if (ucb && ucb->gpio_teardown) {
+		err = ucb->gpio_teardown(&dev->dev, ucb->gc.ngpio);
 		if (err)
 			return err;
 	}
@@ -103,11 +101,6 @@
 	},
 };
 
-void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data)
-{
-	ucbdata = data;
-}
-
 module_platform_driver(ucb1400_gpio_driver);
 
 MODULE_DESCRIPTION("Philips UCB1400 GPIO driver");

diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c
index 59d7239..095ab14 100644
--- a/drivers/gpio/gpio-viperboard.c
+++ b/drivers/gpio/gpio-viperboard.c

@@ -380,10 +380,6 @@
 	struct vprbrd *vb = gpio->vb;
 
 	gpio->gpiob_out |= (1 << offset);
-	if (value)
-		gpio->gpiob_val |= (1 << offset);
-	else
-		gpio->gpiob_val &= ~(1 << offset);
 
 	mutex_lock(&vb->lock);
 

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index a063eb0..5c1ef2b 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c

@@ -17,6 +17,13 @@
 #include <linux/acpi.h>
 #include <linux/interrupt.h>
 
+struct acpi_gpio_evt_pin {
+	struct list_head node;
+	acpi_handle *evt_handle;
+	unsigned int pin;
+	unsigned int irq;
+};
+
 static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
 {
 	if (!gc->dev)
@@ -54,7 +61,6 @@
 }
 EXPORT_SYMBOL_GPL(acpi_get_gpio);
 
-
 static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
 {
 	acpi_handle handle = data;
@@ -64,6 +70,27 @@
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t acpi_gpio_irq_handler_evt(int irq, void *data)
+{
+	struct acpi_gpio_evt_pin *evt_pin = data;
+	struct acpi_object_list args;
+	union acpi_object arg;
+
+	arg.type = ACPI_TYPE_INTEGER;
+	arg.integer.value = evt_pin->pin;
+	args.count = 1;
+	args.pointer = &arg;
+
+	acpi_evaluate_object(evt_pin->evt_handle, NULL, &args, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static void acpi_gpio_evt_dh(acpi_handle handle, void *data)
+{
+	/* The address of this function is used as a key. */
+}
+
 /**
  * acpi_gpiochip_request_interrupts() - Register isr for gpio chip ACPI events
  * @chip:      gpio chip
@@ -73,15 +100,13 @@
  * chip's interrupt handler. acpi_gpiochip_request_interrupts finds out which
  * gpio pins have acpi event methods and assigns interrupt handlers that calls
  * the acpi event methods for those pins.
- *
- * Interrupts are automatically freed on driver detach
  */
-
 void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
 {
 	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
 	struct acpi_resource *res;
-	acpi_handle handle, ev_handle;
+	acpi_handle handle, evt_handle;
+	struct list_head *evt_pins = NULL;
 	acpi_status status;
 	unsigned int pin;
 	int irq, ret;
@@ -98,13 +123,30 @@
 	if (ACPI_FAILURE(status))
 		return;
 
-	/* If a gpio interrupt has an acpi event handler method, then
-	 * set up an interrupt handler that calls the acpi event handler
-	 */
+	status = acpi_get_handle(handle, "_EVT", &evt_handle);
+	if (ACPI_SUCCESS(status)) {
+		evt_pins = kzalloc(sizeof(*evt_pins), GFP_KERNEL);
+		if (evt_pins) {
+			INIT_LIST_HEAD(evt_pins);
+			status = acpi_attach_data(handle, acpi_gpio_evt_dh,
+						  evt_pins);
+			if (ACPI_FAILURE(status)) {
+				kfree(evt_pins);
+				evt_pins = NULL;
+			}
+		}
+	}
 
+	/*
+	 * If a GPIO interrupt has an ACPI event handler method, or _EVT is
+	 * present, set up an interrupt handler that calls the ACPI event
+	 * handler.
+	 */
 	for (res = buf.pointer;
 	     res && (res->type != ACPI_RESOURCE_TYPE_END_TAG);
 	     res = ACPI_NEXT_RESOURCE(res)) {
+		irq_handler_t handler = NULL;
+		void *data;
 
 		if (res->type != ACPI_RESOURCE_TYPE_GPIO ||
 		    res->data.gpio.connection_type !=
@@ -115,23 +157,42 @@
 		if (pin > chip->ngpio)
 			continue;
 
-		sprintf(ev_name, "_%c%02X",
-		res->data.gpio.triggering ? 'E' : 'L', pin);
-
-		status = acpi_get_handle(handle, ev_name, &ev_handle);
-		if (ACPI_FAILURE(status))
-			continue;
-
 		irq = chip->to_irq(chip, pin);
 		if (irq < 0)
 			continue;
 
+		if (pin <= 255) {
+			acpi_handle ev_handle;
+
+			sprintf(ev_name, "_%c%02X",
+				res->data.gpio.triggering ? 'E' : 'L', pin);
+			status = acpi_get_handle(handle, ev_name, &ev_handle);
+			if (ACPI_SUCCESS(status)) {
+				handler = acpi_gpio_irq_handler;
+				data = ev_handle;
+			}
+		}
+		if (!handler && evt_pins) {
+			struct acpi_gpio_evt_pin *evt_pin;
+
+			evt_pin = kzalloc(sizeof(*evt_pin), GFP_KERNEL);
+			if (!evt_pin)
+				continue;
+
+			list_add_tail(&evt_pin->node, evt_pins);
+			evt_pin->evt_handle = evt_handle;
+			evt_pin->pin = pin;
+			evt_pin->irq = irq;
+			handler = acpi_gpio_irq_handler_evt;
+			data = evt_pin;
+		}
+		if (!handler)
+			continue;
+
 		/* Assume BIOS sets the triggering, so no flags */
-		ret = devm_request_threaded_irq(chip->dev, irq, NULL,
-					  acpi_gpio_irq_handler,
-					  0,
-					  "GPIO-signaled-ACPI-event",
-					  ev_handle);
+		ret = devm_request_threaded_irq(chip->dev, irq, NULL, handler,
+						0, "GPIO-signaled-ACPI-event",
+						data);
 		if (ret)
 			dev_err(chip->dev,
 				"Failed to request IRQ %d ACPI event handler\n",
@@ -139,3 +200,119 @@
 	}
 }
 EXPORT_SYMBOL(acpi_gpiochip_request_interrupts);
+
+struct acpi_gpio_lookup {
+	struct acpi_gpio_info info;
+	int index;
+	int gpio;
+	int n;
+};
+
+static int acpi_find_gpio(struct acpi_resource *ares, void *data)
+{
+	struct acpi_gpio_lookup *lookup = data;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
+		return 1;
+
+	if (lookup->n++ == lookup->index && lookup->gpio < 0) {
+		const struct acpi_resource_gpio *agpio = &ares->data.gpio;
+
+		lookup->gpio = acpi_get_gpio(agpio->resource_source.string_ptr,
+					     agpio->pin_table[0]);
+		lookup->info.gpioint =
+			agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
+	}
+
+	return 1;
+}
+
+/**
+ * acpi_get_gpio_by_index() - get a GPIO number from device resources
+ * @dev: pointer to a device to get GPIO from
+ * @index: index of GpioIo/GpioInt resource (starting from %0)
+ * @info: info pointer to fill in (optional)
+ *
+ * Function goes through ACPI resources for @dev and based on @index looks
+ * up a GpioIo/GpioInt resource, translates it to the Linux GPIO number,
+ * and returns it. @index matches GpioIo/GpioInt resources only so if there
+ * are total %3 GPIO resources, the index goes from %0 to %2.
+ *
+ * If the GPIO cannot be translated or there is an error, negative errno is
+ * returned.
+ *
+ * Note: if the GPIO resource has multiple entries in the pin list, this
+ * function only returns the first.
+ */
+int acpi_get_gpio_by_index(struct device *dev, int index,
+			   struct acpi_gpio_info *info)
+{
+	struct acpi_gpio_lookup lookup;
+	struct list_head resource_list;
+	struct acpi_device *adev;
+	acpi_handle handle;
+	int ret;
+
+	if (!dev)
+		return -EINVAL;
+
+	handle = ACPI_HANDLE(dev);
+	if (!handle || acpi_bus_get_device(handle, &adev))
+		return -ENODEV;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.index = index;
+	lookup.gpio = -ENODEV;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, acpi_find_gpio,
+				     &lookup);
+	if (ret < 0)
+		return ret;
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (lookup.gpio >= 0 && info)
+		*info = lookup.info;
+
+	return lookup.gpio;
+}
+EXPORT_SYMBOL_GPL(acpi_get_gpio_by_index);
+
+/**
+ * acpi_gpiochip_free_interrupts() - Free GPIO _EVT ACPI event interrupts.
+ * @chip:      gpio chip
+ *
+ * Free interrupts associated with the _EVT method for the given GPIO chip.
+ *
+ * The remaining ACPI event interrupts associated with the chip are freed
+ * automatically.
+ */
+void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
+{
+	acpi_handle handle;
+	acpi_status status;
+	struct list_head *evt_pins;
+	struct acpi_gpio_evt_pin *evt_pin, *ep;
+
+	if (!chip->dev || !chip->to_irq)
+		return;
+
+	handle = ACPI_HANDLE(chip->dev);
+	if (!handle)
+		return;
+
+	status = acpi_get_data(handle, acpi_gpio_evt_dh, (void **)&evt_pins);
+	if (ACPI_FAILURE(status))
+		return;
+
+	list_for_each_entry_safe_reverse(evt_pin, ep, evt_pins, node) {
+		devm_free_irq(chip->dev, evt_pin->irq, evt_pin);
+		list_del(&evt_pin->node);
+		kfree(evt_pin);
+	}
+
+	acpi_detach_data(handle, acpi_gpio_evt_dh);
+	kfree(evt_pins);
+}
+EXPORT_SYMBOL(acpi_gpiochip_free_interrupts);

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 465f4ca..665f953 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c

@@ -61,7 +61,7 @@
  * in flags for the GPIO.
  */
 int of_get_named_gpio_flags(struct device_node *np, const char *propname,
-                           int index, enum of_gpio_flags *flags)
+			   int index, enum of_gpio_flags *flags)
 {
 	/* Return -EPROBE_DEFER to support probe() functions to be called
 	 * later when the GPIO actually becomes available

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index ac05006..6a195d5 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig

@@ -628,4 +628,16 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called w90p910_keypad.
 
+config KEYBOARD_CROS_EC
+	tristate "ChromeOS EC keyboard"
+	select INPUT_MATRIXKMAP
+	depends on MFD_CROS_EC
+	help
+	  Say Y here to enable the matrix keyboard used by ChromeOS devices
+	  and implemented on the ChromeOS EC. You must enable one bus option
+	  (MFD_CROS_EC_I2C or MFD_CROS_EC_SPI) to use this.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cros_ec_keyb.
+
 endif

diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 49b1645..0c43e8c 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile

@@ -11,6 +11,7 @@
 obj-$(CONFIG_KEYBOARD_ATARI)		+= atakbd.o
 obj-$(CONFIG_KEYBOARD_ATKBD)		+= atkbd.o
 obj-$(CONFIG_KEYBOARD_BFIN)		+= bf54x-keys.o
+obj-$(CONFIG_KEYBOARD_CROS_EC)		+= cros_ec_keyb.o
 obj-$(CONFIG_KEYBOARD_DAVINCI)		+= davinci_keyscan.o
 obj-$(CONFIG_KEYBOARD_EP93XX)		+= ep93xx_keypad.o
 obj-$(CONFIG_KEYBOARD_GOLDFISH_EVENTS)	+= goldfish_events.o

diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
new file mode 100644
index 0000000..49557f2
--- /dev/null
+++ b/drivers/input/keyboard/cros_ec_keyb.c

@@ -0,0 +1,334 @@
+/*
+ * ChromeOS EC keyboard driver
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/input/matrix_keypad.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+
+/*
+ * @rows: Number of rows in the keypad
+ * @cols: Number of columns in the keypad
+ * @row_shift: log2 or number of rows, rounded up
+ * @keymap_data: Matrix keymap data used to convert to keyscan values
+ * @ghost_filter: true to enable the matrix key-ghosting filter
+ * @dev: Device pointer
+ * @idev: Input device
+ * @ec: Top level ChromeOS device to use to talk to EC
+ * @event_notifier: interrupt event notifier for transport devices
+ */
+struct cros_ec_keyb {
+	unsigned int rows;
+	unsigned int cols;
+	int row_shift;
+	const struct matrix_keymap_data *keymap_data;
+	bool ghost_filter;
+
+	struct device *dev;
+	struct input_dev *idev;
+	struct cros_ec_device *ec;
+	struct notifier_block notifier;
+};
+
+
+static bool cros_ec_keyb_row_has_ghosting(struct cros_ec_keyb *ckdev,
+					  uint8_t *buf, int row)
+{
+	int pressed_in_row = 0;
+	int row_has_teeth = 0;
+	int col, mask;
+
+	mask = 1 << row;
+	for (col = 0; col < ckdev->cols; col++) {
+		if (buf[col] & mask) {
+			pressed_in_row++;
+			row_has_teeth |= buf[col] & ~mask;
+			if (pressed_in_row > 1 && row_has_teeth) {
+				/* ghosting */
+				dev_dbg(ckdev->dev,
+					"ghost found at: r%d c%d, pressed %d, teeth 0x%x\n",
+					row, col, pressed_in_row,
+					row_has_teeth);
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Returns true when there is at least one combination of pressed keys that
+ * results in ghosting.
+ */
+static bool cros_ec_keyb_has_ghosting(struct cros_ec_keyb *ckdev, uint8_t *buf)
+{
+	int row;
+
+	/*
+	 * Ghosting happens if for any pressed key X there are other keys
+	 * pressed both in the same row and column of X as, for instance,
+	 * in the following diagram:
+	 *
+	 * . . Y . g .
+	 * . . . . . .
+	 * . . . . . .
+	 * . . X . Z .
+	 *
+	 * In this case only X, Y, and Z are pressed, but g appears to be
+	 * pressed too (see Wikipedia).
+	 *
+	 * We can detect ghosting in a single pass (*) over the keyboard state
+	 * by maintaining two arrays.  pressed_in_row counts how many pressed
+	 * keys we have found in a row.  row_has_teeth is true if any of the
+	 * pressed keys for this row has other pressed keys in its column.  If
+	 * at any point of the scan we find that a row has multiple pressed
+	 * keys, and at least one of them is at the intersection with a column
+	 * with multiple pressed keys, we're sure there is ghosting.
+	 * Conversely, if there is ghosting, we will detect such situation for
+	 * at least one key during the pass.
+	 *
+	 * (*) This looks linear in the number of keys, but it's not.  We can
+	 * cheat because the number of rows is small.
+	 */
+	for (row = 0; row < ckdev->rows; row++)
+		if (cros_ec_keyb_row_has_ghosting(ckdev, buf, row))
+			return true;
+
+	return false;
+}
+
+/*
+ * Compares the new keyboard state to the old one and produces key
+ * press/release events accordingly.  The keyboard state is 13 bytes (one byte
+ * per column)
+ */
+static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev,
+			 uint8_t *kb_state, int len)
+{
+	struct input_dev *idev = ckdev->idev;
+	int col, row;
+	int new_state;
+	int num_cols;
+
+	num_cols = len;
+
+	if (ckdev->ghost_filter && cros_ec_keyb_has_ghosting(ckdev, kb_state)) {
+		/*
+		 * Simple-minded solution: ignore this state. The obvious
+		 * improvement is to only ignore changes to keys involved in
+		 * the ghosting, but process the other changes.
+		 */
+		dev_dbg(ckdev->dev, "ghosting found\n");
+		return;
+	}
+
+	for (col = 0; col < ckdev->cols; col++) {
+		for (row = 0; row < ckdev->rows; row++) {
+			int pos = MATRIX_SCAN_CODE(row, col, ckdev->row_shift);
+			const unsigned short *keycodes = idev->keycode;
+			int code;
+
+			code = keycodes[pos];
+			new_state = kb_state[col] & (1 << row);
+			if (!!new_state != test_bit(code, idev->key)) {
+				dev_dbg(ckdev->dev,
+					"changed: [r%d c%d]: byte %02x\n",
+					row, col, new_state);
+
+				input_report_key(idev, code, new_state);
+			}
+		}
+	}
+	input_sync(ckdev->idev);
+}
+
+static int cros_ec_keyb_open(struct input_dev *dev)
+{
+	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
+
+	return blocking_notifier_chain_register(&ckdev->ec->event_notifier,
+						&ckdev->notifier);
+}
+
+static void cros_ec_keyb_close(struct input_dev *dev)
+{
+	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
+
+	blocking_notifier_chain_unregister(&ckdev->ec->event_notifier,
+					   &ckdev->notifier);
+}
+
+static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
+{
+	return ckdev->ec->command_recv(ckdev->ec, EC_CMD_MKBP_STATE,
+					  kb_state, ckdev->cols);
+}
+
+static int cros_ec_keyb_work(struct notifier_block *nb,
+		     unsigned long state, void *_notify)
+{
+	int ret;
+	struct cros_ec_keyb *ckdev = container_of(nb, struct cros_ec_keyb,
+						    notifier);
+	uint8_t kb_state[ckdev->cols];
+
+	ret = cros_ec_keyb_get_state(ckdev, kb_state);
+	if (ret >= 0)
+		cros_ec_keyb_process(ckdev, kb_state, ret);
+
+	return NOTIFY_DONE;
+}
+
+/* Clear any keys in the buffer */
+static void cros_ec_keyb_clear_keyboard(struct cros_ec_keyb *ckdev)
+{
+	uint8_t old_state[ckdev->cols];
+	uint8_t new_state[ckdev->cols];
+	unsigned long duration;
+	int i, ret;
+
+	/*
+	 * Keep reading until we see that the scan state does not change.
+	 * That indicates that we are done.
+	 *
+	 * Assume that the EC keyscan buffer is at most 32 deep.
+	 */
+	duration = jiffies;
+	ret = cros_ec_keyb_get_state(ckdev, new_state);
+	for (i = 1; !ret && i < 32; i++) {
+		memcpy(old_state, new_state, sizeof(old_state));
+		ret = cros_ec_keyb_get_state(ckdev, new_state);
+		if (0 == memcmp(old_state, new_state, sizeof(old_state)))
+			break;
+	}
+	duration = jiffies - duration;
+	dev_info(ckdev->dev, "Discarded %d keyscan(s) in %dus\n", i,
+		jiffies_to_usecs(duration));
+}
+
+static int cros_ec_keyb_probe(struct platform_device *pdev)
+{
+	struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = ec->dev;
+	struct cros_ec_keyb *ckdev;
+	struct input_dev *idev;
+	struct device_node *np;
+	int err;
+
+	np = pdev->dev.of_node;
+	if (!np)
+		return -ENODEV;
+
+	ckdev = devm_kzalloc(&pdev->dev, sizeof(*ckdev), GFP_KERNEL);
+	if (!ckdev)
+		return -ENOMEM;
+	err = matrix_keypad_parse_of_params(&pdev->dev, &ckdev->rows,
+					    &ckdev->cols);
+	if (err)
+		return err;
+
+	idev = devm_input_allocate_device(&pdev->dev);
+	if (!idev)
+		return -ENOMEM;
+
+	ckdev->ec = ec;
+	ckdev->notifier.notifier_call = cros_ec_keyb_work;
+	ckdev->dev = dev;
+	dev_set_drvdata(&pdev->dev, ckdev);
+
+	idev->name = ec->ec_name;
+	idev->phys = ec->phys_name;
+	__set_bit(EV_REP, idev->evbit);
+
+	idev->id.bustype = BUS_VIRTUAL;
+	idev->id.version = 1;
+	idev->id.product = 0;
+	idev->dev.parent = &pdev->dev;
+	idev->open = cros_ec_keyb_open;
+	idev->close = cros_ec_keyb_close;
+
+	ckdev->ghost_filter = of_property_read_bool(np,
+					"google,needs-ghost-filter");
+
+	err = matrix_keypad_build_keymap(NULL, NULL, ckdev->rows, ckdev->cols,
+					 NULL, idev);
+	if (err) {
+		dev_err(dev, "cannot build key matrix\n");
+		return err;
+	}
+
+	ckdev->row_shift = get_count_order(ckdev->cols);
+
+	input_set_capability(idev, EV_MSC, MSC_SCAN);
+	input_set_drvdata(idev, ckdev);
+	ckdev->idev = idev;
+	err = input_register_device(ckdev->idev);
+	if (err) {
+		dev_err(dev, "cannot register input device\n");
+		return err;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int cros_ec_keyb_resume(struct device *dev)
+{
+	struct cros_ec_keyb *ckdev = dev_get_drvdata(dev);
+
+	/*
+	 * When the EC is not a wake source, then it could not have caused the
+	 * resume, so we clear the EC's key scan buffer. If the EC was a
+	 * wake source (e.g. the lid is open and the user might press a key to
+	 * wake) then the key scan buffer should be preserved.
+	 */
+	if (ckdev->ec->was_wake_device)
+		cros_ec_keyb_clear_keyboard(ckdev);
+
+	return 0;
+}
+
+#endif
+
+static SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);
+
+static struct platform_driver cros_ec_keyb_driver = {
+	.probe = cros_ec_keyb_probe,
+	.driver = {
+		.name = "cros-ec-keyb",
+		.pm	= &cros_ec_keyb_pm_ops,
+	},
+};
+
+module_platform_driver(cros_ec_keyb_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC keyboard driver");
+MODULE_ALIAS("platform:cros-ec-keyb");

diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c
index 1b8add6..4218143 100644
--- a/drivers/input/keyboard/lpc32xx-keys.c
+++ b/drivers/input/keyboard/lpc32xx-keys.c

@@ -144,12 +144,13 @@
 {
 	struct device_node *np = dev->of_node;
 	u32 rows = 0, columns = 0;
+	int err;
 
-	of_property_read_u32(np, "keypad,num-rows", &rows);
-	of_property_read_u32(np, "keypad,num-columns", &columns);
-	if (!rows || rows != columns) {
-		dev_err(dev,
-			"rows and columns must be specified and be equal!\n");
+	err = matrix_keypad_parse_of_params(dev, &rows, &columns);
+	if (err)
+		return err;
+	if (rows != columns) {
+		dev_err(dev, "rows and columns must be equal!\n");
 		return -EINVAL;
 	}
 

diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index e25b022..1b28909 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c

@@ -215,18 +215,12 @@
 				 struct omap4_keypad *keypad_data)
 {
 	struct device_node *np = dev->of_node;
+	int err;
 
-	if (!np) {
-		dev_err(dev, "missing DT data");
-		return -EINVAL;
-	}
-
-	of_property_read_u32(np, "keypad,num-rows", &keypad_data->rows);
-	of_property_read_u32(np, "keypad,num-columns", &keypad_data->cols);
-	if (!keypad_data->rows || !keypad_data->cols) {
-		dev_err(dev, "number of keypad rows/columns not specified\n");
-		return -EINVAL;
-	}
+	err = matrix_keypad_parse_of_params(dev, &keypad_data->rows,
+					    &keypad_data->cols);
+	if (err)
+		return err;
 
 	if (of_get_property(np, "linux,input-no-autorepeat", NULL))
 		keypad_data->no_autorepeat = true;

diff --git a/drivers/input/keyboard/tca8418_keypad.c b/drivers/input/keyboard/tca8418_keypad.c
index a34cc67..55c1530 100644
--- a/drivers/input/keyboard/tca8418_keypad.c
+++ b/drivers/input/keyboard/tca8418_keypad.c

@@ -288,8 +288,11 @@
 		irq_is_gpio = pdata->irq_is_gpio;
 	} else {
 		struct device_node *np = dev->of_node;
-		of_property_read_u32(np, "keypad,num-rows", &rows);
-		of_property_read_u32(np, "keypad,num-columns", &cols);
+		int err;
+
+		err = matrix_keypad_parse_of_params(dev, &rows, &cols);
+		if (err)
+			return err;
 		rep = of_property_read_bool(np, "keypad,autorepeat");
 	}
 

diff --git a/drivers/input/matrix-keymap.c b/drivers/input/matrix-keymap.c
index 3ae496e..08b61f5 100644
--- a/drivers/input/matrix-keymap.c
+++ b/drivers/input/matrix-keymap.c

@@ -50,6 +50,26 @@
 }
 
 #ifdef CONFIG_OF
+int matrix_keypad_parse_of_params(struct device *dev,
+				  unsigned int *rows, unsigned int *cols)
+{
+	struct device_node *np = dev->of_node;
+
+	if (!np) {
+		dev_err(dev, "missing DT data");
+		return -EINVAL;
+	}
+	of_property_read_u32(np, "keypad,num-rows", rows);
+	of_property_read_u32(np, "keypad,num-columns", cols);
+	if (!*rows || !*cols) {
+		dev_err(dev, "number of keypad rows/columns not specified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(matrix_keypad_parse_of_params);
+
 static int matrix_keypad_parse_of_keymap(const char *propname,
 					 unsigned int rows, unsigned int cols,
 					 struct input_dev *input_dev)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 8301837..21d02b0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -46,6 +46,7 @@
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
+#include "pci.h"
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
@@ -263,12 +264,6 @@
 	return true;
 }
 
-static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
-{
-	pci_dev_put(*from);
-	*from = to;
-}
-
 static struct pci_bus *find_hosted_bus(struct pci_bus *bus)
 {
 	while (!bus->self) {
@@ -701,9 +696,6 @@
 static void iommu_poll_events(struct amd_iommu *iommu)
 {
 	u32 head, tail;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu->lock, flags);
 
 	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
@@ -714,8 +706,6 @@
 	}
 
 	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
 static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
@@ -740,17 +730,11 @@
 
 static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 {
-	unsigned long flags;
 	u32 head, tail;
 
 	if (iommu->ppr_log == NULL)
 		return;
 
-	/* enable ppr interrupts again */
-	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
-	spin_lock_irqsave(&iommu->lock, flags);
-
 	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 
@@ -786,34 +770,50 @@
 		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
 		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 
-		/*
-		 * Release iommu->lock because ppr-handling might need to
-		 * re-acquire it
-		 */
-		spin_unlock_irqrestore(&iommu->lock, flags);
-
 		/* Handle PPR entry */
 		iommu_handle_ppr_entry(iommu, entry);
 
-		spin_lock_irqsave(&iommu->lock, flags);
-
 		/* Refresh ring-buffer information */
 		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 	}
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
 irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
-	struct amd_iommu *iommu;
+	struct amd_iommu *iommu = (struct amd_iommu *) data;
+	u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-	for_each_iommu(iommu) {
-		iommu_poll_events(iommu);
-		iommu_poll_ppr_log(iommu);
+	while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) {
+		/* Enable EVT and PPR interrupts again */
+		writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK),
+			iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+		if (status & MMIO_STATUS_EVT_INT_MASK) {
+			pr_devel("AMD-Vi: Processing IOMMU Event Log\n");
+			iommu_poll_events(iommu);
+		}
+
+		if (status & MMIO_STATUS_PPR_INT_MASK) {
+			pr_devel("AMD-Vi: Processing IOMMU PPR Log\n");
+			iommu_poll_ppr_log(iommu);
+		}
+
+		/*
+		 * Hardware bug: ERBT1312
+		 * When re-enabling interrupt (by writing 1
+		 * to clear the bit), the hardware might also try to set
+		 * the interrupt bit in the event status register.
+		 * In this scenario, the bit will be set, and disable
+		 * subsequent interrupts.
+		 *
+		 * Workaround: The IOMMU driver should read back the
+		 * status register and check if the interrupt bits are cleared.
+		 * If not, driver will need to go through the interrupt handler
+		 * again and re-clear the bits
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 	}
-
 	return IRQ_HANDLED;
 }
 
@@ -2839,24 +2839,6 @@
 }
 
 /*
- * This is a special map_sg function which is used if we should map a
- * device which is not handled by an AMD IOMMU in the system.
- */
-static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
-			   int nelems, int dir)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sglist, s, nelems, i) {
-		s->dma_address = (dma_addr_t)sg_phys(s);
-		s->dma_length  = s->length;
-	}
-
-	return nelems;
-}
-
-/*
  * The exported map_sg function for dma_ops (handles scatter-gather
  * lists).
  */
@@ -2875,9 +2857,7 @@
 	INC_STATS_COUNTER(cnt_map_sg);
 
 	domain = get_domain(dev);
-	if (PTR_ERR(domain) == -EINVAL)
-		return map_sg_no_iommu(dev, sglist, nelems, dir);
-	else if (IS_ERR(domain))
+	if (IS_ERR(domain))
 		return 0;
 
 	dma_mask = *dev->dma_mask;
@@ -3410,7 +3390,7 @@
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
-					  unsigned long iova)
+					  dma_addr_t iova)
 {
 	struct protection_domain *domain = dom->priv;
 	unsigned long offset_mask;
@@ -3947,6 +3927,9 @@
 	if (!table)
 		goto out;
 
+	/* Initialize table spin-lock */
+	spin_lock_init(&table->lock);
+
 	if (ioapic)
 		/* Keep the first 32 indexes free for IOAPIC interrupts */
 		table->min_index = 32;
@@ -4007,7 +3990,7 @@
 			c = 0;
 
 		if (c == count)	{
-			struct irq_2_iommu *irte_info;
+			struct irq_2_irte *irte_info;
 
 			for (; c != 0; --c)
 				table->table[index - c + 1] = IRTE_ALLOCATED;
@@ -4015,9 +3998,9 @@
 			index -= count - 1;
 
 			cfg->remapped	      = 1;
-			irte_info             = &cfg->irq_2_iommu;
-			irte_info->sub_handle = devid;
-			irte_info->irte_index = index;
+			irte_info             = &cfg->irq_2_irte;
+			irte_info->devid      = devid;
+			irte_info->index      = index;
 
 			goto out;
 		}
@@ -4098,7 +4081,7 @@
 			      struct io_apic_irq_attr *attr)
 {
 	struct irq_remap_table *table;
-	struct irq_2_iommu *irte_info;
+	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 	union irte irte;
 	int ioapic_id;
@@ -4110,7 +4093,7 @@
 	if (!cfg)
 		return -EINVAL;
 
-	irte_info = &cfg->irq_2_iommu;
+	irte_info = &cfg->irq_2_irte;
 	ioapic_id = mpc_ioapic_id(attr->ioapic);
 	devid     = get_ioapic_devid(ioapic_id);
 
@@ -4125,8 +4108,8 @@
 
 	/* Setup IRQ remapping info */
 	cfg->remapped	      = 1;
-	irte_info->sub_handle = devid;
-	irte_info->irte_index = index;
+	irte_info->devid      = devid;
+	irte_info->index      = index;
 
 	/* Setup IRTE for IOMMU */
 	irte.val		= 0;
@@ -4160,7 +4143,7 @@
 static int set_affinity(struct irq_data *data, const struct cpumask *mask,
 			bool force)
 {
-	struct irq_2_iommu *irte_info;
+	struct irq_2_irte *irte_info;
 	unsigned int dest, irq;
 	struct irq_cfg *cfg;
 	union irte irte;
@@ -4171,12 +4154,12 @@
 
 	cfg       = data->chip_data;
 	irq       = data->irq;
-	irte_info = &cfg->irq_2_iommu;
+	irte_info = &cfg->irq_2_irte;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
 		return -EINVAL;
 
-	if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte))
+	if (get_irte(irte_info->devid, irte_info->index, &irte))
 		return -EBUSY;
 
 	if (assign_irq_vector(irq, cfg, mask))
@@ -4192,7 +4175,7 @@
 	irte.fields.vector      = cfg->vector;
 	irte.fields.destination = dest;
 
-	modify_irte(irte_info->sub_handle, irte_info->irte_index, irte);
+	modify_irte(irte_info->devid, irte_info->index, irte);
 
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
@@ -4204,16 +4187,16 @@
 
 static int free_irq(int irq)
 {
-	struct irq_2_iommu *irte_info;
+	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 
 	cfg = irq_get_chip_data(irq);
 	if (!cfg)
 		return -EINVAL;
 
-	irte_info = &cfg->irq_2_iommu;
+	irte_info = &cfg->irq_2_irte;
 
-	free_irte(irte_info->sub_handle, irte_info->irte_index);
+	free_irte(irte_info->devid, irte_info->index);
 
 	return 0;
 }
@@ -4222,7 +4205,7 @@
 			    unsigned int irq, unsigned int dest,
 			    struct msi_msg *msg, u8 hpet_id)
 {
-	struct irq_2_iommu *irte_info;
+	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 	union irte irte;
 
@@ -4230,7 +4213,7 @@
 	if (!cfg)
 		return;
 
-	irte_info = &cfg->irq_2_iommu;
+	irte_info = &cfg->irq_2_irte;
 
 	irte.val		= 0;
 	irte.fields.vector	= cfg->vector;
@@ -4239,11 +4222,11 @@
 	irte.fields.dm		= apic->irq_dest_mode;
 	irte.fields.valid	= 1;
 
-	modify_irte(irte_info->sub_handle, irte_info->irte_index, irte);
+	modify_irte(irte_info->devid, irte_info->index, irte);
 
 	msg->address_hi = MSI_ADDR_BASE_HI;
 	msg->address_lo = MSI_ADDR_BASE_LO;
-	msg->data       = irte_info->irte_index;
+	msg->data       = irte_info->index;
 }
 
 static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec)
@@ -4268,7 +4251,7 @@
 static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
 			 int index, int offset)
 {
-	struct irq_2_iommu *irte_info;
+	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 	u16 devid;
 
@@ -4283,18 +4266,18 @@
 		return 0;
 
 	devid		= get_device_id(&pdev->dev);
-	irte_info	= &cfg->irq_2_iommu;
+	irte_info	= &cfg->irq_2_irte;
 
 	cfg->remapped	      = 1;
-	irte_info->sub_handle = devid;
-	irte_info->irte_index = index + offset;
+	irte_info->devid      = devid;
+	irte_info->index      = index + offset;
 
 	return 0;
 }
 
 static int setup_hpet_msi(unsigned int irq, unsigned int id)
 {
-	struct irq_2_iommu *irte_info;
+	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 	int index, devid;
 
@@ -4302,7 +4285,7 @@
 	if (!cfg)
 		return -EINVAL;
 
-	irte_info = &cfg->irq_2_iommu;
+	irte_info = &cfg->irq_2_irte;
 	devid     = get_hpet_devid(id);
 	if (devid < 0)
 		return devid;
@@ -4312,8 +4295,8 @@
 		return index;
 
 	cfg->remapped	      = 1;
-	irte_info->sub_handle = devid;
-	irte_info->irte_index = index;
+	irte_info->devid      = devid;
+	irte_info->index      = index;
 
 	return 0;
 }

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 2f46881..bf51abb 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c

@@ -213,6 +213,14 @@
 	IOMMU_INIT_ERROR,
 };
 
+/* Early ioapic and hpet maps from kernel command line */
+#define EARLY_MAP_SIZE		4
+static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
+static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
+static int __initdata early_ioapic_map_size;
+static int __initdata early_hpet_map_size;
+static bool __initdata cmdline_maps;
+
 static enum iommu_init_state init_state = IOMMU_START_STATE;
 
 static int amd_iommu_enable_interrupts(void);
@@ -703,31 +711,66 @@
 	set_iommu_for_device(iommu, devid);
 }
 
-static int add_special_device(u8 type, u8 id, u16 devid)
+static int __init add_special_device(u8 type, u8 id, u16 devid, bool cmd_line)
 {
 	struct devid_map *entry;
 	struct list_head *list;
 
-	if (type != IVHD_SPECIAL_IOAPIC && type != IVHD_SPECIAL_HPET)
+	if (type == IVHD_SPECIAL_IOAPIC)
+		list = &ioapic_map;
+	else if (type == IVHD_SPECIAL_HPET)
+		list = &hpet_map;
+	else
 		return -EINVAL;
 
+	list_for_each_entry(entry, list, list) {
+		if (!(entry->id == id && entry->cmd_line))
+			continue;
+
+		pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n",
+			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
+
+		return 0;
+	}
+
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return -ENOMEM;
 
-	entry->id    = id;
-	entry->devid = devid;
-
-	if (type == IVHD_SPECIAL_IOAPIC)
-		list = &ioapic_map;
-	else
-		list = &hpet_map;
+	entry->id	= id;
+	entry->devid	= devid;
+	entry->cmd_line	= cmd_line;
 
 	list_add_tail(&entry->list, list);
 
 	return 0;
 }
 
+static int __init add_early_maps(void)
+{
+	int i, ret;
+
+	for (i = 0; i < early_ioapic_map_size; ++i) {
+		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
+					 early_ioapic_map[i].id,
+					 early_ioapic_map[i].devid,
+					 early_ioapic_map[i].cmd_line);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < early_hpet_map_size; ++i) {
+		ret = add_special_device(IVHD_SPECIAL_HPET,
+					 early_hpet_map[i].id,
+					 early_hpet_map[i].devid,
+					 early_hpet_map[i].cmd_line);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /*
  * Reads the device exclusion range from ACPI and initializes the IOMMU with
  * it
@@ -764,6 +807,12 @@
 	u32 dev_i, ext_flags = 0;
 	bool alias = false;
 	struct ivhd_entry *e;
+	int ret;
+
+
+	ret = add_early_maps();
+	if (ret)
+		return ret;
 
 	/*
 	 * First save the recommended feature enable bits from ACPI
@@ -929,7 +978,7 @@
 				    PCI_FUNC(devid));
 
 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
-			ret = add_special_device(type, handle, devid);
+			ret = add_special_device(type, handle, devid, false);
 			if (ret)
 				return ret;
 			break;
@@ -1275,7 +1324,7 @@
 				 amd_iommu_int_handler,
 				 amd_iommu_int_thread,
 				 0, "AMD-Vi",
-				 iommu->dev);
+				 iommu);
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
@@ -1638,18 +1687,28 @@
 
 static bool __init check_ioapic_information(void)
 {
+	const char *fw_bug = FW_BUG;
 	bool ret, has_sb_ioapic;
 	int idx;
 
 	has_sb_ioapic = false;
 	ret           = false;
 
+	/*
+	 * If we have map overrides on the kernel command line the
+	 * messages in this function might not describe firmware bugs
+	 * anymore - so be careful
+	 */
+	if (cmdline_maps)
+		fw_bug = "";
+
 	for (idx = 0; idx < nr_ioapics; idx++) {
 		int devid, id = mpc_ioapic_id(idx);
 
 		devid = get_ioapic_devid(id);
 		if (devid < 0) {
-			pr_err(FW_BUG "AMD-Vi: IOAPIC[%d] not in IVRS table\n", id);
+			pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n",
+				fw_bug, id);
 			ret = false;
 		} else if (devid == IOAPIC_SB_DEVID) {
 			has_sb_ioapic = true;
@@ -1666,11 +1725,11 @@
 		 * when the BIOS is buggy and provides us the wrong
 		 * device id for the IOAPIC in the system.
 		 */
-		pr_err(FW_BUG "AMD-Vi: No southbridge IOAPIC found in IVRS table\n");
+		pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug);
 	}
 
 	if (!ret)
-		pr_err("AMD-Vi: Disabling interrupt remapping due to BIOS Bug(s)\n");
+		pr_err("AMD-Vi: Disabling interrupt remapping\n");
 
 	return ret;
 }
@@ -1801,6 +1860,7 @@
 		 * Interrupt remapping enabled, create kmem_cache for the
 		 * remapping tables.
 		 */
+		ret = -ENOMEM;
 		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
 				MAX_IRQS_PER_TABLE * sizeof(u32),
 				IRQ_TABLE_ALIGNMENT,
@@ -2097,8 +2157,70 @@
 	return 1;
 }
 
-__setup("amd_iommu_dump", parse_amd_iommu_dump);
-__setup("amd_iommu=", parse_amd_iommu_options);
+static int __init parse_ivrs_ioapic(char *str)
+{
+	unsigned int bus, dev, fn;
+	int ret, id, i;
+	u16 devid;
+
+	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
+
+	if (ret != 4) {
+		pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str);
+		return 1;
+	}
+
+	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
+		pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
+			str);
+		return 1;
+	}
+
+	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+
+	cmdline_maps			= true;
+	i				= early_ioapic_map_size++;
+	early_ioapic_map[i].id		= id;
+	early_ioapic_map[i].devid	= devid;
+	early_ioapic_map[i].cmd_line	= true;
+
+	return 1;
+}
+
+static int __init parse_ivrs_hpet(char *str)
+{
+	unsigned int bus, dev, fn;
+	int ret, id, i;
+	u16 devid;
+
+	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
+
+	if (ret != 4) {
+		pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str);
+		return 1;
+	}
+
+	if (early_hpet_map_size == EARLY_MAP_SIZE) {
+		pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n",
+			str);
+		return 1;
+	}
+
+	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+
+	cmdline_maps			= true;
+	i				= early_hpet_map_size++;
+	early_hpet_map[i].id		= id;
+	early_hpet_map[i].devid		= devid;
+	early_hpet_map[i].cmd_line	= true;
+
+	return 1;
+}
+
+__setup("amd_iommu_dump",	parse_amd_iommu_dump);
+__setup("amd_iommu=",		parse_amd_iommu_options);
+__setup("ivrs_ioapic",		parse_ivrs_ioapic);
+__setup("ivrs_hpet",		parse_ivrs_hpet);
 
 IOMMU_INIT_FINISH(amd_iommu_detect,
 		  gart_iommu_hole_init,

diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index ec36cf6..0285a21 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h

@@ -100,6 +100,7 @@
 #define PASID_MASK		0x000fffff
 
 /* MMIO status bits */
+#define MMIO_STATUS_EVT_INT_MASK	(1 << 1)
 #define MMIO_STATUS_COM_WAIT_INT_MASK	(1 << 2)
 #define MMIO_STATUS_PPR_INT_MASK	(1 << 6)
 
@@ -589,6 +590,7 @@
 	struct list_head list;
 	u8 id;
 	u16 devid;
+	bool cmd_line;
 };
 
 /* Map HPET and IOAPIC ids to the devid used by the IOMMU */

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index b8008f6..a7967ce 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c

@@ -646,7 +646,7 @@
 int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
 	struct intel_iommu *iommu;
-	u32 ver;
+	u32 ver, sts;
 	static int iommu_allocated = 0;
 	int agaw = 0;
 	int msagaw = 0;
@@ -696,6 +696,15 @@
 		(unsigned long long)iommu->cap,
 		(unsigned long long)iommu->ecap);
 
+	/* Reflect status in gcmd */
+	sts = readl(iommu->reg + DMAR_GSTS_REG);
+	if (sts & DMA_GSTS_IRES)
+		iommu->gcmd |= DMA_GCMD_IRE;
+	if (sts & DMA_GSTS_TES)
+		iommu->gcmd |= DMA_GCMD_TE;
+	if (sts & DMA_GSTS_QIES)
+		iommu->gcmd |= DMA_GCMD_QIE;
+
 	raw_spin_lock_init(&iommu->register_lock);
 
 	drhd->iommu = iommu;
@@ -1205,7 +1214,7 @@
 
 	/* TBD: ignore advanced fault log currently */
 	if (!(fault_status & DMA_FSTS_PPF))
-		goto clear_rest;
+		goto unlock_exit;
 
 	fault_index = dma_fsts_fault_record_index(fault_status);
 	reg = cap_fault_reg_offset(iommu->cap);
@@ -1246,11 +1255,10 @@
 			fault_index = 0;
 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	}
-clear_rest:
-	/* clear all the other faults */
-	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-	writel(fault_status, iommu->reg + DMAR_FSTS_REG);
 
+	writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
+
+unlock_exit:
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 	return IRQ_HANDLED;
 }
@@ -1298,6 +1306,7 @@
 	for_each_drhd_unit(drhd) {
 		int ret;
 		struct intel_iommu *iommu = drhd->iommu;
+		u32 fault_status;
 		ret = dmar_set_interrupt(iommu);
 
 		if (ret) {
@@ -1310,6 +1319,8 @@
 		 * Clear any previous faults.
 		 */
 		dmar_fault(iommu->irq, iommu);
+		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
 	}
 
 	return 0;

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 238a3ca..3f32d64 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c

@@ -1027,7 +1027,7 @@
 }
 
 static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,
-					  unsigned long iova)
+					  dma_addr_t iova)
 {
 	struct exynos_iommu_domain *priv = domain->priv;
 	unsigned long *entry;

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0099667..b4f0e28 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c

@@ -47,6 +47,7 @@
 #include <asm/iommu.h>
 
 #include "irq_remapping.h"
+#include "pci.h"
 
 #define ROOT_SIZE		VTD_PAGE_SIZE
 #define CONTEXT_SIZE		VTD_PAGE_SIZE
@@ -3665,6 +3666,7 @@
 int __init intel_iommu_init(void)
 {
 	int ret = 0;
+	struct dmar_drhd_unit *drhd;
 
 	/* VT-d is required for a TXT/tboot launch, so enforce that */
 	force_on = tboot_force_iommu();
@@ -3675,6 +3677,20 @@
 		return 	-ENODEV;
 	}
 
+	/*
+	 * Disable translation if already enabled prior to OS handover.
+	 */
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu;
+
+		if (drhd->ignored)
+			continue;
+
+		iommu = drhd->iommu;
+		if (iommu->gcmd & DMA_GCMD_TE)
+			iommu_disable_translation(iommu);
+	}
+
 	if (dmar_dev_scope_init() < 0) {
 		if (force_on)
 			panic("tboot: Failed to initialize DMAR device scope\n");
@@ -4111,7 +4127,7 @@
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
-					    unsigned long iova)
+					    dma_addr_t iova)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
 	struct dma_pte *pte;
@@ -4137,12 +4153,6 @@
 	return 0;
 }
 
-static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
-{
-	pci_dev_put(*from);
-	*from = to;
-}
-
 #define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
 
 static int intel_iommu_add_device(struct device *dev)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index f3b8f23..5b19b2d 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c

@@ -524,6 +524,16 @@
 
 	if (disable_irq_remap)
 		return 0;
+	if (irq_remap_broken) {
+		WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+			   "This system BIOS has enabled interrupt remapping\n"
+			   "on a chipset that contains an erratum making that\n"
+			   "feature unstable.  To maintain system stability\n"
+			   "interrupt remapping is being disabled.  Please\n"
+			   "contact your BIOS vendor for an update\n");
+		disable_irq_remap = 1;
+		return 0;
+	}
 
 	if (!dmar_ir_support())
 		return 0;

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b972d43..d8f98b1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c

@@ -204,6 +204,35 @@
 }
 EXPORT_SYMBOL_GPL(iommu_group_alloc);
 
+struct iommu_group *iommu_group_get_by_id(int id)
+{
+	struct kobject *group_kobj;
+	struct iommu_group *group;
+	const char *name;
+
+	if (!iommu_group_kset)
+		return NULL;
+
+	name = kasprintf(GFP_KERNEL, "%d", id);
+	if (!name)
+		return NULL;
+
+	group_kobj = kset_find_obj(iommu_group_kset, name);
+	kfree(name);
+
+	if (!group_kobj)
+		return NULL;
+
+	group = container_of(group_kobj, struct iommu_group, kobj);
+	BUG_ON(group->id != id);
+
+	kobject_get(group->devices_kobj);
+	kobject_put(&group->kobj);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
+
 /**
  * iommu_group_get_iommudata - retrieve iommu_data registered for a group
  * @group: the group
@@ -706,8 +735,7 @@
 }
 EXPORT_SYMBOL_GPL(iommu_detach_group);
 
-phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
-			       unsigned long iova)
+phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
 	if (unlikely(domain->ops->iova_to_phys == NULL))
 		return 0;
@@ -854,12 +882,13 @@
 
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
-			       phys_addr_t paddr, u64 size)
+			       phys_addr_t paddr, u64 size, int prot)
 {
 	if (unlikely(domain->ops->domain_window_enable == NULL))
 		return -ENODEV;
 
-	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size);
+	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size,
+						 prot);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
 

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 7c11ff3..dcfea4e 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c

@@ -18,6 +18,7 @@
 int irq_remapping_enabled;
 
 int disable_irq_remap;
+int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
@@ -210,6 +211,11 @@
 #endif
 }
 
+void set_irq_remapping_broken(void)
+{
+	irq_remap_broken = 1;
+}
+
 int irq_remapping_supported(void)
 {
 	if (disable_irq_remap)

diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index ecb6376..90c4dae 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h

@@ -32,6 +32,7 @@
 struct msi_msg;
 
 extern int disable_irq_remap;
+extern int irq_remap_broken;
 extern int disable_sourceid_checking;
 extern int no_x2apic_optout;
 extern int irq_remapping_enabled;
@@ -89,6 +90,7 @@
 
 #define irq_remapping_enabled 0
 #define disable_irq_remap     1
+#define irq_remap_broken      0
 
 #endif /* CONFIG_IRQ_REMAP */
 

diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 6a8870a..8ab4f41 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c

@@ -554,7 +554,7 @@
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
-					  unsigned long va)
+					  dma_addr_t va)
 {
 	struct msm_priv *priv;
 	struct msm_iommu_drvdata *iommu_drvdata;

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 6ac02fa..e02e5d7 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c

@@ -1219,7 +1219,7 @@
 }
 
 static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
-					  unsigned long da)
+					  dma_addr_t da)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;

diff --git a/drivers/iommu/pci.h b/drivers/iommu/pci.h
new file mode 100644
index 0000000..352d80a
--- /dev/null
+++ b/drivers/iommu/pci.h

@@ -0,0 +1,29 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+#ifndef __IOMMU_PCI_H
+#define __IOMMU_PCI_H
+
+/* Helper function for swapping pci device reference */
+static inline void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
+{
+	pci_dev_put(*from);
+	*from = to;
+}
+
+#endif  /* __IOMMU_PCI_H */

diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
index b6e8b57..d572863 100644
--- a/drivers/iommu/shmobile-iommu.c
+++ b/drivers/iommu/shmobile-iommu.c

@@ -296,7 +296,7 @@
 }
 
 static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain,
-					       unsigned long iova)
+					       dma_addr_t iova)
 {
 	struct shmobile_iommu_domain *sh_domain = domain->priv;
 	uint32_t l1entry = 0, l2entry = 0;

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 8643757..108c0e9 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c

@@ -279,7 +279,7 @@
 }
 
 static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
-					   unsigned long iova)
+					   dma_addr_t iova)
 {
 	struct gart_device *gart = domain->priv;
 	unsigned long pte;
@@ -295,7 +295,8 @@
 
 	pa = (pte & GART_PAGE_MASK);
 	if (!pfn_valid(__phys_to_pfn(pa))) {
-		dev_err(gart->dev, "No entry for %08lx:%08x\n", iova, pa);
+		dev_err(gart->dev, "No entry for %08llx:%08x\n",
+			 (unsigned long long)iova, pa);
 		gart_dump_table(gart);
 		return -EINVAL;
 	}

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index b34e5fd..f6f120e 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c

@@ -757,7 +757,7 @@
 }
 
 static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain,
-					   unsigned long iova)
+					   dma_addr_t iova)
 {
 	struct smmu_as *as = domain->priv;
 	unsigned long *pte;
@@ -772,7 +772,8 @@
 	pfn = *pte & SMMU_PFN_MASK;
 	WARN_ON(!pfn_valid(pfn));
 	dev_dbg(as->smmu->dev,
-		"iova:%08lx pfn:%08lx asid:%d\n", iova, pfn, as->asid);
+		"iova:%08llx pfn:%08lx asid:%d\n", (unsigned long long)iova,
+		 pfn, as->asid);
 
 	spin_unlock_irqrestore(&as->lock, flags);
 	return PFN_PHYS(pfn);

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index ec50824..d44806d 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig

@@ -194,8 +194,8 @@
 	  module will be called leds-lp3944.
 
 config LEDS_LP55XX_COMMON
-	tristate "Common Driver for TI/National LP5521 and LP5523/55231"
-	depends on LEDS_LP5521 || LEDS_LP5523
+	tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562"
+	depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562
 	select FW_LOADER
 	help
 	  This option supports common operations for LP5521 and LP5523/55231
@@ -222,6 +222,16 @@
 	  Driver provides direct control via LED class and interface for
 	  programming the engines.
 
+config LEDS_LP5562
+	tristate "LED Support for TI LP5562 LED driver chip"
+	depends on LEDS_CLASS && I2C
+	select LEDS_LP55XX_COMMON
+	help
+	  If you say yes here you get support for TI LP5562 LED driver.
+	  It is 4 channels chip with programmable engines.
+	  Driver provides direct control via LED class and interface for
+	  programming the engines.
+
 config LEDS_LP8788
 	tristate "LED support for the TI LP8788 PMIC"
 	depends on LEDS_CLASS
@@ -469,106 +479,7 @@
 	  This option enables support for the BlinkM RGB LED connected
 	  through I2C. Say Y to enable support for the BlinkM LED.
 
-config LEDS_TRIGGERS
-	bool "LED Trigger support"
-	depends on LEDS_CLASS
-	help
-	  This option enables trigger support for the leds class.
-	  These triggers allow kernel events to drive the LEDs and can
-	  be configured via sysfs. If unsure, say Y.
-
 comment "LED Triggers"
-
-config LEDS_TRIGGER_TIMER
-	tristate "LED Timer Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to be controlled by a programmable timer
-	  via sysfs. Some LED hardware can be programmed to start
-	  blinking the LED without any further software interaction.
-	  For more details read Documentation/leds/leds-class.txt.
-
-	  If unsure, say Y.
-
-config LEDS_TRIGGER_ONESHOT
-	tristate "LED One-shot Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to blink in one-shot pulses with parameters
-	  controlled via sysfs.  It's useful to notify the user on
-	  sporadic events, when there are no clear begin and end trap points,
-	  or on dense events, where this blinks the LED at constant rate if
-	  rearmed continuously.
-
-	  It also shows how to use the led_blink_set_oneshot() function.
-
-	  If unsure, say Y.
-
-config LEDS_TRIGGER_IDE_DISK
-	bool "LED IDE Disk Trigger"
-	depends on IDE_GD_ATA
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to be controlled by IDE disk activity.
-	  If unsure, say Y.
-
-config LEDS_TRIGGER_HEARTBEAT
-	tristate "LED Heartbeat Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to be controlled by a CPU load average.
-	  The flash frequency is a hyperbolic function of the 1-minute
-	  load average.
-	  If unsure, say Y.
-
-config LEDS_TRIGGER_BACKLIGHT
-	tristate "LED backlight Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to be controlled as a backlight device: they
-	  turn off and on when the display is blanked and unblanked.
-
-	  If unsure, say N.
-
-config LEDS_TRIGGER_CPU
-	bool "LED CPU Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to be controlled by active CPUs. This shows
-	  the active CPUs across an array of LEDs so you can see which
-	  CPUs are active on the system at any given moment.
-
-	  If unsure, say N.
-
-config LEDS_TRIGGER_GPIO
-	tristate "LED GPIO Trigger"
-	depends on LEDS_TRIGGERS
-	depends on GPIOLIB
-	help
-	  This allows LEDs to be controlled by gpio events. It's good
-	  when using gpios as switches and triggering the needed LEDs
-	  from there. One use case is n810's keypad LEDs that could
-	  be triggered by this trigger when user slides up to show
-	  keypad.
-
-	  If unsure, say N.
-
-config LEDS_TRIGGER_DEFAULT_ON
-	tristate "LED Default ON Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows LEDs to be initialised in the ON state.
-	  If unsure, say Y.
-
-comment "iptables trigger is under Netfilter config (LED target)"
-	depends on LEDS_TRIGGERS
-
-config LEDS_TRIGGER_TRANSIENT
-	tristate "LED Transient Trigger"
-	depends on LEDS_TRIGGERS
-	help
-	  This allows one time activation of a transient state on
-	  GPIO/PWM based hardware.
-	  If unsure, say Y.
+source "drivers/leds/trigger/Kconfig"
 
 endif # NEW_LEDS

diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 215e7e3..ac28977 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile

@@ -26,6 +26,7 @@
 obj-$(CONFIG_LEDS_LP55XX_COMMON)	+= leds-lp55xx-common.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
 obj-$(CONFIG_LEDS_LP5523)		+= leds-lp5523.o
+obj-$(CONFIG_LEDS_LP5562)		+= leds-lp5562.o
 obj-$(CONFIG_LEDS_LP8788)		+= leds-lp8788.o
 obj-$(CONFIG_LEDS_TCA6507)		+= leds-tca6507.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
@@ -57,12 +58,4 @@
 obj-$(CONFIG_LEDS_DAC124S085)		+= leds-dac124s085.o
 
 # LED Triggers
-obj-$(CONFIG_LEDS_TRIGGER_TIMER)	+= ledtrig-timer.o
-obj-$(CONFIG_LEDS_TRIGGER_ONESHOT)	+= ledtrig-oneshot.o
-obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK)	+= ledtrig-ide-disk.o
-obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT)	+= ledtrig-heartbeat.o
-obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT)	+= ledtrig-backlight.o
-obj-$(CONFIG_LEDS_TRIGGER_GPIO)		+= ledtrig-gpio.o
-obj-$(CONFIG_LEDS_TRIGGER_CPU)		+= ledtrig-cpu.o
-obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON)	+= ledtrig-default-on.o
-obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT)	+= ledtrig-transient.o
+obj-$(CONFIG_LEDS_TRIGGERS)		+= trigger/

diff --git a/drivers/leds/leds-asic3.c b/drivers/leds/leds-asic3.c
index b474745..cf9efe4 100644
--- a/drivers/leds/leds-asic3.c
+++ b/drivers/leds/leds-asic3.c

@@ -134,6 +134,7 @@
 	return mfd_cell_disable(pdev);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int asic3_led_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -159,11 +160,9 @@
 
 	return ret;
 }
+#endif
 
-static const struct dev_pm_ops asic3_led_pm_ops = {
-	.suspend	= asic3_led_suspend,
-	.resume		= asic3_led_resume,
-};
+static SIMPLE_DEV_PM_OPS(asic3_led_pm_ops, asic3_led_suspend, asic3_led_resume);
 
 static struct platform_driver asic3_led_driver = {
 	.probe		= asic3_led_probe,

diff --git a/drivers/leds/leds-atmel-pwm.c b/drivers/leds/leds-atmel-pwm.c
index 3867735..8a39c5b 100644
--- a/drivers/leds/leds-atmel-pwm.c
+++ b/drivers/leds/leds-atmel-pwm.c

@@ -113,7 +113,7 @@
 	return status;
 }
 
-static int __exit pwmled_remove(struct platform_device *pdev)
+static int pwmled_remove(struct platform_device *pdev)
 {
 	const struct gpio_led_platform_data	*pdata;
 	struct pwmled				*leds;
@@ -140,7 +140,7 @@
 	},
 	/* REVISIT add suspend() and resume() methods */
 	.probe =	pwmled_probe,
-	.remove =	__exit_p(pwmled_remove),
+	.remove =	pwmled_remove,
 };
 
 module_platform_driver(pwmled_driver);

diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c
index 8515170..2db0423 100644
--- a/drivers/leds/leds-bd2802.c
+++ b/drivers/leds/leds-bd2802.c

@@ -732,7 +732,7 @@
 	return ret;
 }
 
-static int __exit bd2802_remove(struct i2c_client *client)
+static int bd2802_remove(struct i2c_client *client)
 {
 	struct bd2802_led *led = i2c_get_clientdata(client);
 	int i;
@@ -747,8 +747,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static void bd2802_restore_state(struct bd2802_led *led)
 {
 	int i;
@@ -785,12 +784,9 @@
 
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(bd2802_pm, bd2802_suspend, bd2802_resume);
-#define BD2802_PM (&bd2802_pm)
-#else		/* CONFIG_PM */
-#define BD2802_PM NULL
-#endif
 
 static const struct i2c_device_id bd2802_id[] = {
 	{ "BD2802", 0 },
@@ -801,10 +797,10 @@
 static struct i2c_driver bd2802_i2c_driver = {
 	.driver	= {
 		.name	= "BD2802",
-		.pm	= BD2802_PM,
+		.pm	= &bd2802_pm,
 	},
 	.probe		= bd2802_probe,
-	.remove		= __exit_p(bd2802_remove),
+	.remove		= bd2802_remove,
 	.id_table	= bd2802_id,
 };
 

diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c
index 4117235..d81a8e7 100644
--- a/drivers/leds/leds-lm355x.c
+++ b/drivers/leds/leds-lm355x.c

@@ -477,6 +477,7 @@
 	chip->cdev_flash.name = "flash";
 	chip->cdev_flash.max_brightness = 16;
 	chip->cdev_flash.brightness_set = lm355x_strobe_brightness_set;
+	chip->cdev_flash.default_trigger = "flash";
 	err = led_classdev_register((struct device *)
 				    &client->dev, &chip->cdev_flash);
 	if (err < 0)
@@ -486,6 +487,7 @@
 	chip->cdev_torch.name = "torch";
 	chip->cdev_torch.max_brightness = 8;
 	chip->cdev_torch.brightness_set = lm355x_torch_brightness_set;
+	chip->cdev_torch.default_trigger = "torch";
 	err = led_classdev_register((struct device *)
 				    &client->dev, &chip->cdev_torch);
 	if (err < 0)

diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c
index 9f428d9..f361bbe 100644
--- a/drivers/leds/leds-lm3642.c
+++ b/drivers/leds/leds-lm3642.c

@@ -363,6 +363,7 @@
 	chip->cdev_flash.name = "flash";
 	chip->cdev_flash.max_brightness = 16;
 	chip->cdev_flash.brightness_set = lm3642_strobe_brightness_set;
+	chip->cdev_flash.default_trigger = "flash";
 	err = led_classdev_register((struct device *)
 				    &client->dev, &chip->cdev_flash);
 	if (err < 0) {
@@ -380,6 +381,7 @@
 	chip->cdev_torch.name = "torch";
 	chip->cdev_torch.max_brightness = 8;
 	chip->cdev_torch.brightness_set = lm3642_torch_brightness_set;
+	chip->cdev_torch.default_trigger = "torch";
 	err = led_classdev_register((struct device *)
 				    &client->dev, &chip->cdev_torch);
 	if (err < 0) {

diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index 1001347..19752c9 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c

@@ -68,6 +68,18 @@
 #define LP5521_ENABLE_RUN_PROGRAM	\
 	(LP5521_ENABLE_DEFAULT | LP5521_EXEC_RUN)
 
+/* CONFIG register */
+#define LP5521_PWM_HF			0x40	/* PWM: 0 = 256Hz, 1 = 558Hz */
+#define LP5521_PWRSAVE_EN		0x20	/* 1 = Power save mode */
+#define LP5521_CP_MODE_OFF		0	/* Charge pump (CP) off */
+#define LP5521_CP_MODE_BYPASS		8	/* CP forced to bypass mode */
+#define LP5521_CP_MODE_1X5		0x10	/* CP forced to 1.5x mode */
+#define LP5521_CP_MODE_AUTO		0x18	/* Automatic mode selection */
+#define LP5521_R_TO_BATT		0x04	/* R out: 0 = CP, 1 = Vbat */
+#define LP5521_CLK_INT			0x01	/* Internal clock */
+#define LP5521_DEFAULT_CFG		\
+	(LP5521_PWM_HF | LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO)
+
 /* Status */
 #define LP5521_EXT_CLK_USED		0x08
 
@@ -296,8 +308,11 @@
 	/* Set all PWMs to direct control mode */
 	ret = lp55xx_write(chip, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT);
 
-	val = chip->pdata->update_config ?
-		: (LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT);
+	/* Update configuration for the clock setting */
+	val = LP5521_DEFAULT_CFG;
+	if (!lp55xx_is_extclk_used(chip))
+		val |= LP5521_CLK_INT;
+
 	ret = lp55xx_write(chip, LP5521_REG_CONFIG, val);
 	if (ret)
 		return ret;
@@ -360,7 +375,8 @@
 	mutex_lock(&chip->lock);
 	ret = lp5521_run_selftest(chip, buf);
 	mutex_unlock(&chip->lock);
-	return sprintf(buf, "%s\n", ret ? "FAIL" : "OK");
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", ret ? "FAIL" : "OK");
 }
 
 /* device attributes */

diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c
new file mode 100644
index 0000000..513f239
--- /dev/null
+++ b/drivers/leds/leds-lp5562.c

@@ -0,0 +1,599 @@
+/*
+ * LP5562 LED driver
+ *
+ * Copyright (C) 2013 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_data/leds-lp55xx.h>
+#include <linux/slab.h>
+
+#include "leds-lp55xx-common.h"
+
+#define LP5562_PROGRAM_LENGTH		32
+#define LP5562_MAX_LEDS			4
+
+/* ENABLE Register 00h */
+#define LP5562_REG_ENABLE		0x00
+#define LP5562_EXEC_ENG1_M		0x30
+#define LP5562_EXEC_ENG2_M		0x0C
+#define LP5562_EXEC_ENG3_M		0x03
+#define LP5562_EXEC_M			0x3F
+#define LP5562_MASTER_ENABLE		0x40	/* Chip master enable */
+#define LP5562_LOGARITHMIC_PWM		0x80	/* Logarithmic PWM adjustment */
+#define LP5562_EXEC_RUN			0x2A
+#define LP5562_ENABLE_DEFAULT	\
+	(LP5562_MASTER_ENABLE | LP5562_LOGARITHMIC_PWM)
+#define LP5562_ENABLE_RUN_PROGRAM	\
+	(LP5562_ENABLE_DEFAULT | LP5562_EXEC_RUN)
+
+/* OPMODE Register 01h */
+#define LP5562_REG_OP_MODE		0x01
+#define LP5562_MODE_ENG1_M		0x30
+#define LP5562_MODE_ENG2_M		0x0C
+#define LP5562_MODE_ENG3_M		0x03
+#define LP5562_LOAD_ENG1		0x10
+#define LP5562_LOAD_ENG2		0x04
+#define LP5562_LOAD_ENG3		0x01
+#define LP5562_RUN_ENG1			0x20
+#define LP5562_RUN_ENG2			0x08
+#define LP5562_RUN_ENG3			0x02
+#define LP5562_ENG1_IS_LOADING(mode)	\
+	((mode & LP5562_MODE_ENG1_M) == LP5562_LOAD_ENG1)
+#define LP5562_ENG2_IS_LOADING(mode)	\
+	((mode & LP5562_MODE_ENG2_M) == LP5562_LOAD_ENG2)
+#define LP5562_ENG3_IS_LOADING(mode)	\
+	((mode & LP5562_MODE_ENG3_M) == LP5562_LOAD_ENG3)
+
+/* BRIGHTNESS Registers */
+#define LP5562_REG_R_PWM		0x04
+#define LP5562_REG_G_PWM		0x03
+#define LP5562_REG_B_PWM		0x02
+#define LP5562_REG_W_PWM		0x0E
+
+/* CURRENT Registers */
+#define LP5562_REG_R_CURRENT		0x07
+#define LP5562_REG_G_CURRENT		0x06
+#define LP5562_REG_B_CURRENT		0x05
+#define LP5562_REG_W_CURRENT		0x0F
+
+/* CONFIG Register 08h */
+#define LP5562_REG_CONFIG		0x08
+#define LP5562_PWM_HF			0x40
+#define LP5562_PWRSAVE_EN		0x20
+#define LP5562_CLK_INT			0x01	/* Internal clock */
+#define LP5562_DEFAULT_CFG		(LP5562_PWM_HF | LP5562_PWRSAVE_EN)
+
+/* RESET Register 0Dh */
+#define LP5562_REG_RESET		0x0D
+#define LP5562_RESET			0xFF
+
+/* PROGRAM ENGINE Registers */
+#define LP5562_REG_PROG_MEM_ENG1	0x10
+#define LP5562_REG_PROG_MEM_ENG2	0x30
+#define LP5562_REG_PROG_MEM_ENG3	0x50
+
+/* LEDMAP Register 70h */
+#define LP5562_REG_ENG_SEL		0x70
+#define LP5562_ENG_SEL_PWM		0
+#define LP5562_ENG_FOR_RGB_M		0x3F
+#define LP5562_ENG_SEL_RGB		0x1B	/* R:ENG1, G:ENG2, B:ENG3 */
+#define LP5562_ENG_FOR_W_M		0xC0
+#define LP5562_ENG1_FOR_W		0x40	/* W:ENG1 */
+#define LP5562_ENG2_FOR_W		0x80	/* W:ENG2 */
+#define LP5562_ENG3_FOR_W		0xC0	/* W:ENG3 */
+
+/* Program Commands */
+#define LP5562_CMD_DISABLE		0x00
+#define LP5562_CMD_LOAD			0x15
+#define LP5562_CMD_RUN			0x2A
+#define LP5562_CMD_DIRECT		0x3F
+#define LP5562_PATTERN_OFF		0
+
+static inline void lp5562_wait_opmode_done(void)
+{
+	/* operation mode change needs to be longer than 153 us */
+	usleep_range(200, 300);
+}
+
+static inline void lp5562_wait_enable_done(void)
+{
+	/* it takes more 488 us to update ENABLE register */
+	usleep_range(500, 600);
+}
+
+static void lp5562_set_led_current(struct lp55xx_led *led, u8 led_current)
+{
+	u8 addr[] = {
+		LP5562_REG_R_CURRENT,
+		LP5562_REG_G_CURRENT,
+		LP5562_REG_B_CURRENT,
+		LP5562_REG_W_CURRENT,
+	};
+
+	led->led_current = led_current;
+	lp55xx_write(led->chip, addr[led->chan_nr], led_current);
+}
+
+static void lp5562_load_engine(struct lp55xx_chip *chip)
+{
+	enum lp55xx_engine_index idx = chip->engine_idx;
+	u8 mask[] = {
+		[LP55XX_ENGINE_1] = LP5562_MODE_ENG1_M,
+		[LP55XX_ENGINE_2] = LP5562_MODE_ENG2_M,
+		[LP55XX_ENGINE_3] = LP5562_MODE_ENG3_M,
+	};
+
+	u8 val[] = {
+		[LP55XX_ENGINE_1] = LP5562_LOAD_ENG1,
+		[LP55XX_ENGINE_2] = LP5562_LOAD_ENG2,
+		[LP55XX_ENGINE_3] = LP5562_LOAD_ENG3,
+	};
+
+	lp55xx_update_bits(chip, LP5562_REG_OP_MODE, mask[idx], val[idx]);
+
+	lp5562_wait_opmode_done();
+}
+
+static void lp5562_stop_engine(struct lp55xx_chip *chip)
+{
+	lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DISABLE);
+	lp5562_wait_opmode_done();
+}
+
+static void lp5562_run_engine(struct lp55xx_chip *chip, bool start)
+{
+	int ret;
+	u8 mode;
+	u8 exec;
+
+	/* stop engine */
+	if (!start) {
+		lp55xx_write(chip, LP5562_REG_ENABLE, LP5562_ENABLE_DEFAULT);
+		lp5562_wait_enable_done();
+		lp5562_stop_engine(chip);
+		lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM);
+		lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT);
+		lp5562_wait_opmode_done();
+		return;
+	}
+
+	/*
+	 * To run the engine,
+	 * operation mode and enable register should updated at the same time
+	 */
+
+	ret = lp55xx_read(chip, LP5562_REG_OP_MODE, &mode);
+	if (ret)
+		return;
+
+	ret = lp55xx_read(chip, LP5562_REG_ENABLE, &exec);
+	if (ret)
+		return;
+
+	/* change operation mode to RUN only when each engine is loading */
+	if (LP5562_ENG1_IS_LOADING(mode)) {
+		mode = (mode & ~LP5562_MODE_ENG1_M) | LP5562_RUN_ENG1;
+		exec = (exec & ~LP5562_EXEC_ENG1_M) | LP5562_RUN_ENG1;
+	}
+
+	if (LP5562_ENG2_IS_LOADING(mode)) {
+		mode = (mode & ~LP5562_MODE_ENG2_M) | LP5562_RUN_ENG2;
+		exec = (exec & ~LP5562_EXEC_ENG2_M) | LP5562_RUN_ENG2;
+	}
+
+	if (LP5562_ENG3_IS_LOADING(mode)) {
+		mode = (mode & ~LP5562_MODE_ENG3_M) | LP5562_RUN_ENG3;
+		exec = (exec & ~LP5562_EXEC_ENG3_M) | LP5562_RUN_ENG3;
+	}
+
+	lp55xx_write(chip, LP5562_REG_OP_MODE, mode);
+	lp5562_wait_opmode_done();
+
+	lp55xx_update_bits(chip, LP5562_REG_ENABLE, LP5562_EXEC_M, exec);
+	lp5562_wait_enable_done();
+}
+
+static int lp5562_update_firmware(struct lp55xx_chip *chip,
+					const u8 *data, size_t size)
+{
+	enum lp55xx_engine_index idx = chip->engine_idx;
+	u8 pattern[LP5562_PROGRAM_LENGTH] = {0};
+	u8 addr[] = {
+		[LP55XX_ENGINE_1] = LP5562_REG_PROG_MEM_ENG1,
+		[LP55XX_ENGINE_2] = LP5562_REG_PROG_MEM_ENG2,
+		[LP55XX_ENGINE_3] = LP5562_REG_PROG_MEM_ENG3,
+	};
+	unsigned cmd;
+	char c[3];
+	int program_size;
+	int nrchars;
+	int offset = 0;
+	int ret;
+	int i;
+
+	/* clear program memory before updating */
+	for (i = 0; i < LP5562_PROGRAM_LENGTH; i++)
+		lp55xx_write(chip, addr[idx] + i, 0);
+
+	i = 0;
+	while ((offset < size - 1) && (i < LP5562_PROGRAM_LENGTH)) {
+		/* separate sscanfs because length is working only for %s */
+		ret = sscanf(data + offset, "%2s%n ", c, &nrchars);
+		if (ret != 1)
+			goto err;
+
+		ret = sscanf(c, "%2x", &cmd);
+		if (ret != 1)
+			goto err;
+
+		pattern[i] = (u8)cmd;
+		offset += nrchars;
+		i++;
+	}
+
+	/* Each instruction is 16bit long. Check that length is even */
+	if (i % 2)
+		goto err;
+
+	program_size = i;
+	for (i = 0; i < program_size; i++)
+		lp55xx_write(chip, addr[idx] + i, pattern[i]);
+
+	return 0;
+
+err:
+	dev_err(&chip->cl->dev, "wrong pattern format\n");
+	return -EINVAL;
+}
+
+static void lp5562_firmware_loaded(struct lp55xx_chip *chip)
+{
+	const struct firmware *fw = chip->fw;
+
+	if (fw->size > LP5562_PROGRAM_LENGTH) {
+		dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n",
+			fw->size);
+		return;
+	}
+
+	/*
+	 * Program momery sequence
+	 *  1) set engine mode to "LOAD"
+	 *  2) write firmware data into program memory
+	 */
+
+	lp5562_load_engine(chip);
+	lp5562_update_firmware(chip, fw->data, fw->size);
+}
+
+static int lp5562_post_init_device(struct lp55xx_chip *chip)
+{
+	int ret;
+	u8 cfg = LP5562_DEFAULT_CFG;
+
+	/* Set all PWMs to direct control mode */
+	ret = lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT);
+	if (ret)
+		return ret;
+
+	lp5562_wait_opmode_done();
+
+	/* Update configuration for the clock setting */
+	if (!lp55xx_is_extclk_used(chip))
+		cfg |= LP5562_CLK_INT;
+
+	ret = lp55xx_write(chip, LP5562_REG_CONFIG, cfg);
+	if (ret)
+		return ret;
+
+	/* Initialize all channels PWM to zero -> leds off */
+	lp55xx_write(chip, LP5562_REG_R_PWM, 0);
+	lp55xx_write(chip, LP5562_REG_G_PWM, 0);
+	lp55xx_write(chip, LP5562_REG_B_PWM, 0);
+	lp55xx_write(chip, LP5562_REG_W_PWM, 0);
+
+	/* Set LED map as register PWM by default */
+	lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM);
+
+	return 0;
+}
+
+static void lp5562_led_brightness_work(struct work_struct *work)
+{
+	struct lp55xx_led *led = container_of(work, struct lp55xx_led,
+					      brightness_work);
+	struct lp55xx_chip *chip = led->chip;
+	u8 addr[] = {
+		LP5562_REG_R_PWM,
+		LP5562_REG_G_PWM,
+		LP5562_REG_B_PWM,
+		LP5562_REG_W_PWM,
+	};
+
+	mutex_lock(&chip->lock);
+	lp55xx_write(chip, addr[led->chan_nr], led->brightness);
+	mutex_unlock(&chip->lock);
+}
+
+static void lp5562_write_program_memory(struct lp55xx_chip *chip,
+					u8 base, const u8 *rgb, int size)
+{
+	int i;
+
+	if (!rgb || size <= 0)
+		return;
+
+	for (i = 0; i < size; i++)
+		lp55xx_write(chip, base + i, *(rgb + i));
+
+	lp55xx_write(chip, base + i, 0);
+	lp55xx_write(chip, base + i + 1, 0);
+}
+
+/* check the size of program count */
+static inline bool _is_pc_overflow(struct lp55xx_predef_pattern *ptn)
+{
+	return (ptn->size_r >= LP5562_PROGRAM_LENGTH ||
+		ptn->size_g >= LP5562_PROGRAM_LENGTH ||
+		ptn->size_b >= LP5562_PROGRAM_LENGTH);
+}
+
+static int lp5562_run_predef_led_pattern(struct lp55xx_chip *chip, int mode)
+{
+	struct lp55xx_predef_pattern *ptn;
+	int i;
+
+	if (mode == LP5562_PATTERN_OFF) {
+		lp5562_run_engine(chip, false);
+		return 0;
+	}
+
+	ptn = chip->pdata->patterns + (mode - 1);
+	if (!ptn || _is_pc_overflow(ptn)) {
+		dev_err(&chip->cl->dev, "invalid pattern data\n");
+		return -EINVAL;
+	}
+
+	lp5562_stop_engine(chip);
+
+	/* Set LED map as RGB */
+	lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_RGB);
+
+	/* Load engines */
+	for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) {
+		chip->engine_idx = i;
+		lp5562_load_engine(chip);
+	}
+
+	/* Clear program registers */
+	lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1, 0);
+	lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1 + 1, 0);
+	lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2, 0);
+	lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2 + 1, 0);
+	lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3, 0);
+	lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3 + 1, 0);
+
+	/* Program engines */
+	lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG1,
+				ptn->r, ptn->size_r);
+	lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG2,
+				ptn->g, ptn->size_g);
+	lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG3,
+				ptn->b, ptn->size_b);
+
+	/* Run engines */
+	lp5562_run_engine(chip, true);
+
+	return 0;
+}
+
+static ssize_t lp5562_store_pattern(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+	struct lp55xx_chip *chip = led->chip;
+	struct lp55xx_predef_pattern *ptn = chip->pdata->patterns;
+	int num_patterns = chip->pdata->num_patterns;
+	unsigned long mode;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &mode);
+	if (ret)
+		return ret;
+
+	if (mode > num_patterns || !ptn)
+		return -EINVAL;
+
+	mutex_lock(&chip->lock);
+	ret = lp5562_run_predef_led_pattern(chip, mode);
+	mutex_unlock(&chip->lock);
+
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static ssize_t lp5562_store_engine_mux(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+	struct lp55xx_chip *chip = led->chip;
+	u8 mask;
+	u8 val;
+
+	/* LED map
+	 * R ... Engine 1 (fixed)
+	 * G ... Engine 2 (fixed)
+	 * B ... Engine 3 (fixed)
+	 * W ... Engine 1 or 2 or 3
+	 */
+
+	if (sysfs_streq(buf, "RGB")) {
+		mask = LP5562_ENG_FOR_RGB_M;
+		val = LP5562_ENG_SEL_RGB;
+	} else if (sysfs_streq(buf, "W")) {
+		enum lp55xx_engine_index idx = chip->engine_idx;
+
+		mask = LP5562_ENG_FOR_W_M;
+		switch (idx) {
+		case LP55XX_ENGINE_1:
+			val = LP5562_ENG1_FOR_W;
+			break;
+		case LP55XX_ENGINE_2:
+			val = LP5562_ENG2_FOR_W;
+			break;
+		case LP55XX_ENGINE_3:
+			val = LP5562_ENG3_FOR_W;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+	} else {
+		dev_err(dev, "choose RGB or W\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&chip->lock);
+	lp55xx_update_bits(chip, LP5562_REG_ENG_SEL, mask, val);
+	mutex_unlock(&chip->lock);
+
+	return len;
+}
+
+static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern);
+static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux);
+
+static struct attribute *lp5562_attributes[] = {
+	&dev_attr_led_pattern.attr,
+	&dev_attr_engine_mux.attr,
+	NULL,
+};
+
+static const struct attribute_group lp5562_group = {
+	.attrs = lp5562_attributes,
+};
+
+/* Chip specific configurations */
+static struct lp55xx_device_config lp5562_cfg = {
+	.max_channel  = LP5562_MAX_LEDS,
+	.reset = {
+		.addr = LP5562_REG_RESET,
+		.val  = LP5562_RESET,
+	},
+	.enable = {
+		.addr = LP5562_REG_ENABLE,
+		.val  = LP5562_ENABLE_DEFAULT,
+	},
+	.post_init_device   = lp5562_post_init_device,
+	.set_led_current    = lp5562_set_led_current,
+	.brightness_work_fn = lp5562_led_brightness_work,
+	.run_engine         = lp5562_run_engine,
+	.firmware_cb        = lp5562_firmware_loaded,
+	.dev_attr_group     = &lp5562_group,
+};
+
+static int lp5562_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int ret;
+	struct lp55xx_chip *chip;
+	struct lp55xx_led *led;
+	struct lp55xx_platform_data *pdata = client->dev.platform_data;
+
+	if (!pdata) {
+		dev_err(&client->dev, "no platform data\n");
+		return -EINVAL;
+	}
+
+	chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	led = devm_kzalloc(&client->dev,
+			sizeof(*led) * pdata->num_channels, GFP_KERNEL);
+	if (!led)
+		return -ENOMEM;
+
+	chip->cl = client;
+	chip->pdata = pdata;
+	chip->cfg = &lp5562_cfg;
+
+	mutex_init(&chip->lock);
+
+	i2c_set_clientdata(client, led);
+
+	ret = lp55xx_init_device(chip);
+	if (ret)
+		goto err_init;
+
+	ret = lp55xx_register_leds(led, chip);
+	if (ret)
+		goto err_register_leds;
+
+	ret = lp55xx_register_sysfs(chip);
+	if (ret) {
+		dev_err(&client->dev, "registering sysfs failed\n");
+		goto err_register_sysfs;
+	}
+
+	return 0;
+
+err_register_sysfs:
+	lp55xx_unregister_leds(led, chip);
+err_register_leds:
+	lp55xx_deinit_device(chip);
+err_init:
+	return ret;
+}
+
+static int lp5562_remove(struct i2c_client *client)
+{
+	struct lp55xx_led *led = i2c_get_clientdata(client);
+	struct lp55xx_chip *chip = led->chip;
+
+	lp5562_stop_engine(chip);
+
+	lp55xx_unregister_sysfs(chip);
+	lp55xx_unregister_leds(led, chip);
+	lp55xx_deinit_device(chip);
+
+	return 0;
+}
+
+static const struct i2c_device_id lp5562_id[] = {
+	{ "lp5562", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, lp5562_id);
+
+static struct i2c_driver lp5562_driver = {
+	.driver = {
+		.name	= "lp5562",
+	},
+	.probe		= lp5562_probe,
+	.remove		= lp5562_remove,
+	.id_table	= lp5562_id,
+};
+
+module_i2c_driver(lp5562_driver);
+
+MODULE_DESCRIPTION("Texas Instruments LP5562 LED Driver");
+MODULE_AUTHOR("Milo Kim");
+MODULE_LICENSE("GPL");

diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
index d9eb841..ba34199 100644
--- a/drivers/leds/leds-lp55xx-common.c
+++ b/drivers/leds/leds-lp55xx-common.c

@@ -1,5 +1,5 @@
 /*
- * LP5521/LP5523/LP55231 Common Driver
+ * LP5521/LP5523/LP55231/LP5562 Common Driver
  *
  * Copyright 2012 Texas Instruments
  *
@@ -12,6 +12,7 @@
  * Derived from leds-lp5521.c, leds-lp5523.c
  */
 
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
 #include <linux/i2c.h>
@@ -21,6 +22,9 @@
 
 #include "leds-lp55xx-common.h"
 
+/* External clock rate */
+#define LP55XX_CLK_32K			32768
+
 static struct lp55xx_led *cdev_to_lp55xx_led(struct led_classdev *cdev)
 {
 	return container_of(cdev, struct lp55xx_led, cdev);
@@ -80,7 +84,7 @@
 {
 	struct lp55xx_led *led = dev_to_lp55xx_led(dev);
 
-	return sprintf(buf, "%d\n", led->led_current);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", led->led_current);
 }
 
 static ssize_t lp55xx_store_current(struct device *dev,
@@ -113,7 +117,7 @@
 {
 	struct lp55xx_led *led = dev_to_lp55xx_led(dev);
 
-	return sprintf(buf, "%d\n", led->max_current);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", led->max_current);
 }
 
 static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, lp55xx_show_current,
@@ -357,6 +361,35 @@
 }
 EXPORT_SYMBOL_GPL(lp55xx_update_bits);
 
+bool lp55xx_is_extclk_used(struct lp55xx_chip *chip)
+{
+	struct clk *clk;
+	int err;
+
+	clk = devm_clk_get(&chip->cl->dev, "32k_clk");
+	if (IS_ERR(clk))
+		goto use_internal_clk;
+
+	err = clk_prepare_enable(clk);
+	if (err)
+		goto use_internal_clk;
+
+	if (clk_get_rate(clk) != LP55XX_CLK_32K) {
+		clk_disable_unprepare(clk);
+		goto use_internal_clk;
+	}
+
+	dev_info(&chip->cl->dev, "%dHz external clock used\n",	LP55XX_CLK_32K);
+
+	chip->clk = clk;
+	return true;
+
+use_internal_clk:
+	dev_info(&chip->cl->dev, "internal clock used\n");
+	return false;
+}
+EXPORT_SYMBOL_GPL(lp55xx_is_extclk_used);
+
 int lp55xx_init_device(struct lp55xx_chip *chip)
 {
 	struct lp55xx_platform_data *pdata;
@@ -421,6 +454,9 @@
 {
 	struct lp55xx_platform_data *pdata = chip->pdata;
 
+	if (chip->clk)
+		clk_disable_unprepare(chip->clk);
+
 	if (pdata->enable)
 		pdata->enable(0);
 

diff --git a/drivers/leds/leds-lp55xx-common.h b/drivers/leds/leds-lp55xx-common.h
index ece4761..fa6a078 100644
--- a/drivers/leds/leds-lp55xx-common.h
+++ b/drivers/leds/leds-lp55xx-common.h

@@ -83,6 +83,7 @@
  */
 struct lp55xx_chip {
 	struct i2c_client *cl;
+	struct clk *clk;
 	struct lp55xx_platform_data *pdata;
 	struct mutex lock;	/* lock for user-space interface */
 	int num_leds;
@@ -117,6 +118,9 @@
 extern int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg,
 			u8 mask, u8 val);
 
+/* external clock detection */
+extern bool lp55xx_is_extclk_used(struct lp55xx_chip *chip);
+
 /* common device init/deinit functions */
 extern int lp55xx_init_device(struct lp55xx_chip *chip);
 extern void lp55xx_deinit_device(struct lp55xx_chip *chip);

diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c
index c9b9e1f..ca48a7d5 100644
--- a/drivers/leds/leds-lt3593.c
+++ b/drivers/leds/leds-lt3593.c

@@ -106,8 +106,9 @@
 	if (!template->retain_state_suspended)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
-	ret = devm_gpio_request_one(parent, template->gpio,
-				    GPIOF_DIR_OUT | state, template->name);
+	ret = devm_gpio_request_one(parent, template->gpio, state ?
+				    GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
+				    template->name);
 	if (ret < 0)
 		return ret;
 

diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c
index d978171..70137b1 100644
--- a/drivers/leds/leds-ns2.c
+++ b/drivers/leds/leds-ns2.c

@@ -193,7 +193,8 @@
 	enum ns2_led_modes mode;
 
 	ret = devm_gpio_request_one(&pdev->dev, template->cmd,
-			GPIOF_DIR_OUT | gpio_get_value(template->cmd),
+			gpio_get_value(template->cmd) ?
+			GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
 			template->name);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: failed to setup command GPIO\n",
@@ -202,7 +203,8 @@
 	}
 
 	ret = devm_gpio_request_one(&pdev->dev, template->slow,
-			GPIOF_DIR_OUT | gpio_get_value(template->slow),
+			gpio_get_value(template->slow) ?
+			GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
 			template->name);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: failed to setup slow GPIO\n",
@@ -306,10 +308,21 @@
 };
 #endif /* CONFIG_OF_GPIO */
 
+struct ns2_led_priv {
+	int num_leds;
+	struct ns2_led_data leds_data[];
+};
+
+static inline int sizeof_ns2_led_priv(int num_leds)
+{
+	return sizeof(struct ns2_led_priv) +
+		      (sizeof(struct ns2_led_data) * num_leds);
+}
+
 static int ns2_led_probe(struct platform_device *pdev)
 {
 	struct ns2_led_platform_data *pdata = pdev->dev.platform_data;
-	struct ns2_led_data *leds_data;
+	struct ns2_led_priv *priv;
 	int i;
 	int ret;
 
@@ -330,21 +343,23 @@
 		return -EINVAL;
 #endif /* CONFIG_OF_GPIO */
 
-	leds_data = devm_kzalloc(&pdev->dev, sizeof(struct ns2_led_data) *
-				 pdata->num_leds, GFP_KERNEL);
-	if (!leds_data)
+	priv = devm_kzalloc(&pdev->dev,
+			    sizeof_ns2_led_priv(pdata->num_leds), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
+	priv->num_leds = pdata->num_leds;
 
-	for (i = 0; i < pdata->num_leds; i++) {
-		ret = create_ns2_led(pdev, &leds_data[i], &pdata->leds[i]);
+	for (i = 0; i < priv->num_leds; i++) {
+		ret = create_ns2_led(pdev, &priv->leds_data[i],
+				     &pdata->leds[i]);
 		if (ret < 0) {
 			for (i = i - 1; i >= 0; i--)
-				delete_ns2_led(&leds_data[i]);
+				delete_ns2_led(&priv->leds_data[i]);
 			return ret;
 		}
 	}
 
-	platform_set_drvdata(pdev, leds_data);
+	platform_set_drvdata(pdev, priv);
 
 	return 0;
 }
@@ -352,13 +367,12 @@
 static int ns2_led_remove(struct platform_device *pdev)
 {
 	int i;
-	struct ns2_led_platform_data *pdata = pdev->dev.platform_data;
-	struct ns2_led_data *leds_data;
+	struct ns2_led_priv *priv;
 
-	leds_data = platform_get_drvdata(pdev);
+	priv = platform_get_drvdata(pdev);
 
-	for (i = 0; i < pdata->num_leds; i++)
-		delete_ns2_led(&leds_data[i]);
+	for (i = 0; i < priv->num_leds; i++)
+		delete_ns2_led(&priv->leds_data[i]);
 
 	platform_set_drvdata(pdev, NULL);
 

diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index a1ea5f6..faf52c0 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c

@@ -23,12 +23,16 @@
 #include <linux/pwm.h>
 #include <linux/leds_pwm.h>
 #include <linux/slab.h>
+#include <linux/workqueue.h>
 
 struct led_pwm_data {
 	struct led_classdev	cdev;
 	struct pwm_device	*pwm;
+	struct work_struct	work;
 	unsigned int		active_low;
 	unsigned int		period;
+	int			duty;
+	bool			can_sleep;
 };
 
 struct led_pwm_priv {
@@ -36,6 +40,26 @@
 	struct led_pwm_data leds[0];
 };
 
+static void __led_pwm_set(struct led_pwm_data *led_dat)
+{
+	int new_duty = led_dat->duty;
+
+	pwm_config(led_dat->pwm, new_duty, led_dat->period);
+
+	if (new_duty == 0)
+		pwm_disable(led_dat->pwm);
+	else
+		pwm_enable(led_dat->pwm);
+}
+
+static void led_pwm_work(struct work_struct *work)
+{
+	struct led_pwm_data *led_dat =
+		container_of(work, struct led_pwm_data, work);
+
+	__led_pwm_set(led_dat);
+}
+
 static void led_pwm_set(struct led_classdev *led_cdev,
 	enum led_brightness brightness)
 {
@@ -44,13 +68,12 @@
 	unsigned int max = led_dat->cdev.max_brightness;
 	unsigned int period =  led_dat->period;
 
-	if (brightness == 0) {
-		pwm_config(led_dat->pwm, 0, period);
-		pwm_disable(led_dat->pwm);
-	} else {
-		pwm_config(led_dat->pwm, brightness * period / max, period);
-		pwm_enable(led_dat->pwm);
-	}
+	led_dat->duty = brightness * period / max;
+
+	if (led_dat->can_sleep)
+		schedule_work(&led_dat->work);
+	else
+		__led_pwm_set(led_dat);
 }
 
 static inline size_t sizeof_pwm_leds_priv(int num_leds)
@@ -100,6 +123,10 @@
 		led_dat->cdev.brightness = LED_OFF;
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
+		led_dat->can_sleep = pwm_can_sleep(led_dat->pwm);
+		if (led_dat->can_sleep)
+			INIT_WORK(&led_dat->work, led_pwm_work);
+
 		ret = led_classdev_register(&pdev->dev, &led_dat->cdev);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "failed to register for %s\n",
@@ -153,6 +180,10 @@
 			led_dat->cdev.max_brightness = cur_led->max_brightness;
 			led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
+			led_dat->can_sleep = pwm_can_sleep(led_dat->pwm);
+			if (led_dat->can_sleep)
+				INIT_WORK(&led_dat->work, led_pwm_work);
+
 			ret = led_classdev_register(&pdev->dev, &led_dat->cdev);
 			if (ret < 0)
 				goto err;
@@ -180,8 +211,11 @@
 	struct led_pwm_priv *priv = platform_get_drvdata(pdev);
 	int i;
 
-	for (i = 0; i < priv->num_leds; i++)
+	for (i = 0; i < priv->num_leds; i++) {
 		led_classdev_unregister(&priv->leds[i].cdev);
+		if (priv->leds[i].can_sleep)
+			cancel_work_sync(&priv->leds[i].work);
+	}
 
 	return 0;
 }

diff --git a/drivers/leds/leds-renesas-tpu.c b/drivers/leds/leds-renesas-tpu.c
index d3c2b7e..9483f1c 100644
--- a/drivers/leds/leds-renesas-tpu.c
+++ b/drivers/leds/leds-renesas-tpu.c

@@ -205,7 +205,8 @@
 		gpio_free(cfg->pin_gpio_fn);
 
 	if (new_state == R_TPU_PIN_GPIO)
-		gpio_request_one(cfg->pin_gpio, GPIOF_DIR_OUT | !!brightness,
+		gpio_request_one(cfg->pin_gpio, !!brightness ?
+				GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
 				cfg->name);
 
 	if (new_state == R_TPU_PIN_GPIO_FN)

diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
index 070ba07..98fe021 100644
--- a/drivers/leds/leds-tca6507.c
+++ b/drivers/leds/leds-tca6507.c

@@ -85,6 +85,7 @@
 #include <linux/gpio.h>
 #include <linux/workqueue.h>
 #include <linux/leds-tca6507.h>
+#include <linux/of.h>
 
 /* LED select registers determine the source that drives LED outputs */
 #define TCA6507_LS_LED_OFF	0x0	/* Output HI-Z (off) */
@@ -724,7 +725,6 @@
 	return ERR_PTR(-ENODEV);
 }
 
-#define of_tca6507_leds_match NULL
 #endif
 
 static int tca6507_probe(struct i2c_client *client,
@@ -813,7 +813,7 @@
 	.driver   = {
 		.name    = "leds-tca6507",
 		.owner   = THIS_MODULE,
-		.of_match_table = of_tca6507_leds_match,
+		.of_match_table = of_match_ptr(of_tca6507_leds_match),
 	},
 	.probe    = tca6507_probe,
 	.remove   = tca6507_remove,

diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c
index ed15157..8a181d5 100644
--- a/drivers/leds/leds-wm8350.c
+++ b/drivers/leds/leds-wm8350.c

@@ -129,7 +129,10 @@
 	ret = regulator_disable(led->isink);
 	if (ret != 0) {
 		dev_err(led->cdev.dev, "Failed to disable ISINK: %d\n", ret);
-		regulator_enable(led->dcdc);
+		ret = regulator_enable(led->dcdc);
+		if (ret != 0)
+			dev_err(led->cdev.dev, "Failed to reenable DCDC: %d\n",
+				ret);
 		return;
 	}
 

diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
new file mode 100644
index 0000000..49794b4
--- /dev/null
+++ b/drivers/leds/trigger/Kconfig

@@ -0,0 +1,111 @@
+menuconfig LEDS_TRIGGERS
+	bool "LED Trigger support"
+	depends on LEDS_CLASS
+	help
+	  This option enables trigger support for the leds class.
+	  These triggers allow kernel events to drive the LEDs and can
+	  be configured via sysfs. If unsure, say Y.
+
+if LEDS_TRIGGERS
+
+config LEDS_TRIGGER_TIMER
+	tristate "LED Timer Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled by a programmable timer
+	  via sysfs. Some LED hardware can be programmed to start
+	  blinking the LED without any further software interaction.
+	  For more details read Documentation/leds/leds-class.txt.
+
+	  If unsure, say Y.
+
+config LEDS_TRIGGER_ONESHOT
+	tristate "LED One-shot Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to blink in one-shot pulses with parameters
+	  controlled via sysfs.  It's useful to notify the user on
+	  sporadic events, when there are no clear begin and end trap points,
+	  or on dense events, where this blinks the LED at constant rate if
+	  rearmed continuously.
+
+	  It also shows how to use the led_blink_set_oneshot() function.
+
+	  If unsure, say Y.
+
+config LEDS_TRIGGER_IDE_DISK
+	bool "LED IDE Disk Trigger"
+	depends on IDE_GD_ATA
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled by IDE disk activity.
+	  If unsure, say Y.
+
+config LEDS_TRIGGER_HEARTBEAT
+	tristate "LED Heartbeat Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled by a CPU load average.
+	  The flash frequency is a hyperbolic function of the 1-minute
+	  load average.
+	  If unsure, say Y.
+
+config LEDS_TRIGGER_BACKLIGHT
+	tristate "LED backlight Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled as a backlight device: they
+	  turn off and on when the display is blanked and unblanked.
+
+	  If unsure, say N.
+
+config LEDS_TRIGGER_CPU
+	bool "LED CPU Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled by active CPUs. This shows
+	  the active CPUs across an array of LEDs so you can see which
+	  CPUs are active on the system at any given moment.
+
+	  If unsure, say N.
+
+config LEDS_TRIGGER_GPIO
+	tristate "LED GPIO Trigger"
+	depends on LEDS_TRIGGERS
+	depends on GPIOLIB
+	help
+	  This allows LEDs to be controlled by gpio events. It's good
+	  when using gpios as switches and triggering the needed LEDs
+	  from there. One use case is n810's keypad LEDs that could
+	  be triggered by this trigger when user slides up to show
+	  keypad.
+
+	  If unsure, say N.
+
+config LEDS_TRIGGER_DEFAULT_ON
+	tristate "LED Default ON Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be initialised in the ON state.
+	  If unsure, say Y.
+
+comment "iptables trigger is under Netfilter config (LED target)"
+	depends on LEDS_TRIGGERS
+
+config LEDS_TRIGGER_TRANSIENT
+	tristate "LED Transient Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows one time activation of a transient state on
+	  GPIO/PWM based hardware.
+	  If unsure, say Y.
+
+config LEDS_TRIGGER_CAMERA
+	tristate "LED Camera Flash/Torch Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled as a camera flash/torch device.
+	  This enables direct flash/torch on/off by the driver, kernel space.
+	  If unsure, say Y.
+
+endif # LEDS_TRIGGERS

diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile
new file mode 100644
index 0000000..1abf48d
--- /dev/null
+++ b/drivers/leds/trigger/Makefile

@@ -0,0 +1,10 @@
+obj-$(CONFIG_LEDS_TRIGGER_TIMER)	+= ledtrig-timer.o
+obj-$(CONFIG_LEDS_TRIGGER_ONESHOT)	+= ledtrig-oneshot.o
+obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK)	+= ledtrig-ide-disk.o
+obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT)	+= ledtrig-heartbeat.o
+obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT)	+= ledtrig-backlight.o
+obj-$(CONFIG_LEDS_TRIGGER_GPIO)		+= ledtrig-gpio.o
+obj-$(CONFIG_LEDS_TRIGGER_CPU)		+= ledtrig-cpu.o
+obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON)	+= ledtrig-default-on.o
+obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT)	+= ledtrig-transient.o
+obj-$(CONFIG_LEDS_TRIGGER_CAMERA)	+= ledtrig-camera.o

diff --git a/drivers/leds/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c
similarity index 99%
rename from drivers/leds/ledtrig-backlight.c
rename to drivers/leds/trigger/ledtrig-backlight.c
index 027a2b1..3c9c88a 100644
--- a/drivers/leds/ledtrig-backlight.c
+++ b/drivers/leds/trigger/ledtrig-backlight.c

@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/leds.h>
-#include "leds.h"
+#include "../leds.h"
 
 #define BLANK		1
 #define UNBLANK		0

diff --git a/drivers/leds/trigger/ledtrig-camera.c b/drivers/leds/trigger/ledtrig-camera.c
new file mode 100644
index 0000000..9bd73a8
--- /dev/null
+++ b/drivers/leds/trigger/ledtrig-camera.c

@@ -0,0 +1,57 @@
+/*
+ * Camera Flash and Torch On/Off Trigger
+ *
+ * based on ledtrig-ide-disk.c
+ *
+ * Copyright 2013 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+
+DEFINE_LED_TRIGGER(ledtrig_flash);
+DEFINE_LED_TRIGGER(ledtrig_torch);
+
+void ledtrig_flash_ctrl(bool on)
+{
+	enum led_brightness brt = on ? LED_FULL : LED_OFF;
+
+	led_trigger_event(ledtrig_flash, brt);
+}
+EXPORT_SYMBOL_GPL(ledtrig_flash_ctrl);
+
+void ledtrig_torch_ctrl(bool on)
+{
+	enum led_brightness brt = on ? LED_FULL : LED_OFF;
+
+	led_trigger_event(ledtrig_torch, brt);
+}
+EXPORT_SYMBOL_GPL(ledtrig_torch_ctrl);
+
+static int __init ledtrig_camera_init(void)
+{
+	led_trigger_register_simple("flash", &ledtrig_flash);
+	led_trigger_register_simple("torch", &ledtrig_torch);
+	return 0;
+}
+module_init(ledtrig_camera_init);
+
+static void __exit ledtrig_camera_exit(void)
+{
+	led_trigger_unregister_simple(ledtrig_torch);
+	led_trigger_unregister_simple(ledtrig_flash);
+}
+module_exit(ledtrig_camera_exit);
+
+MODULE_DESCRIPTION("LED Trigger for Camera Flash/Torch Control");
+MODULE_AUTHOR("Milo Kim");
+MODULE_LICENSE("GPL");

diff --git a/drivers/leds/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c
similarity index 99%
rename from drivers/leds/ledtrig-cpu.c
rename to drivers/leds/trigger/ledtrig-cpu.c
index 4239b39..118335e 100644
--- a/drivers/leds/ledtrig-cpu.c
+++ b/drivers/leds/trigger/ledtrig-cpu.c

@@ -26,7 +26,7 @@
 #include <linux/percpu.h>
 #include <linux/syscore_ops.h>
 #include <linux/rwsem.h>
-#include "leds.h"
+#include "../leds.h"
 
 #define MAX_NAME_LEN	8
 

diff --git a/drivers/leds/ledtrig-default-on.c b/drivers/leds/trigger/ledtrig-default-on.c
similarity index 97%
rename from drivers/leds/ledtrig-default-on.c
rename to drivers/leds/trigger/ledtrig-default-on.c
index eac1f1b..81a91be 100644
--- a/drivers/leds/ledtrig-default-on.c
+++ b/drivers/leds/trigger/ledtrig-default-on.c

@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/leds.h>
-#include "leds.h"
+#include "../leds.h"
 
 static void defon_trig_activate(struct led_classdev *led_cdev)
 {

diff --git a/drivers/leds/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c
similarity index 99%
rename from drivers/leds/ledtrig-gpio.c
rename to drivers/leds/trigger/ledtrig-gpio.c
index 72e3ebf..35812e3 100644
--- a/drivers/leds/ledtrig-gpio.c
+++ b/drivers/leds/trigger/ledtrig-gpio.c

@@ -17,7 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/leds.h>
 #include <linux/slab.h>
-#include "leds.h"
+#include "../leds.h"
 
 struct gpio_trig_data {
 	struct led_classdev *led;

diff --git a/drivers/leds/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
similarity index 99%
rename from drivers/leds/ledtrig-heartbeat.c
rename to drivers/leds/trigger/ledtrig-heartbeat.c
index 1edc746..5c8464a 100644
--- a/drivers/leds/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c

@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
-#include "leds.h"
+#include "../leds.h"
 
 static int panic_heartbeats;
 

diff --git a/drivers/leds/ledtrig-ide-disk.c b/drivers/leds/trigger/ledtrig-ide-disk.c
similarity index 100%
rename from drivers/leds/ledtrig-ide-disk.c
rename to drivers/leds/trigger/ledtrig-ide-disk.c


diff --git a/drivers/leds/ledtrig-oneshot.c b/drivers/leds/trigger/ledtrig-oneshot.c
similarity index 99%
rename from drivers/leds/ledtrig-oneshot.c
rename to drivers/leds/trigger/ledtrig-oneshot.c
index 2c029aa..cb4c746 100644
--- a/drivers/leds/ledtrig-oneshot.c
+++ b/drivers/leds/trigger/ledtrig-oneshot.c

@@ -18,7 +18,7 @@
 #include <linux/ctype.h>
 #include <linux/slab.h>
 #include <linux/leds.h>
-#include "leds.h"
+#include "../leds.h"
 
 #define DEFAULT_DELAY 100
 

diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c
similarity index 99%
rename from drivers/leds/ledtrig-timer.c
rename to drivers/leds/trigger/ledtrig-timer.c
index f774d05..8d09327 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/trigger/ledtrig-timer.c

@@ -17,7 +17,6 @@
 #include <linux/device.h>
 #include <linux/ctype.h>
 #include <linux/leds.h>
-#include "leds.h"
 
 static ssize_t led_delay_on_show(struct device *dev,
 		struct device_attribute *attr, char *buf)

diff --git a/drivers/leds/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c
similarity index 99%
rename from drivers/leds/ledtrig-transient.c
rename to drivers/leds/trigger/ledtrig-transient.c
index 398f104..e5abc00 100644
--- a/drivers/leds/ledtrig-transient.c
+++ b/drivers/leds/trigger/ledtrig-transient.c

@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/leds.h>
-#include "leds.h"
+#include "../leds.h"
 
 struct transient_trig_data {
 	int activate;

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 893fc1b..31ca555 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c

@@ -1144,17 +1144,15 @@
 			return -ENOMEM;
 		ret = pm860x_dt_init(node, &client->dev, pdata);
 		if (ret)
-			goto err;
+			return ret;
 	} else if (!pdata) {
 		pr_info("No platform data in %s!\n", __func__);
 		return -EINVAL;
 	}
 
 	chip = kzalloc(sizeof(struct pm860x_chip), GFP_KERNEL);
-	if (chip == NULL) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (chip == NULL)
+		return -ENOMEM;
 
 	chip->id = verify_addr(client);
 	chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config);
@@ -1194,10 +1192,6 @@
 
 	pm860x_device_init(chip, pdata);
 	return 0;
-err:
-	if (node)
-		devm_kfree(&client->dev, pdata);
-	return ret;
 }
 
 static int pm860x_remove(struct i2c_client *client)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ca86581..d9aed15 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig

@@ -10,111 +10,164 @@
 	select IRQ_DOMAIN
 	default n
 
-config MFD_88PM860X
-	bool "Support Marvell 88PM8606/88PM8607"
-	depends on I2C=y && GENERIC_HARDIRQS
-	select REGMAP_I2C
+config MFD_CS5535
+	tristate "AMD CS5535 and CS5536 southbridge core functions"
 	select MFD_CORE
-	help
-	  This supports for Marvell 88PM8606/88PM8607 Power Management IC.
-	  This includes the I2C driver and the core APIs _only_, you have to
-	  select individual components like voltage regulators, RTC and
-	  battery-charger under the corresponding menus.
+	depends on PCI && X86
+	---help---
+	  This is the core driver for CS5535/CS5536 MFD functions.  This is
+          necessary for using the board's GPIO and MFGPT functionality.
 
-config MFD_88PM800
-	tristate "Support Marvell 88PM800"
-	depends on I2C=y && GENERIC_HARDIRQS
+config MFD_AS3711
+	bool "AMS AS3711"
+	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
-	select MFD_CORE
-	help
-	  This supports for Marvell 88PM800 Power Management IC.
-	  This includes the I2C driver and the core APIs _only_, you have to
-	  select individual components like voltage regulators, RTC and
-	  battery-charger under the corresponding menus.
-
-config MFD_88PM805
-	tristate "Support Marvell 88PM805"
 	depends on I2C=y && GENERIC_HARDIRQS
-	select REGMAP_I2C
-	select REGMAP_IRQ
+	help
+	  Support for the AS3711 PMIC from AMS
+
+config PMIC_ADP5520
+	bool "Analog Devices ADP5520/01 MFD PMIC Core Support"
+	depends on I2C=y
+	help
+	  Say yes here to add support for Analog Devices AD5520 and ADP5501,
+	  Multifunction Power Management IC. This includes
+	  the I2C driver and the core APIs _only_, you have to select
+	  individual components like LCD backlight, LEDs, GPIOs and Kepad
+	  under the corresponding menus.
+
+config MFD_AAT2870_CORE
+	bool "AnalogicTech AAT2870"
+	select MFD_CORE
+	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
+	help
+	  If you say yes here you get support for the AAT2870.
+	  This driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
+config MFD_CROS_EC
+	tristate "ChromeOS Embedded Controller"
 	select MFD_CORE
 	help
-	  This supports for Marvell 88PM805 Power Management IC. This includes
-	  the I2C driver and the core APIs _only_, you have to select individual
-	  components like codec device, headset/Mic device under the
-	  corresponding menus.
+	  If you say Y here you get support for the ChromeOS Embedded
+	  Controller (EC) providing keyboard, battery and power services.
+	  You also ned to enable the driver for the bus you are using. The
+	  protocol for talking to the EC is defined by the bus driver.
 
-config MFD_SM501
-	tristate "Support for Silicon Motion SM501"
-	 ---help---
-	  This is the core driver for the Silicon Motion SM501 multimedia
-	  companion chip. This device is a multifunction device which may
-	  provide numerous interfaces including USB host controller, USB gadget,
-	  asynchronous serial ports, audio functions, and a dual display video
-	  interface. The device may be connected by PCI or local bus with
-	  varying functions enabled.
+config MFD_CROS_EC_I2C
+	tristate "ChromeOS Embedded Controller (I2C)"
+	depends on MFD_CROS_EC && I2C
 
-config MFD_SM501_GPIO
-	bool "Export GPIO via GPIO layer"
-	depends on MFD_SM501 && GPIOLIB
-	 ---help---
-	 This option uses the gpio library layer to export the 64 GPIO
-	 lines on the SM501. The platform data is used to supply the
-	 base number for the first GPIO line to register.
-
-config MFD_RTSX_PCI
-	tristate "Support for Realtek PCI-E card reader"
-	depends on PCI && GENERIC_HARDIRQS
-	select MFD_CORE
 	help
-	  This supports for Realtek PCI-Express card reader including rts5209,
-	  rts5229, rtl8411, etc. Realtek card reader supports access to many
-	  types of memory cards, such as Memory Stick, Memory Stick Pro,
-	  Secure Digital and MultiMediaCard.
+	  If you say Y here, you get support for talking to the ChromeOS
+	  EC through an I2C bus. This uses a simple byte-level protocol with
+	  a checksum. Failing accesses will be retried three times to
+	  improve reliability.
+
+config MFD_CROS_EC_SPI
+	tristate "ChromeOS Embedded Controller (SPI)"
+	depends on MFD_CROS_EC && SPI
+
+	---help---
+	  If you say Y here, you get support for talking to the ChromeOS EC
+	  through a SPI bus, using a byte-level protocol. Since the EC's
+	  response time cannot be guaranteed, we support ignoring
+	  'pre-amble' bytes before the response actually starts.
 
 config MFD_ASIC3
-	bool "Support for Compaq ASIC3"
+	bool "Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
 	select MFD_CORE
 	 ---help---
 	  This driver supports the ASIC3 multifunction chip found on many
 	  PDAs (mainly iPAQ and HTC based ones)
 
-config MFD_DAVINCI_VOICECODEC
+config PMIC_DA903X
+	bool "Dialog Semiconductor DA9030/DA9034 PMIC Support"
+	depends on I2C=y
+	help
+	  Say yes here to support for Dialog Semiconductor DA9030 (a.k.a
+	  ARAVA) and DA9034 (a.k.a MICCO), these are Power Management IC
+	  usually found on PXA processors-based platforms. This includes
+	  the I2C driver and the core APIs _only_, you have to select
+	  individual components like LCD backlight, voltage regulators,
+	  LEDs and battery-charger under the corresponding menus.
+
+config PMIC_DA9052
+	bool
+	select MFD_CORE
+
+config MFD_DA9052_SPI
+	bool "Dialog Semiconductor DA9052/53 PMIC variants with SPI"
+	select REGMAP_SPI
+	select REGMAP_IRQ
+	select PMIC_DA9052
+	depends on SPI_MASTER=y && GENERIC_HARDIRQS
+	help
+	  Support for the Dialog Semiconductor DA9052 PMIC
+	  when controlled using SPI. This driver provides common support
+	  for accessing the device, additional drivers must be enabled in
+	  order to use the functionality of the device.
+
+config MFD_DA9052_I2C
+	bool "Dialog Semiconductor DA9052/53 PMIC variants with I2C"
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	select PMIC_DA9052
+	depends on I2C=y && GENERIC_HARDIRQS
+	help
+	  Support for the Dialog Semiconductor DA9052 PMIC
+	  when controlled using I2C. This driver provides common support
+	  for accessing the device, additional drivers must be enabled in
+	  order to use the functionality of the device.
+
+config MFD_DA9055
+	bool "Dialog Semiconductor DA9055 PMIC Support"
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	select MFD_CORE
+	depends on I2C=y && GENERIC_HARDIRQS
+	help
+	  Say yes here for support of Dialog Semiconductor DA9055. This is
+	  a Power Management IC. This driver provides common support for
+	  accessing the device as well as the I2C interface to the chip itself.
+	  Additional drivers must be enabled in order to use the functionality
+	  of the device.
+
+	  This driver can be built as a module. If built as a module it will be
+	  called "da9055"
+
+config MFD_MC13783
 	tristate
-	select MFD_CORE
 
-config MFD_DM355EVM_MSP
-	bool "DaVinci DM355 EVM microcontroller"
-	depends on I2C=y && MACH_DAVINCI_DM355_EVM
+config MFD_MC13XXX
+	tristate
+	depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS
+	select MFD_CORE
+	select MFD_MC13783
 	help
-	  This driver supports the MSP430 microcontroller used on these
-	  boards.  MSP430 firmware manages resets and power sequencing,
-	  inputs from buttons and the IR remote, LEDs, an RTC, and more.
+	  Enable support for the Freescale MC13783 and MC13892 PMICs.
+	  This driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
 
-config MFD_TI_SSP
-	tristate "TI Sequencer Serial Port support"
-	depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS
-	select MFD_CORE
-	---help---
-	  Say Y here if you want support for the Sequencer Serial Port
-	  in a Texas Instruments TNETV107X SoC.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ti-ssp.
-
-config MFD_TI_AM335X_TSCADC
-	tristate "TI ADC / Touch Screen chip support"
-	select MFD_CORE
-	select REGMAP
-	select REGMAP_MMIO
-	depends on GENERIC_HARDIRQS
+config MFD_MC13XXX_SPI
+	tristate "Freescale MC13783 and MC13892 SPI interface"
+	depends on SPI_MASTER && GENERIC_HARDIRQS
+	select REGMAP_SPI
+	select MFD_MC13XXX
 	help
-	  If you say yes here you get support for Texas Instruments series
-	  of Touch Screen /ADC chips.
-	  To compile this driver as a module, choose M here: the
-	  module will be called ti_am335x_tscadc.
+	  Select this if your MC13xxx is connected via an SPI bus.
+
+config MFD_MC13XXX_I2C
+	tristate "Freescale MC13892 I2C interface"
+	depends on I2C && GENERIC_HARDIRQS
+	select REGMAP_I2C
+	select MFD_MC13XXX
+	help
+	  Select this if your MC13xxx is connected via an I2C bus.
 
 config HTC_EGPIO
 	bool "HTC EGPIO support"
@@ -143,382 +196,86 @@
 	  This device provides input and output GPIOs through an I2C
 	  interface to one or more sub-chips.
 
-config UCB1400_CORE
-	tristate "Philips UCB1400 Core driver"
-	depends on AC97_BUS
-	depends on GPIOLIB
-	help
-	  This enables support for the Philips UCB1400 core functions.
-	  The UCB1400 is an AC97 audio codec.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ucb1400_core.
-
-config MFD_LM3533
-	tristate "LM3533 Lighting Power chip"
-	depends on I2C
+config LPC_ICH
+	tristate "Intel ICH LPC"
+	depends on PCI && GENERIC_HARDIRQS
 	select MFD_CORE
-	select REGMAP_I2C
-	depends on GENERIC_HARDIRQS
 	help
-	  Say yes here to enable support for National Semiconductor / TI
-	  LM3533 Lighting Power chips.
+	  The LPC bridge function of the Intel ICH provides support for
+	  many functional units. This driver provides needed support for
+	  other drivers to control these functions, currently GPIO and
+	  watchdog.
 
-	  This driver provides common support for accessing the device;
-	  additional drivers must be enabled in order to use the LED,
-	  backlight or ambient-light-sensor functionality of the device.
-
-config TPS6105X
-	tristate "TPS61050/61052 Boost Converters"
-	depends on I2C
-	select REGULATOR
+config LPC_SCH
+	tristate "Intel SCH LPC"
+	depends on PCI && GENERIC_HARDIRQS
 	select MFD_CORE
-	select REGULATOR_FIXED_VOLTAGE
-	depends on GENERIC_HARDIRQS
 	help
-	  This option enables a driver for the TP61050/TPS61052
-	  high-power "white LED driver". This boost converter is
-	  sometimes used for other things than white LEDs, and
-	  also contains a GPIO pin.
+	  LPC bridge function of the Intel SCH provides support for
+	  System Management Bus and General Purpose I/O.
 
-config TPS65010
-	tristate "TPS6501x Power Management chips"
-	depends on I2C && GPIOLIB
-	default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
-	help
-	  If you say yes here you get support for the TPS6501x series of
-	  Power Management chips.  These include voltage regulators,
-	  lithium ion/polymer battery charging, and other features that
-	  are often used in portable devices like cell phones and cameras.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called tps65010.
-
-config TPS6507X
-	tristate "TPS6507x Power Management / Touch Screen chips"
+config MFD_INTEL_MSIC
+	bool "Intel MSIC"
+	depends on INTEL_SCU_IPC
 	select MFD_CORE
-	depends on I2C && GENERIC_HARDIRQS
 	help
-	  If you say yes here you get support for the TPS6507x series of
-	  Power Management / Touch Screen chips.  These include voltage
-	  regulators, lithium ion/polymer battery charging, touch screen
-	  and other features that are often used in portable devices.
-	  This driver can also be built as a module.  If so, the module
-	  will be called tps6507x.
+	  Select this option to enable access to Intel MSIC (Avatele
+	  Passage) chip. This chip embeds audio, battery, GPIO, etc.
+	  devices used in Intel Medfield platforms.
 
-config MFD_TPS65217
-	tristate "TPS65217 Power Management / White LED chips"
-	depends on I2C && GENERIC_HARDIRQS
+config MFD_JANZ_CMODIO
+	tristate "Janz CMOD-IO PCI MODULbus Carrier Board"
 	select MFD_CORE
-	select REGMAP_I2C
+	depends on PCI && GENERIC_HARDIRQS
 	help
-	  If you say yes here you get support for the TPS65217 series of
-	  Power Management / White LED chips.
-	  These include voltage regulators, lithium ion/polymer battery
-	  charger, wled and other features that are often used in portable
-	  devices.
+	  This is the core driver for the Janz CMOD-IO PCI MODULbus
+	  carrier board. This device is a PCI to MODULbus bridge which may
+	  host many different types of MODULbus daughterboards, including
+	  CAN and GPIO controllers.
 
-	  This driver can also be built as a module.  If so, the module
-	  will be called tps65217.
+config MFD_JZ4740_ADC
+	bool "Janz JZ4740 ADC core"
+	select MFD_CORE
+	select GENERIC_IRQ_CHIP
+	depends on MACH_JZ4740
+	help
+	  Say yes here if you want support for the ADC unit in the JZ4740 SoC.
+	  This driver is necessary for jz4740-battery and jz4740-hwmon driver.
 
-config MFD_TPS6586X
-	bool "TPS6586x Power Management chips"
+config MFD_88PM800
+	tristate "Marvell 88PM800"
 	depends on I2C=y && GENERIC_HARDIRQS
-	select MFD_CORE
-	select REGMAP_I2C
-	help
-	  If you say yes here you get support for the TPS6586X series of
-	  Power Management chips.
-	  This driver provides common support for accessing the device,
-	  additional drivers must be enabled in order to use the
-	  functionality of the device.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called tps6586x.
-
-config MFD_TPS65910
-	bool "TPS65910 Power Management chip"
-	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
-	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
-	select IRQ_DOMAIN
-	help
-	  if you say yes here you get support for the TPS65910 series of
-	  Power Management chips.
-
-config MFD_TPS65912
-	bool
-	depends on GPIOLIB
-
-config MFD_TPS65912_I2C
-	bool "TPS65912 Power Management chip with I2C"
 	select MFD_CORE
-	select MFD_TPS65912
-	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
 	help
-	  If you say yes here you get support for the TPS65912 series of
-	  PM chips with I2C interface.
+	  This supports for Marvell 88PM800 Power Management IC.
+	  This includes the I2C driver and the core APIs _only_, you have to
+	  select individual components like voltage regulators, RTC and
+	  battery-charger under the corresponding menus.
 
-config MFD_TPS65912_SPI
-	bool "TPS65912 Power Management chip with SPI"
-	select MFD_CORE
-	select MFD_TPS65912
-	depends on SPI_MASTER && GPIOLIB && GENERIC_HARDIRQS
-	help
-	  If you say yes here you get support for the TPS65912 series of
-	  PM chips with SPI interface.
-
-config MFD_TPS80031
-	bool "TI TPS80031/TPS80032 Power Management chips"
+config MFD_88PM805
+	tristate "Marvell 88PM805"
 	depends on I2C=y && GENERIC_HARDIRQS
-	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
+	select MFD_CORE
 	help
-	  If you say yes here you get support for the Texas Instruments
-	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
-	  Path and Battery Charger. The device provides five configurable
-	  step-down converters, 11 general purpose LDOs, USB OTG Module,
-	  ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
-	  Power Path from USB, 32K clock generator.
+	  This supports for Marvell 88PM805 Power Management IC. This includes
+	  the I2C driver and the core APIs _only_, you have to select individual
+	  components like codec device, headset/Mic device under the
+	  corresponding menus.
 
-config MENELAUS
-	bool "Texas Instruments TWL92330/Menelaus PM chip"
-	depends on I2C=y && ARCH_OMAP2
-	help
-	  If you say yes here you get support for the Texas Instruments
-	  TWL92330/Menelaus Power Management chip. This include voltage
-	  regulators, Dual slot memory card transceivers, real-time clock
-	  and other features that are often used in portable devices like
-	  cell phones and PDAs.
-
-config TWL4030_CORE
-	bool "Texas Instruments TWL4030/TWL5030/TWL6030/TPS659x0 Support"
+config MFD_88PM860X
+	bool "Marvell 88PM8606/88PM8607"
 	depends on I2C=y && GENERIC_HARDIRQS
-	select IRQ_DOMAIN
 	select REGMAP_I2C
-	help
-	  Say yes here if you have TWL4030 / TWL6030 family chip on your board.
-	  This core driver provides register access and IRQ handling
-	  facilities, and registers devices for the various functions
-	  so that function-specific drivers can bind to them.
-
-	  These multi-function chips are found on many OMAP2 and OMAP3
-	  boards, providing power management, RTC, GPIO, keypad, a
-	  high speed USB OTG transceiver, an audio codec (on most
-	  versions) and many other features.
-
-config TWL4030_MADC
-	tristate "Texas Instruments TWL4030 MADC"
-	depends on TWL4030_CORE
-	help
-	This driver provides support for triton TWL4030-MADC. The
-	driver supports both RT and SW conversion methods.
-
-	This driver can be built as a module. If so it will be
-	named twl4030-madc
-
-config TWL4030_POWER
-	bool "Support power resources on TWL4030 family chips"
-	depends on TWL4030_CORE && ARM
-	help
-	  Say yes here if you want to use the power resources on the
-	  TWL4030 family chips.  Most of these resources are regulators,
-	  which have a separate driver; some are control signals, such
-	  as clock request handshaking.
-
-	  This driver uses board-specific data to initialize the resources
-	  and load scripts controlling which resources are switched off/on
-	  or reset when a sleep, wakeup or warm reset event occurs.
-
-config MFD_TWL4030_AUDIO
-	bool
-	depends on TWL4030_CORE && GENERIC_HARDIRQS
-	select MFD_CORE
-	default n
-
-config TWL6040_CORE
-	bool "Support for TWL6040 audio codec"
-	depends on I2C=y && GENERIC_HARDIRQS
-	select MFD_CORE
-	select REGMAP_I2C
-	select REGMAP_IRQ
-	default n
-	help
-	  Say yes here if you want support for Texas Instruments TWL6040 audio
-	  codec.
-	  This driver provides common support for accessing the device,
-	  additional drivers must be enabled in order to use the
-	  functionality of the device (audio, vibra).
-
-config MFD_STMPE
-	bool "Support STMicroelectronics STMPE"
-	depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS
 	select MFD_CORE
 	help
-	  Support for the STMPE family of I/O Expanders from
-	  STMicroelectronics.
-
-	  Currently supported devices are:
-
-		STMPE811: GPIO, Touchscreen
-		STMPE1601: GPIO, Keypad
-		STMPE2401: GPIO, Keypad
-		STMPE2403: GPIO, Keypad
-
-	  This driver provides common support for accessing the device,
-	  additional drivers must be enabled in order to use the functionality
-	  of the device.  Currently available sub drivers are:
-
-		GPIO: stmpe-gpio
-		Keypad: stmpe-keypad
-		Touchscreen: stmpe-ts
-
-menu "STMPE Interface Drivers"
-depends on MFD_STMPE
-
-config STMPE_I2C
-	bool "STMPE I2C Inteface"
-	depends on I2C=y
-	default y
-	help
-	  This is used to enable I2C interface of STMPE
-
-config STMPE_SPI
-	bool "STMPE SPI Inteface"
-	depends on SPI_MASTER
-	help
-	  This is used to enable SPI interface of STMPE
-endmenu
-
-config MFD_TC3589X
-	bool "Support Toshiba TC35892 and variants"
-	depends on I2C=y && GENERIC_HARDIRQS
-	select MFD_CORE
-	help
-	  Support for the Toshiba TC35892 and variants I/O Expander.
-
-	  This driver provides common support for accessing the device,
-	  additional drivers must be enabled in order to use the
-	  functionality of the device.
-
-config MFD_TMIO
-	bool
-	default n
-
-config MFD_T7L66XB
-	bool "Support Toshiba T7L66XB"
-	depends on ARM && HAVE_CLK && GENERIC_HARDIRQS
-	select MFD_CORE
-	select MFD_TMIO
-	help
-	  Support for Toshiba Mobile IO Controller T7L66XB
-
-config MFD_SMSC
-       bool "Support for the SMSC ECE1099 series chips"
-       depends on I2C=y && GENERIC_HARDIRQS
-       select MFD_CORE
-       select REGMAP_I2C
-       help
-        If you say yes here you get support for the
-        ece1099 chips from SMSC.
-
-        To compile this driver as a module, choose M here: the
-        module will be called smsc.
-
-config MFD_TC6387XB
-	bool "Support Toshiba TC6387XB"
-	depends on ARM && HAVE_CLK
-	select MFD_CORE
-	select MFD_TMIO
-	help
-	  Support for Toshiba Mobile IO Controller TC6387XB
-
-config MFD_TC6393XB
-	bool "Support Toshiba TC6393XB"
-	depends on ARM && HAVE_CLK
-	select GPIOLIB
-	select MFD_CORE
-	select MFD_TMIO
-	help
-	  Support for Toshiba Mobile IO Controller TC6393XB
-
-config PMIC_DA903X
-	bool "Dialog Semiconductor DA9030/DA9034 PMIC Support"
-	depends on I2C=y
-	help
-	  Say yes here to support for Dialog Semiconductor DA9030 (a.k.a
-	  ARAVA) and DA9034 (a.k.a MICCO), these are Power Management IC
-	  usually found on PXA processors-based platforms. This includes
-	  the I2C driver and the core APIs _only_, you have to select
-	  individual components like LCD backlight, voltage regulators,
-	  LEDs and battery-charger under the corresponding menus.
-
-config PMIC_DA9052
-	bool
-	select MFD_CORE
-
-config MFD_DA9052_SPI
-	bool "Support Dialog Semiconductor DA9052/53 PMIC variants with SPI"
-	select REGMAP_SPI
-	select REGMAP_IRQ
-	select PMIC_DA9052
-	depends on SPI_MASTER=y && GENERIC_HARDIRQS
-	help
-	  Support for the Dialog Semiconductor DA9052 PMIC
-	  when controlled using SPI. This driver provides common support
-	  for accessing the device, additional drivers must be enabled in
-	  order to use the functionality of the device.
-
-config MFD_DA9052_I2C
-	bool "Support Dialog Semiconductor DA9052/53 PMIC variants with I2C"
-	select REGMAP_I2C
-	select REGMAP_IRQ
-	select PMIC_DA9052
-	depends on I2C=y && GENERIC_HARDIRQS
-	help
-	  Support for the Dialog Semiconductor DA9052 PMIC
-	  when controlled using I2C. This driver provides common support
-	  for accessing the device, additional drivers must be enabled in
-	  order to use the functionality of the device.
-
-config MFD_DA9055
-	bool "Dialog Semiconductor DA9055 PMIC Support"
-	select REGMAP_I2C
-	select REGMAP_IRQ
-	select PMIC_DA9055
-	select MFD_CORE
-	depends on I2C=y && GENERIC_HARDIRQS
-	help
-	  Say yes here for support of Dialog Semiconductor DA9055. This is
-	  a Power Management IC. This driver provides common support for
-	  accessing the device as well as the I2C interface to the chip itself.
-	  Additional drivers must be enabled in order to use the functionality
-	  of the device.
-
-	  This driver can be built as a module. If built as a module it will be
-	  called "da9055"
-
-config PMIC_ADP5520
-	bool "Analog Devices ADP5520/01 MFD PMIC Core Support"
-	depends on I2C=y
-	help
-	  Say yes here to add support for Analog Devices AD5520 and ADP5501,
-	  Multifunction Power Management IC. This includes
-	  the I2C driver and the core APIs _only_, you have to select
-	  individual components like LCD backlight, LEDs, GPIOs and Kepad
-	  under the corresponding menus.
-
-config MFD_LP8788
-	bool "Texas Instruments LP8788 Power Management Unit Driver"
-	depends on I2C=y && GENERIC_HARDIRQS
-	select MFD_CORE
-	select REGMAP_I2C
-	select IRQ_DOMAIN
-	help
-	  TI LP8788 PMU supports regulators, battery charger, RTC,
-	  ADC, backlight driver and current sinks.
+	  This supports for Marvell 88PM8606/88PM8607 Power Management IC.
+	  This includes the I2C driver and the core APIs _only_, you have to
+	  select individual components like voltage regulators, RTC and
+	  battery-charger under the corresponding menus.
 
 config MFD_MAX77686
 	bool "Maxim Semiconductor MAX77686 PMIC Support"
@@ -592,6 +349,132 @@
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
 
+config EZX_PCAP
+	bool "Motorola EZXPCAP Support"
+	depends on GENERIC_HARDIRQS && SPI_MASTER
+	help
+	  This enables the PCAP ASIC present on EZX Phones. This is
+	  needed for MMC, TouchScreen, Sound, USB, etc..
+
+config MFD_VIPERBOARD
+        tristate "Nano River Technologies Viperboard"
+	select MFD_CORE
+	depends on USB && GENERIC_HARDIRQS
+	default n
+	help
+	  Say yes here if you want support for Nano River Technologies
+	  Viperboard.
+	  There are mfd cell drivers available for i2c master, adc and
+	  both gpios found on the board. The spi part does not yet
+	  have a driver.
+	  You need to select the mfd cell drivers separately.
+	  The drivers do not support all features the board exposes.
+
+config MFD_RETU
+	tristate "Nokia Retu and Tahvo multi-function device"
+	select MFD_CORE
+	depends on I2C && GENERIC_HARDIRQS
+	select REGMAP_IRQ
+	help
+	  Retu and Tahvo are a multi-function devices found on Nokia
+	  Internet Tablets (770, N800 and N810).
+
+config MFD_PCF50633
+	tristate "NXP PCF50633"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Say yes here if you have NXP PCF50633 chip on your board.
+	  This core driver provides register access and IRQ handling
+	  facilities, and registers devices for the various functions
+	  so that function-specific drivers can bind to them.
+
+config PCF50633_ADC
+	tristate "NXP PCF50633 ADC"
+	depends on MFD_PCF50633
+	help
+	 Say yes here if you want to include support for ADC in the
+	 NXP PCF50633 chip.
+
+config PCF50633_GPIO
+	tristate "NXP PCF50633 GPIO"
+	depends on MFD_PCF50633
+	help
+	 Say yes here if you want to include support GPIO for pins on
+	 the PCF50633 chip.
+
+config UCB1400_CORE
+	tristate "Philips UCB1400 Core driver"
+	depends on AC97_BUS
+	depends on GPIOLIB
+	help
+	  This enables support for the Philips UCB1400 core functions.
+	  The UCB1400 is an AC97 audio codec.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ucb1400_core.
+
+config MFD_PM8XXX
+	tristate
+
+config MFD_PM8921_CORE
+	tristate "Qualcomm PM8921 PMIC chip"
+	depends on SSBI && BROKEN
+	select MFD_CORE
+	select MFD_PM8XXX
+	help
+	  If you say yes to this option, support will be included for the
+	  built-in PM8921 PMIC chip.
+
+	  This is required if your board has a PM8921 and uses its features,
+	  such as: MPPs, GPIOs, regulators, interrupts, and PWM.
+
+	  Say M here if you want to include support for PM8921 chip as a module.
+	  This will build a module called "pm8921-core".
+
+config MFD_PM8XXX_IRQ
+	bool "Qualcomm PM8xxx IRQ features"
+	depends on MFD_PM8XXX
+	default y if MFD_PM8XXX
+	help
+	  This is the IRQ driver for Qualcomm PM 8xxx PMIC chips.
+
+	  This is required to use certain other PM 8xxx features, such as GPIO
+	  and MPP.
+
+config MFD_RDC321X
+	tristate "RDC R-321x southbridge"
+	select MFD_CORE
+	depends on PCI && GENERIC_HARDIRQS
+	help
+	  Say yes here if you want to have support for the RDC R-321x SoC
+	  southbridge which provides access to GPIOs and Watchdog using the
+	  southbridge PCI device configuration space.
+
+config MFD_RTSX_PCI
+	tristate "Realtek PCI-E card reader"
+	depends on PCI && GENERIC_HARDIRQS
+	select MFD_CORE
+	help
+	  This supports for Realtek PCI-Express card reader including rts5209,
+	  rts5229, rtl8411, etc. Realtek card reader supports access to many
+	  types of memory cards, such as Memory Stick, Memory Stick Pro,
+	  Secure Digital and MultiMediaCard.
+
+config MFD_RC5T583
+	bool "Ricoh RC5T583 Power Management system device"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  Select this option to get support for the RICOH583 Power
+	  Management system device.
+	  This driver provides common support for accessing the device
+	  through i2c interface. The device supports multiple sub-devices
+	  like GPIO, interrupts, RTC, LDO and DCDC regulators, onkey.
+	  Additional drivers must be enabled in order to use the
+	  different functionality of the device.
+
 config MFD_SEC_CORE
 	bool "SAMSUNG Electronics PMIC Series Support"
 	depends on I2C=y && GENERIC_HARDIRQS
@@ -604,215 +487,48 @@
 	 additional drivers must be enabled in order to use the functionality
 	 of the device
 
-config MFD_ARIZONA
-	select REGMAP
-	select REGMAP_IRQ
-	select MFD_CORE
-	bool
-
-config MFD_ARIZONA_I2C
-	tristate "Support Wolfson Microelectronics Arizona platform with I2C"
-	select MFD_ARIZONA
-	select MFD_CORE
-	select REGMAP_I2C
-	depends on I2C && GENERIC_HARDIRQS
-	help
-	  Support for the Wolfson Microelectronics Arizona platform audio SoC
-	  core functionality controlled via I2C.
-
-config MFD_ARIZONA_SPI
-	tristate "Support Wolfson Microelectronics Arizona platform with SPI"
-	select MFD_ARIZONA
-	select MFD_CORE
-	select REGMAP_SPI
-	depends on SPI_MASTER && GENERIC_HARDIRQS
-	help
-	  Support for the Wolfson Microelectronics Arizona platform audio SoC
-	  core functionality controlled via I2C.
-
-config MFD_WM5102
-	bool "Support Wolfson Microelectronics WM5102"
-	depends on MFD_ARIZONA
-	help
-	  Support for Wolfson Microelectronics WM5102 low power audio SoC
-
-config MFD_WM5110
-	bool "Support Wolfson Microelectronics WM5110"
-	depends on MFD_ARIZONA
-	help
-	  Support for Wolfson Microelectronics WM5110 low power audio SoC
-
-config MFD_WM8400
-	bool "Support Wolfson Microelectronics WM8400"
-	select MFD_CORE
-	depends on I2C=y && GENERIC_HARDIRQS
-	select REGMAP_I2C
-	help
-	  Support for the Wolfson Microelecronics WM8400 PMIC and audio
-	  CODEC.  This driver provides common support for accessing
-	  the device, additional drivers must be enabled in order to use
-	  the functionality of the device.
-
-config MFD_WM831X
-	bool
-	depends on GENERIC_HARDIRQS
-
-config MFD_WM831X_I2C
-	bool "Support Wolfson Microelectronics WM831x/2x PMICs with I2C"
-	select MFD_CORE
-	select MFD_WM831X
-	select REGMAP_I2C
-	select IRQ_DOMAIN
-	depends on I2C=y && GENERIC_HARDIRQS
-	help
-	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
-	  when controlled using I2C.  This driver provides common support
-	  for accessing the device, additional drivers must be enabled in
-	  order to use the functionality of the device.
-
-config MFD_WM831X_SPI
-	bool "Support Wolfson Microelectronics WM831x/2x PMICs with SPI"
-	select MFD_CORE
-	select MFD_WM831X
-	select REGMAP_SPI
-	select IRQ_DOMAIN
-	depends on SPI_MASTER && GENERIC_HARDIRQS
-	help
-	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
-	  when controlled using SPI.  This driver provides common support
-	  for accessing the device, additional drivers must be enabled in
-	  order to use the functionality of the device.
-
-config MFD_WM8350
-	bool
-	depends on GENERIC_HARDIRQS
-
-config MFD_WM8350_CONFIG_MODE_0
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8350_CONFIG_MODE_1
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8350_CONFIG_MODE_2
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8350_CONFIG_MODE_3
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8351_CONFIG_MODE_0
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8351_CONFIG_MODE_1
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8351_CONFIG_MODE_2
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8351_CONFIG_MODE_3
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8352_CONFIG_MODE_0
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8352_CONFIG_MODE_1
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8352_CONFIG_MODE_2
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8352_CONFIG_MODE_3
-	bool
-	depends on MFD_WM8350
-
-config MFD_WM8350_I2C
-	bool "Support Wolfson Microelectronics WM8350 with I2C"
-	select MFD_WM8350
-	depends on I2C=y && GENERIC_HARDIRQS
-	help
-	  The WM8350 is an integrated audio and power management
-	  subsystem with watchdog and RTC functionality for embedded
-	  systems.  This option enables core support for the WM8350 with
-	  I2C as the control interface.  Additional options must be
-	  selected to enable support for the functionality of the chip.
-
-config MFD_WM8994
-	bool "Support Wolfson Microelectronics WM8994"
-	select MFD_CORE
-	select REGMAP_I2C
-	select REGMAP_IRQ
-	depends on I2C=y && GENERIC_HARDIRQS
-	help
-	  The WM8994 is a highly integrated hi-fi CODEC designed for
-	  smartphone applicatiosn.  As well as audio functionality it
-	  has on board GPIO and regulator functionality which is
-	  supported via the relevant subsystems.  This driver provides
-	  core support for the WM8994, in order to use the actual
-	  functionaltiy of the device other drivers must be enabled.
-
-config MFD_PCF50633
-	tristate "Support for NXP PCF50633"
+config MFD_SI476X_CORE
+	tristate "Silicon Laboratories 4761/64/68 AM/FM radio."
 	depends on I2C
-	select REGMAP_I2C
-	help
-	  Say yes here if you have NXP PCF50633 chip on your board.
-	  This core driver provides register access and IRQ handling
-	  facilities, and registers devices for the various functions
-	  so that function-specific drivers can bind to them.
-
-config PCF50633_ADC
-	tristate "Support for NXP PCF50633 ADC"
-	depends on MFD_PCF50633
-	help
-	 Say yes here if you want to include support for ADC in the
-	 NXP PCF50633 chip.
-
-config PCF50633_GPIO
-	tristate "Support for NXP PCF50633 GPIO"
-	depends on MFD_PCF50633
-	help
-	 Say yes here if you want to include support GPIO for pins on
-	 the PCF50633 chip.
-
-config MFD_MC13783
-	tristate
-
-config MFD_MC13XXX
-	tristate
-	depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS
 	select MFD_CORE
-	select MFD_MC13783
-	help
-	  Enable support for the Freescale MC13783 and MC13892 PMICs.
-	  This driver provides common support for accessing the device,
-	  additional drivers must be enabled in order to use the
-	  functionality of the device.
-
-config MFD_MC13XXX_SPI
-	tristate "Freescale MC13783 and MC13892 SPI interface"
-	depends on SPI_MASTER && GENERIC_HARDIRQS
-	select REGMAP_SPI
-	select MFD_MC13XXX
-	help
-	  Select this if your MC13xxx is connected via an SPI bus.
-
-config MFD_MC13XXX_I2C
-	tristate "Freescale MC13892 I2C interface"
-	depends on I2C && GENERIC_HARDIRQS
 	select REGMAP_I2C
-	select MFD_MC13XXX
 	help
-	  Select this if your MC13xxx is connected via an I2C bus.
+	  This is the core driver for the SI476x series of AM/FM
+	  radio. This MFD driver connects the radio-si476x V4L2 module
+	  and the si476x audio codec.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called si476x-core.
+
+config MFD_SM501
+	tristate "Silicon Motion SM501"
+	 ---help---
+	  This is the core driver for the Silicon Motion SM501 multimedia
+	  companion chip. This device is a multifunction device which may
+	  provide numerous interfaces including USB host controller, USB gadget,
+	  asynchronous serial ports, audio functions, and a dual display video
+	  interface. The device may be connected by PCI or local bus with
+	  varying functions enabled.
+
+config MFD_SM501_GPIO
+	bool "Export GPIO via GPIO layer"
+	depends on MFD_SM501 && GPIOLIB
+	 ---help---
+	 This option uses the gpio library layer to export the 64 GPIO
+	 lines on the SM501. The platform data is used to supply the
+	 base number for the first GPIO line to register.
+
+config MFD_SMSC
+       bool "SMSC ECE1099 series chips"
+       depends on I2C=y && GENERIC_HARDIRQS
+       select MFD_CORE
+       select REGMAP_I2C
+       help
+        If you say yes here you get support for the
+        ece1099 chips from SMSC.
+
+        To compile this driver as a module, choose M here: the
+        module will be called smsc.
 
 config ABX500_CORE
 	bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions"
@@ -848,13 +564,6 @@
 	  programmable memory) support. This exposes a sysfs file to read
 	  out OTP values.
 
-config EZX_PCAP
-	bool "PCAP Support"
-	depends on GENERIC_HARDIRQS && SPI_MASTER
-	help
-	  This enables the PCAP ASIC present on EZX Phones. This is
-	  needed for MMC, TouchScreen, Sound, USB, etc..
-
 config AB8500_CORE
 	bool "ST-Ericsson AB8500 Mixed Signal Power Management chip"
 	depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU
@@ -877,7 +586,7 @@
          filesystem, debugfs.
 
 config AB8500_GPADC
-	bool "AB8500 GPADC driver"
+	bool "ST-Ericsson AB8500 GPADC driver"
 	depends on AB8500_CORE && REGULATOR_AB8500
 	default y
 	help
@@ -893,92 +602,96 @@
 	  system controller running an XP70 microprocessor, which is accessed
 	  through a register map.
 
-config MFD_CS5535
-	tristate "Support for CS5535 and CS5536 southbridge core functions"
-	select MFD_CORE
-	depends on PCI && X86
-	---help---
-	  This is the core driver for CS5535/CS5536 MFD functions.  This is
-          necessary for using the board's GPIO and MFGPT functionality.
-
-config MFD_TIMBERDALE
-	tristate "Support for the Timberdale FPGA"
-	select MFD_CORE
-	depends on PCI && GPIOLIB
-	---help---
-	This is the core driver for the timberdale FPGA. This device is a
-	multifunction device which exposes numerous platform devices.
-
-	The timberdale FPGA can be found on the Intel Atom development board
-	for in-vehicle infontainment, called Russellville.
-
-config LPC_SCH
-	tristate "Intel SCH LPC"
-	depends on PCI && GENERIC_HARDIRQS
+config MFD_STMPE
+	bool "STMicroelectronics STMPE"
+	depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS
 	select MFD_CORE
 	help
-	  LPC bridge function of the Intel SCH provides support for
-	  System Management Bus and General Purpose I/O.
+	  Support for the STMPE family of I/O Expanders from
+	  STMicroelectronics.
 
-config LPC_ICH
-	tristate "Intel ICH LPC"
-	depends on PCI && GENERIC_HARDIRQS
-	select MFD_CORE
-	help
-	  The LPC bridge function of the Intel ICH provides support for
-	  many functional units. This driver provides needed support for
-	  other drivers to control these functions, currently GPIO and
-	  watchdog.
+	  Currently supported devices are:
 
-config MFD_RDC321X
-	tristate "Support for RDC-R321x southbridge"
-	select MFD_CORE
-	depends on PCI && GENERIC_HARDIRQS
-	help
-	  Say yes here if you want to have support for the RDC R-321x SoC
-	  southbridge which provides access to GPIOs and Watchdog using the
-	  southbridge PCI device configuration space.
+		STMPE811: GPIO, Touchscreen
+		STMPE1601: GPIO, Keypad
+		STMPE1801: GPIO, Keypad
+		STMPE2401: GPIO, Keypad
+		STMPE2403: GPIO, Keypad
 
-config MFD_JANZ_CMODIO
-	tristate "Support for Janz CMOD-IO PCI MODULbus Carrier Board"
-	select MFD_CORE
-	depends on PCI && GENERIC_HARDIRQS
-	help
-	  This is the core driver for the Janz CMOD-IO PCI MODULbus
-	  carrier board. This device is a PCI to MODULbus bridge which may
-	  host many different types of MODULbus daughterboards, including
-	  CAN and GPIO controllers.
+	  This driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the functionality
+	  of the device.  Currently available sub drivers are:
 
-config MFD_JZ4740_ADC
-	bool "Support for the JZ4740 SoC ADC core"
-	select MFD_CORE
-	select GENERIC_IRQ_CHIP
-	depends on MACH_JZ4740
-	help
-	  Say yes here if you want support for the ADC unit in the JZ4740 SoC.
-	  This driver is necessary for jz4740-battery and jz4740-hwmon driver.
+		GPIO: stmpe-gpio
+		Keypad: stmpe-keypad
+		Touchscreen: stmpe-ts
 
-config MFD_VX855
-	tristate "Support for VIA VX855/VX875 integrated south bridge"
-	depends on PCI && GENERIC_HARDIRQS
-	select MFD_CORE
-	help
-	  Say yes here to enable support for various functions of the
-	  VIA VX855/VX875 south bridge. You will need to enable the vx855_spi
-	  and/or vx855_gpio drivers for this to do anything useful.
+menu "STMicroelectronics STMPE Interface Drivers"
+depends on MFD_STMPE
 
-config MFD_WL1273_CORE
-	tristate "Support for TI WL1273 FM radio."
-	depends on I2C && GENERIC_HARDIRQS
-	select MFD_CORE
-	default n
+config STMPE_I2C
+	bool "STMicroelectronics STMPE I2C Inteface"
+	depends on I2C=y
+	default y
 	help
-	  This is the core driver for the TI WL1273 FM radio. This MFD
-	  driver connects the radio-wl1273 V4L2 module and the wl1273
-	  audio codec.
+	  This is used to enable I2C interface of STMPE
+
+config STMPE_SPI
+	bool "STMicroelectronics STMPE SPI Inteface"
+	depends on SPI_MASTER
+	help
+	  This is used to enable SPI interface of STMPE
+endmenu
+
+config MFD_STA2X11
+	bool "STMicroelectronics STA2X11"
+	depends on STA2X11 && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_MMIO
+
+config MFD_SYSCON
+	bool "System Controller Register R/W Based on Regmap"
+	select REGMAP_MMIO
+	help
+	  Select this option to enable accessing system control registers
+	  via regmap.
+
+config MFD_DAVINCI_VOICECODEC
+	tristate
+	select MFD_CORE
+
+config MFD_TI_AM335X_TSCADC
+	tristate "TI ADC / Touch Screen chip support"
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_MMIO
+	depends on GENERIC_HARDIRQS
+	help
+	  If you say yes here you get support for Texas Instruments series
+	  of Touch Screen /ADC chips.
+	  To compile this driver as a module, choose M here: the
+	  module will be called ti_am335x_tscadc.
+
+config MFD_DM355EVM_MSP
+	bool "TI DaVinci DM355 EVM microcontroller"
+	depends on I2C=y && MACH_DAVINCI_DM355_EVM
+	help
+	  This driver supports the MSP430 microcontroller used on these
+	  boards.  MSP430 firmware manages resets and power sequencing,
+	  inputs from buttons and the IR remote, LEDs, an RTC, and more.
+
+config MFD_LP8788
+	bool "TI LP8788 Power Management Unit Driver"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	select IRQ_DOMAIN
+	help
+	  TI LP8788 PMU supports regulators, battery charger, RTC,
+	  ADC, backlight driver and current sinks.
 
 config MFD_OMAP_USB_HOST
-	bool "Support OMAP USBHS core and TLL driver"
+	bool "TI OMAP USBHS core and TLL driver"
 	depends on USB_EHCI_HCD_OMAP || USB_OHCI_HCD_OMAP3
 	default y
 	help
@@ -986,39 +699,70 @@
 	  This MFD driver does the required setup functionalities for
 	  OMAP USB Host drivers.
 
-config MFD_PM8XXX
-	tristate
-
-config MFD_PM8921_CORE
-	tristate "Qualcomm PM8921 PMIC chip"
-	depends on SSBI && BROKEN
+config MFD_PALMAS
+	bool "TI Palmas series chips"
 	select MFD_CORE
-	select MFD_PM8XXX
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C=y && GENERIC_HARDIRQS
 	help
-	  If you say yes to this option, support will be included for the
-	  built-in PM8921 PMIC chip.
+	  If you say yes here you get support for the Palmas
+	  series of PMIC chips from Texas Instruments.
 
-	  This is required if your board has a PM8921 and uses its features,
-	  such as: MPPs, GPIOs, regulators, interrupts, and PWM.
+config MFD_TI_SSP
+	tristate "TI Sequencer Serial Port support"
+	depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS
+	select MFD_CORE
+	---help---
+	  Say Y here if you want support for the Sequencer Serial Port
+	  in a Texas Instruments TNETV107X SoC.
 
-	  Say M here if you want to include support for PM8921 chip as a module.
-	  This will build a module called "pm8921-core".
+	  To compile this driver as a module, choose M here: the
+	  module will be called ti-ssp.
 
-config MFD_PM8XXX_IRQ
-	bool "Support for Qualcomm PM8xxx IRQ features"
-	depends on MFD_PM8XXX
-	default y if MFD_PM8XXX
+config TPS6105X
+	tristate "TI TPS61050/61052 Boost Converters"
+	depends on I2C
+	select REGULATOR
+	select MFD_CORE
+	select REGULATOR_FIXED_VOLTAGE
+	depends on GENERIC_HARDIRQS
 	help
-	  This is the IRQ driver for Qualcomm PM 8xxx PMIC chips.
+	  This option enables a driver for the TP61050/TPS61052
+	  high-power "white LED driver". This boost converter is
+	  sometimes used for other things than white LEDs, and
+	  also contains a GPIO pin.
 
-	  This is required to use certain other PM 8xxx features, such as GPIO
-	  and MPP.
+config TPS65010
+	tristate "TI TPS6501x Power Management chips"
+	depends on I2C && GPIOLIB
+	default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
+	help
+	  If you say yes here you get support for the TPS6501x series of
+	  Power Management chips.  These include voltage regulators,
+	  lithium ion/polymer battery charging, and other features that
+	  are often used in portable devices like cell phones and cameras.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps65010.
+
+config TPS6507X
+	tristate "TI TPS6507x Power Management / Touch Screen chips"
+	select MFD_CORE
+	depends on I2C && GENERIC_HARDIRQS
+	help
+	  If you say yes here you get support for the TPS6507x series of
+	  Power Management / Touch Screen chips.  These include voltage
+	  regulators, lithium ion/polymer battery charging, touch screen
+	  and other features that are often used in portable devices.
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps6507x.
 
 config TPS65911_COMPARATOR
 	tristate
 
 config MFD_TPS65090
-	bool "TPS65090 Power Management chips"
+	bool "TI TPS65090 Power Management chips"
 	depends on I2C=y && GENERIC_HARDIRQS
 	select MFD_CORE
 	select REGMAP_I2C
@@ -1030,94 +774,346 @@
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
 
-config MFD_AAT2870_CORE
-	bool "Support for the AnalogicTech AAT2870"
+config MFD_TPS65217
+	tristate "TI TPS65217 Power Management / White LED chips"
+	depends on I2C && GENERIC_HARDIRQS
 	select MFD_CORE
-	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
+	select REGMAP_I2C
 	help
-	  If you say yes here you get support for the AAT2870.
+	  If you say yes here you get support for the TPS65217 series of
+	  Power Management / White LED chips.
+	  These include voltage regulators, lithium ion/polymer battery
+	  charger, wled and other features that are often used in portable
+	  devices.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps65217.
+
+config MFD_TPS6586X
+	bool "TI TPS6586x Power Management chips"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  If you say yes here you get support for the TPS6586X series of
+	  Power Management chips.
 	  This driver provides common support for accessing the device,
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
 
-config MFD_INTEL_MSIC
-	bool "Support for Intel MSIC"
-	depends on INTEL_SCU_IPC
-	select MFD_CORE
-	help
-	  Select this option to enable access to Intel MSIC (Avatele
-	  Passage) chip. This chip embeds audio, battery, GPIO, etc.
-	  devices used in Intel Medfield platforms.
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps6586x.
 
-config MFD_RC5T583
-	bool "Ricoh RC5T583 Power Management system device"
-	depends on I2C=y && GENERIC_HARDIRQS
-	select MFD_CORE
-	select REGMAP_I2C
-	help
-	  Select this option to get support for the RICOH583 Power
-	  Management system device.
-	  This driver provides common support for accessing the device
-	  through i2c interface. The device supports multiple sub-devices
-	  like GPIO, interrupts, RTC, LDO and DCDC regulators, onkey.
-	  Additional drivers must be enabled in order to use the
-	  different functionality of the device.
-
-config MFD_STA2X11
-	bool "STA2X11 multi function device support"
-	depends on STA2X11 && GENERIC_HARDIRQS
-	select MFD_CORE
-	select REGMAP_MMIO
-
-config MFD_SYSCON
-	bool "System Controller Register R/W Based on Regmap"
-	depends on OF
-	select REGMAP_MMIO
-	help
-	  Select this option to enable accessing system control registers
-	  via regmap.
-
-config MFD_PALMAS
-	bool "Support for the TI Palmas series chips"
+config MFD_TPS65910
+	bool "TI TPS65910 Power Management chip"
+	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
 	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
-	depends on I2C=y && GENERIC_HARDIRQS
+	select IRQ_DOMAIN
 	help
-	  If you say yes here you get support for the Palmas
-	  series of PMIC chips from Texas Instruments.
+	  if you say yes here you get support for the TPS65910 series of
+	  Power Management chips.
 
-config MFD_VIPERBOARD
-        tristate "Support for Nano River Technologies Viperboard"
+config MFD_TPS65912
+	bool "TI TPS65912 Power Management chip"
+	depends on GPIOLIB
+	help
+	  If you say yes here you get support for the TPS65912 series of
+	  PM chips.
+
+config MFD_TPS65912_I2C
+	bool "TI TPS65912 Power Management chip with I2C"
 	select MFD_CORE
-	depends on USB && GENERIC_HARDIRQS
+	select MFD_TPS65912
+	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
+	help
+	  If you say yes here you get support for the TPS65912 series of
+	  PM chips with I2C interface.
+
+config MFD_TPS65912_SPI
+	bool "TI TPS65912 Power Management chip with SPI"
+	select MFD_CORE
+	select MFD_TPS65912
+	depends on SPI_MASTER && GPIOLIB && GENERIC_HARDIRQS
+	help
+	  If you say yes here you get support for the TPS65912 series of
+	  PM chips with SPI interface.
+
+config MFD_TPS80031
+	bool "TI TPS80031/TPS80032 Power Management chips"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  If you say yes here you get support for the Texas Instruments
+	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
+	  Path and Battery Charger. The device provides five configurable
+	  step-down converters, 11 general purpose LDOs, USB OTG Module,
+	  ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
+	  Power Path from USB, 32K clock generator.
+
+config TWL4030_CORE
+	bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select IRQ_DOMAIN
+	select REGMAP_I2C
+	help
+	  Say yes here if you have TWL4030 / TWL6030 family chip on your board.
+	  This core driver provides register access and IRQ handling
+	  facilities, and registers devices for the various functions
+	  so that function-specific drivers can bind to them.
+
+	  These multi-function chips are found on many OMAP2 and OMAP3
+	  boards, providing power management, RTC, GPIO, keypad, a
+	  high speed USB OTG transceiver, an audio codec (on most
+	  versions) and many other features.
+
+config TWL4030_MADC
+	tristate "TI TWL4030 MADC"
+	depends on TWL4030_CORE
+	help
+	This driver provides support for triton TWL4030-MADC. The
+	driver supports both RT and SW conversion methods.
+
+	This driver can be built as a module. If so it will be
+	named twl4030-madc
+
+config TWL4030_POWER
+	bool "TI TWL4030 power resources"
+	depends on TWL4030_CORE && ARM
+	help
+	  Say yes here if you want to use the power resources on the
+	  TWL4030 family chips.  Most of these resources are regulators,
+	  which have a separate driver; some are control signals, such
+	  as clock request handshaking.
+
+	  This driver uses board-specific data to initialize the resources
+	  and load scripts controlling which resources are switched off/on
+	  or reset when a sleep, wakeup or warm reset event occurs.
+
+config MFD_TWL4030_AUDIO
+	bool "TI TWL4030 Audio"
+	depends on TWL4030_CORE && GENERIC_HARDIRQS
+	select MFD_CORE
+	default n
+
+config TWL6040_CORE
+	bool "TI TWL6040 audio codec"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
 	default n
 	help
-	  Say yes here if you want support for Nano River Technologies
-	  Viperboard.
-	  There are mfd cell drivers available for i2c master, adc and
-	  both gpios found on the board. The spi part does not yet
-	  have a driver.
-	  You need to select the mfd cell drivers separately.
-	  The drivers do not support all features the board exposes.
+	  Say yes here if you want support for Texas Instruments TWL6040 audio
+	  codec.
+	  This driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device (audio, vibra).
 
-config MFD_RETU
-	tristate "Support for Retu multi-function device"
-	select MFD_CORE
-	depends on I2C && GENERIC_HARDIRQS
-	select REGMAP_IRQ
+config MENELAUS
+	bool "TI TWL92330/Menelaus PM chip"
+	depends on I2C=y && ARCH_OMAP2
 	help
-	  Retu is a multi-function device found on Nokia Internet Tablets
-	  (770, N800 and N810).
+	  If you say yes here you get support for the Texas Instruments
+	  TWL92330/Menelaus Power Management chip. This include voltage
+	  regulators, Dual slot memory card transceivers, real-time clock
+	  and other features that are often used in portable devices like
+	  cell phones and PDAs.
 
-config MFD_AS3711
-	bool "Support for AS3711"
+config MFD_WL1273_CORE
+	tristate "TI WL1273 FM radio"
+	depends on I2C && GENERIC_HARDIRQS
+	select MFD_CORE
+	default n
+	help
+	  This is the core driver for the TI WL1273 FM radio. This MFD
+	  driver connects the radio-wl1273 V4L2 module and the wl1273
+	  audio codec.
+
+config MFD_LM3533
+	tristate "TI/National Semiconductor LM3533 Lighting Power chip"
+	depends on I2C
+	select MFD_CORE
+	select REGMAP_I2C
+	depends on GENERIC_HARDIRQS
+	help
+	  Say yes here to enable support for National Semiconductor / TI
+	  LM3533 Lighting Power chips.
+
+	  This driver provides common support for accessing the device;
+	  additional drivers must be enabled in order to use the LED,
+	  backlight or ambient-light-sensor functionality of the device.
+
+config MFD_TIMBERDALE
+	tristate "Timberdale FPGA"
+	select MFD_CORE
+	depends on PCI && GPIOLIB
+	---help---
+	This is the core driver for the timberdale FPGA. This device is a
+	multifunction device which exposes numerous platform devices.
+
+	The timberdale FPGA can be found on the Intel Atom development board
+	for in-vehicle infontainment, called Russellville.
+
+config MFD_TC3589X
+	bool "Toshiba TC35892 and variants"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	help
+	  Support for the Toshiba TC35892 and variants I/O Expander.
+
+	  This driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
+config MFD_TMIO
+	bool
+	default n
+
+config MFD_T7L66XB
+	bool "Toshiba T7L66XB"
+	depends on ARM && HAVE_CLK && GENERIC_HARDIRQS
+	select MFD_CORE
+	select MFD_TMIO
+	help
+	  Support for Toshiba Mobile IO Controller T7L66XB
+
+config MFD_TC6387XB
+	bool "Toshiba TC6387XB"
+	depends on ARM && HAVE_CLK
+	select MFD_CORE
+	select MFD_TMIO
+	help
+	  Support for Toshiba Mobile IO Controller TC6387XB
+
+config MFD_TC6393XB
+	bool "Toshiba TC6393XB"
+	depends on ARM && HAVE_CLK
+	select GPIOLIB
+	select MFD_CORE
+	select MFD_TMIO
+	help
+	  Support for Toshiba Mobile IO Controller TC6393XB
+
+config MFD_VX855
+	tristate "VIA VX855/VX875 integrated south bridge"
+	depends on PCI && GENERIC_HARDIRQS
+	select MFD_CORE
+	help
+	  Say yes here to enable support for various functions of the
+	  VIA VX855/VX875 south bridge. You will need to enable the vx855_spi
+	  and/or vx855_gpio drivers for this to do anything useful.
+
+config MFD_ARIZONA
+	select REGMAP
+	select REGMAP_IRQ
+	select MFD_CORE
+	bool
+
+config MFD_ARIZONA_I2C
+	tristate "Wolfson Microelectronics Arizona platform with I2C"
+	select MFD_ARIZONA
+	select MFD_CORE
+	select REGMAP_I2C
+	depends on I2C && GENERIC_HARDIRQS
+	help
+	  Support for the Wolfson Microelectronics Arizona platform audio SoC
+	  core functionality controlled via I2C.
+
+config MFD_ARIZONA_SPI
+	tristate "Wolfson Microelectronics Arizona platform with SPI"
+	select MFD_ARIZONA
+	select MFD_CORE
+	select REGMAP_SPI
+	depends on SPI_MASTER && GENERIC_HARDIRQS
+	help
+	  Support for the Wolfson Microelectronics Arizona platform audio SoC
+	  core functionality controlled via I2C.
+
+config MFD_WM5102
+	bool "Wolfson Microelectronics WM5102"
+	depends on MFD_ARIZONA
+	help
+	  Support for Wolfson Microelectronics WM5102 low power audio SoC
+
+config MFD_WM5110
+	bool "Wolfson Microelectronics WM5110"
+	depends on MFD_ARIZONA
+	help
+	  Support for Wolfson Microelectronics WM5110 low power audio SoC
+
+config MFD_WM8400
+	bool "Wolfson Microelectronics WM8400"
+	select MFD_CORE
+	depends on I2C=y && GENERIC_HARDIRQS
+	select REGMAP_I2C
+	help
+	  Support for the Wolfson Microelecronics WM8400 PMIC and audio
+	  CODEC.  This driver provides common support for accessing
+	  the device, additional drivers must be enabled in order to use
+	  the functionality of the device.
+
+config MFD_WM831X
+	bool
+	depends on GENERIC_HARDIRQS
+
+config MFD_WM831X_I2C
+	bool "Wolfson Microelectronics WM831x/2x PMICs with I2C"
+	select MFD_CORE
+	select MFD_WM831X
+	select REGMAP_I2C
+	select IRQ_DOMAIN
+	depends on I2C=y && GENERIC_HARDIRQS
+	help
+	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
+	  when controlled using I2C.  This driver provides common support
+	  for accessing the device, additional drivers must be enabled in
+	  order to use the functionality of the device.
+
+config MFD_WM831X_SPI
+	bool "Wolfson Microelectronics WM831x/2x PMICs with SPI"
+	select MFD_CORE
+	select MFD_WM831X
+	select REGMAP_SPI
+	select IRQ_DOMAIN
+	depends on SPI_MASTER && GENERIC_HARDIRQS
+	help
+	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
+	  when controlled using SPI.  This driver provides common support
+	  for accessing the device, additional drivers must be enabled in
+	  order to use the functionality of the device.
+
+config MFD_WM8350
+	bool
+	depends on GENERIC_HARDIRQS
+
+config MFD_WM8350_I2C
+	bool "Wolfson Microelectronics WM8350 with I2C"
+	select MFD_WM8350
+	depends on I2C=y && GENERIC_HARDIRQS
+	help
+	  The WM8350 is an integrated audio and power management
+	  subsystem with watchdog and RTC functionality for embedded
+	  systems.  This option enables core support for the WM8350 with
+	  I2C as the control interface.  Additional options must be
+	  selected to enable support for the functionality of the chip.
+
+config MFD_WM8994
+	bool "Wolfson Microelectronics WM8994"
 	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
 	depends on I2C=y && GENERIC_HARDIRQS
 	help
-	  Support for the AS3711 PMIC from AMS
+	  The WM8994 is a highly integrated hi-fi CODEC designed for
+	  smartphone applicatiosn.  As well as audio functionality it
+	  has on board GPIO and regulator functionality which is
+	  supported via the relevant subsystems.  This driver provides
+	  core support for the WM8994, in order to use the actual
+	  functionaltiy of the device other drivers must be enabled.
 
 endmenu
 endif

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index b90409c..718e94a 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile

@@ -8,8 +8,11 @@
 obj-$(CONFIG_MFD_88PM805)	+= 88pm805.o 88pm80x.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
+obj-$(CONFIG_MFD_CROS_EC)	+= cros_ec.o
+obj-$(CONFIG_MFD_CROS_EC_I2C)	+= cros_ec_i2c.o
+obj-$(CONFIG_MFD_CROS_EC_SPI)	+= cros_ec_spi.o
 
-rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o
+rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o
 obj-$(CONFIG_MFD_RTSX_PCI)	+= rtsx_pci.o
 
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
@@ -131,6 +134,10 @@
 obj-$(CONFIG_MFD_TPS6586X)	+= tps6586x.o
 obj-$(CONFIG_MFD_VX855)		+= vx855.o
 obj-$(CONFIG_MFD_WL1273_CORE)	+= wl1273-core.o
+
+si476x-core-y := si476x-cmd.o si476x-prop.o si476x-i2c.o
+obj-$(CONFIG_MFD_SI476X_CORE)	+= si476x-core.o
+
 obj-$(CONFIG_MFD_CS5535)	+= cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)	+= omap-usb-host.o omap-usb-tll.o
 obj-$(CONFIG_MFD_PM8921_CORE) 	+= pm8921-core.o

diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c
index f1beb49..dfdb0a2 100644
--- a/drivers/mfd/aat2870-core.c
+++ b/drivers/mfd/aat2870-core.c

@@ -367,12 +367,12 @@
 	int i, j;
 	int ret = 0;
 
-	aat2870 = kzalloc(sizeof(struct aat2870_data), GFP_KERNEL);
+	aat2870 = devm_kzalloc(&client->dev, sizeof(struct aat2870_data),
+				GFP_KERNEL);
 	if (!aat2870) {
 		dev_err(&client->dev,
 			"Failed to allocate memory for aat2870\n");
-		ret = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	aat2870->dev = &client->dev;
@@ -400,12 +400,12 @@
 		aat2870->init(aat2870);
 
 	if (aat2870->en_pin >= 0) {
-		ret = gpio_request_one(aat2870->en_pin, GPIOF_OUT_INIT_HIGH,
-				       "aat2870-en");
+		ret = devm_gpio_request_one(&client->dev, aat2870->en_pin,
+					GPIOF_OUT_INIT_HIGH, "aat2870-en");
 		if (ret < 0) {
 			dev_err(&client->dev,
 				"Failed to request GPIO %d\n", aat2870->en_pin);
-			goto out_kfree;
+			return ret;
 		}
 	}
 
@@ -436,11 +436,6 @@
 
 out_disable:
 	aat2870_disable(aat2870);
-	if (aat2870->en_pin >= 0)
-		gpio_free(aat2870->en_pin);
-out_kfree:
-	kfree(aat2870);
-out:
 	return ret;
 }
 
@@ -452,11 +447,8 @@
 
 	mfd_remove_devices(aat2870->dev);
 	aat2870_disable(aat2870);
-	if (aat2870->en_pin >= 0)
-		gpio_free(aat2870->en_pin);
 	if (aat2870->uninit)
 		aat2870->uninit(aat2870);
-	kfree(aat2870);
 
 	return 0;
 }

diff --git a/drivers/mfd/ab3100-otp.c b/drivers/mfd/ab3100-otp.c
index 8440010..d7ce016 100644
--- a/drivers/mfd/ab3100-otp.c
+++ b/drivers/mfd/ab3100-otp.c

@@ -248,19 +248,7 @@
 	.remove	 = __exit_p(ab3100_otp_remove),
 };
 
-static int __init ab3100_otp_init(void)
-{
-	return platform_driver_probe(&ab3100_otp_driver,
-				     ab3100_otp_probe);
-}
-
-static void __exit ab3100_otp_exit(void)
-{
-	platform_driver_unregister(&ab3100_otp_driver);
-}
-
-module_init(ab3100_otp_init);
-module_exit(ab3100_otp_exit);
+module_platform_driver_probe(ab3100_otp_driver, ab3100_otp_probe);
 
 MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
 MODULE_DESCRIPTION("AB3100 OTP Readout Driver");

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index f276352..8e8a016 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c

@@ -458,22 +458,23 @@
 static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500,
 					int latch_offset, u8 latch_val)
 {
-	int int_bit = __ffs(latch_val);
-	int line, i;
+	int int_bit, line, i;
 
-	do {
+	for (i = 0; i < ab8500->mask_size; i++)
+		if (ab8500->irq_reg_offset[i] == latch_offset)
+			break;
+
+	if (i >= ab8500->mask_size) {
+		dev_err(ab8500->dev, "Register offset 0x%2x not declared\n",
+				latch_offset);
+		return -ENXIO;
+	}
+
+	/* ignore masked out interrupts */
+	latch_val &= ~ab8500->mask[i];
+
+	while (latch_val) {
 		int_bit = __ffs(latch_val);
-
-		for (i = 0; i < ab8500->mask_size; i++)
-			if (ab8500->irq_reg_offset[i] == latch_offset)
-				break;
-
-		if (i >= ab8500->mask_size) {
-			dev_err(ab8500->dev, "Register offset 0x%2x not declared\n",
-					latch_offset);
-			return -ENXIO;
-		}
-
 		line = (i << 3) + int_bit;
 		latch_val &= ~(1 << int_bit);
 
@@ -491,7 +492,7 @@
 			line += 1;
 
 		handle_nested_irq(ab8500->irq_base + line);
-	} while (latch_val);
+	}
 
 	return 0;
 }
@@ -1107,6 +1108,7 @@
 	},
 	{
 		.name = "ab8500-usb",
+		.of_compatible = "stericsson,ab8500-usb",
 		.num_resources = ARRAY_SIZE(ab8500_usb_resources),
 		.resources = ab8500_usb_resources,
 	},

diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index 65f7228..5e65b28 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c

@@ -332,7 +332,7 @@
 
 	return voltage;
 }
-EXPORT_SYMBOL(ab8500_gpadc_convert);
+EXPORT_SYMBOL(ab8500_gpadc_sw_hw_convert);
 
 /**
  * ab8500_gpadc_read_raw() - gpadc read

diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index 272479c..fbca1ce 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c

@@ -242,7 +242,7 @@
 {
 	return platform_driver_register(&ab8500_sysctrl_driver);
 }
-subsys_initcall(ab8500_sysctrl_init);
+arch_initcall(ab8500_sysctrl_init);
 
 MODULE_AUTHOR("Mattias Nilsson <mattias.i.nilsson@stericsson.com");
 MODULE_DESCRIPTION("AB8500 system control driver");

diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c
index 210dd03..0d2eba0 100644
--- a/drivers/mfd/adp5520.c
+++ b/drivers/mfd/adp5520.c

@@ -36,6 +36,7 @@
 	struct blocking_notifier_head notifier_list;
 	int irq;
 	unsigned long id;
+	uint8_t mode;
 };
 
 static int __adp5520_read(struct i2c_client *client,
@@ -326,7 +327,10 @@
 	struct i2c_client *client = to_i2c_client(dev);
 	struct adp5520_chip *chip = dev_get_drvdata(&client->dev);
 
-	adp5520_clr_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY);
+	adp5520_read(chip->dev, ADP5520_MODE_STATUS, &chip->mode);
+	/* All other bits are W1C */
+	chip->mode &= ADP5520_BL_EN | ADP5520_DIM_EN | ADP5520_nSTNBY;
+	adp5520_write(chip->dev, ADP5520_MODE_STATUS, 0);
 	return 0;
 }
 
@@ -335,7 +339,7 @@
 	struct i2c_client *client = to_i2c_client(dev);
 	struct adp5520_chip *chip = dev_get_drvdata(&client->dev);
 
-	adp5520_set_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY);
+	adp5520_write(chip->dev, ADP5520_MODE_STATUS, chip->mode);
 	return 0;
 }
 #endif
@@ -360,17 +364,7 @@
 	.id_table 	= adp5520_id,
 };
 
-static int __init adp5520_init(void)
-{
-	return i2c_add_driver(&adp5520_driver);
-}
-module_init(adp5520_init);
-
-static void __exit adp5520_exit(void)
-{
-	i2c_del_driver(&adp5520_driver);
-}
-module_exit(adp5520_exit);
+module_i2c_driver(adp5520_driver);
 
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("ADP5520(01) PMIC-MFD Driver");

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index b562c7b..6ab0304 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c

@@ -39,11 +39,21 @@
 
 	arizona->clk32k_ref++;
 
-	if (arizona->clk32k_ref == 1)
+	if (arizona->clk32k_ref == 1) {
+		switch (arizona->pdata.clk32k_src) {
+		case ARIZONA_32KZ_MCLK1:
+			ret = pm_runtime_get_sync(arizona->dev);
+			if (ret != 0)
+				goto out;
+			break;
+		}
+
 		ret = regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1,
 					 ARIZONA_CLK_32K_ENA,
 					 ARIZONA_CLK_32K_ENA);
+	}
 
+out:
 	if (ret != 0)
 		arizona->clk32k_ref--;
 
@@ -63,10 +73,17 @@
 
 	arizona->clk32k_ref--;
 
-	if (arizona->clk32k_ref == 0)
+	if (arizona->clk32k_ref == 0) {
 		regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1,
 				   ARIZONA_CLK_32K_ENA, 0);
 
+		switch (arizona->pdata.clk32k_src) {
+		case ARIZONA_32KZ_MCLK1:
+			pm_runtime_put_sync(arizona->dev);
+			break;
+		}
+	}
+
 	mutex_unlock(&arizona->clk_lock);
 
 	return ret;
@@ -179,42 +196,134 @@
 	return IRQ_HANDLED;
 }
 
+static int arizona_poll_reg(struct arizona *arizona,
+			    int timeout, unsigned int reg,
+			    unsigned int mask, unsigned int target)
+{
+	unsigned int val = 0;
+	int ret, i;
+
+	for (i = 0; i < timeout; i++) {
+		ret = regmap_read(arizona->regmap, reg, &val);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to read reg %u: %d\n",
+				reg, ret);
+			continue;
+		}
+
+		if ((val & mask) == target)
+			return 0;
+
+		msleep(1);
+	}
+
+	dev_err(arizona->dev, "Polling reg %u timed out: %x\n", reg, val);
+	return -ETIMEDOUT;
+}
+
 static int arizona_wait_for_boot(struct arizona *arizona)
 {
-	unsigned int reg;
-	int ret, i;
+	int ret;
 
 	/*
 	 * We can't use an interrupt as we need to runtime resume to do so,
 	 * we won't race with the interrupt handler as it'll be blocked on
 	 * runtime resume.
 	 */
-	for (i = 0; i < 5; i++) {
-		msleep(1);
+	ret = arizona_poll_reg(arizona, 5, ARIZONA_INTERRUPT_RAW_STATUS_5,
+			       ARIZONA_BOOT_DONE_STS, ARIZONA_BOOT_DONE_STS);
 
-		ret = regmap_read(arizona->regmap,
-				  ARIZONA_INTERRUPT_RAW_STATUS_5, &reg);
-		if (ret != 0) {
-			dev_err(arizona->dev, "Failed to read boot state: %d\n",
-				ret);
-			continue;
-		}
-
-		if (reg & ARIZONA_BOOT_DONE_STS)
-			break;
-	}
-
-	if (reg & ARIZONA_BOOT_DONE_STS) {
+	if (!ret)
 		regmap_write(arizona->regmap, ARIZONA_INTERRUPT_STATUS_5,
 			     ARIZONA_BOOT_DONE_STS);
-	} else {
-		dev_err(arizona->dev, "Device boot timed out: %x\n", reg);
-		return -ETIMEDOUT;
-	}
 
 	pm_runtime_mark_last_busy(arizona->dev);
 
-	return 0;
+	return ret;
+}
+
+static int arizona_apply_hardware_patch(struct arizona* arizona)
+{
+	unsigned int fll, sysclk;
+	int ret, err;
+
+	regcache_cache_bypass(arizona->regmap, true);
+
+	/* Cache existing FLL and SYSCLK settings */
+	ret = regmap_read(arizona->regmap, ARIZONA_FLL1_CONTROL_1, &fll);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to cache FLL settings: %d\n",
+			ret);
+		return ret;
+	}
+	ret = regmap_read(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, &sysclk);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to cache SYSCLK settings: %d\n",
+			ret);
+		return ret;
+	}
+
+	/* Start up SYSCLK using the FLL in free running mode */
+	ret = regmap_write(arizona->regmap, ARIZONA_FLL1_CONTROL_1,
+			ARIZONA_FLL1_ENA | ARIZONA_FLL1_FREERUN);
+	if (ret != 0) {
+		dev_err(arizona->dev,
+			"Failed to start FLL in freerunning mode: %d\n",
+			ret);
+		return ret;
+	}
+	ret = arizona_poll_reg(arizona, 25, ARIZONA_INTERRUPT_RAW_STATUS_5,
+			       ARIZONA_FLL1_CLOCK_OK_STS,
+			       ARIZONA_FLL1_CLOCK_OK_STS);
+	if (ret != 0) {
+		ret = -ETIMEDOUT;
+		goto err_fll;
+	}
+
+	ret = regmap_write(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, 0x0144);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to start SYSCLK: %d\n", ret);
+		goto err_fll;
+	}
+
+	/* Start the write sequencer and wait for it to finish */
+	ret = regmap_write(arizona->regmap, ARIZONA_WRITE_SEQUENCER_CTRL_0,
+			ARIZONA_WSEQ_ENA | ARIZONA_WSEQ_START | 160);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to start write sequencer: %d\n",
+			ret);
+		goto err_sysclk;
+	}
+	ret = arizona_poll_reg(arizona, 5, ARIZONA_WRITE_SEQUENCER_CTRL_1,
+			       ARIZONA_WSEQ_BUSY, 0);
+	if (ret != 0) {
+		regmap_write(arizona->regmap, ARIZONA_WRITE_SEQUENCER_CTRL_0,
+				ARIZONA_WSEQ_ABORT);
+		ret = -ETIMEDOUT;
+	}
+
+err_sysclk:
+	err = regmap_write(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, sysclk);
+	if (err != 0) {
+		dev_err(arizona->dev,
+			"Failed to re-apply old SYSCLK settings: %d\n",
+			err);
+	}
+
+err_fll:
+	err = regmap_write(arizona->regmap, ARIZONA_FLL1_CONTROL_1, fll);
+	if (err != 0) {
+		dev_err(arizona->dev,
+			"Failed to re-apply old FLL settings: %d\n",
+			err);
+	}
+
+	regcache_cache_bypass(arizona->regmap, false);
+
+	if (ret != 0)
+		return ret;
+	else
+		return err;
 }
 
 #ifdef CONFIG_PM_RUNTIME
@@ -233,20 +342,44 @@
 
 	regcache_cache_only(arizona->regmap, false);
 
-	ret = arizona_wait_for_boot(arizona);
-	if (ret != 0) {
-		regulator_disable(arizona->dcvdd);
-		return ret;
+	switch (arizona->type) {
+	case WM5102:
+		ret = wm5102_patch(arizona);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to apply patch: %d\n",
+				ret);
+			goto err;
+		}
+
+		ret = arizona_apply_hardware_patch(arizona);
+		if (ret != 0) {
+			dev_err(arizona->dev,
+				"Failed to apply hardware patch: %d\n",
+				ret);
+			goto err;
+		}
+		break;
+	default:
+		ret = arizona_wait_for_boot(arizona);
+		if (ret != 0) {
+			goto err;
+		}
+
+		break;
 	}
 
 	ret = regcache_sync(arizona->regmap);
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to restore register cache\n");
-		regulator_disable(arizona->dcvdd);
-		return ret;
+		goto err;
 	}
 
 	return 0;
+
+err:
+	regcache_cache_only(arizona->regmap, true);
+	regulator_disable(arizona->dcvdd);
+	return ret;
 }
 
 static int arizona_runtime_suspend(struct device *dev)
@@ -371,6 +504,17 @@
 		goto err_early;
 	}
 
+	if (arizona->pdata.reset) {
+		/* Start out with /RESET low to put the chip into reset */
+		ret = gpio_request_one(arizona->pdata.reset,
+				       GPIOF_DIR_OUT | GPIOF_INIT_LOW,
+				       "arizona /RESET");
+		if (ret != 0) {
+			dev_err(dev, "Failed to request /RESET: %d\n", ret);
+			goto err_early;
+		}
+	}
+
 	ret = regulator_bulk_enable(arizona->num_core_supplies,
 				    arizona->core_supplies);
 	if (ret != 0) {
@@ -386,16 +530,8 @@
 	}
 
 	if (arizona->pdata.reset) {
-		/* Start out with /RESET low to put the chip into reset */
-		ret = gpio_request_one(arizona->pdata.reset,
-				       GPIOF_DIR_OUT | GPIOF_INIT_LOW,
-				       "arizona /RESET");
-		if (ret != 0) {
-			dev_err(dev, "Failed to request /RESET: %d\n", ret);
-			goto err_dcvdd;
-		}
-
 		gpio_set_value_cansleep(arizona->pdata.reset, 1);
+		msleep(1);
 	}
 
 	regcache_cache_only(arizona->regmap, false);
@@ -424,6 +560,7 @@
 			arizona->type = WM5102;
 		}
 		apply_patch = wm5102_patch;
+		arizona->rev &= 0x7;
 		break;
 #endif
 #ifdef CONFIG_MFD_WM5110
@@ -454,6 +591,8 @@
 			goto err_reset;
 		}
 
+		msleep(1);
+
 		ret = regcache_sync(arizona->regmap);
 		if (ret != 0) {
 			dev_err(dev, "Failed to sync device: %d\n", ret);
@@ -461,10 +600,24 @@
 		}
 	}
 
-	ret = arizona_wait_for_boot(arizona);
-	if (ret != 0) {
-		dev_err(arizona->dev, "Device failed initial boot: %d\n", ret);
-		goto err_reset;
+	switch (arizona->type) {
+	case WM5102:
+		ret = regmap_read(arizona->regmap, 0x19, &val);
+		if (ret != 0)
+			dev_err(dev,
+				"Failed to check write sequencer state: %d\n",
+				ret);
+		else if (val & 0x01)
+			break;
+		/* Fall through */
+	default:
+		ret = arizona_wait_for_boot(arizona);
+		if (ret != 0) {
+			dev_err(arizona->dev,
+				"Device failed initial boot: %d\n", ret);
+			goto err_reset;
+		}
+		break;
 	}
 
 	if (apply_patch) {
@@ -474,6 +627,20 @@
 				ret);
 			goto err_reset;
 		}
+
+		switch (arizona->type) {
+		case WM5102:
+			ret = arizona_apply_hardware_patch(arizona);
+			if (ret != 0) {
+				dev_err(arizona->dev,
+					"Failed to apply hardware patch: %d\n",
+					ret);
+				goto err_reset;
+			}
+			break;
+		default:
+			break;
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(arizona->pdata.gpio_defaults); i++) {
@@ -498,6 +665,7 @@
 		regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1,
 				   ARIZONA_CLK_32K_SRC_MASK,
 				   arizona->pdata.clk32k_src - 1);
+		arizona_clk32k_enable(arizona);
 		break;
 	case ARIZONA_32KZ_NONE:
 		regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1,
@@ -511,10 +679,16 @@
 	}
 
 	for (i = 0; i < ARIZONA_MAX_MICBIAS; i++) {
-		if (!arizona->pdata.micbias[i].mV)
+		if (!arizona->pdata.micbias[i].mV &&
+		    !arizona->pdata.micbias[i].bypass)
 			continue;
 
+		/* Apply default for bypass mode */
+		if (!arizona->pdata.micbias[i].mV)
+			arizona->pdata.micbias[i].mV = 2800;
+
 		val = (arizona->pdata.micbias[i].mV - 1500) / 100;
+
 		val <<= ARIZONA_MICB1_LVL_SHIFT;
 
 		if (arizona->pdata.micbias[i].ext_cap)
@@ -526,10 +700,14 @@
 		if (arizona->pdata.micbias[i].fast_start)
 			val |= ARIZONA_MICB1_RATE;
 
+		if (arizona->pdata.micbias[i].bypass)
+			val |= ARIZONA_MICB1_BYPASS;
+
 		regmap_update_bits(arizona->regmap,
 				   ARIZONA_MIC_BIAS_CTRL_1 + i,
 				   ARIZONA_MICB1_LVL_MASK |
 				   ARIZONA_MICB1_DISCH |
+				   ARIZONA_MICB1_BYPASS |
 				   ARIZONA_MICB1_RATE, val);
 	}
 
@@ -610,10 +788,9 @@
 	arizona_irq_exit(arizona);
 err_reset:
 	if (arizona->pdata.reset) {
-		gpio_set_value_cansleep(arizona->pdata.reset, 1);
+		gpio_set_value_cansleep(arizona->pdata.reset, 0);
 		gpio_free(arizona->pdata.reset);
 	}
-err_dcvdd:
 	regulator_disable(arizona->dcvdd);
 err_enable:
 	regulator_bulk_disable(arizona->num_core_supplies,

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 2bec5f0..64cd9b6 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c

@@ -94,6 +94,7 @@
 static irqreturn_t arizona_irq_thread(int irq, void *data)
 {
 	struct arizona *arizona = data;
+	bool poll;
 	unsigned int val;
 	int ret;
 
@@ -103,20 +104,39 @@
 		return IRQ_NONE;
 	}
 
-	/* Always handle the AoD domain */
-	handle_nested_irq(irq_find_mapping(arizona->virq, 0));
+	do {
+		poll = false;
 
-	/*
-	 * Check if one of the main interrupts is asserted and only
-	 * check that domain if it is.
-	 */
-	ret = regmap_read(arizona->regmap, ARIZONA_IRQ_PIN_STATUS, &val);
-	if (ret == 0 && val & ARIZONA_IRQ1_STS) {
-		handle_nested_irq(irq_find_mapping(arizona->virq, 1));
-	} else if (ret != 0) {
-		dev_err(arizona->dev, "Failed to read main IRQ status: %d\n",
-			ret);
-	}
+		/* Always handle the AoD domain */
+		handle_nested_irq(irq_find_mapping(arizona->virq, 0));
+
+		/*
+		 * Check if one of the main interrupts is asserted and only
+		 * check that domain if it is.
+		 */
+		ret = regmap_read(arizona->regmap, ARIZONA_IRQ_PIN_STATUS,
+				  &val);
+		if (ret == 0 && val & ARIZONA_IRQ1_STS) {
+			handle_nested_irq(irq_find_mapping(arizona->virq, 1));
+		} else if (ret != 0) {
+			dev_err(arizona->dev,
+				"Failed to read main IRQ status: %d\n", ret);
+		}
+
+		/*
+		 * Poll the IRQ pin status to see if we're really done
+		 * if the interrupt controller can't do it for us.
+		 */
+		if (!arizona->pdata.irq_gpio) {
+			break;
+		} else if (arizona->pdata.irq_flags & IRQF_TRIGGER_RISING &&
+			   gpio_get_value_cansleep(arizona->pdata.irq_gpio)) {
+			poll = true;
+		} else if (arizona->pdata.irq_flags & IRQF_TRIGGER_FALLING &&
+			   !gpio_get_value_cansleep(arizona->pdata.irq_gpio)) {
+			poll = true;
+		}
+	} while (poll);
 
 	pm_runtime_mark_last_busy(arizona->dev);
 	pm_runtime_put_autosuspend(arizona->dev);
@@ -169,6 +189,7 @@
 	int ret, i;
 	const struct regmap_irq_chip *aod, *irq;
 	bool ctrlif_error = true;
+	struct irq_data *irq_data;
 
 	switch (arizona->type) {
 #ifdef CONFIG_MFD_WM5102
@@ -192,7 +213,36 @@
 		return -EINVAL;
 	}
 
-	if (arizona->pdata.irq_active_high) {
+	/* Disable all wake sources by default */
+	regmap_write(arizona->regmap, ARIZONA_WAKE_CONTROL, 0);
+
+	/* Read the flags from the interrupt controller if not specified */
+	if (!arizona->pdata.irq_flags) {
+		irq_data = irq_get_irq_data(arizona->irq);
+		if (!irq_data) {
+			dev_err(arizona->dev, "Invalid IRQ: %d\n",
+				arizona->irq);
+			return -EINVAL;
+		}
+
+		arizona->pdata.irq_flags = irqd_get_trigger_type(irq_data);
+		switch (arizona->pdata.irq_flags) {
+		case IRQF_TRIGGER_LOW:
+		case IRQF_TRIGGER_HIGH:
+		case IRQF_TRIGGER_RISING:
+		case IRQF_TRIGGER_FALLING:
+			break;
+
+		case IRQ_TYPE_NONE:
+		default:
+			/* Device default */
+			arizona->pdata.irq_flags = IRQF_TRIGGER_LOW;
+			break;
+		}
+	}
+
+	if (arizona->pdata.irq_flags & (IRQF_TRIGGER_HIGH |
+					IRQF_TRIGGER_RISING)) {
 		ret = regmap_update_bits(arizona->regmap, ARIZONA_IRQ_CTRL_1,
 					 ARIZONA_IRQ_POL, 0);
 		if (ret != 0) {
@@ -200,12 +250,10 @@
 				ret);
 			goto err;
 		}
-
-		flags |= IRQF_TRIGGER_HIGH;
-	} else {
-		flags |= IRQF_TRIGGER_LOW;
 	}
 
+	flags |= arizona->pdata.irq_flags;
+
 	/* Allocate a virtual IRQ domain to distribute to the regmap domains */
 	arizona->virq = irq_domain_add_linear(NULL, 2, &arizona_domain_ops,
 					      arizona);
@@ -257,11 +305,31 @@
 		}
 	}
 
+	/* Used to emulate edge trigger and to work around broken pinmux */
+	if (arizona->pdata.irq_gpio) {
+		if (gpio_to_irq(arizona->pdata.irq_gpio) != arizona->irq) {
+			dev_warn(arizona->dev, "IRQ %d is not GPIO %d (%d)\n",
+				 arizona->irq, arizona->pdata.irq_gpio,
+				 gpio_to_irq(arizona->pdata.irq_gpio));
+			arizona->irq = gpio_to_irq(arizona->pdata.irq_gpio);
+		}
+
+		ret = devm_gpio_request_one(arizona->dev,
+					    arizona->pdata.irq_gpio,
+					    GPIOF_IN, "arizona IRQ");
+		if (ret != 0) {
+			dev_err(arizona->dev,
+				"Failed to request IRQ GPIO %d:: %d\n",
+				arizona->pdata.irq_gpio, ret);
+			arizona->pdata.irq_gpio = 0;
+		}
+	}
+
 	ret = request_threaded_irq(arizona->irq, NULL, arizona_irq_thread,
 				   flags, "arizona", arizona);
 
 	if (ret != 0) {
-		dev_err(arizona->dev, "Failed to request IRQ %d: %d\n",
+		dev_err(arizona->dev, "Failed to request primary IRQ %d: %d\n",
 			arizona->irq, ret);
 		goto err_main_irq;
 	}

diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c
index 1b9fdd6..b57e642 100644
--- a/drivers/mfd/arizona-spi.c
+++ b/drivers/mfd/arizona-spi.c

@@ -67,7 +67,7 @@
 
 static int arizona_spi_remove(struct spi_device *spi)
 {
-	struct arizona *arizona = dev_get_drvdata(&spi->dev);
+	struct arizona *arizona = spi_get_drvdata(spi);
 	arizona_dev_exit(arizona);
 	return 0;
 }

diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c
index e994c96..01e4141 100644
--- a/drivers/mfd/as3711.c
+++ b/drivers/mfd/as3711.c

@@ -112,16 +112,34 @@
 	.cache_type = REGCACHE_RBTREE,
 };
 
+#ifdef CONFIG_OF
+static struct of_device_id as3711_of_match[] = {
+	{.compatible = "ams,as3711",},
+	{}
+};
+MODULE_DEVICE_TABLE(of, as3711_of_match);
+#endif
+
 static int as3711_i2c_probe(struct i2c_client *client,
 			    const struct i2c_device_id *id)
 {
 	struct as3711 *as3711;
-	struct as3711_platform_data *pdata = client->dev.platform_data;
+	struct as3711_platform_data *pdata;
 	unsigned int id1, id2;
 	int ret;
 
-	if (!pdata)
-		dev_dbg(&client->dev, "Platform data not found\n");
+	if (!client->dev.of_node) {
+		pdata = client->dev.platform_data;
+		if (!pdata)
+			dev_dbg(&client->dev, "Platform data not found\n");
+	} else {
+		pdata = devm_kzalloc(&client->dev,
+				     sizeof(*pdata), GFP_KERNEL);
+		if (!pdata) {
+			dev_err(&client->dev, "Failed to allocate pdata\n");
+			return -ENOMEM;
+		}
+	}
 
 	as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL);
 	if (!as3711) {
@@ -193,7 +211,8 @@
 	.driver = {
 		   .name = "as3711",
 		   .owner = THIS_MODULE,
-		   },
+		   .of_match_table = of_match_ptr(as3711_of_match),
+	},
 	.probe = as3711_i2c_probe,
 	.remove = as3711_i2c_remove,
 	.id_table = as3711_i2c_id,

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
new file mode 100644
index 0000000..10cd14e
--- /dev/null
+++ b/drivers/mfd/cros_ec.c

@@ -0,0 +1,196 @@
+/*
+ * ChromeOS EC multi-function device
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * The ChromeOS EC multi function device is used to mux all the requests
+ * to the EC device for its multiple features: keyboard controller,
+ * battery charging and regulator control, firmware update.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+
+int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
+		       struct cros_ec_msg *msg)
+{
+	uint8_t *out;
+	int csum, i;
+
+	BUG_ON(msg->out_len > EC_HOST_PARAM_SIZE);
+	out = ec_dev->dout;
+	out[0] = EC_CMD_VERSION0 + msg->version;
+	out[1] = msg->cmd;
+	out[2] = msg->out_len;
+	csum = out[0] + out[1] + out[2];
+	for (i = 0; i < msg->out_len; i++)
+		csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->out_buf[i];
+	out[EC_MSG_TX_HEADER_BYTES + msg->out_len] = (uint8_t)(csum & 0xff);
+
+	return EC_MSG_TX_PROTO_BYTES + msg->out_len;
+}
+EXPORT_SYMBOL(cros_ec_prepare_tx);
+
+static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
+		uint16_t cmd, void *out_buf, int out_len,
+		void *in_buf, int in_len)
+{
+	struct cros_ec_msg msg;
+
+	msg.version = cmd >> 8;
+	msg.cmd = cmd & 0xff;
+	msg.out_buf = out_buf;
+	msg.out_len = out_len;
+	msg.in_buf = in_buf;
+	msg.in_len = in_len;
+
+	return ec_dev->command_xfer(ec_dev, &msg);
+}
+
+static int cros_ec_command_recv(struct cros_ec_device *ec_dev,
+		uint16_t cmd, void *buf, int buf_len)
+{
+	return cros_ec_command_sendrecv(ec_dev, cmd, NULL, 0, buf, buf_len);
+}
+
+static int cros_ec_command_send(struct cros_ec_device *ec_dev,
+		uint16_t cmd, void *buf, int buf_len)
+{
+	return cros_ec_command_sendrecv(ec_dev, cmd, buf, buf_len, NULL, 0);
+}
+
+static irqreturn_t ec_irq_thread(int irq, void *data)
+{
+	struct cros_ec_device *ec_dev = data;
+
+	if (device_may_wakeup(ec_dev->dev))
+		pm_wakeup_event(ec_dev->dev, 0);
+
+	blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev);
+
+	return IRQ_HANDLED;
+}
+
+static struct mfd_cell cros_devs[] = {
+	{
+		.name = "cros-ec-keyb",
+		.id = 1,
+		.of_compatible = "google,cros-ec-keyb",
+	},
+};
+
+int cros_ec_register(struct cros_ec_device *ec_dev)
+{
+	struct device *dev = ec_dev->dev;
+	int err = 0;
+
+	BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier);
+
+	ec_dev->command_send = cros_ec_command_send;
+	ec_dev->command_recv = cros_ec_command_recv;
+	ec_dev->command_sendrecv = cros_ec_command_sendrecv;
+
+	if (ec_dev->din_size) {
+		ec_dev->din = kmalloc(ec_dev->din_size, GFP_KERNEL);
+		if (!ec_dev->din) {
+			err = -ENOMEM;
+			goto fail_din;
+		}
+	}
+	if (ec_dev->dout_size) {
+		ec_dev->dout = kmalloc(ec_dev->dout_size, GFP_KERNEL);
+		if (!ec_dev->dout) {
+			err = -ENOMEM;
+			goto fail_dout;
+		}
+	}
+
+	if (!ec_dev->irq) {
+		dev_dbg(dev, "no valid IRQ: %d\n", ec_dev->irq);
+		goto fail_irq;
+	}
+
+	err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   "chromeos-ec", ec_dev);
+	if (err) {
+		dev_err(dev, "request irq %d: error %d\n", ec_dev->irq, err);
+		goto fail_irq;
+	}
+
+	err = mfd_add_devices(dev, 0, cros_devs,
+			      ARRAY_SIZE(cros_devs),
+			      NULL, ec_dev->irq, NULL);
+	if (err) {
+		dev_err(dev, "failed to add mfd devices\n");
+		goto fail_mfd;
+	}
+
+	dev_info(dev, "Chrome EC (%s)\n", ec_dev->name);
+
+	return 0;
+
+fail_mfd:
+	free_irq(ec_dev->irq, ec_dev);
+fail_irq:
+	kfree(ec_dev->dout);
+fail_dout:
+	kfree(ec_dev->din);
+fail_din:
+	return err;
+}
+EXPORT_SYMBOL(cros_ec_register);
+
+int cros_ec_remove(struct cros_ec_device *ec_dev)
+{
+	mfd_remove_devices(ec_dev->dev);
+	free_irq(ec_dev->irq, ec_dev);
+	kfree(ec_dev->dout);
+	kfree(ec_dev->din);
+
+	return 0;
+}
+EXPORT_SYMBOL(cros_ec_remove);
+
+#ifdef CONFIG_PM_SLEEP
+int cros_ec_suspend(struct cros_ec_device *ec_dev)
+{
+	struct device *dev = ec_dev->dev;
+
+	if (device_may_wakeup(dev))
+		ec_dev->wake_enabled = !enable_irq_wake(ec_dev->irq);
+
+	disable_irq(ec_dev->irq);
+	ec_dev->was_wake_device = ec_dev->wake_enabled;
+
+	return 0;
+}
+EXPORT_SYMBOL(cros_ec_suspend);
+
+int cros_ec_resume(struct cros_ec_device *ec_dev)
+{
+	enable_irq(ec_dev->irq);
+
+	if (ec_dev->wake_enabled) {
+		disable_irq_wake(ec_dev->irq);
+		ec_dev->wake_enabled = 0;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(cros_ec_resume);
+
+#endif

diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
new file mode 100644
index 0000000..1230446
--- /dev/null
+++ b/drivers/mfd/cros_ec_i2c.c

@@ -0,0 +1,201 @@
+/*
+ * ChromeOS EC multi-function device (I2C)
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+static inline struct cros_ec_device *to_ec_dev(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_get_clientdata(client);
+}
+
+static int cros_ec_command_xfer(struct cros_ec_device *ec_dev,
+				struct cros_ec_msg *msg)
+{
+	struct i2c_client *client = ec_dev->priv;
+	int ret = -ENOMEM;
+	int i;
+	int packet_len;
+	u8 *out_buf = NULL;
+	u8 *in_buf = NULL;
+	u8 sum;
+	struct i2c_msg i2c_msg[2];
+
+	i2c_msg[0].addr = client->addr;
+	i2c_msg[0].flags = 0;
+	i2c_msg[1].addr = client->addr;
+	i2c_msg[1].flags = I2C_M_RD;
+
+	/*
+	 * allocate larger packet (one byte for checksum, one byte for
+	 * length, and one for result code)
+	 */
+	packet_len = msg->in_len + 3;
+	in_buf = kzalloc(packet_len, GFP_KERNEL);
+	if (!in_buf)
+		goto done;
+	i2c_msg[1].len = packet_len;
+	i2c_msg[1].buf = (char *)in_buf;
+
+	/*
+	 * allocate larger packet (one byte for checksum, one for
+	 * command code, one for length, and one for command version)
+	 */
+	packet_len = msg->out_len + 4;
+	out_buf = kzalloc(packet_len, GFP_KERNEL);
+	if (!out_buf)
+		goto done;
+	i2c_msg[0].len = packet_len;
+	i2c_msg[0].buf = (char *)out_buf;
+
+	out_buf[0] = EC_CMD_VERSION0 + msg->version;
+	out_buf[1] = msg->cmd;
+	out_buf[2] = msg->out_len;
+
+	/* copy message payload and compute checksum */
+	sum = out_buf[0] + out_buf[1] + out_buf[2];
+	for (i = 0; i < msg->out_len; i++) {
+		out_buf[3 + i] = msg->out_buf[i];
+		sum += out_buf[3 + i];
+	}
+	out_buf[3 + msg->out_len] = sum;
+
+	/* send command to EC and read answer */
+	ret = i2c_transfer(client->adapter, i2c_msg, 2);
+	if (ret < 0) {
+		dev_err(ec_dev->dev, "i2c transfer failed: %d\n", ret);
+		goto done;
+	} else if (ret != 2) {
+		dev_err(ec_dev->dev, "failed to get response: %d\n", ret);
+		ret = -EIO;
+		goto done;
+	}
+
+	/* check response error code */
+	if (i2c_msg[1].buf[0]) {
+		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
+			 msg->cmd, i2c_msg[1].buf[0]);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* copy response packet payload and compute checksum */
+	sum = in_buf[0] + in_buf[1];
+	for (i = 0; i < msg->in_len; i++) {
+		msg->in_buf[i] = in_buf[2 + i];
+		sum += in_buf[2 + i];
+	}
+	dev_dbg(ec_dev->dev, "packet: %*ph, sum = %02x\n",
+		i2c_msg[1].len, in_buf, sum);
+	if (sum != in_buf[2 + msg->in_len]) {
+		dev_err(ec_dev->dev, "bad packet checksum\n");
+		ret = -EBADMSG;
+		goto done;
+	}
+
+	ret = 0;
+ done:
+	kfree(in_buf);
+	kfree(out_buf);
+	return ret;
+}
+
+static int cros_ec_probe_i2c(struct i2c_client *client,
+			     const struct i2c_device_id *dev_id)
+{
+	struct device *dev = &client->dev;
+	struct cros_ec_device *ec_dev = NULL;
+	int err;
+
+ 	ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
+	if (!ec_dev)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, ec_dev);
+	ec_dev->name = "I2C";
+	ec_dev->dev = dev;
+	ec_dev->priv = client;
+	ec_dev->irq = client->irq;
+	ec_dev->command_xfer = cros_ec_command_xfer;
+	ec_dev->ec_name = client->name;
+	ec_dev->phys_name = client->adapter->name;
+	ec_dev->parent = &client->dev;
+
+	err = cros_ec_register(ec_dev);
+	if (err) {
+		dev_err(dev, "cannot register EC\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int cros_ec_remove_i2c(struct i2c_client *client)
+{
+	struct cros_ec_device *ec_dev = i2c_get_clientdata(client);
+
+	cros_ec_remove(ec_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int cros_ec_i2c_suspend(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = to_ec_dev(dev);
+
+	return cros_ec_suspend(ec_dev);
+}
+
+static int cros_ec_i2c_resume(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = to_ec_dev(dev);
+
+	return cros_ec_resume(ec_dev);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(cros_ec_i2c_pm_ops, cros_ec_i2c_suspend,
+			  cros_ec_i2c_resume);
+
+static const struct i2c_device_id cros_ec_i2c_id[] = {
+	{ "cros-ec-i2c", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, cros_ec_i2c_id);
+
+static struct i2c_driver cros_ec_driver = {
+	.driver	= {
+		.name	= "cros-ec-i2c",
+		.owner	= THIS_MODULE,
+		.pm	= &cros_ec_i2c_pm_ops,
+	},
+	.probe		= cros_ec_probe_i2c,
+	.remove		= cros_ec_remove_i2c,
+	.id_table	= cros_ec_i2c_id,
+};
+
+module_i2c_driver(cros_ec_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC multi function device");

diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
new file mode 100644
index 0000000..19193cf
--- /dev/null
+++ b/drivers/mfd/cros_ec_spi.c

@@ -0,0 +1,375 @@
+/*
+ * ChromeOS EC multi-function device (SPI)
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+
+
+/* The header byte, which follows the preamble */
+#define EC_MSG_HEADER			0xec
+
+/*
+ * Number of EC preamble bytes we read at a time. Since it takes
+ * about 400-500us for the EC to respond there is not a lot of
+ * point in tuning this. If the EC could respond faster then
+ * we could increase this so that might expect the preamble and
+ * message to occur in a single transaction. However, the maximum
+ * SPI transfer size is 256 bytes, so at 5MHz we need a response
+ * time of perhaps <320us (200 bytes / 1600 bits).
+ */
+#define EC_MSG_PREAMBLE_COUNT		32
+
+/*
+  * We must get a response from the EC in 5ms. This is a very long
+  * time, but the flash write command can take 2-3ms. The EC command
+  * processing is currently not very fast (about 500us). We could
+  * look at speeding this up and making the flash write command a
+  * 'slow' command, requiring a GET_STATUS wait loop, like flash
+  * erase.
+  */
+#define EC_MSG_DEADLINE_MS		5
+
+/*
+  * Time between raising the SPI chip select (for the end of a
+  * transaction) and dropping it again (for the next transaction).
+  * If we go too fast, the EC will miss the transaction. It seems
+  * that 50us is enough with the 16MHz STM32 EC.
+  */
+#define EC_SPI_RECOVERY_TIME_NS	(50 * 1000)
+
+/**
+ * struct cros_ec_spi - information about a SPI-connected EC
+ *
+ * @spi: SPI device we are connected to
+ * @last_transfer_ns: time that we last finished a transfer, or 0 if there
+ *	if no record
+ */
+struct cros_ec_spi {
+	struct spi_device *spi;
+	s64 last_transfer_ns;
+};
+
+static void debug_packet(struct device *dev, const char *name, u8 *ptr,
+			  int len)
+{
+#ifdef DEBUG
+	int i;
+
+	dev_dbg(dev, "%s: ", name);
+	for (i = 0; i < len; i++)
+		dev_cont(dev, " %02x", ptr[i]);
+#endif
+}
+
+/**
+ * cros_ec_spi_receive_response - Receive a response from the EC.
+ *
+ * This function has two phases: reading the preamble bytes (since if we read
+ * data from the EC before it is ready to send, we just get preamble) and
+ * reading the actual message.
+ *
+ * The received data is placed into ec_dev->din.
+ *
+ * @ec_dev: ChromeOS EC device
+ * @need_len: Number of message bytes we need to read
+ */
+static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev,
+					int need_len)
+{
+	struct cros_ec_spi *ec_spi = ec_dev->priv;
+	struct spi_transfer trans;
+	struct spi_message msg;
+	u8 *ptr, *end;
+	int ret;
+	unsigned long deadline;
+	int todo;
+
+	/* Receive data until we see the header byte */
+	deadline = jiffies + msecs_to_jiffies(EC_MSG_DEADLINE_MS);
+	do {
+		memset(&trans, '\0', sizeof(trans));
+		trans.cs_change = 1;
+		trans.rx_buf = ptr = ec_dev->din;
+		trans.len = EC_MSG_PREAMBLE_COUNT;
+
+		spi_message_init(&msg);
+		spi_message_add_tail(&trans, &msg);
+		ret = spi_sync(ec_spi->spi, &msg);
+		if (ret < 0) {
+			dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+			return ret;
+		}
+
+		for (end = ptr + EC_MSG_PREAMBLE_COUNT; ptr != end; ptr++) {
+			if (*ptr == EC_MSG_HEADER) {
+				dev_dbg(ec_dev->dev, "msg found at %ld\n",
+					ptr - ec_dev->din);
+				break;
+			}
+		}
+
+		if (time_after(jiffies, deadline)) {
+			dev_warn(ec_dev->dev, "EC failed to respond in time\n");
+			return -ETIMEDOUT;
+		}
+	} while (ptr == end);
+
+	/*
+	 * ptr now points to the header byte. Copy any valid data to the
+	 * start of our buffer
+	 */
+	todo = end - ++ptr;
+	BUG_ON(todo < 0 || todo > ec_dev->din_size);
+	todo = min(todo, need_len);
+	memmove(ec_dev->din, ptr, todo);
+	ptr = ec_dev->din + todo;
+	dev_dbg(ec_dev->dev, "need %d, got %d bytes from preamble\n",
+		 need_len, todo);
+	need_len -= todo;
+
+	/* Receive data until we have it all */
+	while (need_len > 0) {
+		/*
+		 * We can't support transfers larger than the SPI FIFO size
+		 * unless we have DMA. We don't have DMA on the ISP SPI ports
+		 * for Exynos. We need a way of asking SPI driver for
+		 * maximum-supported transfer size.
+		 */
+		todo = min(need_len, 256);
+		dev_dbg(ec_dev->dev, "loop, todo=%d, need_len=%d, ptr=%ld\n",
+			todo, need_len, ptr - ec_dev->din);
+
+		memset(&trans, '\0', sizeof(trans));
+		trans.cs_change = 1;
+		trans.rx_buf = ptr;
+		trans.len = todo;
+		spi_message_init(&msg);
+		spi_message_add_tail(&trans, &msg);
+
+		/* send command to EC and read answer */
+		BUG_ON((u8 *)trans.rx_buf - ec_dev->din + todo >
+				ec_dev->din_size);
+		ret = spi_sync(ec_spi->spi, &msg);
+		if (ret < 0) {
+			dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+			return ret;
+		}
+
+		debug_packet(ec_dev->dev, "interim", ptr, todo);
+		ptr += todo;
+		need_len -= todo;
+	}
+
+	dev_dbg(ec_dev->dev, "loop done, ptr=%ld\n", ptr - ec_dev->din);
+
+	return 0;
+}
+
+/**
+ * cros_ec_command_spi_xfer - Transfer a message over SPI and receive the reply
+ *
+ * @ec_dev: ChromeOS EC device
+ * @ec_msg: Message to transfer
+ */
+static int cros_ec_command_spi_xfer(struct cros_ec_device *ec_dev,
+				    struct cros_ec_msg *ec_msg)
+{
+	struct cros_ec_spi *ec_spi = ec_dev->priv;
+	struct spi_transfer trans;
+	struct spi_message msg;
+	int i, len;
+	u8 *ptr;
+	int sum;
+	int ret = 0, final_ret;
+	struct timespec ts;
+
+	len = cros_ec_prepare_tx(ec_dev, ec_msg);
+	dev_dbg(ec_dev->dev, "prepared, len=%d\n", len);
+
+	/* If it's too soon to do another transaction, wait */
+	if (ec_spi->last_transfer_ns) {
+		struct timespec ts;
+		unsigned long delay;	/* The delay completed so far */
+
+		ktime_get_ts(&ts);
+		delay = timespec_to_ns(&ts) - ec_spi->last_transfer_ns;
+		if (delay < EC_SPI_RECOVERY_TIME_NS)
+			ndelay(delay);
+	}
+
+	/* Transmit phase - send our message */
+	debug_packet(ec_dev->dev, "out", ec_dev->dout, len);
+	memset(&trans, '\0', sizeof(trans));
+	trans.tx_buf = ec_dev->dout;
+	trans.len = len;
+	trans.cs_change = 1;
+	spi_message_init(&msg);
+	spi_message_add_tail(&trans, &msg);
+	ret = spi_sync(ec_spi->spi, &msg);
+
+	/* Get the response */
+	if (!ret) {
+		ret = cros_ec_spi_receive_response(ec_dev,
+				ec_msg->in_len + EC_MSG_TX_PROTO_BYTES);
+	} else {
+		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+	}
+
+	/* turn off CS */
+	spi_message_init(&msg);
+	final_ret = spi_sync(ec_spi->spi, &msg);
+	ktime_get_ts(&ts);
+	ec_spi->last_transfer_ns = timespec_to_ns(&ts);
+	if (!ret)
+		ret = final_ret;
+	if (ret < 0) {
+		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+		return ret;
+	}
+
+	/* check response error code */
+	ptr = ec_dev->din;
+	if (ptr[0]) {
+		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
+			 ec_msg->cmd, ptr[0]);
+		debug_packet(ec_dev->dev, "in_err", ptr, len);
+		return -EINVAL;
+	}
+	len = ptr[1];
+	sum = ptr[0] + ptr[1];
+	if (len > ec_msg->in_len) {
+		dev_err(ec_dev->dev, "packet too long (%d bytes, expected %d)",
+			len, ec_msg->in_len);
+		return -ENOSPC;
+	}
+
+	/* copy response packet payload and compute checksum */
+	for (i = 0; i < len; i++) {
+		sum += ptr[i + 2];
+		if (ec_msg->in_len)
+			ec_msg->in_buf[i] = ptr[i + 2];
+	}
+	sum &= 0xff;
+
+	debug_packet(ec_dev->dev, "in", ptr, len + 3);
+
+	if (sum != ptr[len + 2]) {
+		dev_err(ec_dev->dev,
+			"bad packet checksum, expected %02x, got %02x\n",
+			sum, ptr[len + 2]);
+		return -EBADMSG;
+	}
+
+	return 0;
+}
+
+static int cros_ec_probe_spi(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct cros_ec_device *ec_dev;
+	struct cros_ec_spi *ec_spi;
+	int err;
+
+	spi->bits_per_word = 8;
+	spi->mode = SPI_MODE_0;
+	err = spi_setup(spi);
+	if (err < 0)
+		return err;
+
+	ec_spi = devm_kzalloc(dev, sizeof(*ec_spi), GFP_KERNEL);
+	if (ec_spi == NULL)
+		return -ENOMEM;
+	ec_spi->spi = spi;
+	ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
+	if (!ec_dev)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, ec_dev);
+	ec_dev->name = "SPI";
+	ec_dev->dev = dev;
+	ec_dev->priv = ec_spi;
+	ec_dev->irq = spi->irq;
+	ec_dev->command_xfer = cros_ec_command_spi_xfer;
+	ec_dev->ec_name = ec_spi->spi->modalias;
+	ec_dev->phys_name = dev_name(&ec_spi->spi->dev);
+	ec_dev->parent = &ec_spi->spi->dev;
+	ec_dev->din_size = EC_MSG_BYTES + EC_MSG_PREAMBLE_COUNT;
+	ec_dev->dout_size = EC_MSG_BYTES;
+
+	err = cros_ec_register(ec_dev);
+	if (err) {
+		dev_err(dev, "cannot register EC\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int cros_ec_remove_spi(struct spi_device *spi)
+{
+	struct cros_ec_device *ec_dev;
+
+	ec_dev = spi_get_drvdata(spi);
+	cros_ec_remove(ec_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int cros_ec_spi_suspend(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = dev_get_drvdata(dev);
+
+	return cros_ec_suspend(ec_dev);
+}
+
+static int cros_ec_spi_resume(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = dev_get_drvdata(dev);
+
+	return cros_ec_resume(ec_dev);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(cros_ec_spi_pm_ops, cros_ec_spi_suspend,
+			 cros_ec_spi_resume);
+
+static const struct spi_device_id cros_ec_spi_id[] = {
+	{ "cros-ec-spi", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, cros_ec_spi_id);
+
+static struct spi_driver cros_ec_driver_spi = {
+	.driver	= {
+		.name	= "cros-ec-spi",
+		.owner	= THIS_MODULE,
+		.pm	= &cros_ec_spi_pm_ops,
+	},
+	.probe		= cros_ec_probe_spi,
+	.remove		= cros_ec_remove_spi,
+	.id_table	= cros_ec_spi_id,
+};
+
+module_spi_driver(cros_ec_driver_spi);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC multi function device (SPI)");

diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index 05176cd..f1a316e 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c

@@ -499,7 +499,8 @@
 	unsigned int tmp;
 	int ret;
 
-	chip = kzalloc(sizeof(struct da903x_chip), GFP_KERNEL);
+	chip = devm_kzalloc(&client->dev, sizeof(struct da903x_chip),
+				GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 
@@ -515,33 +516,27 @@
 
 	ret = chip->ops->init_chip(chip);
 	if (ret)
-		goto out_free_chip;
+		return ret;
 
 	/* mask and clear all IRQs */
 	chip->events_mask = 0xffffffff;
 	chip->ops->mask_events(chip, chip->events_mask);
 	chip->ops->read_events(chip, &tmp);
 
-	ret = request_irq(client->irq, da903x_irq_handler,
+	ret = devm_request_irq(&client->dev, client->irq, da903x_irq_handler,
 			IRQF_TRIGGER_FALLING,
 			"da903x", chip);
 	if (ret) {
 		dev_err(&client->dev, "failed to request irq %d\n",
 				client->irq);
-		goto out_free_chip;
+		return ret;
 	}
 
 	ret = da903x_add_subdevs(chip, pdata);
 	if (ret)
-		goto out_free_irq;
+		return ret;
 
 	return 0;
-
-out_free_irq:
-	free_irq(client->irq, chip);
-out_free_chip:
-	kfree(chip);
-	return ret;
 }
 
 static int da903x_remove(struct i2c_client *client)
@@ -549,8 +544,6 @@
 	struct da903x_chip *chip = i2c_get_clientdata(client);
 
 	da903x_remove_subdevs(chip);
-	free_irq(client->irq, chip);
-	kfree(chip);
 	return 0;
 }
 

diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 61d63b9..0680bcb 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c

@@ -38,7 +38,7 @@
 	da9052->dev = &spi->dev;
 	da9052->chip_irq = spi->irq;
 
-	dev_set_drvdata(&spi->dev, da9052);
+	spi_set_drvdata(spi, da9052);
 
 	da9052_regmap_config.read_flag_mask = 1;
 	da9052_regmap_config.write_flag_mask = 0;
@@ -60,7 +60,7 @@
 
 static int da9052_spi_remove(struct spi_device *spi)
 {
-	struct da9052 *da9052 = dev_get_drvdata(&spi->dev);
+	struct da9052 *da9052 = spi_get_drvdata(spi);
 
 	da9052_device_exit(da9052);
 	return 0;

diff --git a/drivers/mfd/da9055-core.c b/drivers/mfd/da9055-core.c
index f56a1a9..49cb23d 100644
--- a/drivers/mfd/da9055-core.c
+++ b/drivers/mfd/da9055-core.c

@@ -391,7 +391,7 @@
 		da9055->irq_base = pdata->irq_base;
 
 	ret = regmap_add_irq_chip(da9055->regmap, da9055->chip_irq,
-				  IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
 				  da9055->irq_base, &da9055_regmap_irq_chip,
 				  &da9055->irq_data);
 	if (ret < 0)

diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index c0bcc87..c60ab0c 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c

@@ -177,17 +177,7 @@
 	.remove	= davinci_vc_remove,
 };
 
-static int __init davinci_vc_init(void)
-{
-	return platform_driver_probe(&davinci_vc_driver, davinci_vc_probe);
-}
-module_init(davinci_vc_init);
-
-static void __exit davinci_vc_exit(void)
-{
-	platform_driver_unregister(&davinci_vc_driver);
-}
-module_exit(davinci_vc_exit);
+module_platform_driver_probe(davinci_vc_driver, davinci_vc_probe);
 
 MODULE_AUTHOR("Miguel Aguilar");
 MODULE_DESCRIPTION("Texas Instruments DaVinci Voice Codec Core Interface");

diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 21434be..319b8ab 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c

@@ -24,6 +24,7 @@
 #include <linux/jiffies.h>
 #include <linux/bitops.h>
 #include <linux/fs.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/mfd/core.h>
@@ -2704,6 +2705,7 @@
 {
 	struct resource *res;
 	void __iomem *tcpm_base;
+	u32 version;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   "prcmu-tcpm");
@@ -2713,26 +2715,27 @@
 		return;
 	}
 	tcpm_base = ioremap(res->start, resource_size(res));
-	if (tcpm_base != NULL) {
-		u32 version;
-
-		version = readl(tcpm_base + version_offset);
-		fw_info.version.project = (version & 0xFF);
-		fw_info.version.api_version = (version >> 8) & 0xFF;
-		fw_info.version.func_version = (version >> 16) & 0xFF;
-		fw_info.version.errata = (version >> 24) & 0xFF;
-		strncpy(fw_info.version.project_name,
-			fw_project_name(fw_info.version.project),
-			PRCMU_FW_PROJECT_NAME_LEN);
-		fw_info.valid = true;
-		pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n",
-			fw_info.version.project_name,
-			fw_info.version.project,
-			fw_info.version.api_version,
-			fw_info.version.func_version,
-			fw_info.version.errata);
-		iounmap(tcpm_base);
+	if (!tcpm_base) {
+		dev_err(&pdev->dev, "no prcmu tcpm mem region provided\n");
+		return;
 	}
+
+	version = readl(tcpm_base + version_offset);
+	fw_info.version.project = (version & 0xFF);
+	fw_info.version.api_version = (version >> 8) & 0xFF;
+	fw_info.version.func_version = (version >> 16) & 0xFF;
+	fw_info.version.errata = (version >> 24) & 0xFF;
+	strncpy(fw_info.version.project_name,
+		fw_project_name(fw_info.version.project),
+		PRCMU_FW_PROJECT_NAME_LEN);
+	fw_info.valid = true;
+	pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n",
+		fw_info.version.project_name,
+		fw_info.version.project,
+		fw_info.version.api_version,
+		fw_info.version.func_version,
+		fw_info.version.errata);
+	iounmap(tcpm_base);
 }
 
 void __init db8500_prcmu_early_init(u32 phy_base, u32 size)
@@ -3065,6 +3068,15 @@
 	.num_trips = 4,
 };
 
+static struct mfd_cell common_prcmu_devs[] = {
+	{
+		.name = "ux500_wdt",
+		.platform_data = &db8500_wdt_pdata,
+		.pdata_size = sizeof(db8500_wdt_pdata),
+		.id = -1,
+	},
+};
+
 static struct mfd_cell db8500_prcmu_devs[] = {
 	{
 		.name = "db8500-prcmu-regulators",
@@ -3079,12 +3091,6 @@
 		.pdata_size = sizeof(db8500_cpufreq_table),
 	},
 	{
-		.name = "ux500_wdt",
-		.platform_data = &db8500_wdt_pdata,
-		.pdata_size = sizeof(db8500_wdt_pdata),
-		.id = -1,
-	},
-	{
 		.name = "db8500-thermal",
 		.num_resources = ARRAY_SIZE(db8500_thsens_resources),
 		.resources = db8500_thsens_resources,
@@ -3173,13 +3179,25 @@
 
 	db8500_prcmu_update_cpufreq();
 
-	err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs,
-			      ARRAY_SIZE(db8500_prcmu_devs), NULL, 0, db8500_irq_domain);
+	err = mfd_add_devices(&pdev->dev, 0, common_prcmu_devs,
+			      ARRAY_SIZE(common_prcmu_devs), NULL, 0, db8500_irq_domain);
 	if (err) {
 		pr_err("prcmu: Failed to add subdevices\n");
 		return err;
 	}
 
+	/* TODO: Remove restriction when clk definitions are available. */
+	if (!of_machine_is_compatible("st-ericsson,u8540")) {
+		err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs,
+				      ARRAY_SIZE(db8500_prcmu_devs), NULL, 0,
+				      db8500_irq_domain);
+		if (err) {
+			mfd_remove_devices(&pdev->dev);
+			pr_err("prcmu: Failed to add subdevices\n");
+			goto no_irq_return;
+		}
+	}
+
 	err = db8500_prcmu_register_ab8500(&pdev->dev, pdata->ab_platdata,
 					   pdata->ab_irq);
 	if (err) {

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index b7a61f0..5502106 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c

@@ -393,7 +393,7 @@
 
 static int ezx_pcap_remove(struct spi_device *spi)
 {
-	struct pcap_chip *pcap = dev_get_drvdata(&spi->dev);
+	struct pcap_chip *pcap = spi_get_drvdata(spi);
 	struct pcap_platform_data *pdata = spi->dev.platform_data;
 	int i, adc_irq;
 
@@ -403,7 +403,7 @@
 	/* cleanup ADC */
 	adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ?
 				PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE);
-	free_irq(adc_irq, pcap);
+	devm_free_irq(&spi->dev, adc_irq, pcap);
 	mutex_lock(&pcap->adc_mutex);
 	for (i = 0; i < PCAP_ADC_MAXQ; i++)
 		kfree(pcap->adc_queue[i]);
@@ -415,8 +415,6 @@
 
 	destroy_workqueue(pcap->workqueue);
 
-	kfree(pcap);
-
 	return 0;
 }
 
@@ -431,7 +429,7 @@
 	if (!pdata)
 		goto ret;
 
-	pcap = kzalloc(sizeof(*pcap), GFP_KERNEL);
+	pcap = devm_kzalloc(&spi->dev, sizeof(*pcap), GFP_KERNEL);
 	if (!pcap) {
 		ret = -ENOMEM;
 		goto ret;
@@ -441,14 +439,14 @@
 	mutex_init(&pcap->adc_mutex);
 	INIT_WORK(&pcap->isr_work, pcap_isr_work);
 	INIT_WORK(&pcap->msr_work, pcap_msr_work);
-	dev_set_drvdata(&spi->dev, pcap);
+	spi_set_drvdata(spi, pcap);
 
 	/* setup spi */
 	spi->bits_per_word = 32;
 	spi->mode = SPI_MODE_0 | (pdata->config & PCAP_CS_AH ? SPI_CS_HIGH : 0);
 	ret = spi_setup(spi);
 	if (ret)
-		goto free_pcap;
+		goto ret;
 
 	pcap->spi = spi;
 
@@ -458,7 +456,7 @@
 	if (!pcap->workqueue) {
 		ret = -ENOMEM;
 		dev_err(&spi->dev, "can't create pcap thread\n");
-		goto free_pcap;
+		goto ret;
 	}
 
 	/* redirect interrupts to AP, except adcdone2 */
@@ -491,7 +489,8 @@
 	adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ?
 					PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE);
 
-	ret = request_irq(adc_irq, pcap_adc_irq, 0, "ADC", pcap);
+	ret = devm_request_irq(&spi->dev, adc_irq, pcap_adc_irq, 0, "ADC",
+				pcap);
 	if (ret)
 		goto free_irqchip;
 
@@ -511,14 +510,12 @@
 remove_subdevs:
 	device_for_each_child(&spi->dev, NULL, pcap_remove_subdev);
 /* free_adc: */
-	free_irq(adc_irq, pcap);
+	devm_free_irq(&spi->dev, adc_irq, pcap);
 free_irqchip:
 	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
 		irq_set_chip_and_handler(i, NULL, NULL);
 /* destroy_workqueue: */
 	destroy_workqueue(pcap->workqueue);
-free_pcap:
-	kfree(pcap);
 ret:
 	return ret;
 }

diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c
index 9e5453d..0285fce 100644
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c

@@ -208,18 +208,7 @@
 	.remove		= pasic3_remove,
 };
 
-static int __init pasic3_base_init(void)
-{
-	return platform_driver_probe(&pasic3_driver, pasic3_probe);
-}
-
-static void __exit pasic3_base_exit(void)
-{
-	platform_driver_unregister(&pasic3_driver);
-}
-
-module_init(pasic3_base_init);
-module_exit(pasic3_base_exit);
+module_platform_driver_probe(pasic3_driver, pasic3_probe);
 
 MODULE_AUTHOR("Philipp Zabel <philipp.zabel@gmail.com>");
 MODULE_DESCRIPTION("Core driver for HTC PASIC3");

diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index 1804331..5be3b5e 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c

@@ -323,7 +323,8 @@
 	if (pdata->ocd) {
 		unsigned gpio = pdata->ocd->gpio;
 
-		ret = gpio_request_one(gpio, GPIOF_IN, "ocd_gpio");
+		ret = devm_gpio_request_one(&pdev->dev, gpio,
+					GPIOF_IN, "ocd_gpio");
 		if (ret) {
 			dev_err(&pdev->dev, "failed to register OCD GPIO\n");
 			return ret;
@@ -332,7 +333,6 @@
 		ret = gpio_to_irq(gpio);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "no IRQ number for OCD GPIO\n");
-			gpio_free(gpio);
 			return ret;
 		}
 
@@ -359,8 +359,6 @@
 
 fail:
 	mfd_remove_devices(&pdev->dev);
-	if (pdata->ocd)
-		gpio_free(pdata->ocd->gpio);
 
 	return ret;
 }
@@ -368,12 +366,8 @@
 static void intel_msic_remove_devices(struct intel_msic *msic)
 {
 	struct platform_device *pdev = msic->pdev;
-	struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
 
 	mfd_remove_devices(&pdev->dev);
-
-	if (pdata->ocd)
-		gpio_free(pdata->ocd->gpio);
 }
 
 static int intel_msic_probe(struct platform_device *pdev)

diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
index ceebf2c..4b7e6da 100644
--- a/drivers/mfd/lm3533-core.c
+++ b/drivers/mfd/lm3533-core.c

@@ -496,8 +496,8 @@
 	dev_set_drvdata(lm3533->dev, lm3533);
 
 	if (gpio_is_valid(lm3533->gpio_hwen)) {
-		ret = gpio_request_one(lm3533->gpio_hwen, GPIOF_OUT_INIT_LOW,
-								"lm3533-hwen");
+		ret = devm_gpio_request_one(lm3533->dev, lm3533->gpio_hwen,
+					GPIOF_OUT_INIT_LOW, "lm3533-hwen");
 		if (ret < 0) {
 			dev_err(lm3533->dev,
 				"failed to request HWEN GPIO %d\n",
@@ -528,8 +528,6 @@
 	mfd_remove_devices(lm3533->dev);
 err_disable:
 	lm3533_disable(lm3533);
-	if (gpio_is_valid(lm3533->gpio_hwen))
-		gpio_free(lm3533->gpio_hwen);
 
 	return ret;
 }
@@ -542,8 +540,6 @@
 
 	mfd_remove_devices(lm3533->dev);
 	lm3533_disable(lm3533);
-	if (gpio_is_valid(lm3533->gpio_hwen))
-		gpio_free(lm3533->gpio_hwen);
 }
 
 static bool lm3533_readable_register(struct device *dev, unsigned int reg)

diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index 4d73963..1cbb176 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c

@@ -46,7 +46,7 @@
 
 #ifdef CONFIG_OF
 static struct of_device_id max77686_pmic_dt_match[] = {
-	{.compatible = "maxim,max77686",        .data = 0},
+	{.compatible = "maxim,max77686", .data = NULL},
 	{},
 };
 

diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index 3032bae..77189da 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c

@@ -131,7 +131,7 @@
 	if (!mc13xxx)
 		return -ENOMEM;
 
-	dev_set_drvdata(&spi->dev, mc13xxx);
+	spi_set_drvdata(spi, mc13xxx);
 	spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
 
 	mc13xxx->dev = &spi->dev;
@@ -144,7 +144,7 @@
 		ret = PTR_ERR(mc13xxx->regmap);
 		dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n",
 				ret);
-		dev_set_drvdata(&spi->dev, NULL);
+		spi_set_drvdata(spi, NULL);
 		return ret;
 	}
 
@@ -164,7 +164,7 @@
 
 static int mc13xxx_spi_remove(struct spi_device *spi)
 {
-	struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev);
+	struct mc13xxx *mc13xxx = spi_get_drvdata(spi);
 
 	mc13xxx_common_cleanup(mc13xxx);
 

diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 4febc5c..759fae3 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c

@@ -1,8 +1,9 @@
 /**
  * omap-usb-host.c - The USBHS core driver for OMAP EHCI & OHCI
  *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com
+ * Copyright (C) 2011-2013 Texas Instruments Incorporated - http://www.ti.com
  * Author: Keshava Munegowda <keshava_mgowda@ti.com>
+ * Author: Roger Quadros <rogerq@ti.com>
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2  of
@@ -27,6 +28,9 @@
 #include <linux/platform_device.h>
 #include <linux/platform_data/usb-omap.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/err.h>
 
 #include "omap-usb.h"
 
@@ -137,6 +141,49 @@
 
 /*-------------------------------------------------------------------------*/
 
+/**
+ * Map 'enum usbhs_omap_port_mode' found in <linux/platform_data/usb-omap.h>
+ * to the device tree binding portN-mode found in
+ * 'Documentation/devicetree/bindings/mfd/omap-usb-host.txt'
+ */
+static const char * const port_modes[] = {
+	[OMAP_USBHS_PORT_MODE_UNUSED]	= "",
+	[OMAP_EHCI_PORT_MODE_PHY]	= "ehci-phy",
+	[OMAP_EHCI_PORT_MODE_TLL]	= "ehci-tll",
+	[OMAP_EHCI_PORT_MODE_HSIC]	= "ehci-hsic",
+	[OMAP_OHCI_PORT_MODE_PHY_6PIN_DATSE0]	= "ohci-phy-6pin-datse0",
+	[OMAP_OHCI_PORT_MODE_PHY_6PIN_DPDM]	= "ohci-phy-6pin-dpdm",
+	[OMAP_OHCI_PORT_MODE_PHY_3PIN_DATSE0]	= "ohci-phy-3pin-datse0",
+	[OMAP_OHCI_PORT_MODE_PHY_4PIN_DPDM]	= "ohci-phy-4pin-dpdm",
+	[OMAP_OHCI_PORT_MODE_TLL_6PIN_DATSE0]	= "ohci-tll-6pin-datse0",
+	[OMAP_OHCI_PORT_MODE_TLL_6PIN_DPDM]	= "ohci-tll-6pin-dpdm",
+	[OMAP_OHCI_PORT_MODE_TLL_3PIN_DATSE0]	= "ohci-tll-3pin-datse0",
+	[OMAP_OHCI_PORT_MODE_TLL_4PIN_DPDM]	= "ohci-tll-4pin-dpdm",
+	[OMAP_OHCI_PORT_MODE_TLL_2PIN_DATSE0]	= "ohci-tll-2pin-datse0",
+	[OMAP_OHCI_PORT_MODE_TLL_2PIN_DPDM]	= "ohci-tll-2pin-dpdm",
+};
+
+/**
+ * omap_usbhs_get_dt_port_mode - Get the 'enum usbhs_omap_port_mode'
+ * from the port mode string.
+ * @mode: The port mode string, usually obtained from device tree.
+ *
+ * The function returns the 'enum usbhs_omap_port_mode' that matches the
+ * provided port mode string as per the port_modes table.
+ * If no match is found it returns -ENODEV
+ */
+static const int omap_usbhs_get_dt_port_mode(const char *mode)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(port_modes); i++) {
+		if (!strcmp(mode, port_modes[i]))
+			return i;
+	}
+
+	return -ENODEV;
+}
+
 static struct platform_device *omap_usbhs_alloc_child(const char *name,
 			struct resource	*res, int num_resources, void *pdata,
 			size_t pdata_size, struct device *dev)
@@ -278,7 +325,7 @@
 
 	dev_dbg(dev, "usbhs_runtime_resume\n");
 
-	omap_tll_enable();
+	omap_tll_enable(pdata);
 
 	if (!IS_ERR(omap->ehci_logic_fck))
 		clk_enable(omap->ehci_logic_fck);
@@ -353,7 +400,7 @@
 	if (!IS_ERR(omap->ehci_logic_fck))
 		clk_disable(omap->ehci_logic_fck);
 
-	omap_tll_disable();
+	omap_tll_disable(pdata);
 
 	return 0;
 }
@@ -430,24 +477,10 @@
 static void omap_usbhs_init(struct device *dev)
 {
 	struct usbhs_hcd_omap		*omap = dev_get_drvdata(dev);
-	struct usbhs_omap_platform_data	*pdata = omap->pdata;
 	unsigned			reg;
 
 	dev_dbg(dev, "starting TI HSUSB Controller\n");
 
-	if (pdata->phy_reset) {
-		if (gpio_is_valid(pdata->reset_gpio_port[0]))
-			gpio_request_one(pdata->reset_gpio_port[0],
-					 GPIOF_OUT_INIT_LOW, "USB1 PHY reset");
-
-		if (gpio_is_valid(pdata->reset_gpio_port[1]))
-			gpio_request_one(pdata->reset_gpio_port[1],
-					 GPIOF_OUT_INIT_LOW, "USB2 PHY reset");
-
-		/* Hold the PHY in RESET for enough time till DIR is high */
-		udelay(10);
-	}
-
 	pm_runtime_get_sync(dev);
 
 	reg = usbhs_read(omap->uhh_base, OMAP_UHH_HOSTCONFIG);
@@ -476,36 +509,59 @@
 	dev_dbg(dev, "UHH setup done, uhh_hostconfig=%x\n", reg);
 
 	pm_runtime_put_sync(dev);
-	if (pdata->phy_reset) {
-		/* Hold the PHY in RESET for enough time till
-		 * PHY is settled and ready
-		 */
-		udelay(10);
-
-		if (gpio_is_valid(pdata->reset_gpio_port[0]))
-			gpio_set_value_cansleep
-				(pdata->reset_gpio_port[0], 1);
-
-		if (gpio_is_valid(pdata->reset_gpio_port[1]))
-			gpio_set_value_cansleep
-				(pdata->reset_gpio_port[1], 1);
-	}
 }
 
-static void omap_usbhs_deinit(struct device *dev)
+static int usbhs_omap_get_dt_pdata(struct device *dev,
+					struct usbhs_omap_platform_data *pdata)
 {
-	struct usbhs_hcd_omap		*omap = dev_get_drvdata(dev);
-	struct usbhs_omap_platform_data	*pdata = omap->pdata;
+	int ret, i;
+	struct device_node *node = dev->of_node;
 
-	if (pdata->phy_reset) {
-		if (gpio_is_valid(pdata->reset_gpio_port[0]))
-			gpio_free(pdata->reset_gpio_port[0]);
+	ret = of_property_read_u32(node, "num-ports", &pdata->nports);
+	if (ret)
+		pdata->nports = 0;
 
-		if (gpio_is_valid(pdata->reset_gpio_port[1]))
-			gpio_free(pdata->reset_gpio_port[1]);
+	if (pdata->nports > OMAP3_HS_USB_PORTS) {
+		dev_warn(dev, "Too many num_ports <%d> in device tree. Max %d\n",
+				pdata->nports, OMAP3_HS_USB_PORTS);
+		return -ENODEV;
 	}
+
+	/* get port modes */
+	for (i = 0; i < OMAP3_HS_USB_PORTS; i++) {
+		char prop[11];
+		const char *mode;
+
+		pdata->port_mode[i] = OMAP_USBHS_PORT_MODE_UNUSED;
+
+		snprintf(prop, sizeof(prop), "port%d-mode", i + 1);
+		ret = of_property_read_string(node, prop, &mode);
+		if (ret < 0)
+			continue;
+
+		ret = omap_usbhs_get_dt_port_mode(mode);
+		if (ret < 0) {
+			dev_warn(dev, "Invalid port%d-mode \"%s\" in device tree\n",
+					i, mode);
+			return -ENODEV;
+		}
+
+		dev_dbg(dev, "port%d-mode: %s -> %d\n", i, mode, ret);
+		pdata->port_mode[i] = ret;
+	}
+
+	/* get flags */
+	pdata->single_ulpi_bypass = of_property_read_bool(node,
+						"single-ulpi-bypass");
+
+	return 0;
 }
 
+static struct of_device_id usbhs_child_match_table[] = {
+	{ .compatible = "ti,omap-ehci", },
+	{ .compatible = "ti,omap-ohci", },
+	{ }
+};
 
 /**
  * usbhs_omap_probe - initialize TI-based HCDs
@@ -522,26 +578,46 @@
 	int				i;
 	bool				need_logic_fck;
 
+	if (dev->of_node) {
+		/* For DT boot we populate platform data from OF node */
+		pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		ret = usbhs_omap_get_dt_pdata(dev, pdata);
+		if (ret)
+			return ret;
+
+		dev->platform_data = pdata;
+	}
+
 	if (!pdata) {
 		dev_err(dev, "Missing platform data\n");
 		return -ENODEV;
 	}
 
+	if (pdata->nports > OMAP3_HS_USB_PORTS) {
+		dev_info(dev, "Too many num_ports <%d> in platform_data. Max %d\n",
+				pdata->nports, OMAP3_HS_USB_PORTS);
+		return -ENODEV;
+	}
+
 	omap = devm_kzalloc(dev, sizeof(*omap), GFP_KERNEL);
 	if (!omap) {
 		dev_err(dev, "Memory allocation failed\n");
 		return -ENOMEM;
 	}
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "uhh");
-	omap->uhh_base = devm_request_and_ioremap(dev, res);
-	if (!omap->uhh_base) {
-		dev_err(dev, "Resource request/ioremap failed\n");
-		return -EADDRNOTAVAIL;
-	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	omap->uhh_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(omap->uhh_base))
+		return PTR_ERR(omap->uhh_base);
 
 	omap->pdata = pdata;
 
+	/* Initialize the TLL subsystem */
+	omap_tll_init(pdata);
+
 	pm_runtime_enable(dev);
 
 	platform_set_drvdata(pdev, omap);
@@ -575,6 +651,7 @@
 			 omap->usbhs_rev, omap->nports);
 			break;
 		}
+		pdata->nports = omap->nports;
 	}
 
 	i = sizeof(struct clk *) * omap->nports;
@@ -700,17 +777,28 @@
 	}
 
 	omap_usbhs_init(dev);
-	ret = omap_usbhs_alloc_children(pdev);
-	if (ret) {
-		dev_err(dev, "omap_usbhs_alloc_children failed\n");
-		goto err_alloc;
+
+	if (dev->of_node) {
+		ret = of_platform_populate(dev->of_node,
+				usbhs_child_match_table, NULL, dev);
+
+		if (ret) {
+			dev_err(dev, "Failed to create DT children: %d\n", ret);
+			goto err_alloc;
+		}
+
+	} else {
+		ret = omap_usbhs_alloc_children(pdev);
+		if (ret) {
+			dev_err(dev, "omap_usbhs_alloc_children failed: %d\n",
+						ret);
+			goto err_alloc;
+		}
 	}
 
 	return 0;
 
 err_alloc:
-	omap_usbhs_deinit(&pdev->dev);
-
 	for (i = 0; i < omap->nports; i++) {
 		if (!IS_ERR(omap->utmi_clk[i]))
 			clk_put(omap->utmi_clk[i]);
@@ -744,6 +832,13 @@
 	return ret;
 }
 
+static int usbhs_omap_remove_child(struct device *dev, void *data)
+{
+	dev_info(dev, "unregistering\n");
+	platform_device_unregister(to_platform_device(dev));
+	return 0;
+}
+
 /**
  * usbhs_omap_remove - shutdown processing for UHH & TLL HCDs
  * @pdev: USB Host Controller being removed
@@ -755,8 +850,6 @@
 	struct usbhs_hcd_omap *omap = platform_get_drvdata(pdev);
 	int i;
 
-	omap_usbhs_deinit(&pdev->dev);
-
 	for (i = 0; i < omap->nports; i++) {
 		if (!IS_ERR(omap->utmi_clk[i]))
 			clk_put(omap->utmi_clk[i]);
@@ -777,6 +870,8 @@
 
 	pm_runtime_disable(&pdev->dev);
 
+	/* remove children */
+	device_for_each_child(&pdev->dev, NULL, usbhs_omap_remove_child);
 	return 0;
 }
 
@@ -785,16 +880,26 @@
 	.runtime_resume		= usbhs_runtime_resume,
 };
 
+static const struct of_device_id usbhs_omap_dt_ids[] = {
+	{ .compatible = "ti,usbhs-host" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, usbhs_omap_dt_ids);
+
+
 static struct platform_driver usbhs_omap_driver = {
 	.driver = {
 		.name		= (char *)usbhs_driver_name,
 		.owner		= THIS_MODULE,
 		.pm		= &usbhsomap_dev_pm_ops,
+		.of_match_table = of_match_ptr(usbhs_omap_dt_ids),
 	},
 	.remove		= usbhs_omap_remove,
 };
 
 MODULE_AUTHOR("Keshava Munegowda <keshava_mgowda@ti.com>");
+MODULE_AUTHOR("Roger Quadros <rogerq@ti.com>");
 MODULE_ALIAS("platform:" USBHS_DRIVER_NAME);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI");

diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c
index 0aef1a7..e59ac4c 100644
--- a/drivers/mfd/omap-usb-tll.c
+++ b/drivers/mfd/omap-usb-tll.c

@@ -1,8 +1,9 @@
 /**
  * omap-usb-tll.c - The USB TLL driver for OMAP EHCI & OHCI
  *
- * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com
+ * Copyright (C) 2012-2013 Texas Instruments Incorporated - http://www.ti.com
  * Author: Keshava Munegowda <keshava_mgowda@ti.com>
+ * Author: Roger Quadros <rogerq@ti.com>
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2  of
@@ -27,6 +28,7 @@
 #include <linux/err.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_data/usb-omap.h>
+#include <linux/of.h>
 
 #define USBTLL_DRIVER_NAME	"usbhs_tll"
 
@@ -105,8 +107,8 @@
 
 struct usbtll_omap {
 	int					nch;	/* num. of channels */
-	struct usbhs_omap_platform_data		*pdata;
 	struct clk				**ch_clk;
+	void __iomem				*base;
 };
 
 /*-------------------------------------------------------------------------*/
@@ -210,14 +212,10 @@
 static int usbtll_omap_probe(struct platform_device *pdev)
 {
 	struct device				*dev =  &pdev->dev;
-	struct usbhs_omap_platform_data		*pdata = dev->platform_data;
-	void __iomem				*base;
 	struct resource				*res;
 	struct usbtll_omap			*tll;
-	unsigned				reg;
 	int					ret = 0;
 	int					i, ver;
-	bool needs_tll;
 
 	dev_dbg(dev, "starting TI HSUSB TLL Controller\n");
 
@@ -227,26 +225,16 @@
 		return -ENOMEM;
 	}
 
-	if (!pdata) {
-		dev_err(dev, "Platform data missing\n");
-		return -ENODEV;
-	}
-
-	tll->pdata = pdata;
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_request_and_ioremap(dev, res);
-	if (!base) {
-		ret = -EADDRNOTAVAIL;
-		dev_err(dev, "Resource request/ioremap failed:%d\n", ret);
-		return ret;
-	}
+	tll->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(tll->base))
+		return PTR_ERR(tll->base);
 
 	platform_set_drvdata(pdev, tll);
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 
-	ver =  usbtll_read(base, OMAP_USBTLL_REVISION);
+	ver =  usbtll_read(tll->base, OMAP_USBTLL_REVISION);
 	switch (ver) {
 	case OMAP_USBTLL_REV1:
 	case OMAP_USBTLL_REV4:
@@ -283,11 +271,85 @@
 			dev_dbg(dev, "can't get clock : %s\n", clkname);
 	}
 
+	pm_runtime_put_sync(dev);
+	/* only after this can omap_tll_enable/disable work */
+	spin_lock(&tll_lock);
+	tll_dev = dev;
+	spin_unlock(&tll_lock);
+
+	return 0;
+
+err_clk_alloc:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+
+	return ret;
+}
+
+/**
+ * usbtll_omap_remove - shutdown processing for UHH & TLL HCDs
+ * @pdev: USB Host Controller being removed
+ *
+ * Reverses the effect of usbtll_omap_probe().
+ */
+static int usbtll_omap_remove(struct platform_device *pdev)
+{
+	struct usbtll_omap *tll = platform_get_drvdata(pdev);
+	int i;
+
+	spin_lock(&tll_lock);
+	tll_dev = NULL;
+	spin_unlock(&tll_lock);
+
+	for (i = 0; i < tll->nch; i++)
+		if (!IS_ERR(tll->ch_clk[i]))
+			clk_put(tll->ch_clk[i]);
+
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static const struct of_device_id usbtll_omap_dt_ids[] = {
+	{ .compatible = "ti,usbhs-tll" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, usbtll_omap_dt_ids);
+
+static struct platform_driver usbtll_omap_driver = {
+	.driver = {
+		.name		= (char *)usbtll_driver_name,
+		.owner		= THIS_MODULE,
+		.of_match_table = of_match_ptr(usbtll_omap_dt_ids),
+	},
+	.probe		= usbtll_omap_probe,
+	.remove		= usbtll_omap_remove,
+};
+
+int omap_tll_init(struct usbhs_omap_platform_data *pdata)
+{
+	int i;
+	bool needs_tll;
+	unsigned reg;
+	struct usbtll_omap *tll;
+
+	spin_lock(&tll_lock);
+
+	if (!tll_dev) {
+		spin_unlock(&tll_lock);
+		return -ENODEV;
+	}
+
+	tll = dev_get_drvdata(tll_dev);
+
 	needs_tll = false;
 	for (i = 0; i < tll->nch; i++)
 		needs_tll |= omap_usb_mode_needs_tll(pdata->port_mode[i]);
 
+	pm_runtime_get_sync(tll_dev);
+
 	if (needs_tll) {
+		void __iomem *base = tll->base;
 
 		/* Program Common TLL register */
 		reg = usbtll_read(base, OMAP_TLL_SHARED_CONF);
@@ -336,51 +398,29 @@
 		}
 	}
 
-	pm_runtime_put_sync(dev);
-	/* only after this can omap_tll_enable/disable work */
-	spin_lock(&tll_lock);
-	tll_dev = dev;
+	pm_runtime_put_sync(tll_dev);
+
 	spin_unlock(&tll_lock);
 
 	return 0;
-
-err_clk_alloc:
-	pm_runtime_put_sync(dev);
-	pm_runtime_disable(dev);
-
-	return ret;
 }
+EXPORT_SYMBOL_GPL(omap_tll_init);
 
-/**
- * usbtll_omap_remove - shutdown processing for UHH & TLL HCDs
- * @pdev: USB Host Controller being removed
- *
- * Reverses the effect of usbtll_omap_probe().
- */
-static int usbtll_omap_remove(struct platform_device *pdev)
+int omap_tll_enable(struct usbhs_omap_platform_data *pdata)
 {
-	struct usbtll_omap *tll = platform_get_drvdata(pdev);
 	int i;
+	struct usbtll_omap *tll;
 
 	spin_lock(&tll_lock);
-	tll_dev = NULL;
-	spin_unlock(&tll_lock);
 
-	for (i = 0; i < tll->nch; i++)
-		if (!IS_ERR(tll->ch_clk[i]))
-			clk_put(tll->ch_clk[i]);
+	if (!tll_dev) {
+		spin_unlock(&tll_lock);
+		return -ENODEV;
+	}
 
-	pm_runtime_disable(&pdev->dev);
-	return 0;
-}
+	tll = dev_get_drvdata(tll_dev);
 
-static int usbtll_runtime_resume(struct device *dev)
-{
-	struct usbtll_omap			*tll = dev_get_drvdata(dev);
-	struct usbhs_omap_platform_data		*pdata = tll->pdata;
-	int i;
-
-	dev_dbg(dev, "usbtll_runtime_resume\n");
+	pm_runtime_get_sync(tll_dev);
 
 	for (i = 0; i < tll->nch; i++) {
 		if (omap_usb_mode_needs_tll(pdata->port_mode[i])) {
@@ -391,22 +431,31 @@
 
 			r = clk_enable(tll->ch_clk[i]);
 			if (r) {
-				dev_err(dev,
+				dev_err(tll_dev,
 				 "Error enabling ch %d clock: %d\n", i, r);
 			}
 		}
 	}
 
+	spin_unlock(&tll_lock);
+
 	return 0;
 }
+EXPORT_SYMBOL_GPL(omap_tll_enable);
 
-static int usbtll_runtime_suspend(struct device *dev)
+int omap_tll_disable(struct usbhs_omap_platform_data *pdata)
 {
-	struct usbtll_omap			*tll = dev_get_drvdata(dev);
-	struct usbhs_omap_platform_data		*pdata = tll->pdata;
 	int i;
+	struct usbtll_omap *tll;
 
-	dev_dbg(dev, "usbtll_runtime_suspend\n");
+	spin_lock(&tll_lock);
+
+	if (!tll_dev) {
+		spin_unlock(&tll_lock);
+		return -ENODEV;
+	}
+
+	tll = dev_get_drvdata(tll_dev);
 
 	for (i = 0; i < tll->nch; i++) {
 		if (omap_usb_mode_needs_tll(pdata->port_mode[i])) {
@@ -415,64 +464,16 @@
 		}
 	}
 
+	pm_runtime_put_sync(tll_dev);
+
+	spin_unlock(&tll_lock);
+
 	return 0;
 }
-
-static const struct dev_pm_ops usbtllomap_dev_pm_ops = {
-	SET_RUNTIME_PM_OPS(usbtll_runtime_suspend,
-			   usbtll_runtime_resume,
-			   NULL)
-};
-
-static struct platform_driver usbtll_omap_driver = {
-	.driver = {
-		.name		= (char *)usbtll_driver_name,
-		.owner		= THIS_MODULE,
-		.pm		= &usbtllomap_dev_pm_ops,
-	},
-	.probe		= usbtll_omap_probe,
-	.remove		= usbtll_omap_remove,
-};
-
-int omap_tll_enable(void)
-{
-	int ret;
-
-	spin_lock(&tll_lock);
-
-	if (!tll_dev) {
-		pr_err("%s: OMAP USB TLL not initialized\n", __func__);
-		ret = -ENODEV;
-	} else {
-		ret = pm_runtime_get_sync(tll_dev);
-	}
-
-	spin_unlock(&tll_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(omap_tll_enable);
-
-int omap_tll_disable(void)
-{
-	int ret;
-
-	spin_lock(&tll_lock);
-
-	if (!tll_dev) {
-		pr_err("%s: OMAP USB TLL not initialized\n", __func__);
-		ret = -ENODEV;
-	} else {
-		ret = pm_runtime_put_sync(tll_dev);
-	}
-
-	spin_unlock(&tll_lock);
-
-	return ret;
-}
 EXPORT_SYMBOL_GPL(omap_tll_disable);
 
 MODULE_AUTHOR("Keshava Munegowda <keshava_mgowda@ti.com>");
+MODULE_AUTHOR("Roger Quadros <rogerq@ti.com>");
 MODULE_ALIAS("platform:" USBHS_DRIVER_NAME);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("usb tll driver for TI OMAP EHCI and OHCI controllers");

diff --git a/drivers/mfd/omap-usb.h b/drivers/mfd/omap-usb.h
index 972aa96..2a508b6 100644
--- a/drivers/mfd/omap-usb.h
+++ b/drivers/mfd/omap-usb.h

@@ -1,2 +1,3 @@
-extern int omap_tll_enable(void);
-extern int omap_tll_disable(void);
+extern int omap_tll_init(struct usbhs_omap_platform_data *pdata);
+extern int omap_tll_enable(struct usbhs_omap_platform_data *pdata);
+extern int omap_tll_disable(struct usbhs_omap_platform_data *pdata);

diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c
index 73bf76d..53e9fe6 100644
--- a/drivers/mfd/palmas.c
+++ b/drivers/mfd/palmas.c

@@ -278,20 +278,20 @@
 	int ret;
 	u32 prop;
 
-	ret = of_property_read_u32(node, "ti,mux_pad1", &prop);
+	ret = of_property_read_u32(node, "ti,mux-pad1", &prop);
 	if (!ret) {
 		pdata->mux_from_pdata = 1;
 		pdata->pad1 = prop;
 	}
 
-	ret = of_property_read_u32(node, "ti,mux_pad2", &prop);
+	ret = of_property_read_u32(node, "ti,mux-pad2", &prop);
 	if (!ret) {
 		pdata->mux_from_pdata = 1;
 		pdata->pad2 = prop;
 	}
 
 	/* The default for this register is all masked */
-	ret = of_property_read_u32(node, "ti,power_ctrl", &prop);
+	ret = of_property_read_u32(node, "ti,power-ctrl", &prop);
 	if (!ret)
 		pdata->power_ctrl = prop;
 	else
@@ -349,6 +349,7 @@
 				ret = -ENOMEM;
 				goto err;
 			}
+			palmas->i2c_clients[i]->dev.of_node = of_node_get(node);
 		}
 		palmas->regmap[i] = devm_regmap_init_i2c(palmas->i2c_clients[i],
 				&palmas_regmap_config[i]);

diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c
index 3ba0486..a183098 100644
--- a/drivers/mfd/retu-mfd.c
+++ b/drivers/mfd/retu-mfd.c

@@ -1,5 +1,5 @@
 /*
- * Retu MFD driver
+ * Retu/Tahvo MFD driver
  *
  * Copyright (C) 2004, 2005 Nokia Corporation
  *
@@ -33,7 +33,8 @@
 #define RETU_REG_ASICR		0x00		/* ASIC ID and revision */
 #define RETU_REG_ASICR_VILMA	(1 << 7)	/* Bit indicating Vilma */
 #define RETU_REG_IDR		0x01		/* Interrupt ID */
-#define RETU_REG_IMR		0x02		/* Interrupt mask */
+#define RETU_REG_IMR		0x02		/* Interrupt mask (Retu) */
+#define TAHVO_REG_IMR		0x03		/* Interrupt mask (Tahvo) */
 
 /* Interrupt sources */
 #define RETU_INT_PWR		0		/* Power button */
@@ -84,6 +85,62 @@
 /* Retu device registered for the power off. */
 static struct retu_dev *retu_pm_power_off;
 
+static struct resource tahvo_usb_res[] = {
+	{
+		.name	= "tahvo-usb",
+		.start	= TAHVO_INT_VBUS,
+		.end	= TAHVO_INT_VBUS,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell tahvo_devs[] = {
+	{
+		.name		= "tahvo-usb",
+		.resources	= tahvo_usb_res,
+		.num_resources	= ARRAY_SIZE(tahvo_usb_res),
+	},
+};
+
+static struct regmap_irq tahvo_irqs[] = {
+	[TAHVO_INT_VBUS] = {
+		.mask = 1 << TAHVO_INT_VBUS,
+	}
+};
+
+static struct regmap_irq_chip tahvo_irq_chip = {
+	.name		= "TAHVO",
+	.irqs		= tahvo_irqs,
+	.num_irqs	= ARRAY_SIZE(tahvo_irqs),
+	.num_regs	= 1,
+	.status_base	= RETU_REG_IDR,
+	.mask_base	= TAHVO_REG_IMR,
+	.ack_base	= RETU_REG_IDR,
+};
+
+static const struct retu_data {
+	char			*chip_name;
+	char			*companion_name;
+	struct regmap_irq_chip	*irq_chip;
+	struct mfd_cell		*children;
+	int			nchildren;
+} retu_data[] = {
+	[0] = {
+		.chip_name	= "Retu",
+		.companion_name	= "Vilma",
+		.irq_chip	= &retu_irq_chip,
+		.children	= retu_devs,
+		.nchildren	= ARRAY_SIZE(retu_devs),
+	},
+	[1] = {
+		.chip_name	= "Tahvo",
+		.companion_name	= "Betty",
+		.irq_chip	= &tahvo_irq_chip,
+		.children	= tahvo_devs,
+		.nchildren	= ARRAY_SIZE(tahvo_devs),
+	}
+};
+
 int retu_read(struct retu_dev *rdev, u8 reg)
 {
 	int ret;
@@ -173,9 +230,14 @@
 
 static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 {
+	struct retu_data const *rdat;
 	struct retu_dev *rdev;
 	int ret;
 
+	if (i2c->addr > ARRAY_SIZE(retu_data))
+		return -ENODEV;
+	rdat = &retu_data[i2c->addr - 1];
+
 	rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL);
 	if (rdev == NULL)
 		return -ENOMEM;
@@ -190,25 +252,27 @@
 
 	ret = retu_read(rdev, RETU_REG_ASICR);
 	if (ret < 0) {
-		dev_err(rdev->dev, "could not read Retu revision: %d\n", ret);
+		dev_err(rdev->dev, "could not read %s revision: %d\n",
+			rdat->chip_name, ret);
 		return ret;
 	}
 
-	dev_info(rdev->dev, "Retu%s v%d.%d found\n",
-		 (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "",
+	dev_info(rdev->dev, "%s%s%s v%d.%d found\n", rdat->chip_name,
+		 (ret & RETU_REG_ASICR_VILMA) ? " & " : "",
+		 (ret & RETU_REG_ASICR_VILMA) ? rdat->companion_name : "",
 		 (ret >> 4) & 0x7, ret & 0xf);
 
-	/* Mask all RETU interrupts. */
-	ret = retu_write(rdev, RETU_REG_IMR, 0xffff);
+	/* Mask all interrupts. */
+	ret = retu_write(rdev, rdat->irq_chip->mask_base, 0xffff);
 	if (ret < 0)
 		return ret;
 
 	ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1,
-				  &retu_irq_chip, &rdev->irq_data);
+				  rdat->irq_chip, &rdev->irq_data);
 	if (ret < 0)
 		return ret;
 
-	ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs),
+	ret = mfd_add_devices(rdev->dev, -1, rdat->children, rdat->nchildren,
 			      NULL, regmap_irq_chip_get_base(rdev->irq_data),
 			      NULL);
 	if (ret < 0) {
@@ -216,7 +280,7 @@
 		return ret;
 	}
 
-	if (!pm_power_off) {
+	if (i2c->addr == 1 && !pm_power_off) {
 		retu_pm_power_off = rdev;
 		pm_power_off	  = retu_power_off;
 	}
@@ -240,6 +304,7 @@
 
 static const struct i2c_device_id retu_id[] = {
 	{ "retu-mfd", 0 },
+	{ "tahvo-mfd", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, retu_id);

diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c
new file mode 100644
index 0000000..15dc848
--- /dev/null
+++ b/drivers/mfd/rts5249.c

@@ -0,0 +1,241 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 128, West Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Correct driving */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+			SD30_CLK_DRIVE_SEL, 0xFF, 0x99);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+			SD30_CMD_DRIVE_SEL, 0xFF, 0x99);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+			SD30_DAT_DRIVE_SEL, 0xFF, 0x92);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5249_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV, 0xFE46);
+	if (err < 0)
+		return err;
+
+	msleep(1);
+
+	return rtsx_pci_write_phy_register(pcr, PHY_BPCR, 0x05C0);
+}
+
+static int rts5249_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5249_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5249_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5249_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5249_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	msleep(5);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_VCC_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5249_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_OFF);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5249_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u8 clk_drive, cmd_drive, dat_drive;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+		clk_drive = 0x99;
+		cmd_drive = 0x99;
+		dat_drive = 0x92;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, PHY_BACR, 0x3C02);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+		clk_drive = 0xb3;
+		cmd_drive = 0xb3;
+		dat_drive = 0xb3;
+	} else {
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			0xFF, clk_drive);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			0xFF, cmd_drive);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			0xFF, dat_drive);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5249_pcr_ops = {
+	.extra_init_hw = rts5249_extra_init_hw,
+	.optimize_phy = rts5249_optimize_phy,
+	.turn_on_led = rts5249_turn_on_led,
+	.turn_off_led = rts5249_turn_off_led,
+	.enable_auto_blink = rts5249_enable_auto_blink,
+	.disable_auto_blink = rts5249_disable_auto_blink,
+	.card_power_on = rts5249_card_power_on,
+	.card_power_off = rts5249_card_power_off,
+	.switch_output_voltage = rts5249_switch_output_voltage,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5249_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5249_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5249_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5249_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5249_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5249_pcr_ops;
+
+	pcr->ic_version = rts5249_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl;
+}

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 2f12cc1..e968c01 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c

@@ -56,6 +56,7 @@
 	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ 0, }
 };
 
@@ -1033,6 +1034,10 @@
 	case 0x5227:
 		rts5227_init_params(pcr);
 		break;
+
+	case 0x5249:
+		rts5249_init_params(pcr);
+		break;
 	}
 
 	dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n",
@@ -1138,7 +1143,7 @@
 
 	ret = rtsx_pci_acquire_irq(pcr);
 	if (ret < 0)
-		goto free_dma;
+		goto disable_msi;
 
 	pci_set_master(pcidev);
 	synchronize_irq(pcr->irq);
@@ -1162,7 +1167,9 @@
 
 disable_irq:
 	free_irq(pcr->irq, (void *)pcr);
-free_dma:
+disable_msi:
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
 	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
 			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
 unmap:

diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
index 2b3ab8a..55fcfc2 100644
--- a/drivers/mfd/rtsx_pcr.h
+++ b/drivers/mfd/rtsx_pcr.h

@@ -32,5 +32,6 @@
 void rts5229_init_params(struct rtsx_pcr *pcr);
 void rtl8411_init_params(struct rtsx_pcr *pcr);
 void rts5227_init_params(struct rtsx_pcr *pcr);
+void rts5249_init_params(struct rtsx_pcr *pcr);
 
 #endif

diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c
new file mode 100644
index 0000000..de48b4e
--- /dev/null
+++ b/drivers/mfd/si476x-cmd.c

@@ -0,0 +1,1553 @@
+/*
+ * drivers/mfd/si476x-cmd.c -- Subroutines implementing command
+ * protocol of si476x series of chips
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+#include <linux/i2c.h>
+#include <linux/device.h>
+#include <linux/gpio.h>
+#include <linux/videodev2.h>
+
+#include <linux/mfd/si476x-core.h>
+
+#define msb(x)                  ((u8)((u16) x >> 8))
+#define lsb(x)                  ((u8)((u16) x &  0x00FF))
+
+
+
+#define CMD_POWER_UP				0x01
+#define CMD_POWER_UP_A10_NRESP			1
+#define CMD_POWER_UP_A10_NARGS			5
+
+#define CMD_POWER_UP_A20_NRESP			1
+#define CMD_POWER_UP_A20_NARGS			5
+
+#define POWER_UP_DELAY_MS			110
+
+#define CMD_POWER_DOWN				0x11
+#define CMD_POWER_DOWN_A10_NRESP		1
+
+#define CMD_POWER_DOWN_A20_NRESP		1
+#define CMD_POWER_DOWN_A20_NARGS		1
+
+#define CMD_FUNC_INFO				0x12
+#define CMD_FUNC_INFO_NRESP			7
+
+#define CMD_SET_PROPERTY			0x13
+#define CMD_SET_PROPERTY_NARGS			5
+#define CMD_SET_PROPERTY_NRESP			1
+
+#define CMD_GET_PROPERTY			0x14
+#define CMD_GET_PROPERTY_NARGS			3
+#define CMD_GET_PROPERTY_NRESP			4
+
+#define CMD_AGC_STATUS				0x17
+#define CMD_AGC_STATUS_NRESP_A10		2
+#define CMD_AGC_STATUS_NRESP_A20                6
+
+#define PIN_CFG_BYTE(x) (0x7F & (x))
+#define CMD_DIG_AUDIO_PIN_CFG			0x18
+#define CMD_DIG_AUDIO_PIN_CFG_NARGS		4
+#define CMD_DIG_AUDIO_PIN_CFG_NRESP		5
+
+#define CMD_ZIF_PIN_CFG				0x19
+#define CMD_ZIF_PIN_CFG_NARGS			4
+#define CMD_ZIF_PIN_CFG_NRESP			5
+
+#define CMD_IC_LINK_GPO_CTL_PIN_CFG		0x1A
+#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS	4
+#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP	5
+
+#define CMD_ANA_AUDIO_PIN_CFG			0x1B
+#define CMD_ANA_AUDIO_PIN_CFG_NARGS		1
+#define CMD_ANA_AUDIO_PIN_CFG_NRESP		2
+
+#define CMD_INTB_PIN_CFG			0x1C
+#define CMD_INTB_PIN_CFG_NARGS			2
+#define CMD_INTB_PIN_CFG_A10_NRESP		6
+#define CMD_INTB_PIN_CFG_A20_NRESP		3
+
+#define CMD_FM_TUNE_FREQ			0x30
+#define CMD_FM_TUNE_FREQ_A10_NARGS		5
+#define CMD_FM_TUNE_FREQ_A20_NARGS		3
+#define CMD_FM_TUNE_FREQ_NRESP			1
+
+#define CMD_FM_RSQ_STATUS			0x32
+
+#define CMD_FM_RSQ_STATUS_A10_NARGS		1
+#define CMD_FM_RSQ_STATUS_A10_NRESP		17
+#define CMD_FM_RSQ_STATUS_A30_NARGS		1
+#define CMD_FM_RSQ_STATUS_A30_NRESP		23
+
+
+#define CMD_FM_SEEK_START			0x31
+#define CMD_FM_SEEK_START_NARGS			1
+#define CMD_FM_SEEK_START_NRESP			1
+
+#define CMD_FM_RDS_STATUS			0x36
+#define CMD_FM_RDS_STATUS_NARGS			1
+#define CMD_FM_RDS_STATUS_NRESP			16
+
+#define CMD_FM_RDS_BLOCKCOUNT			0x37
+#define CMD_FM_RDS_BLOCKCOUNT_NARGS		1
+#define CMD_FM_RDS_BLOCKCOUNT_NRESP		8
+
+#define CMD_FM_PHASE_DIVERSITY			0x38
+#define CMD_FM_PHASE_DIVERSITY_NARGS		1
+#define CMD_FM_PHASE_DIVERSITY_NRESP		1
+
+#define CMD_FM_PHASE_DIV_STATUS			0x39
+#define CMD_FM_PHASE_DIV_STATUS_NRESP		2
+
+#define CMD_AM_TUNE_FREQ			0x40
+#define CMD_AM_TUNE_FREQ_NARGS			3
+#define CMD_AM_TUNE_FREQ_NRESP			1
+
+#define CMD_AM_RSQ_STATUS			0x42
+#define CMD_AM_RSQ_STATUS_NARGS			1
+#define CMD_AM_RSQ_STATUS_NRESP			13
+
+#define CMD_AM_SEEK_START			0x41
+#define CMD_AM_SEEK_START_NARGS			1
+#define CMD_AM_SEEK_START_NRESP			1
+
+
+#define CMD_AM_ACF_STATUS			0x45
+#define CMD_AM_ACF_STATUS_NRESP			6
+#define CMD_AM_ACF_STATUS_NARGS			1
+
+#define CMD_FM_ACF_STATUS			0x35
+#define CMD_FM_ACF_STATUS_NRESP			8
+#define CMD_FM_ACF_STATUS_NARGS			1
+
+#define CMD_MAX_ARGS_COUNT			(10)
+
+
+enum si476x_acf_status_report_bits {
+	SI476X_ACF_BLEND_INT	= (1 << 4),
+	SI476X_ACF_HIBLEND_INT	= (1 << 3),
+	SI476X_ACF_HICUT_INT	= (1 << 2),
+	SI476X_ACF_CHBW_INT	= (1 << 1),
+	SI476X_ACF_SOFTMUTE_INT	= (1 << 0),
+
+	SI476X_ACF_SMUTE	= (1 << 0),
+	SI476X_ACF_SMATTN	= 0b11111,
+	SI476X_ACF_PILOT	= (1 << 7),
+	SI476X_ACF_STBLEND	= ~SI476X_ACF_PILOT,
+};
+
+enum si476x_agc_status_report_bits {
+	SI476X_AGC_MXHI		= (1 << 5),
+	SI476X_AGC_MXLO		= (1 << 4),
+	SI476X_AGC_LNAHI	= (1 << 3),
+	SI476X_AGC_LNALO	= (1 << 2),
+};
+
+enum si476x_errors {
+	SI476X_ERR_BAD_COMMAND		= 0x10,
+	SI476X_ERR_BAD_ARG1		= 0x11,
+	SI476X_ERR_BAD_ARG2		= 0x12,
+	SI476X_ERR_BAD_ARG3		= 0x13,
+	SI476X_ERR_BAD_ARG4		= 0x14,
+	SI476X_ERR_BUSY			= 0x18,
+	SI476X_ERR_BAD_INTERNAL_MEMORY  = 0x20,
+	SI476X_ERR_BAD_PATCH		= 0x30,
+	SI476X_ERR_BAD_BOOT_MODE	= 0x31,
+	SI476X_ERR_BAD_PROPERTY		= 0x40,
+};
+
+static int si476x_core_parse_and_nag_about_error(struct si476x_core *core)
+{
+	int err;
+	char *cause;
+	u8 buffer[2];
+
+	if (core->revision != SI476X_REVISION_A10) {
+		err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV,
+					   buffer, sizeof(buffer));
+		if (err == sizeof(buffer)) {
+			switch (buffer[1]) {
+			case SI476X_ERR_BAD_COMMAND:
+				cause = "Bad command";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BAD_ARG1:
+				cause = "Bad argument #1";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BAD_ARG2:
+				cause = "Bad argument #2";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BAD_ARG3:
+				cause = "Bad argument #3";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BAD_ARG4:
+				cause = "Bad argument #4";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BUSY:
+				cause = "Chip is busy";
+				err = -EBUSY;
+				break;
+			case SI476X_ERR_BAD_INTERNAL_MEMORY:
+				cause = "Bad internal memory";
+				err = -EIO;
+				break;
+			case SI476X_ERR_BAD_PATCH:
+				cause = "Bad patch";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BAD_BOOT_MODE:
+				cause = "Bad boot mode";
+				err = -EINVAL;
+				break;
+			case SI476X_ERR_BAD_PROPERTY:
+				cause = "Bad property";
+				err = -EINVAL;
+				break;
+			default:
+				cause = "Unknown";
+				err = -EIO;
+			}
+
+			dev_err(&core->client->dev,
+				"[Chip error status]: %s\n", cause);
+		} else {
+			dev_err(&core->client->dev,
+				"Failed to fetch error code\n");
+			err = (err >= 0) ? -EIO : err;
+		}
+	} else {
+		err = -EIO;
+	}
+
+	return err;
+}
+
+/**
+ * si476x_core_send_command() - sends a command to si476x and waits its
+ * response
+ * @core:    si476x_device structure for the device we are
+ *            communicating with
+ * @command:  command id
+ * @args:     command arguments we are sending
+ * @argn:     actual size of @args
+ * @response: buffer to place the expected response from the device
+ * @respn:    actual size of @response
+ * @usecs:    amount of time to wait before reading the response (in
+ *            usecs)
+ *
+ * Function returns 0 on succsess and negative error code on
+ * failure
+ */
+static int si476x_core_send_command(struct si476x_core *core,
+				    const u8 command,
+				    const u8 args[],
+				    const int argn,
+				    u8 resp[],
+				    const int respn,
+				    const int usecs)
+{
+	struct i2c_client *client = core->client;
+	int err;
+	u8  data[CMD_MAX_ARGS_COUNT + 1];
+
+	if (argn > CMD_MAX_ARGS_COUNT) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	if (!client->adapter) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	/* First send the command and its arguments */
+	data[0] = command;
+	memcpy(&data[1], args, argn);
+	dev_dbg(&client->dev, "Command:\n %*ph\n", argn + 1, data);
+
+	err = si476x_core_i2c_xfer(core, SI476X_I2C_SEND,
+				   (char *) data, argn + 1);
+	if (err != argn + 1) {
+		dev_err(&core->client->dev,
+			"Error while sending command 0x%02x\n",
+			command);
+		err = (err >= 0) ? -EIO : err;
+		goto exit;
+	}
+	/* Set CTS to zero only after the command is send to avoid
+	 * possible racing conditions when working in polling mode */
+	atomic_set(&core->cts, 0);
+
+	/* if (unlikely(command == CMD_POWER_DOWN) */
+	if (!wait_event_timeout(core->command,
+				atomic_read(&core->cts),
+				usecs_to_jiffies(usecs) + 1))
+		dev_warn(&core->client->dev,
+			 "(%s) [CMD 0x%02x] Answer timeout.\n",
+			 __func__, command);
+
+	/*
+	  When working in polling mode, for some reason the tuner will
+	  report CTS bit as being set in the first status byte read,
+	  but all the consequtive ones will return zeros until the
+	  tuner is actually completed the POWER_UP command. To
+	  workaround that we wait for second CTS to be reported
+	 */
+	if (unlikely(!core->client->irq && command == CMD_POWER_UP)) {
+		if (!wait_event_timeout(core->command,
+					atomic_read(&core->cts),
+					usecs_to_jiffies(usecs) + 1))
+			dev_warn(&core->client->dev,
+				 "(%s) Power up took too much time.\n",
+				 __func__);
+	}
+
+	/* Then get the response */
+	err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, resp, respn);
+	if (err != respn) {
+		dev_err(&core->client->dev,
+			"Error while reading response for command 0x%02x\n",
+			command);
+		err = (err >= 0) ? -EIO : err;
+		goto exit;
+	}
+	dev_dbg(&client->dev, "Response:\n %*ph\n", respn, resp);
+
+	err = 0;
+
+	if (resp[0] & SI476X_ERR) {
+		dev_err(&core->client->dev,
+			"[CMD 0x%02x] Chip set error flag\n", command);
+		err = si476x_core_parse_and_nag_about_error(core);
+		goto exit;
+	}
+
+	if (!(resp[0] & SI476X_CTS))
+		err = -EBUSY;
+exit:
+	return err;
+}
+
+static int si476x_cmd_clear_stc(struct si476x_core *core)
+{
+	int err;
+	struct si476x_rsq_status_args args = {
+		.primary	= false,
+		.rsqack		= false,
+		.attune		= false,
+		.cancel		= false,
+		.stcack		= true,
+	};
+
+	switch (core->power_up_parameters.func) {
+	case SI476X_FUNC_FM_RECEIVER:
+		err = si476x_core_cmd_fm_rsq_status(core, &args, NULL);
+		break;
+	case SI476X_FUNC_AM_RECEIVER:
+		err = si476x_core_cmd_am_rsq_status(core, &args, NULL);
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int si476x_cmd_tune_seek_freq(struct si476x_core *core,
+				     uint8_t cmd,
+				     const uint8_t args[], size_t argn,
+				     uint8_t *resp, size_t respn)
+{
+	int err;
+
+
+	atomic_set(&core->stc, 0);
+	err = si476x_core_send_command(core, cmd, args, argn, resp, respn,
+				       SI476X_TIMEOUT_TUNE);
+	if (!err) {
+		wait_event_killable(core->tuning,
+				    atomic_read(&core->stc));
+		si476x_cmd_clear_stc(core);
+	}
+
+	return err;
+}
+
+/**
+ * si476x_cmd_func_info() - send 'FUNC_INFO' command to the device
+ * @core: device to send the command to
+ * @info:  struct si476x_func_info to fill all the information
+ *         returned by the command
+ *
+ * The command requests the firmware and patch version for currently
+ * loaded firmware (dependent on the function of the device FM/AM/WB)
+ *
+ * Function returns 0 on succsess and negative error code on
+ * failure
+ */
+int si476x_core_cmd_func_info(struct si476x_core *core,
+			      struct si476x_func_info *info)
+{
+	int err;
+	u8  resp[CMD_FUNC_INFO_NRESP];
+
+	err = si476x_core_send_command(core, CMD_FUNC_INFO,
+				       NULL, 0,
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+
+	info->firmware.major    = resp[1];
+	info->firmware.minor[0] = resp[2];
+	info->firmware.minor[1] = resp[3];
+
+	info->patch_id = ((u16) resp[4] << 8) | resp[5];
+	info->func     = resp[6];
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_func_info);
+
+/**
+ * si476x_cmd_set_property() - send 'SET_PROPERTY' command to the device
+ * @core:    device to send the command to
+ * @property: property address
+ * @value:    property value
+ *
+ * Function returns 0 on succsess and negative error code on
+ * failure
+ */
+int si476x_core_cmd_set_property(struct si476x_core *core,
+				 u16 property, u16 value)
+{
+	u8       resp[CMD_SET_PROPERTY_NRESP];
+	const u8 args[CMD_SET_PROPERTY_NARGS] = {
+		0x00,
+		msb(property),
+		lsb(property),
+		msb(value),
+		lsb(value),
+	};
+
+	return si476x_core_send_command(core, CMD_SET_PROPERTY,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_set_property);
+
+/**
+ * si476x_cmd_get_property() - send 'GET_PROPERTY' command to the device
+ * @core:    device to send the command to
+ * @property: property address
+ *
+ * Function return the value of property as u16 on success or a
+ * negative error on failure
+ */
+int si476x_core_cmd_get_property(struct si476x_core *core, u16 property)
+{
+	int err;
+	u8       resp[CMD_GET_PROPERTY_NRESP];
+	const u8 args[CMD_GET_PROPERTY_NARGS] = {
+		0x00,
+		msb(property),
+		lsb(property),
+	};
+
+	err = si476x_core_send_command(core, CMD_GET_PROPERTY,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	if (err < 0)
+		return err;
+	else
+		return be16_to_cpup((__be16 *)(resp + 2));
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_get_property);
+
+/**
+ * si476x_cmd_dig_audio_pin_cfg() - send 'DIG_AUDIO_PIN_CFG' command to
+ * the device
+ * @core: device to send the command to
+ * @dclk:  DCLK pin function configuration:
+ *	   #SI476X_DCLK_NOOP     - do not modify the behaviour
+ *         #SI476X_DCLK_TRISTATE - put the pin in tristate condition,
+ *                                 enable 1MOhm pulldown
+ *         #SI476X_DCLK_DAUDIO   - set the pin to be a part of digital
+ *                                 audio interface
+ * @dfs:   DFS pin function configuration:
+ *         #SI476X_DFS_NOOP      - do not modify the behaviour
+ *         #SI476X_DFS_TRISTATE  - put the pin in tristate condition,
+ *                             enable 1MOhm pulldown
+ *      SI476X_DFS_DAUDIO    - set the pin to be a part of digital
+ *                             audio interface
+ * @dout - DOUT pin function configuration:
+ *      SI476X_DOUT_NOOP       - do not modify the behaviour
+ *      SI476X_DOUT_TRISTATE   - put the pin in tristate condition,
+ *                               enable 1MOhm pulldown
+ *      SI476X_DOUT_I2S_OUTPUT - set this pin to be digital out on I2S
+ *                               port 1
+ *      SI476X_DOUT_I2S_INPUT  - set this pin to be digital in on I2S
+ *                               port 1
+ * @xout - XOUT pin function configuration:
+ *	SI476X_XOUT_NOOP        - do not modify the behaviour
+ *      SI476X_XOUT_TRISTATE    - put the pin in tristate condition,
+ *                                enable 1MOhm pulldown
+ *      SI476X_XOUT_I2S_INPUT   - set this pin to be digital in on I2S
+ *                                port 1
+ *      SI476X_XOUT_MODE_SELECT - set this pin to be the input that
+ *                                selects the mode of the I2S audio
+ *                                combiner (analog or HD)
+ *                                [SI4761/63/65/67 Only]
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_dig_audio_pin_cfg(struct  si476x_core *core,
+				      enum si476x_dclk_config dclk,
+				      enum si476x_dfs_config  dfs,
+				      enum si476x_dout_config dout,
+				      enum si476x_xout_config xout)
+{
+	u8       resp[CMD_DIG_AUDIO_PIN_CFG_NRESP];
+	const u8 args[CMD_DIG_AUDIO_PIN_CFG_NARGS] = {
+		PIN_CFG_BYTE(dclk),
+		PIN_CFG_BYTE(dfs),
+		PIN_CFG_BYTE(dout),
+		PIN_CFG_BYTE(xout),
+	};
+
+	return si476x_core_send_command(core, CMD_DIG_AUDIO_PIN_CFG,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_dig_audio_pin_cfg);
+
+/**
+ * si476x_cmd_zif_pin_cfg - send 'ZIF_PIN_CFG_COMMAND'
+ * @core - device to send the command to
+ * @iqclk - IQCL pin function configuration:
+ *       SI476X_IQCLK_NOOP     - do not modify the behaviour
+ *       SI476X_IQCLK_TRISTATE - put the pin in tristate condition,
+ *                               enable 1MOhm pulldown
+ *       SI476X_IQCLK_IQ       - set pin to be a part of I/Q interace
+ *                               in master mode
+ * @iqfs - IQFS pin function configuration:
+ *       SI476X_IQFS_NOOP     - do not modify the behaviour
+ *       SI476X_IQFS_TRISTATE - put the pin in tristate condition,
+ *                              enable 1MOhm pulldown
+ *       SI476X_IQFS_IQ       - set pin to be a part of I/Q interace
+ *                              in master mode
+ * @iout - IOUT pin function configuration:
+ *       SI476X_IOUT_NOOP     - do not modify the behaviour
+ *       SI476X_IOUT_TRISTATE - put the pin in tristate condition,
+ *                              enable 1MOhm pulldown
+ *       SI476X_IOUT_OUTPUT   - set pin to be I out
+ * @qout - QOUT pin function configuration:
+ *       SI476X_QOUT_NOOP     - do not modify the behaviour
+ *       SI476X_QOUT_TRISTATE - put the pin in tristate condition,
+ *                              enable 1MOhm pulldown
+ *       SI476X_QOUT_OUTPUT   - set pin to be Q out
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_zif_pin_cfg(struct si476x_core *core,
+				enum si476x_iqclk_config iqclk,
+				enum si476x_iqfs_config iqfs,
+				enum si476x_iout_config iout,
+				enum si476x_qout_config qout)
+{
+	u8       resp[CMD_ZIF_PIN_CFG_NRESP];
+	const u8 args[CMD_ZIF_PIN_CFG_NARGS] = {
+		PIN_CFG_BYTE(iqclk),
+		PIN_CFG_BYTE(iqfs),
+		PIN_CFG_BYTE(iout),
+		PIN_CFG_BYTE(qout),
+	};
+
+	return si476x_core_send_command(core, CMD_ZIF_PIN_CFG,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_zif_pin_cfg);
+
+/**
+ * si476x_cmd_ic_link_gpo_ctl_pin_cfg - send
+ * 'IC_LINK_GPIO_CTL_PIN_CFG' comand to the device
+ * @core - device to send the command to
+ * @icin - ICIN pin function configuration:
+ *      SI476X_ICIN_NOOP      - do not modify the behaviour
+ *      SI476X_ICIN_TRISTATE  - put the pin in tristate condition,
+ *                              enable 1MOhm pulldown
+ *      SI476X_ICIN_GPO1_HIGH - set pin to be an output, drive it high
+ *      SI476X_ICIN_GPO1_LOW  - set pin to be an output, drive it low
+ *      SI476X_ICIN_IC_LINK   - set the pin to be a part of Inter-Chip link
+ * @icip - ICIP pin function configuration:
+ *      SI476X_ICIP_NOOP      - do not modify the behaviour
+ *      SI476X_ICIP_TRISTATE  - put the pin in tristate condition,
+ *                              enable 1MOhm pulldown
+ *      SI476X_ICIP_GPO1_HIGH - set pin to be an output, drive it high
+ *      SI476X_ICIP_GPO1_LOW  - set pin to be an output, drive it low
+ *      SI476X_ICIP_IC_LINK   - set the pin to be a part of Inter-Chip link
+ * @icon - ICON pin function configuration:
+ *      SI476X_ICON_NOOP     - do not modify the behaviour
+ *      SI476X_ICON_TRISTATE - put the pin in tristate condition,
+ *                             enable 1MOhm pulldown
+ *      SI476X_ICON_I2S      - set the pin to be a part of audio
+ *                             interface in slave mode (DCLK)
+ *      SI476X_ICON_IC_LINK  - set the pin to be a part of Inter-Chip link
+ * @icop - ICOP pin function configuration:
+ *      SI476X_ICOP_NOOP     - do not modify the behaviour
+ *      SI476X_ICOP_TRISTATE - put the pin in tristate condition,
+ *                             enable 1MOhm pulldown
+ *      SI476X_ICOP_I2S      - set the pin to be a part of audio
+ *                             interface in slave mode (DOUT)
+ *                             [Si4761/63/65/67 Only]
+ *      SI476X_ICOP_IC_LINK  - set the pin to be a part of Inter-Chip link
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(struct si476x_core *core,
+					    enum si476x_icin_config icin,
+					    enum si476x_icip_config icip,
+					    enum si476x_icon_config icon,
+					    enum si476x_icop_config icop)
+{
+	u8       resp[CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP];
+	const u8 args[CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS] = {
+		PIN_CFG_BYTE(icin),
+		PIN_CFG_BYTE(icip),
+		PIN_CFG_BYTE(icon),
+		PIN_CFG_BYTE(icop),
+	};
+
+	return si476x_core_send_command(core, CMD_IC_LINK_GPO_CTL_PIN_CFG,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_ic_link_gpo_ctl_pin_cfg);
+
+/**
+ * si476x_cmd_ana_audio_pin_cfg - send 'ANA_AUDIO_PIN_CFG' to the
+ * device
+ * @core - device to send the command to
+ * @lrout - LROUT pin function configuration:
+ *       SI476X_LROUT_NOOP     - do not modify the behaviour
+ *       SI476X_LROUT_TRISTATE - put the pin in tristate condition,
+ *                               enable 1MOhm pulldown
+ *       SI476X_LROUT_AUDIO    - set pin to be audio output
+ *       SI476X_LROUT_MPX      - set pin to be MPX output
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_ana_audio_pin_cfg(struct si476x_core *core,
+				      enum si476x_lrout_config lrout)
+{
+	u8       resp[CMD_ANA_AUDIO_PIN_CFG_NRESP];
+	const u8 args[CMD_ANA_AUDIO_PIN_CFG_NARGS] = {
+		PIN_CFG_BYTE(lrout),
+	};
+
+	return si476x_core_send_command(core, CMD_ANA_AUDIO_PIN_CFG,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_ana_audio_pin_cfg);
+
+
+/**
+ * si476x_cmd_intb_pin_cfg - send 'INTB_PIN_CFG' command to the device
+ * @core - device to send the command to
+ * @intb - INTB pin function configuration:
+ *      SI476X_INTB_NOOP     - do not modify the behaviour
+ *      SI476X_INTB_TRISTATE - put the pin in tristate condition,
+ *                             enable 1MOhm pulldown
+ *      SI476X_INTB_DAUDIO   - set pin to be a part of digital
+ *                             audio interface in slave mode
+ *      SI476X_INTB_IRQ      - set pin to be an interrupt request line
+ * @a1 - A1 pin function configuration:
+ *      SI476X_A1_NOOP     - do not modify the behaviour
+ *      SI476X_A1_TRISTATE - put the pin in tristate condition,
+ *                           enable 1MOhm pulldown
+ *      SI476X_A1_IRQ      - set pin to be an interrupt request line
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+static int si476x_core_cmd_intb_pin_cfg_a10(struct si476x_core *core,
+					    enum si476x_intb_config intb,
+					    enum si476x_a1_config a1)
+{
+	u8       resp[CMD_INTB_PIN_CFG_A10_NRESP];
+	const u8 args[CMD_INTB_PIN_CFG_NARGS] = {
+		PIN_CFG_BYTE(intb),
+		PIN_CFG_BYTE(a1),
+	};
+
+	return si476x_core_send_command(core, CMD_INTB_PIN_CFG,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+
+static int si476x_core_cmd_intb_pin_cfg_a20(struct si476x_core *core,
+					    enum si476x_intb_config intb,
+					    enum si476x_a1_config a1)
+{
+	u8       resp[CMD_INTB_PIN_CFG_A20_NRESP];
+	const u8 args[CMD_INTB_PIN_CFG_NARGS] = {
+		PIN_CFG_BYTE(intb),
+		PIN_CFG_BYTE(a1),
+	};
+
+	return si476x_core_send_command(core, CMD_INTB_PIN_CFG,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+
+
+
+/**
+ * si476x_cmd_am_rsq_status - send 'AM_RSQ_STATUS' command to the
+ * device
+ * @core  - device to send the command to
+ * @rsqack - if set command clears RSQINT, SNRINT, SNRLINT, RSSIHINT,
+ *           RSSSILINT, BLENDINT, MULTHINT and MULTLINT
+ * @attune - when set the values in the status report are the values
+ *           that were calculated at tune
+ * @cancel - abort ongoing seek/tune opertation
+ * @stcack - clear the STCINT bin in status register
+ * @report - all signal quality information retured by the command
+ *           (if NULL then the output of the command is ignored)
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_am_rsq_status(struct si476x_core *core,
+				  struct si476x_rsq_status_args *rsqargs,
+				  struct si476x_rsq_status_report *report)
+{
+	int err;
+	u8       resp[CMD_AM_RSQ_STATUS_NRESP];
+	const u8 args[CMD_AM_RSQ_STATUS_NARGS] = {
+		rsqargs->rsqack << 3 | rsqargs->attune << 2 |
+		rsqargs->cancel << 1 | rsqargs->stcack,
+	};
+
+	err = si476x_core_send_command(core, CMD_AM_RSQ_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	/*
+	 * Besides getting received signal quality information this
+	 * command can be used to just acknowledge different interrupt
+	 * flags in those cases it is useless to copy and parse
+	 * received data so user can pass NULL, and thus avoid
+	 * unnecessary copying.
+	 */
+	if (!report)
+		return err;
+
+	report->snrhint		= 0b00001000 & resp[1];
+	report->snrlint		= 0b00000100 & resp[1];
+	report->rssihint	= 0b00000010 & resp[1];
+	report->rssilint	= 0b00000001 & resp[1];
+
+	report->bltf		= 0b10000000 & resp[2];
+	report->snr_ready	= 0b00100000 & resp[2];
+	report->rssiready	= 0b00001000 & resp[2];
+	report->afcrl		= 0b00000010 & resp[2];
+	report->valid		= 0b00000001 & resp[2];
+
+	report->readfreq	= be16_to_cpup((__be16 *)(resp + 3));
+	report->freqoff		= resp[5];
+	report->rssi		= resp[6];
+	report->snr		= resp[7];
+	report->lassi		= resp[9];
+	report->hassi		= resp[10];
+	report->mult		= resp[11];
+	report->dev		= resp[12];
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_am_rsq_status);
+
+int si476x_core_cmd_fm_acf_status(struct si476x_core *core,
+			     struct si476x_acf_status_report *report)
+{
+	int err;
+	u8       resp[CMD_FM_ACF_STATUS_NRESP];
+	const u8 args[CMD_FM_ACF_STATUS_NARGS] = {
+		0x0,
+	};
+
+	if (!report)
+		return -EINVAL;
+
+	err = si476x_core_send_command(core, CMD_FM_ACF_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	report->blend_int	= resp[1] & SI476X_ACF_BLEND_INT;
+	report->hblend_int	= resp[1] & SI476X_ACF_HIBLEND_INT;
+	report->hicut_int	= resp[1] & SI476X_ACF_HICUT_INT;
+	report->chbw_int	= resp[1] & SI476X_ACF_CHBW_INT;
+	report->softmute_int	= resp[1] & SI476X_ACF_SOFTMUTE_INT;
+	report->smute		= resp[2] & SI476X_ACF_SMUTE;
+	report->smattn		= resp[3] & SI476X_ACF_SMATTN;
+	report->chbw		= resp[4];
+	report->hicut		= resp[5];
+	report->hiblend		= resp[6];
+	report->pilot		= resp[7] & SI476X_ACF_PILOT;
+	report->stblend		= resp[7] & SI476X_ACF_STBLEND;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_acf_status);
+
+int si476x_core_cmd_am_acf_status(struct si476x_core *core,
+				  struct si476x_acf_status_report *report)
+{
+	int err;
+	u8       resp[CMD_AM_ACF_STATUS_NRESP];
+	const u8 args[CMD_AM_ACF_STATUS_NARGS] = {
+		0x0,
+	};
+
+	if (!report)
+		return -EINVAL;
+
+	err = si476x_core_send_command(core, CMD_AM_ACF_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	report->blend_int	= resp[1] & SI476X_ACF_BLEND_INT;
+	report->hblend_int	= resp[1] & SI476X_ACF_HIBLEND_INT;
+	report->hicut_int	= resp[1] & SI476X_ACF_HICUT_INT;
+	report->chbw_int	= resp[1] & SI476X_ACF_CHBW_INT;
+	report->softmute_int	= resp[1] & SI476X_ACF_SOFTMUTE_INT;
+	report->smute		= resp[2] & SI476X_ACF_SMUTE;
+	report->smattn		= resp[3] & SI476X_ACF_SMATTN;
+	report->chbw		= resp[4];
+	report->hicut		= resp[5];
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_am_acf_status);
+
+
+/**
+ * si476x_cmd_fm_seek_start - send 'FM_SEEK_START' command to the
+ * device
+ * @core  - device to send the command to
+ * @seekup - if set the direction of the search is 'up'
+ * @wrap   - if set seek wraps when hitting band limit
+ *
+ * This function begins search for a valid station. The station is
+ * considered valid when 'FM_VALID_SNR_THRESHOLD' and
+ * 'FM_VALID_RSSI_THRESHOLD' and 'FM_VALID_MAX_TUNE_ERROR' criteria
+ * are met.
+} *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_fm_seek_start(struct si476x_core *core,
+				  bool seekup, bool wrap)
+{
+	u8       resp[CMD_FM_SEEK_START_NRESP];
+	const u8 args[CMD_FM_SEEK_START_NARGS] = {
+		seekup << 3 | wrap << 2,
+	};
+
+	return si476x_cmd_tune_seek_freq(core, CMD_FM_SEEK_START,
+					 args, sizeof(args),
+					 resp, sizeof(resp));
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_seek_start);
+
+/**
+ * si476x_cmd_fm_rds_status - send 'FM_RDS_STATUS' command to the
+ * device
+ * @core - device to send the command to
+ * @status_only - if set the data is not removed from RDSFIFO,
+ *                RDSFIFOUSED is not decremented and data in all the
+ *                rest RDS data contains the last valid info received
+ * @mtfifo if set the command clears RDS receive FIFO
+ * @intack if set the command clards the RDSINT bit.
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_fm_rds_status(struct si476x_core *core,
+				  bool status_only,
+				  bool mtfifo,
+				  bool intack,
+				  struct si476x_rds_status_report *report)
+{
+	int err;
+	u8       resp[CMD_FM_RDS_STATUS_NRESP];
+	const u8 args[CMD_FM_RDS_STATUS_NARGS] = {
+		status_only << 2 | mtfifo << 1 | intack,
+	};
+
+	err = si476x_core_send_command(core, CMD_FM_RDS_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	/*
+	 * Besides getting RDS status information this command can be
+	 * used to just acknowledge different interrupt flags in those
+	 * cases it is useless to copy and parse received data so user
+	 * can pass NULL, and thus avoid unnecessary copying.
+	 */
+	if (err < 0 || report == NULL)
+		return err;
+
+	report->rdstpptyint	= 0b00010000 & resp[1];
+	report->rdspiint	= 0b00001000 & resp[1];
+	report->rdssyncint	= 0b00000010 & resp[1];
+	report->rdsfifoint	= 0b00000001 & resp[1];
+
+	report->tpptyvalid	= 0b00010000 & resp[2];
+	report->pivalid		= 0b00001000 & resp[2];
+	report->rdssync		= 0b00000010 & resp[2];
+	report->rdsfifolost	= 0b00000001 & resp[2];
+
+	report->tp		= 0b00100000 & resp[3];
+	report->pty		= 0b00011111 & resp[3];
+
+	report->pi		= be16_to_cpup((__be16 *)(resp + 4));
+	report->rdsfifoused	= resp[6];
+
+	report->ble[V4L2_RDS_BLOCK_A]	= 0b11000000 & resp[7];
+	report->ble[V4L2_RDS_BLOCK_B]	= 0b00110000 & resp[7];
+	report->ble[V4L2_RDS_BLOCK_C]	= 0b00001100 & resp[7];
+	report->ble[V4L2_RDS_BLOCK_D]	= 0b00000011 & resp[7];
+
+	report->rds[V4L2_RDS_BLOCK_A].block = V4L2_RDS_BLOCK_A;
+	report->rds[V4L2_RDS_BLOCK_A].msb = resp[8];
+	report->rds[V4L2_RDS_BLOCK_A].lsb = resp[9];
+
+	report->rds[V4L2_RDS_BLOCK_B].block = V4L2_RDS_BLOCK_B;
+	report->rds[V4L2_RDS_BLOCK_B].msb = resp[10];
+	report->rds[V4L2_RDS_BLOCK_B].lsb = resp[11];
+
+	report->rds[V4L2_RDS_BLOCK_C].block = V4L2_RDS_BLOCK_C;
+	report->rds[V4L2_RDS_BLOCK_C].msb = resp[12];
+	report->rds[V4L2_RDS_BLOCK_C].lsb = resp[13];
+
+	report->rds[V4L2_RDS_BLOCK_D].block = V4L2_RDS_BLOCK_D;
+	report->rds[V4L2_RDS_BLOCK_D].msb = resp[14];
+	report->rds[V4L2_RDS_BLOCK_D].lsb = resp[15];
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rds_status);
+
+int si476x_core_cmd_fm_rds_blockcount(struct si476x_core *core,
+				bool clear,
+				struct si476x_rds_blockcount_report *report)
+{
+	int err;
+	u8       resp[CMD_FM_RDS_BLOCKCOUNT_NRESP];
+	const u8 args[CMD_FM_RDS_BLOCKCOUNT_NARGS] = {
+		clear,
+	};
+
+	if (!report)
+		return -EINVAL;
+
+	err = si476x_core_send_command(core, CMD_FM_RDS_BLOCKCOUNT,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+
+	if (!err) {
+		report->expected	= be16_to_cpup((__be16 *)(resp + 2));
+		report->received	= be16_to_cpup((__be16 *)(resp + 4));
+		report->uncorrectable	= be16_to_cpup((__be16 *)(resp + 6));
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rds_blockcount);
+
+int si476x_core_cmd_fm_phase_diversity(struct si476x_core *core,
+				       enum si476x_phase_diversity_mode mode)
+{
+	u8       resp[CMD_FM_PHASE_DIVERSITY_NRESP];
+	const u8 args[CMD_FM_PHASE_DIVERSITY_NARGS] = {
+		mode & 0b111,
+	};
+
+	return si476x_core_send_command(core, CMD_FM_PHASE_DIVERSITY,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_diversity);
+/**
+ * si476x_core_cmd_fm_phase_div_status() - get the phase diversity
+ * status
+ *
+ * @core: si476x device
+ *
+ * NOTE caller must hold core lock
+ *
+ * Function returns the value of the status bit in case of success and
+ * negative error code in case of failre.
+ */
+int si476x_core_cmd_fm_phase_div_status(struct si476x_core *core)
+{
+	int err;
+	u8 resp[CMD_FM_PHASE_DIV_STATUS_NRESP];
+
+	err = si476x_core_send_command(core, CMD_FM_PHASE_DIV_STATUS,
+				       NULL, 0,
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+
+	return (err < 0) ? err : resp[1];
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_div_status);
+
+
+/**
+ * si476x_cmd_am_seek_start - send 'FM_SEEK_START' command to the
+ * device
+ * @core  - device to send the command to
+ * @seekup - if set the direction of the search is 'up'
+ * @wrap   - if set seek wraps when hitting band limit
+ *
+ * This function begins search for a valid station. The station is
+ * considered valid when 'FM_VALID_SNR_THRESHOLD' and
+ * 'FM_VALID_RSSI_THRESHOLD' and 'FM_VALID_MAX_TUNE_ERROR' criteria
+ * are met.
+ *
+ * Function returns 0 on success and negative error code on failure
+ */
+int si476x_core_cmd_am_seek_start(struct si476x_core *core,
+				  bool seekup, bool wrap)
+{
+	u8       resp[CMD_AM_SEEK_START_NRESP];
+	const u8 args[CMD_AM_SEEK_START_NARGS] = {
+		seekup << 3 | wrap << 2,
+	};
+
+	return si476x_cmd_tune_seek_freq(core,  CMD_AM_SEEK_START,
+					 args, sizeof(args),
+					 resp, sizeof(resp));
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_am_seek_start);
+
+
+
+static int si476x_core_cmd_power_up_a10(struct si476x_core *core,
+					struct si476x_power_up_args *puargs)
+{
+	u8       resp[CMD_POWER_UP_A10_NRESP];
+	const bool intsel = (core->pinmux.a1 == SI476X_A1_IRQ);
+	const bool ctsen  = (core->client->irq != 0);
+	const u8 args[CMD_POWER_UP_A10_NARGS] = {
+		0xF7,		/* Reserved, always 0xF7 */
+		0x3F & puargs->xcload,	/* First two bits are reserved to be
+				 * zeros */
+		ctsen << 7 | intsel << 6 | 0x07, /* Last five bits
+						   * are reserved to
+						   * be written as 0x7 */
+		puargs->func << 4 | puargs->freq,
+		0x11,		/* Reserved, always 0x11 */
+	};
+
+	return si476x_core_send_command(core, CMD_POWER_UP,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_TIMEOUT_POWER_UP);
+}
+
+static int si476x_core_cmd_power_up_a20(struct si476x_core *core,
+				 struct si476x_power_up_args *puargs)
+{
+	u8       resp[CMD_POWER_UP_A20_NRESP];
+	const bool intsel = (core->pinmux.a1 == SI476X_A1_IRQ);
+	const bool ctsen  = (core->client->irq != 0);
+	const u8 args[CMD_POWER_UP_A20_NARGS] = {
+		puargs->ibias6x << 7 | puargs->xstart,
+		0x3F & puargs->xcload,	/* First two bits are reserved to be
+					 * zeros */
+		ctsen << 7 | intsel << 6 | puargs->fastboot << 5 |
+		puargs->xbiashc << 3 | puargs->xbias,
+		puargs->func << 4 | puargs->freq,
+		0x10 | puargs->xmode,
+	};
+
+	return si476x_core_send_command(core, CMD_POWER_UP,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_TIMEOUT_POWER_UP);
+}
+
+static int si476x_core_cmd_power_down_a10(struct si476x_core *core,
+					  struct si476x_power_down_args *pdargs)
+{
+	u8 resp[CMD_POWER_DOWN_A10_NRESP];
+
+	return si476x_core_send_command(core, CMD_POWER_DOWN,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+
+static int si476x_core_cmd_power_down_a20(struct si476x_core *core,
+					  struct si476x_power_down_args *pdargs)
+{
+	u8 resp[CMD_POWER_DOWN_A20_NRESP];
+	const u8 args[CMD_POWER_DOWN_A20_NARGS] = {
+		pdargs->xosc,
+	};
+	return si476x_core_send_command(core, CMD_POWER_DOWN,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					SI476X_DEFAULT_TIMEOUT);
+}
+
+static int si476x_core_cmd_am_tune_freq_a10(struct si476x_core *core,
+					struct si476x_tune_freq_args *tuneargs)
+{
+
+	const int am_freq = tuneargs->freq;
+	u8       resp[CMD_AM_TUNE_FREQ_NRESP];
+	const u8 args[CMD_AM_TUNE_FREQ_NARGS] = {
+		(tuneargs->hd << 6),
+		msb(am_freq),
+		lsb(am_freq),
+	};
+
+	return si476x_cmd_tune_seek_freq(core, CMD_AM_TUNE_FREQ, args,
+					 sizeof(args),
+					 resp, sizeof(resp));
+}
+
+static int si476x_core_cmd_am_tune_freq_a20(struct si476x_core *core,
+					struct si476x_tune_freq_args *tuneargs)
+{
+	const int am_freq = tuneargs->freq;
+	u8       resp[CMD_AM_TUNE_FREQ_NRESP];
+	const u8 args[CMD_AM_TUNE_FREQ_NARGS] = {
+		(tuneargs->zifsr << 6) | (tuneargs->injside & 0b11),
+		msb(am_freq),
+		lsb(am_freq),
+	};
+
+	return si476x_cmd_tune_seek_freq(core, CMD_AM_TUNE_FREQ,
+					 args, sizeof(args),
+					 resp, sizeof(resp));
+}
+
+static int si476x_core_cmd_fm_rsq_status_a10(struct si476x_core *core,
+					struct si476x_rsq_status_args *rsqargs,
+					struct si476x_rsq_status_report *report)
+{
+	int err;
+	u8       resp[CMD_FM_RSQ_STATUS_A10_NRESP];
+	const u8 args[CMD_FM_RSQ_STATUS_A10_NARGS] = {
+		rsqargs->rsqack << 3 | rsqargs->attune << 2 |
+		rsqargs->cancel << 1 | rsqargs->stcack,
+	};
+
+	err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	/*
+	 * Besides getting received signal quality information this
+	 * command can be used to just acknowledge different interrupt
+	 * flags in those cases it is useless to copy and parse
+	 * received data so user can pass NULL, and thus avoid
+	 * unnecessary copying.
+	 */
+	if (err < 0 || report == NULL)
+		return err;
+
+	report->multhint	= 0b10000000 & resp[1];
+	report->multlint	= 0b01000000 & resp[1];
+	report->snrhint		= 0b00001000 & resp[1];
+	report->snrlint		= 0b00000100 & resp[1];
+	report->rssihint	= 0b00000010 & resp[1];
+	report->rssilint	= 0b00000001 & resp[1];
+
+	report->bltf		= 0b10000000 & resp[2];
+	report->snr_ready	= 0b00100000 & resp[2];
+	report->rssiready	= 0b00001000 & resp[2];
+	report->afcrl		= 0b00000010 & resp[2];
+	report->valid		= 0b00000001 & resp[2];
+
+	report->readfreq	= be16_to_cpup((__be16 *)(resp + 3));
+	report->freqoff		= resp[5];
+	report->rssi		= resp[6];
+	report->snr		= resp[7];
+	report->lassi		= resp[9];
+	report->hassi		= resp[10];
+	report->mult		= resp[11];
+	report->dev		= resp[12];
+	report->readantcap	= be16_to_cpup((__be16 *)(resp + 13));
+	report->assi		= resp[15];
+	report->usn		= resp[16];
+
+	return err;
+}
+
+static int si476x_core_cmd_fm_rsq_status_a20(struct si476x_core *core,
+					     struct si476x_rsq_status_args *rsqargs,
+					     struct si476x_rsq_status_report *report)
+{
+	int err;
+	u8       resp[CMD_FM_RSQ_STATUS_A10_NRESP];
+	const u8 args[CMD_FM_RSQ_STATUS_A30_NARGS] = {
+		rsqargs->primary << 4 | rsqargs->rsqack << 3 |
+		rsqargs->attune  << 2 | rsqargs->cancel << 1 |
+		rsqargs->stcack,
+	};
+
+	err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	/*
+	 * Besides getting received signal quality information this
+	 * command can be used to just acknowledge different interrupt
+	 * flags in those cases it is useless to copy and parse
+	 * received data so user can pass NULL, and thus avoid
+	 * unnecessary copying.
+	 */
+	if (err < 0 || report == NULL)
+		return err;
+
+	report->multhint	= 0b10000000 & resp[1];
+	report->multlint	= 0b01000000 & resp[1];
+	report->snrhint		= 0b00001000 & resp[1];
+	report->snrlint		= 0b00000100 & resp[1];
+	report->rssihint	= 0b00000010 & resp[1];
+	report->rssilint	= 0b00000001 & resp[1];
+
+	report->bltf		= 0b10000000 & resp[2];
+	report->snr_ready	= 0b00100000 & resp[2];
+	report->rssiready	= 0b00001000 & resp[2];
+	report->afcrl		= 0b00000010 & resp[2];
+	report->valid		= 0b00000001 & resp[2];
+
+	report->readfreq	= be16_to_cpup((__be16 *)(resp + 3));
+	report->freqoff		= resp[5];
+	report->rssi		= resp[6];
+	report->snr		= resp[7];
+	report->lassi		= resp[9];
+	report->hassi		= resp[10];
+	report->mult		= resp[11];
+	report->dev		= resp[12];
+	report->readantcap	= be16_to_cpup((__be16 *)(resp + 13));
+	report->assi		= resp[15];
+	report->usn		= resp[16];
+
+	return err;
+}
+
+
+static int si476x_core_cmd_fm_rsq_status_a30(struct si476x_core *core,
+					struct si476x_rsq_status_args *rsqargs,
+					struct si476x_rsq_status_report *report)
+{
+	int err;
+	u8       resp[CMD_FM_RSQ_STATUS_A30_NRESP];
+	const u8 args[CMD_FM_RSQ_STATUS_A30_NARGS] = {
+		rsqargs->primary << 4 | rsqargs->rsqack << 3 |
+		rsqargs->attune << 2 | rsqargs->cancel << 1 |
+		rsqargs->stcack,
+	};
+
+	err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS,
+				       args, ARRAY_SIZE(args),
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	/*
+	 * Besides getting received signal quality information this
+	 * command can be used to just acknowledge different interrupt
+	 * flags in those cases it is useless to copy and parse
+	 * received data so user can pass NULL, and thus avoid
+	 * unnecessary copying.
+	 */
+	if (err < 0 || report == NULL)
+		return err;
+
+	report->multhint	= 0b10000000 & resp[1];
+	report->multlint	= 0b01000000 & resp[1];
+	report->snrhint		= 0b00001000 & resp[1];
+	report->snrlint		= 0b00000100 & resp[1];
+	report->rssihint	= 0b00000010 & resp[1];
+	report->rssilint	= 0b00000001 & resp[1];
+
+	report->bltf		= 0b10000000 & resp[2];
+	report->snr_ready	= 0b00100000 & resp[2];
+	report->rssiready	= 0b00001000 & resp[2];
+	report->injside         = 0b00000100 & resp[2];
+	report->afcrl		= 0b00000010 & resp[2];
+	report->valid		= 0b00000001 & resp[2];
+
+	report->readfreq	= be16_to_cpup((__be16 *)(resp + 3));
+	report->freqoff		= resp[5];
+	report->rssi		= resp[6];
+	report->snr		= resp[7];
+	report->issi		= resp[8];
+	report->lassi		= resp[9];
+	report->hassi		= resp[10];
+	report->mult		= resp[11];
+	report->dev		= resp[12];
+	report->readantcap	= be16_to_cpup((__be16 *)(resp + 13));
+	report->assi		= resp[15];
+	report->usn		= resp[16];
+
+	report->pilotdev	= resp[17];
+	report->rdsdev		= resp[18];
+	report->assidev		= resp[19];
+	report->strongdev	= resp[20];
+	report->rdspi		= be16_to_cpup((__be16 *)(resp + 21));
+
+	return err;
+}
+
+static int si476x_core_cmd_fm_tune_freq_a10(struct si476x_core *core,
+					struct si476x_tune_freq_args *tuneargs)
+{
+	u8       resp[CMD_FM_TUNE_FREQ_NRESP];
+	const u8 args[CMD_FM_TUNE_FREQ_A10_NARGS] = {
+		(tuneargs->hd << 6) | (tuneargs->tunemode << 4)
+		| (tuneargs->smoothmetrics << 2),
+		msb(tuneargs->freq),
+		lsb(tuneargs->freq),
+		msb(tuneargs->antcap),
+		lsb(tuneargs->antcap)
+	};
+
+	return si476x_cmd_tune_seek_freq(core, CMD_FM_TUNE_FREQ,
+					 args, sizeof(args),
+					 resp, sizeof(resp));
+}
+
+static int si476x_core_cmd_fm_tune_freq_a20(struct si476x_core *core,
+					struct si476x_tune_freq_args *tuneargs)
+{
+	u8       resp[CMD_FM_TUNE_FREQ_NRESP];
+	const u8 args[CMD_FM_TUNE_FREQ_A20_NARGS] = {
+		(tuneargs->hd << 6) | (tuneargs->tunemode << 4)
+		|  (tuneargs->smoothmetrics << 2) | (tuneargs->injside),
+		msb(tuneargs->freq),
+		lsb(tuneargs->freq),
+	};
+
+	return si476x_cmd_tune_seek_freq(core, CMD_FM_TUNE_FREQ,
+					 args, sizeof(args),
+					 resp, sizeof(resp));
+}
+
+static int si476x_core_cmd_agc_status_a20(struct si476x_core *core,
+					struct si476x_agc_status_report *report)
+{
+	int err;
+	u8 resp[CMD_AGC_STATUS_NRESP_A20];
+
+	if (!report)
+		return -EINVAL;
+
+	err = si476x_core_send_command(core, CMD_AGC_STATUS,
+				       NULL, 0,
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	report->mxhi		= resp[1] & SI476X_AGC_MXHI;
+	report->mxlo		= resp[1] & SI476X_AGC_MXLO;
+	report->lnahi		= resp[1] & SI476X_AGC_LNAHI;
+	report->lnalo		= resp[1] & SI476X_AGC_LNALO;
+	report->fmagc1		= resp[2];
+	report->fmagc2		= resp[3];
+	report->pgagain		= resp[4];
+	report->fmwblang	= resp[5];
+
+	return err;
+}
+
+static int si476x_core_cmd_agc_status_a10(struct si476x_core *core,
+					struct si476x_agc_status_report *report)
+{
+	int err;
+	u8 resp[CMD_AGC_STATUS_NRESP_A10];
+
+	if (!report)
+		return -EINVAL;
+
+	err = si476x_core_send_command(core, CMD_AGC_STATUS,
+				       NULL, 0,
+				       resp, ARRAY_SIZE(resp),
+				       SI476X_DEFAULT_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	report->mxhi	= resp[1] & SI476X_AGC_MXHI;
+	report->mxlo	= resp[1] & SI476X_AGC_MXLO;
+	report->lnahi	= resp[1] & SI476X_AGC_LNAHI;
+	report->lnalo	= resp[1] & SI476X_AGC_LNALO;
+
+	return err;
+}
+
+typedef int (*tune_freq_func_t) (struct si476x_core *core,
+				 struct si476x_tune_freq_args *tuneargs);
+
+static struct {
+	int (*power_up) (struct si476x_core *,
+			 struct si476x_power_up_args *);
+	int (*power_down) (struct si476x_core *,
+			   struct si476x_power_down_args *);
+
+	tune_freq_func_t fm_tune_freq;
+	tune_freq_func_t am_tune_freq;
+
+	int (*fm_rsq_status)(struct si476x_core *,
+			     struct si476x_rsq_status_args *,
+			     struct si476x_rsq_status_report *);
+
+	int (*agc_status)(struct si476x_core *,
+			  struct si476x_agc_status_report *);
+	int (*intb_pin_cfg)(struct si476x_core *core,
+			    enum si476x_intb_config intb,
+			    enum si476x_a1_config a1);
+} si476x_cmds_vtable[] = {
+	[SI476X_REVISION_A10] = {
+		.power_up	= si476x_core_cmd_power_up_a10,
+		.power_down	= si476x_core_cmd_power_down_a10,
+		.fm_tune_freq	= si476x_core_cmd_fm_tune_freq_a10,
+		.am_tune_freq	= si476x_core_cmd_am_tune_freq_a10,
+		.fm_rsq_status	= si476x_core_cmd_fm_rsq_status_a10,
+		.agc_status	= si476x_core_cmd_agc_status_a10,
+		.intb_pin_cfg   = si476x_core_cmd_intb_pin_cfg_a10,
+	},
+	[SI476X_REVISION_A20] = {
+		.power_up	= si476x_core_cmd_power_up_a20,
+		.power_down	= si476x_core_cmd_power_down_a20,
+		.fm_tune_freq	= si476x_core_cmd_fm_tune_freq_a20,
+		.am_tune_freq	= si476x_core_cmd_am_tune_freq_a20,
+		.fm_rsq_status	= si476x_core_cmd_fm_rsq_status_a20,
+		.agc_status	= si476x_core_cmd_agc_status_a20,
+		.intb_pin_cfg   = si476x_core_cmd_intb_pin_cfg_a20,
+	},
+	[SI476X_REVISION_A30] = {
+		.power_up	= si476x_core_cmd_power_up_a20,
+		.power_down	= si476x_core_cmd_power_down_a20,
+		.fm_tune_freq	= si476x_core_cmd_fm_tune_freq_a20,
+		.am_tune_freq	= si476x_core_cmd_am_tune_freq_a20,
+		.fm_rsq_status	= si476x_core_cmd_fm_rsq_status_a30,
+		.agc_status	= si476x_core_cmd_agc_status_a20,
+		.intb_pin_cfg   = si476x_core_cmd_intb_pin_cfg_a20,
+	},
+};
+
+int si476x_core_cmd_power_up(struct si476x_core *core,
+			     struct si476x_power_up_args *args)
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return si476x_cmds_vtable[core->revision].power_up(core, args);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_power_up);
+
+int si476x_core_cmd_power_down(struct si476x_core *core,
+			       struct si476x_power_down_args *args)
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return si476x_cmds_vtable[core->revision].power_down(core, args);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_power_down);
+
+int si476x_core_cmd_fm_tune_freq(struct si476x_core *core,
+				 struct si476x_tune_freq_args *args)
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return si476x_cmds_vtable[core->revision].fm_tune_freq(core, args);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_tune_freq);
+
+int si476x_core_cmd_am_tune_freq(struct si476x_core *core,
+				 struct si476x_tune_freq_args *args)
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return si476x_cmds_vtable[core->revision].am_tune_freq(core, args);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_am_tune_freq);
+
+int si476x_core_cmd_fm_rsq_status(struct si476x_core *core,
+				  struct si476x_rsq_status_args *args,
+				  struct si476x_rsq_status_report *report)
+
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return si476x_cmds_vtable[core->revision].fm_rsq_status(core, args,
+								report);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rsq_status);
+
+int si476x_core_cmd_agc_status(struct si476x_core *core,
+				  struct si476x_agc_status_report *report)
+
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return si476x_cmds_vtable[core->revision].agc_status(core, report);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_agc_status);
+
+int si476x_core_cmd_intb_pin_cfg(struct si476x_core *core,
+			    enum si476x_intb_config intb,
+			    enum si476x_a1_config a1)
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+
+	return si476x_cmds_vtable[core->revision].intb_pin_cfg(core, intb, a1);
+}
+EXPORT_SYMBOL_GPL(si476x_core_cmd_intb_pin_cfg);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
+MODULE_DESCRIPTION("API for command exchange for si476x");

diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c
new file mode 100644
index 0000000..f5bc8e4
--- /dev/null
+++ b/drivers/mfd/si476x-i2c.c

@@ -0,0 +1,886 @@
+/*
+ * drivers/mfd/si476x-i2c.c -- Core device driver for si476x MFD
+ * device
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/regulator/consumer.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+
+#include <linux/mfd/si476x-core.h>
+
+#define SI476X_MAX_IO_ERRORS		10
+#define SI476X_DRIVER_RDS_FIFO_DEPTH	128
+
+/**
+ * si476x_core_config_pinmux() - pin function configuration function
+ *
+ * @core: Core device structure
+ *
+ * Configure the functions of the pins of the radio chip.
+ *
+ * The function returns zero in case of succes or negative error code
+ * otherwise.
+ */
+static int si476x_core_config_pinmux(struct si476x_core *core)
+{
+	int err;
+	dev_dbg(&core->client->dev, "Configuring pinmux\n");
+	err = si476x_core_cmd_dig_audio_pin_cfg(core,
+						core->pinmux.dclk,
+						core->pinmux.dfs,
+						core->pinmux.dout,
+						core->pinmux.xout);
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Failed to configure digital audio pins(err = %d)\n",
+			err);
+		return err;
+	}
+
+	err = si476x_core_cmd_zif_pin_cfg(core,
+					  core->pinmux.iqclk,
+					  core->pinmux.iqfs,
+					  core->pinmux.iout,
+					  core->pinmux.qout);
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Failed to configure ZIF pins(err = %d)\n",
+			err);
+		return err;
+	}
+
+	err = si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(core,
+						      core->pinmux.icin,
+						      core->pinmux.icip,
+						      core->pinmux.icon,
+						      core->pinmux.icop);
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Failed to configure IC-Link/GPO pins(err = %d)\n",
+			err);
+		return err;
+	}
+
+	err = si476x_core_cmd_ana_audio_pin_cfg(core,
+						core->pinmux.lrout);
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Failed to configure analog audio pins(err = %d)\n",
+			err);
+		return err;
+	}
+
+	err = si476x_core_cmd_intb_pin_cfg(core,
+					   core->pinmux.intb,
+					   core->pinmux.a1);
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Failed to configure interrupt pins(err = %d)\n",
+			err);
+		return err;
+	}
+
+	return 0;
+}
+
+static inline void si476x_core_schedule_polling_work(struct si476x_core *core)
+{
+	schedule_delayed_work(&core->status_monitor,
+			      usecs_to_jiffies(SI476X_STATUS_POLL_US));
+}
+
+/**
+ * si476x_core_start() - early chip startup function
+ * @core: Core device structure
+ * @soft: When set, this flag forces "soft" startup, where "soft"
+ * power down is the one done by sending appropriate command instead
+ * of using reset pin of the tuner
+ *
+ * Perform required startup sequence to correctly power
+ * up the chip and perform initial configuration. It does the
+ * following sequence of actions:
+ *       1. Claims and enables the power supplies VD and VIO1 required
+ *          for I2C interface of the chip operation.
+ *       2. Waits for 100us, pulls the reset line up, enables irq,
+ *          waits for another 100us as it is specified by the
+ *          datasheet.
+ *       3. Sends 'POWER_UP' command to the device with all provided
+ *          information about power-up parameters.
+ *       4. Configures, pin multiplexor, disables digital audio and
+ *          configures interrupt sources.
+ *
+ * The function returns zero in case of succes or negative error code
+ * otherwise.
+ */
+int si476x_core_start(struct si476x_core *core, bool soft)
+{
+	struct i2c_client *client = core->client;
+	int err;
+
+	if (!soft) {
+		if (gpio_is_valid(core->gpio_reset))
+			gpio_set_value_cansleep(core->gpio_reset, 1);
+
+		if (client->irq)
+			enable_irq(client->irq);
+
+		udelay(100);
+
+		if (!client->irq) {
+			atomic_set(&core->is_alive, 1);
+			si476x_core_schedule_polling_work(core);
+		}
+	} else {
+		if (client->irq)
+			enable_irq(client->irq);
+		else {
+			atomic_set(&core->is_alive, 1);
+			si476x_core_schedule_polling_work(core);
+		}
+	}
+
+	err = si476x_core_cmd_power_up(core,
+				       &core->power_up_parameters);
+
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Power up failure(err = %d)\n",
+			err);
+		goto disable_irq;
+	}
+
+	if (client->irq)
+		atomic_set(&core->is_alive, 1);
+
+	err = si476x_core_config_pinmux(core);
+	if (err < 0) {
+		dev_err(&core->client->dev,
+			"Failed to configure pinmux(err = %d)\n",
+			err);
+		goto disable_irq;
+	}
+
+	if (client->irq) {
+		err = regmap_write(core->regmap,
+				   SI476X_PROP_INT_CTL_ENABLE,
+				   SI476X_RDSIEN |
+				   SI476X_STCIEN |
+				   SI476X_CTSIEN);
+		if (err < 0) {
+			dev_err(&core->client->dev,
+				"Failed to configure interrupt sources"
+				"(err = %d)\n", err);
+			goto disable_irq;
+		}
+	}
+
+	return 0;
+
+disable_irq:
+	if (err == -ENODEV)
+		atomic_set(&core->is_alive, 0);
+
+	if (client->irq)
+		disable_irq(client->irq);
+	else
+		cancel_delayed_work_sync(&core->status_monitor);
+
+	if (gpio_is_valid(core->gpio_reset))
+		gpio_set_value_cansleep(core->gpio_reset, 0);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_start);
+
+/**
+ * si476x_core_stop() - chip power-down function
+ * @core: Core device structure
+ * @soft: When set, function sends a POWER_DOWN command instead of
+ * bringing reset line low
+ *
+ * Power down the chip by performing following actions:
+ * 1. Disable IRQ or stop the polling worker
+ * 2. Send the POWER_DOWN command if the power down is soft or bring
+ *    reset line low if not.
+ *
+ * The function returns zero in case of succes or negative error code
+ * otherwise.
+ */
+int si476x_core_stop(struct si476x_core *core, bool soft)
+{
+	int err = 0;
+	atomic_set(&core->is_alive, 0);
+
+	if (soft) {
+		/* TODO: This probably shoud be a configurable option,
+		 * so it is possible to have the chips keep their
+		 * oscillators running
+		 */
+		struct si476x_power_down_args args = {
+			.xosc = false,
+		};
+		err = si476x_core_cmd_power_down(core, &args);
+	}
+
+	/* We couldn't disable those before
+	 * 'si476x_core_cmd_power_down' since we expect to get CTS
+	 * interrupt */
+	if (core->client->irq)
+		disable_irq(core->client->irq);
+	else
+		cancel_delayed_work_sync(&core->status_monitor);
+
+	if (!soft) {
+		if (gpio_is_valid(core->gpio_reset))
+			gpio_set_value_cansleep(core->gpio_reset, 0);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_stop);
+
+/**
+ * si476x_core_set_power_state() - set the level at which the power is
+ * supplied for the chip.
+ * @core: Core device structure
+ * @next_state: enum si476x_power_state describing power state to
+ *              switch to.
+ *
+ * Switch on all the required power supplies
+ *
+ * This function returns 0 in case of suvccess and negative error code
+ * otherwise.
+ */
+int si476x_core_set_power_state(struct si476x_core *core,
+				enum si476x_power_state next_state)
+{
+	/*
+	   It is not clear form the datasheet if it is possible to
+	   work with device if not all power domains are operational.
+	   So for now the power-up policy is "power-up all the things!"
+	 */
+	int err = 0;
+
+	if (core->power_state == SI476X_POWER_INCONSISTENT) {
+		dev_err(&core->client->dev,
+			"The device in inconsistent power state\n");
+		return -EINVAL;
+	}
+
+	if (next_state != core->power_state) {
+		switch (next_state) {
+		case SI476X_POWER_UP_FULL:
+			err = regulator_bulk_enable(ARRAY_SIZE(core->supplies),
+						    core->supplies);
+			if (err < 0) {
+				core->power_state = SI476X_POWER_INCONSISTENT;
+				break;
+			}
+			/*
+			 * Startup timing diagram recommends to have a
+			 * 100 us delay between enabling of the power
+			 * supplies and turning the tuner on.
+			 */
+			udelay(100);
+
+			err = si476x_core_start(core, false);
+			if (err < 0)
+				goto disable_regulators;
+
+			core->power_state = next_state;
+			break;
+
+		case SI476X_POWER_DOWN:
+			core->power_state = next_state;
+			err = si476x_core_stop(core, false);
+			if (err < 0)
+				core->power_state = SI476X_POWER_INCONSISTENT;
+disable_regulators:
+			err = regulator_bulk_disable(ARRAY_SIZE(core->supplies),
+						     core->supplies);
+			if (err < 0)
+				core->power_state = SI476X_POWER_INCONSISTENT;
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_set_power_state);
+
+/**
+ * si476x_core_report_drainer_stop() - mark the completion of the RDS
+ * buffer drain porcess by the worker.
+ *
+ * @core: Core device structure
+ */
+static inline void si476x_core_report_drainer_stop(struct si476x_core *core)
+{
+	mutex_lock(&core->rds_drainer_status_lock);
+	core->rds_drainer_is_working = false;
+	mutex_unlock(&core->rds_drainer_status_lock);
+}
+
+/**
+ * si476x_core_start_rds_drainer_once() - start RDS drainer worker if
+ * ther is none working, do nothing otherwise
+ *
+ * @core: Datastructure corresponding to the chip.
+ */
+static inline void si476x_core_start_rds_drainer_once(struct si476x_core *core)
+{
+	mutex_lock(&core->rds_drainer_status_lock);
+	if (!core->rds_drainer_is_working) {
+		core->rds_drainer_is_working = true;
+		schedule_work(&core->rds_fifo_drainer);
+	}
+	mutex_unlock(&core->rds_drainer_status_lock);
+}
+/**
+ * si476x_drain_rds_fifo() - RDS buffer drainer.
+ * @work: struct work_struct being ppassed to the function by the
+ * kernel.
+ *
+ * Drain the contents of the RDS FIFO of
+ */
+static void si476x_core_drain_rds_fifo(struct work_struct *work)
+{
+	int err;
+
+	struct si476x_core *core = container_of(work, struct si476x_core,
+						rds_fifo_drainer);
+
+	struct si476x_rds_status_report report;
+
+	si476x_core_lock(core);
+	err = si476x_core_cmd_fm_rds_status(core, true, false, false, &report);
+	if (!err) {
+		int i = report.rdsfifoused;
+		dev_dbg(&core->client->dev,
+			"%d elements in RDS FIFO. Draining.\n", i);
+		for (; i > 0; --i) {
+			err = si476x_core_cmd_fm_rds_status(core, false, false,
+							    (i == 1), &report);
+			if (err < 0)
+				goto unlock;
+
+			kfifo_in(&core->rds_fifo, report.rds,
+				 sizeof(report.rds));
+			dev_dbg(&core->client->dev, "RDS data:\n %*ph\n",
+				(int)sizeof(report.rds), report.rds);
+		}
+		dev_dbg(&core->client->dev, "Drrrrained!\n");
+		wake_up_interruptible(&core->rds_read_queue);
+	}
+
+unlock:
+	si476x_core_unlock(core);
+	si476x_core_report_drainer_stop(core);
+}
+
+/**
+ * si476x_core_pronounce_dead()
+ *
+ * @core: Core device structure
+ *
+ * Mark the device as being dead and wake up all potentially waiting
+ * threads of execution.
+ *
+ */
+static void si476x_core_pronounce_dead(struct si476x_core *core)
+{
+	dev_info(&core->client->dev, "Core device is dead.\n");
+
+	atomic_set(&core->is_alive, 0);
+
+	/* Wake up al possible waiting processes */
+	wake_up_interruptible(&core->rds_read_queue);
+
+	atomic_set(&core->cts, 1);
+	wake_up(&core->command);
+
+	atomic_set(&core->stc, 1);
+	wake_up(&core->tuning);
+}
+
+/**
+ * si476x_core_i2c_xfer()
+ *
+ * @core: Core device structure
+ * @type: Transfer type
+ * @buf: Transfer buffer for/with data
+ * @count: Transfer buffer size
+ *
+ * Perfrom and I2C transfer(either read or write) and keep a counter
+ * of I/O errors. If the error counter rises above the threshold
+ * pronounce device dead.
+ *
+ * The function returns zero on succes or negative error code on
+ * failure.
+ */
+int si476x_core_i2c_xfer(struct si476x_core *core,
+		    enum si476x_i2c_type type,
+		    char *buf, int count)
+{
+	static int io_errors_count;
+	int err;
+	if (type == SI476X_I2C_SEND)
+		err = i2c_master_send(core->client, buf, count);
+	else
+		err = i2c_master_recv(core->client, buf, count);
+
+	if (err < 0) {
+		if (io_errors_count++ > SI476X_MAX_IO_ERRORS)
+			si476x_core_pronounce_dead(core);
+	} else {
+		io_errors_count = 0;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(si476x_core_i2c_xfer);
+
+/**
+ * si476x_get_status()
+ * @core: Core device structure
+ *
+ * Get the status byte of the core device by berforming one byte I2C
+ * read.
+ *
+ * The function returns a status value or a negative error code on
+ * error.
+ */
+static int si476x_core_get_status(struct si476x_core *core)
+{
+	u8 response;
+	int err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV,
+				  &response, sizeof(response));
+
+	return (err < 0) ? err : response;
+}
+
+/**
+ * si476x_get_and_signal_status() - IRQ dispatcher
+ * @core: Core device structure
+ *
+ * Dispatch the arrived interrupt request based on the value of the
+ * status byte reported by the tuner.
+ *
+ */
+static void si476x_core_get_and_signal_status(struct si476x_core *core)
+{
+	int status = si476x_core_get_status(core);
+	if (status < 0) {
+		dev_err(&core->client->dev, "Failed to get status\n");
+		return;
+	}
+
+	if (status & SI476X_CTS) {
+		/* Unfortunately completions could not be used for
+		 * signalling CTS since this flag cannot be cleared
+		 * in status byte, and therefore once it becomes true
+		 * multiple calls to 'complete' would cause the
+		 * commands following the current one to be completed
+		 * before they actually are */
+		dev_dbg(&core->client->dev, "[interrupt] CTSINT\n");
+		atomic_set(&core->cts, 1);
+		wake_up(&core->command);
+	}
+
+	if (status & SI476X_FM_RDS_INT) {
+		dev_dbg(&core->client->dev, "[interrupt] RDSINT\n");
+		si476x_core_start_rds_drainer_once(core);
+	}
+
+	if (status & SI476X_STC_INT) {
+		dev_dbg(&core->client->dev, "[interrupt] STCINT\n");
+		atomic_set(&core->stc, 1);
+		wake_up(&core->tuning);
+	}
+}
+
+static void si476x_core_poll_loop(struct work_struct *work)
+{
+	struct si476x_core *core = SI476X_WORK_TO_CORE(work);
+
+	si476x_core_get_and_signal_status(core);
+
+	if (atomic_read(&core->is_alive))
+		si476x_core_schedule_polling_work(core);
+}
+
+static irqreturn_t si476x_core_interrupt(int irq, void *dev)
+{
+	struct si476x_core *core = dev;
+
+	si476x_core_get_and_signal_status(core);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * si476x_firmware_version_to_revision()
+ * @core: Core device structure
+ * @major:  Firmware major number
+ * @minor1: Firmware first minor number
+ * @minor2: Firmware second minor number
+ *
+ * Convert a chip's firmware version number into an offset that later
+ * will be used to as offset in "vtable" of tuner functions
+ *
+ * This function returns a positive offset in case of success and a -1
+ * in case of failure.
+ */
+static int si476x_core_fwver_to_revision(struct si476x_core *core,
+					 int func, int major,
+					 int minor1, int minor2)
+{
+	switch (func) {
+	case SI476X_FUNC_FM_RECEIVER:
+		switch (major) {
+		case 5:
+			return SI476X_REVISION_A10;
+		case 8:
+			return SI476X_REVISION_A20;
+		case 10:
+			return SI476X_REVISION_A30;
+		default:
+			goto unknown_revision;
+		}
+	case SI476X_FUNC_AM_RECEIVER:
+		switch (major) {
+		case 5:
+			return SI476X_REVISION_A10;
+		case 7:
+			return SI476X_REVISION_A20;
+		case 9:
+			return SI476X_REVISION_A30;
+		default:
+			goto unknown_revision;
+		}
+	case SI476X_FUNC_WB_RECEIVER:
+		switch (major) {
+		case 3:
+			return SI476X_REVISION_A10;
+		case 5:
+			return SI476X_REVISION_A20;
+		case 7:
+			return SI476X_REVISION_A30;
+		default:
+			goto unknown_revision;
+		}
+	case SI476X_FUNC_BOOTLOADER:
+	default:		/* FALLTHROUG */
+		BUG();
+		return -1;
+	}
+
+unknown_revision:
+	dev_err(&core->client->dev,
+		"Unsupported version of the firmware: %d.%d.%d, "
+		"reverting to A10 comptible functions\n",
+		major, minor1, minor2);
+
+	return SI476X_REVISION_A10;
+}
+
+/**
+ * si476x_get_revision_info()
+ * @core: Core device structure
+ *
+ * Get the firmware version number of the device. It is done in
+ * following three steps:
+ *    1. Power-up the device
+ *    2. Send the 'FUNC_INFO' command
+ *    3. Powering the device down.
+ *
+ * The function return zero on success and a negative error code on
+ * failure.
+ */
+static int si476x_core_get_revision_info(struct si476x_core *core)
+{
+	int rval;
+	struct si476x_func_info info;
+
+	si476x_core_lock(core);
+	rval = si476x_core_set_power_state(core, SI476X_POWER_UP_FULL);
+	if (rval < 0)
+		goto exit;
+
+	rval = si476x_core_cmd_func_info(core, &info);
+	if (rval < 0)
+		goto power_down;
+
+	core->revision = si476x_core_fwver_to_revision(core, info.func,
+						       info.firmware.major,
+						       info.firmware.minor[0],
+						       info.firmware.minor[1]);
+power_down:
+	si476x_core_set_power_state(core, SI476X_POWER_DOWN);
+exit:
+	si476x_core_unlock(core);
+
+	return rval;
+}
+
+bool si476x_core_has_am(struct si476x_core *core)
+{
+	return core->chip_id == SI476X_CHIP_SI4761 ||
+		core->chip_id == SI476X_CHIP_SI4764;
+}
+EXPORT_SYMBOL_GPL(si476x_core_has_am);
+
+bool si476x_core_has_diversity(struct si476x_core *core)
+{
+	return core->chip_id == SI476X_CHIP_SI4764;
+}
+EXPORT_SYMBOL_GPL(si476x_core_has_diversity);
+
+bool si476x_core_is_a_secondary_tuner(struct si476x_core *core)
+{
+	return si476x_core_has_diversity(core) &&
+		(core->diversity_mode == SI476X_PHDIV_SECONDARY_ANTENNA ||
+		 core->diversity_mode == SI476X_PHDIV_SECONDARY_COMBINING);
+}
+EXPORT_SYMBOL_GPL(si476x_core_is_a_secondary_tuner);
+
+bool si476x_core_is_a_primary_tuner(struct si476x_core *core)
+{
+	return si476x_core_has_diversity(core) &&
+		(core->diversity_mode == SI476X_PHDIV_PRIMARY_ANTENNA ||
+		 core->diversity_mode == SI476X_PHDIV_PRIMARY_COMBINING);
+}
+EXPORT_SYMBOL_GPL(si476x_core_is_a_primary_tuner);
+
+bool si476x_core_is_in_am_receiver_mode(struct si476x_core *core)
+{
+	return si476x_core_has_am(core) &&
+		(core->power_up_parameters.func == SI476X_FUNC_AM_RECEIVER);
+}
+EXPORT_SYMBOL_GPL(si476x_core_is_in_am_receiver_mode);
+
+bool si476x_core_is_powered_up(struct si476x_core *core)
+{
+	return core->power_state == SI476X_POWER_UP_FULL;
+}
+EXPORT_SYMBOL_GPL(si476x_core_is_powered_up);
+
+static int si476x_core_probe(struct i2c_client *client,
+			     const struct i2c_device_id *id)
+{
+	int rval;
+	struct si476x_core          *core;
+	struct si476x_platform_data *pdata;
+	struct mfd_cell *cell;
+	int              cell_num;
+
+	core = devm_kzalloc(&client->dev, sizeof(*core), GFP_KERNEL);
+	if (!core) {
+		dev_err(&client->dev,
+			"failed to allocate 'struct si476x_core'\n");
+		return -ENOMEM;
+	}
+	core->client = client;
+
+	core->regmap = devm_regmap_init_si476x(core);
+	if (IS_ERR(core->regmap)) {
+		rval = PTR_ERR(core->regmap);
+		dev_err(&client->dev,
+			"Failed to allocate register map: %d\n",
+			rval);
+		return rval;
+	}
+
+	i2c_set_clientdata(client, core);
+
+	atomic_set(&core->is_alive, 0);
+	core->power_state = SI476X_POWER_DOWN;
+
+	pdata = client->dev.platform_data;
+	if (pdata) {
+		memcpy(&core->power_up_parameters,
+		       &pdata->power_up_parameters,
+		       sizeof(core->power_up_parameters));
+
+		core->gpio_reset = -1;
+		if (gpio_is_valid(pdata->gpio_reset)) {
+			rval = gpio_request(pdata->gpio_reset, "si476x reset");
+			if (rval) {
+				dev_err(&client->dev,
+					"Failed to request gpio: %d\n", rval);
+				return rval;
+			}
+			core->gpio_reset = pdata->gpio_reset;
+			gpio_direction_output(core->gpio_reset, 0);
+		}
+
+		core->diversity_mode = pdata->diversity_mode;
+		memcpy(&core->pinmux, &pdata->pinmux,
+		       sizeof(struct si476x_pinmux));
+	} else {
+		dev_err(&client->dev, "No platform data provided\n");
+		return -EINVAL;
+	}
+
+	core->supplies[0].supply = "vd";
+	core->supplies[1].supply = "va";
+	core->supplies[2].supply = "vio1";
+	core->supplies[3].supply = "vio2";
+
+	rval = devm_regulator_bulk_get(&client->dev,
+				       ARRAY_SIZE(core->supplies),
+				       core->supplies);
+	if (rval) {
+		dev_err(&client->dev, "Failet to gett all of the regulators\n");
+		goto free_gpio;
+	}
+
+	mutex_init(&core->cmd_lock);
+	init_waitqueue_head(&core->command);
+	init_waitqueue_head(&core->tuning);
+
+	rval = kfifo_alloc(&core->rds_fifo,
+			   SI476X_DRIVER_RDS_FIFO_DEPTH *
+			   sizeof(struct v4l2_rds_data),
+			   GFP_KERNEL);
+	if (rval) {
+		dev_err(&client->dev, "Could not alloate the FIFO\n");
+		goto free_gpio;
+	}
+	mutex_init(&core->rds_drainer_status_lock);
+	init_waitqueue_head(&core->rds_read_queue);
+	INIT_WORK(&core->rds_fifo_drainer, si476x_core_drain_rds_fifo);
+
+	if (client->irq) {
+		rval = devm_request_threaded_irq(&client->dev,
+						 client->irq, NULL,
+						 si476x_core_interrupt,
+						 IRQF_TRIGGER_FALLING,
+						 client->name, core);
+		if (rval < 0) {
+			dev_err(&client->dev, "Could not request IRQ %d\n",
+				client->irq);
+			goto free_kfifo;
+		}
+		disable_irq(client->irq);
+		dev_dbg(&client->dev, "IRQ requested.\n");
+
+		core->rds_fifo_depth = 20;
+	} else {
+		INIT_DELAYED_WORK(&core->status_monitor,
+				  si476x_core_poll_loop);
+		dev_info(&client->dev,
+			 "No IRQ number specified, will use polling\n");
+
+		core->rds_fifo_depth = 5;
+	}
+
+	core->chip_id = id->driver_data;
+
+	rval = si476x_core_get_revision_info(core);
+	if (rval < 0) {
+		rval = -ENODEV;
+		goto free_kfifo;
+	}
+
+	cell_num = 0;
+
+	cell = &core->cells[SI476X_RADIO_CELL];
+	cell->name = "si476x-radio";
+	cell_num++;
+
+#ifdef CONFIG_SND_SOC_SI476X
+	if ((core->chip_id == SI476X_CHIP_SI4761 ||
+	     core->chip_id == SI476X_CHIP_SI4764)	&&
+	    core->pinmux.dclk == SI476X_DCLK_DAUDIO     &&
+	    core->pinmux.dfs  == SI476X_DFS_DAUDIO      &&
+	    core->pinmux.dout == SI476X_DOUT_I2S_OUTPUT &&
+	    core->pinmux.xout == SI476X_XOUT_TRISTATE) {
+		cell = &core->cells[SI476X_CODEC_CELL];
+		cell->name          = "si476x-codec";
+		cell_num++;
+	}
+#endif
+	rval = mfd_add_devices(&client->dev,
+			       (client->adapter->nr << 8) + client->addr,
+			       core->cells, cell_num,
+			       NULL, 0, NULL);
+	if (!rval)
+		return 0;
+
+free_kfifo:
+	kfifo_free(&core->rds_fifo);
+
+free_gpio:
+	if (gpio_is_valid(core->gpio_reset))
+		gpio_free(core->gpio_reset);
+
+	return rval;
+}
+
+static int si476x_core_remove(struct i2c_client *client)
+{
+	struct si476x_core *core = i2c_get_clientdata(client);
+
+	si476x_core_pronounce_dead(core);
+	mfd_remove_devices(&client->dev);
+
+	if (client->irq)
+		disable_irq(client->irq);
+	else
+		cancel_delayed_work_sync(&core->status_monitor);
+
+	kfifo_free(&core->rds_fifo);
+
+	if (gpio_is_valid(core->gpio_reset))
+		gpio_free(core->gpio_reset);
+
+	return 0;
+}
+
+
+static const struct i2c_device_id si476x_id[] = {
+	{ "si4761", SI476X_CHIP_SI4761 },
+	{ "si4764", SI476X_CHIP_SI4764 },
+	{ "si4768", SI476X_CHIP_SI4768 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, si476x_id);
+
+static struct i2c_driver si476x_core_driver = {
+	.driver		= {
+		.name	= "si476x-core",
+		.owner  = THIS_MODULE,
+	},
+	.probe		= si476x_core_probe,
+	.remove         = si476x_core_remove,
+	.id_table       = si476x_id,
+};
+module_i2c_driver(si476x_core_driver);
+
+
+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
+MODULE_DESCRIPTION("Si4761/64/68 AM/FM MFD core device driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/si476x-prop.c b/drivers/mfd/si476x-prop.c
new file mode 100644
index 0000000..cfeffa6
--- /dev/null
+++ b/drivers/mfd/si476x-prop.c

@@ -0,0 +1,241 @@
+/*
+ * drivers/mfd/si476x-prop.c -- Subroutines to access
+ * properties of si476x chips
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+
+#include <linux/mfd/si476x-core.h>
+
+struct si476x_property_range {
+	u16 low, high;
+};
+
+static bool si476x_core_element_is_in_array(u16 element,
+					    const u16 array[],
+					    size_t size)
+{
+	int i;
+
+	for (i = 0; i < size; i++)
+		if (element == array[i])
+			return true;
+
+	return false;
+}
+
+static bool si476x_core_element_is_in_range(u16 element,
+					    const struct si476x_property_range range[],
+					    size_t size)
+{
+	int i;
+
+	for (i = 0; i < size; i++)
+		if (element <= range[i].high && element >= range[i].low)
+			return true;
+
+	return false;
+}
+
+static bool si476x_core_is_valid_property_a10(struct si476x_core *core,
+					      u16 property)
+{
+	static const u16 valid_properties[] = {
+		0x0000,
+		0x0500, 0x0501,
+		0x0600,
+		0x0709, 0x070C, 0x070D, 0x70E, 0x710,
+		0x0718,
+		0x1207, 0x1208,
+		0x2007,
+		0x2300,
+	};
+
+	static const struct si476x_property_range valid_ranges[] = {
+		{ 0x0200, 0x0203 },
+		{ 0x0300, 0x0303 },
+		{ 0x0400, 0x0404 },
+		{ 0x0700, 0x0707 },
+		{ 0x1100, 0x1102 },
+		{ 0x1200, 0x1204 },
+		{ 0x1300, 0x1306 },
+		{ 0x2000, 0x2005 },
+		{ 0x2100, 0x2104 },
+		{ 0x2106, 0x2106 },
+		{ 0x2200, 0x220E },
+		{ 0x3100, 0x3104 },
+		{ 0x3207, 0x320F },
+		{ 0x3300, 0x3304 },
+		{ 0x3500, 0x3517 },
+		{ 0x3600, 0x3617 },
+		{ 0x3700, 0x3717 },
+		{ 0x4000, 0x4003 },
+	};
+
+	return	si476x_core_element_is_in_range(property, valid_ranges,
+						ARRAY_SIZE(valid_ranges)) ||
+		si476x_core_element_is_in_array(property, valid_properties,
+						ARRAY_SIZE(valid_properties));
+}
+
+static bool si476x_core_is_valid_property_a20(struct si476x_core *core,
+					      u16 property)
+{
+	static const u16 valid_properties[] = {
+		0x071B,
+		0x1006,
+		0x2210,
+		0x3401,
+	};
+
+	static const struct si476x_property_range valid_ranges[] = {
+		{ 0x2215, 0x2219 },
+	};
+
+	return	si476x_core_is_valid_property_a10(core, property) ||
+		si476x_core_element_is_in_range(property, valid_ranges,
+						ARRAY_SIZE(valid_ranges))  ||
+		si476x_core_element_is_in_array(property, valid_properties,
+						ARRAY_SIZE(valid_properties));
+}
+
+static bool si476x_core_is_valid_property_a30(struct si476x_core *core,
+					      u16 property)
+{
+	static const u16 valid_properties[] = {
+		0x071C, 0x071D,
+		0x1007, 0x1008,
+		0x220F, 0x2214,
+		0x2301,
+		0x3105, 0x3106,
+		0x3402,
+	};
+
+	static const struct si476x_property_range valid_ranges[] = {
+		{ 0x0405, 0x0411 },
+		{ 0x2008, 0x200B },
+		{ 0x2220, 0x2223 },
+		{ 0x3100, 0x3106 },
+	};
+
+	return	si476x_core_is_valid_property_a20(core, property) ||
+		si476x_core_element_is_in_range(property, valid_ranges,
+						ARRAY_SIZE(valid_ranges)) ||
+		si476x_core_element_is_in_array(property, valid_properties,
+						ARRAY_SIZE(valid_properties));
+}
+
+typedef bool (*valid_property_pred_t) (struct si476x_core *, u16);
+
+static bool si476x_core_is_valid_property(struct si476x_core *core,
+					  u16 property)
+{
+	static const valid_property_pred_t is_valid_property[] = {
+		[SI476X_REVISION_A10] = si476x_core_is_valid_property_a10,
+		[SI476X_REVISION_A20] = si476x_core_is_valid_property_a20,
+		[SI476X_REVISION_A30] = si476x_core_is_valid_property_a30,
+	};
+
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+	return is_valid_property[core->revision](core, property);
+}
+
+
+static bool si476x_core_is_readonly_property(struct si476x_core *core,
+					     u16 property)
+{
+	BUG_ON(core->revision > SI476X_REVISION_A30 ||
+	       core->revision == -1);
+
+	switch (core->revision) {
+	case SI476X_REVISION_A10:
+		return (property == 0x3200);
+	case SI476X_REVISION_A20:
+		return (property == 0x1006 ||
+			property == 0x2210 ||
+			property == 0x3200);
+	case SI476X_REVISION_A30:
+		return false;
+	}
+
+	return false;
+}
+
+static bool si476x_core_regmap_readable_register(struct device *dev,
+						 unsigned int reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct si476x_core *core = i2c_get_clientdata(client);
+
+	return si476x_core_is_valid_property(core, (u16) reg);
+
+}
+
+static bool si476x_core_regmap_writable_register(struct device *dev,
+						 unsigned int reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct si476x_core *core = i2c_get_clientdata(client);
+
+	return si476x_core_is_valid_property(core, (u16) reg) &&
+		!si476x_core_is_readonly_property(core, (u16) reg);
+}
+
+
+static int si476x_core_regmap_write(void *context, unsigned int reg,
+				    unsigned int val)
+{
+	return si476x_core_cmd_set_property(context, reg, val);
+}
+
+static int si476x_core_regmap_read(void *context, unsigned int reg,
+				   unsigned *val)
+{
+	struct si476x_core *core = context;
+	int err;
+
+	err = si476x_core_cmd_get_property(core, reg);
+	if (err < 0)
+		return err;
+
+	*val = err;
+
+	return 0;
+}
+
+
+static const struct regmap_config si476x_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 16,
+
+	.max_register = 0x4003,
+
+	.writeable_reg = si476x_core_regmap_writable_register,
+	.readable_reg = si476x_core_regmap_readable_register,
+
+	.reg_read = si476x_core_regmap_read,
+	.reg_write = si476x_core_regmap_write,
+
+	.cache_type = REGCACHE_RBTREE,
+};
+
+struct regmap *devm_regmap_init_si476x(struct si476x_core *core)
+{
+	return devm_regmap_init(&core->client->dev, NULL,
+				core, &si476x_regmap_config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_si476x);

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 9bd3316..d70a3430 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c

@@ -98,17 +98,6 @@
 	return 0;
 }
 
-static int mfd_remove(struct pci_dev *pdev)
-{
-	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
-
-	if (!mfd)
-		return -ENODEV;
-	list_del(&mfd->list);
-	kfree(mfd);
-	return 0;
-}
-
 /* This function is exported and is not expected to fail */
 u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
 		       enum sta2x11_mfd_plat_dev index)

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index fd5fcb6..0da02e1 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c

@@ -75,6 +75,7 @@
 	{ "stmpe801", STMPE801 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
+	{ "stmpe1801", STMPE1801 },
 	{ "stmpe2401", STMPE2401 },
 	{ "stmpe2403", STMPE2403 },
 	{ }

diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 973659f..a81badb 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c

@@ -103,7 +103,7 @@
 
 static int stmpe_spi_remove(struct spi_device *spi)
 {
-	struct stmpe *stmpe = dev_get_drvdata(&spi->dev);
+	struct stmpe *stmpe = spi_get_drvdata(spi);
 
 	return stmpe_remove(stmpe);
 }

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 4b11202..bbccd51 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c

@@ -19,6 +19,7 @@
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mfd/core.h>
+#include <linux/delay.h>
 #include "stmpe.h"
 
 static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks)
@@ -643,6 +644,88 @@
 };
 
 /*
+ * STMPE1801
+ */
+static const u8 stmpe1801_regs[] = {
+	[STMPE_IDX_CHIP_ID]	= STMPE1801_REG_CHIP_ID,
+	[STMPE_IDX_ICR_LSB]	= STMPE1801_REG_INT_CTRL_LOW,
+	[STMPE_IDX_IER_LSB]	= STMPE1801_REG_INT_EN_MASK_LOW,
+	[STMPE_IDX_ISR_LSB]	= STMPE1801_REG_INT_STA_LOW,
+	[STMPE_IDX_GPMR_LSB]	= STMPE1801_REG_GPIO_MP_LOW,
+	[STMPE_IDX_GPSR_LSB]	= STMPE1801_REG_GPIO_SET_LOW,
+	[STMPE_IDX_GPCR_LSB]	= STMPE1801_REG_GPIO_CLR_LOW,
+	[STMPE_IDX_GPDR_LSB]	= STMPE1801_REG_GPIO_SET_DIR_LOW,
+	[STMPE_IDX_GPRER_LSB]	= STMPE1801_REG_GPIO_RE_LOW,
+	[STMPE_IDX_GPFER_LSB]	= STMPE1801_REG_GPIO_FE_LOW,
+	[STMPE_IDX_IEGPIOR_LSB]	= STMPE1801_REG_INT_EN_GPIO_MASK_LOW,
+	[STMPE_IDX_ISGPIOR_LSB]	= STMPE1801_REG_INT_STA_GPIO_LOW,
+};
+
+static struct stmpe_variant_block stmpe1801_blocks[] = {
+	{
+		.cell	= &stmpe_gpio_cell,
+		.irq	= STMPE1801_IRQ_GPIOC,
+		.block	= STMPE_BLOCK_GPIO,
+	},
+	{
+		.cell	= &stmpe_keypad_cell,
+		.irq	= STMPE1801_IRQ_KEYPAD,
+		.block	= STMPE_BLOCK_KEYPAD,
+	},
+};
+
+static int stmpe1801_enable(struct stmpe *stmpe, unsigned int blocks,
+			    bool enable)
+{
+	unsigned int mask = 0;
+	if (blocks & STMPE_BLOCK_GPIO)
+		mask |= STMPE1801_MSK_INT_EN_GPIO;
+
+	if (blocks & STMPE_BLOCK_KEYPAD)
+		mask |= STMPE1801_MSK_INT_EN_KPC;
+
+	return __stmpe_set_bits(stmpe, STMPE1801_REG_INT_EN_MASK_LOW, mask,
+				enable ? mask : 0);
+}
+
+static int stmpe1801_reset(struct stmpe *stmpe)
+{
+	unsigned long timeout;
+	int ret = 0;
+
+	ret = __stmpe_set_bits(stmpe, STMPE1801_REG_SYS_CTRL,
+		STMPE1801_MSK_SYS_CTRL_RESET, STMPE1801_MSK_SYS_CTRL_RESET);
+	if (ret < 0)
+		return ret;
+
+	timeout = jiffies + msecs_to_jiffies(100);
+	while (time_before(jiffies, timeout)) {
+		ret = __stmpe_reg_read(stmpe, STMPE1801_REG_SYS_CTRL);
+		if (ret < 0)
+			return ret;
+		if (!(ret & STMPE1801_MSK_SYS_CTRL_RESET))
+			return 0;
+		usleep_range(100, 200);
+	};
+	return -EIO;
+}
+
+static struct stmpe_variant_info stmpe1801 = {
+	.name		= "stmpe1801",
+	.id_val		= STMPE1801_ID,
+	.id_mask	= 0xfff0,
+	.num_gpios	= 18,
+	.af_bits	= 0,
+	.regs		= stmpe1801_regs,
+	.blocks		= stmpe1801_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe1801_blocks),
+	.num_irqs	= STMPE1801_NR_INTERNAL_IRQS,
+	.enable		= stmpe1801_enable,
+	/* stmpe1801 do not have any gpio alternate function */
+	.get_altfunc	= NULL,
+};
+
+/*
  * STMPE24XX
  */
 
@@ -740,6 +823,7 @@
 	[STMPE801]	= &stmpe801,
 	[STMPE811]	= &stmpe811,
 	[STMPE1601]	= &stmpe1601,
+	[STMPE1801]	= &stmpe1801,
 	[STMPE2401]	= &stmpe2401,
 	[STMPE2403]	= &stmpe2403,
 };
@@ -759,7 +843,7 @@
 	struct stmpe *stmpe = data;
 	struct stmpe_variant_info *variant = stmpe->variant;
 	int num = DIV_ROUND_UP(variant->num_irqs, 8);
-	u8 israddr = stmpe->regs[STMPE_IDX_ISR_MSB];
+	u8 israddr;
 	u8 isr[num];
 	int ret;
 	int i;
@@ -771,6 +855,11 @@
 		return IRQ_HANDLED;
 	}
 
+	if (variant->id_val == STMPE1801_ID)
+		israddr = stmpe->regs[STMPE_IDX_ISR_LSB];
+	else
+		israddr = stmpe->regs[STMPE_IDX_ISR_MSB];
+
 	ret = stmpe_block_read(stmpe, israddr, num, isr);
 	if (ret < 0)
 		return IRQ_NONE;
@@ -938,6 +1027,12 @@
 	if (ret)
 		return ret;
 
+	if (id == STMPE1801_ID)	{
+		ret =  stmpe1801_reset(stmpe);
+		if (ret < 0)
+			return ret;
+	}
+
 	if (stmpe->irq >= 0) {
 		if (id == STMPE801_ID)
 			icr = STMPE801_REG_SYS_CTRL_INT_EN;
@@ -1015,7 +1110,10 @@
 {
 	struct device_node *child;
 
-	pdata->id = -1;
+	pdata->id = of_alias_get_id(np, "stmpe-i2c");
+	if (pdata->id < 0)
+		pdata->id = -1;
+
 	pdata->irq_trigger = IRQF_TRIGGER_NONE;
 
 	of_property_read_u32(np, "st,autosleep-timeout",
@@ -1057,6 +1155,9 @@
 			return -ENOMEM;
 
 		stmpe_of_probe(pdata, np);
+
+		if (of_find_property(np, "interrupts", NULL) == NULL)
+			ci->irq = -1;
 	}
 
 	stmpe = devm_kzalloc(ci->dev, sizeof(struct stmpe), GFP_KERNEL);

diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h
index 7b8e13f..ff2b09b 100644
--- a/drivers/mfd/stmpe.h
+++ b/drivers/mfd/stmpe.h

@@ -199,6 +199,55 @@
 #define STPME1601_AUTOSLEEP_ENABLE		(1 << 3)
 
 /*
+ * STMPE1801
+ */
+#define STMPE1801_ID			0xc110
+#define STMPE1801_NR_INTERNAL_IRQS	5
+#define STMPE1801_IRQ_KEYPAD_COMBI	4
+#define STMPE1801_IRQ_GPIOC		3
+#define STMPE1801_IRQ_KEYPAD_OVER	2
+#define STMPE1801_IRQ_KEYPAD		1
+#define STMPE1801_IRQ_WAKEUP		0
+
+#define STMPE1801_REG_CHIP_ID			0x00
+#define STMPE1801_REG_SYS_CTRL			0x02
+#define STMPE1801_REG_INT_CTRL_LOW		0x04
+#define STMPE1801_REG_INT_EN_MASK_LOW		0x06
+#define STMPE1801_REG_INT_STA_LOW		0x08
+#define STMPE1801_REG_INT_EN_GPIO_MASK_LOW	0x0A
+#define STMPE1801_REG_INT_EN_GPIO_MASK_MID	0x0B
+#define STMPE1801_REG_INT_EN_GPIO_MASK_HIGH	0x0C
+#define STMPE1801_REG_INT_STA_GPIO_LOW		0x0D
+#define STMPE1801_REG_INT_STA_GPIO_MID		0x0E
+#define STMPE1801_REG_INT_STA_GPIO_HIGH		0x0F
+#define STMPE1801_REG_GPIO_SET_LOW		0x10
+#define STMPE1801_REG_GPIO_SET_MID		0x11
+#define STMPE1801_REG_GPIO_SET_HIGH		0x12
+#define STMPE1801_REG_GPIO_CLR_LOW		0x13
+#define STMPE1801_REG_GPIO_CLR_MID		0x14
+#define STMPE1801_REG_GPIO_CLR_HIGH		0x15
+#define STMPE1801_REG_GPIO_MP_LOW		0x16
+#define STMPE1801_REG_GPIO_MP_MID		0x17
+#define STMPE1801_REG_GPIO_MP_HIGH		0x18
+#define STMPE1801_REG_GPIO_SET_DIR_LOW		0x19
+#define STMPE1801_REG_GPIO_SET_DIR_MID		0x1A
+#define STMPE1801_REG_GPIO_SET_DIR_HIGH		0x1B
+#define STMPE1801_REG_GPIO_RE_LOW		0x1C
+#define STMPE1801_REG_GPIO_RE_MID		0x1D
+#define STMPE1801_REG_GPIO_RE_HIGH		0x1E
+#define STMPE1801_REG_GPIO_FE_LOW		0x1F
+#define STMPE1801_REG_GPIO_FE_MID		0x20
+#define STMPE1801_REG_GPIO_FE_HIGH		0x21
+#define STMPE1801_REG_GPIO_PULL_UP_LOW		0x22
+#define STMPE1801_REG_GPIO_PULL_UP_MID		0x23
+#define STMPE1801_REG_GPIO_PULL_UP_HIGH		0x24
+
+#define STMPE1801_MSK_SYS_CTRL_RESET		(1 << 7)
+
+#define STMPE1801_MSK_INT_EN_KPC		(1 << 1)
+#define STMPE1801_MSK_INT_EN_GPIO		(1 << 3)
+
+/*
  * STMPE24xx
  */
 

diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index 61aea63..962a6e1 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c

@@ -25,17 +25,15 @@
 static struct platform_driver syscon_driver;
 
 struct syscon {
-	struct device *dev;
 	void __iomem *base;
 	struct regmap *regmap;
 };
 
-static int syscon_match(struct device *dev, void *data)
+static int syscon_match_node(struct device *dev, void *data)
 {
-	struct syscon *syscon = dev_get_drvdata(dev);
 	struct device_node *dn = data;
 
-	return (syscon->dev->of_node == dn) ? 1 : 0;
+	return (dev->of_node == dn) ? 1 : 0;
 }
 
 struct regmap *syscon_node_to_regmap(struct device_node *np)
@@ -44,7 +42,7 @@
 	struct device *dev;
 
 	dev = driver_find_device(&syscon_driver.driver, NULL, np,
-				 syscon_match);
+				 syscon_match_node);
 	if (!dev)
 		return ERR_PTR(-EPROBE_DEFER);
 
@@ -70,6 +68,34 @@
 }
 EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_compatible);
 
+static int syscon_match_pdevname(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	const struct platform_device_id *id = platform_get_device_id(pdev);
+
+	if (id)
+		if (!strcmp(id->name, (const char *)data))
+			return 1;
+
+	return !strcmp(dev_name(dev), (const char *)data);
+}
+
+struct regmap *syscon_regmap_lookup_by_pdevname(const char *s)
+{
+	struct device *dev;
+	struct syscon *syscon;
+
+	dev = driver_find_device(&syscon_driver.driver, NULL, (void *)s,
+				 syscon_match_pdevname);
+	if (!dev)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	syscon = dev_get_drvdata(dev);
+
+	return syscon->regmap;
+}
+EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_pdevname);
+
 struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np,
 					const char *property)
 {
@@ -101,28 +127,22 @@
 static int syscon_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node;
 	struct syscon *syscon;
-	struct resource res;
-	int ret;
+	struct resource *res;
 
-	if (!np)
-		return -ENOENT;
-
-	syscon = devm_kzalloc(dev, sizeof(struct syscon),
-			    GFP_KERNEL);
+	syscon = devm_kzalloc(dev, sizeof(*syscon), GFP_KERNEL);
 	if (!syscon)
 		return -ENOMEM;
 
-	syscon->base = of_iomap(np, 0);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENOENT;
+
+	syscon->base = devm_ioremap(dev, res->start, resource_size(res));
 	if (!syscon->base)
-		return -EADDRNOTAVAIL;
+		return -ENOMEM;
 
-	ret = of_address_to_resource(np, 0, &res);
-	if (ret)
-		return ret;
-
-	syscon_regmap_config.max_register = res.end - res.start - 3;
+	syscon_regmap_config.max_register = res->end - res->start - 3;
 	syscon->regmap = devm_regmap_init_mmio(dev, syscon->base,
 					&syscon_regmap_config);
 	if (IS_ERR(syscon->regmap)) {
@@ -130,25 +150,17 @@
 		return PTR_ERR(syscon->regmap);
 	}
 
-	syscon->dev = dev;
 	platform_set_drvdata(pdev, syscon);
 
-	dev_info(dev, "syscon regmap start 0x%x end 0x%x registered\n",
-		res.start, res.end);
+	dev_info(dev, "regmap %pR registered\n", res);
 
 	return 0;
 }
 
-static int syscon_remove(struct platform_device *pdev)
-{
-	struct syscon *syscon;
-
-	syscon = platform_get_drvdata(pdev);
-	iounmap(syscon->base);
-	platform_set_drvdata(pdev, NULL);
-
-	return 0;
-}
+static const struct platform_device_id syscon_ids[] = {
+	{ "syscon", },
+	{ }
+};
 
 static struct platform_driver syscon_driver = {
 	.driver = {
@@ -157,7 +169,7 @@
 		.of_match_table = of_syscon_match,
 	},
 	.probe		= syscon_probe,
-	.remove		= syscon_remove,
+	.id_table	= syscon_ids,
 };
 
 static int __init syscon_init(void)

diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index ecc092c..4cb92bb 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c

@@ -350,7 +350,8 @@
 				     | I2C_FUNC_SMBUS_I2C_BLOCK))
 		return -EIO;
 
-	tc3589x = kzalloc(sizeof(struct tc3589x), GFP_KERNEL);
+	tc3589x = devm_kzalloc(&i2c->dev, sizeof(struct tc3589x),
+				GFP_KERNEL);
 	if (!tc3589x)
 		return -ENOMEM;
 
@@ -366,33 +367,27 @@
 
 	ret = tc3589x_chip_init(tc3589x);
 	if (ret)
-		goto out_free;
+		return ret;
 
 	ret = tc3589x_irq_init(tc3589x, np);
 	if (ret)
-		goto out_free;
+		return ret;
 
 	ret = request_threaded_irq(tc3589x->i2c->irq, NULL, tc3589x_irq,
 				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				   "tc3589x", tc3589x);
 	if (ret) {
 		dev_err(tc3589x->dev, "failed to request IRQ: %d\n", ret);
-		goto out_free;
+		return ret;
 	}
 
 	ret = tc3589x_device_init(tc3589x);
 	if (ret) {
 		dev_err(tc3589x->dev, "failed to add child devices\n");
-		goto out_freeirq;
+		return ret;
 	}
 
 	return 0;
-
-out_freeirq:
-	free_irq(tc3589x->i2c->irq, tc3589x);
-out_free:
-	kfree(tc3589x);
-	return ret;
 }
 
 static int tc3589x_remove(struct i2c_client *client)
@@ -401,10 +396,6 @@
 
 	mfd_remove_devices(tc3589x->dev);
 
-	free_irq(tc3589x->i2c->irq, tc3589x);
-
-	kfree(tc3589x);
-
 	return 0;
 }
 

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 98edb5be..fbd6ee6 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c

@@ -56,12 +56,23 @@
 #define TPS65090_INT2_MASK_OVERLOAD_FET6		6
 #define TPS65090_INT2_MASK_OVERLOAD_FET7		7
 
+static struct resource charger_resources[] = {
+	{
+		.start  = TPS65090_IRQ_VAC_STATUS_CHANGE,
+		.end    = TPS65090_IRQ_VAC_STATUS_CHANGE,
+		.flags  = IORESOURCE_IRQ,
+	}
+};
+
 static struct mfd_cell tps65090s[] = {
 	{
 		.name = "tps65090-pmic",
 	},
 	{
 		.name = "tps65090-charger",
+		.num_resources = ARRAY_SIZE(charger_resources),
+		.resources = &charger_resources[0],
+		.of_compatible = "ti,tps65090-charger",
 	},
 };
 

diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c
index 942b666..42bd3ea 100644
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c

@@ -211,12 +211,14 @@
  * @reg_base - Base address of the first channel
  * @Channels - 16 bit bitmap. If the bit is set, channel value is read
  * @buf - The channel values are stored here. if read fails error
+ * @raw - Return raw values without conversion
  * value is stored
  * Returns the number of successfully read channels.
  */
 static int twl4030_madc_read_channels(struct twl4030_madc_data *madc,
 				      u8 reg_base, unsigned
-						long channels, int *buf)
+				      long channels, int *buf,
+				      bool raw)
 {
 	int count = 0, count_req = 0, i;
 	u8 reg;
@@ -230,6 +232,10 @@
 			count_req++;
 			continue;
 		}
+		if (raw) {
+			count++;
+			continue;
+		}
 		switch (i) {
 		case 10:
 			buf[i] = twl4030battery_current(buf[i]);
@@ -371,7 +377,7 @@
 		method = &twl4030_conversion_methods[r->method];
 		/* Read results */
 		len = twl4030_madc_read_channels(madc, method->rbase,
-						 r->channels, r->rbuf);
+						 r->channels, r->rbuf, r->raw);
 		/* Return results to caller */
 		if (r->func_cb != NULL) {
 			r->func_cb(len, r->channels, r->rbuf);
@@ -397,7 +403,7 @@
 		method = &twl4030_conversion_methods[r->method];
 		/* Read results */
 		len = twl4030_madc_read_channels(madc, method->rbase,
-						 r->channels, r->rbuf);
+						 r->channels, r->rbuf, r->raw);
 		/* Return results to caller */
 		if (r->func_cb != NULL) {
 			r->func_cb(len, r->channels, r->rbuf);
@@ -585,7 +591,7 @@
 		goto out;
 	}
 	ret = twl4030_madc_read_channels(twl4030_madc, method->rbase,
-					 req->channels, req->rbuf);
+					 req->channels, req->rbuf, req->raw);
 	twl4030_madc->requests[req->method].active = 0;
 
 out:

diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index f361bf3..492ee2c 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c

@@ -554,7 +554,7 @@
 
 	twl6040->supplies[0].supply = "vio";
 	twl6040->supplies[1].supply = "v2v1";
-	ret = regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES,
+	ret = devm_regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES,
 				 twl6040->supplies);
 	if (ret != 0) {
 		dev_err(&client->dev, "Failed to get supplies: %d\n", ret);
@@ -564,7 +564,7 @@
 	ret = regulator_bulk_enable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
 	if (ret != 0) {
 		dev_err(&client->dev, "Failed to enable supplies: %d\n", ret);
-		goto power_err;
+		goto regulator_get_err;
 	}
 
 	twl6040->dev = &client->dev;
@@ -586,8 +586,8 @@
 		twl6040->audpwron = -EINVAL;
 
 	if (gpio_is_valid(twl6040->audpwron)) {
-		ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW,
-				       "audpwron");
+		ret = devm_gpio_request_one(&client->dev, twl6040->audpwron,
+					GPIOF_OUT_INIT_LOW, "audpwron");
 		if (ret)
 			goto gpio_err;
 	}
@@ -596,14 +596,14 @@
 			IRQF_ONESHOT, 0, &twl6040_irq_chip,
 			&twl6040->irq_data);
 	if (ret < 0)
-		goto irq_init_err;
+		goto gpio_err;
 
 	twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data,
 					       TWL6040_IRQ_READY);
 	twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data,
 					       TWL6040_IRQ_TH);
 
-	ret = request_threaded_irq(twl6040->irq_ready, NULL,
+	ret = devm_request_threaded_irq(twl6040->dev, twl6040->irq_ready, NULL,
 				   twl6040_readyint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_ready", twl6040);
 	if (ret) {
@@ -611,7 +611,7 @@
 		goto readyirq_err;
 	}
 
-	ret = request_threaded_irq(twl6040->irq_th, NULL,
+	ret = devm_request_threaded_irq(twl6040->dev, twl6040->irq_th, NULL,
 				   twl6040_thint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_th", twl6040);
 	if (ret) {
@@ -681,18 +681,13 @@
 	return 0;
 
 mfd_err:
-	free_irq(twl6040->irq_th, twl6040);
+	devm_free_irq(&client->dev, twl6040->irq_th, twl6040);
 thirq_err:
-	free_irq(twl6040->irq_ready, twl6040);
+	devm_free_irq(&client->dev, twl6040->irq_ready, twl6040);
 readyirq_err:
 	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
-irq_init_err:
-	if (gpio_is_valid(twl6040->audpwron))
-		gpio_free(twl6040->audpwron);
 gpio_err:
 	regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
-power_err:
-	regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies);
 regulator_get_err:
 	i2c_set_clientdata(client, NULL);
 err:
@@ -706,18 +701,14 @@
 	if (twl6040->power_count)
 		twl6040_power(twl6040, 0);
 
-	if (gpio_is_valid(twl6040->audpwron))
-		gpio_free(twl6040->audpwron);
-
-	free_irq(twl6040->irq_ready, twl6040);
-	free_irq(twl6040->irq_th, twl6040);
+	devm_free_irq(&client->dev, twl6040->irq_ready, twl6040);
+	devm_free_irq(&client->dev, twl6040->irq_th, twl6040);
 	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 
 	mfd_remove_devices(&client->dev);
 	i2c_set_clientdata(client, NULL);
 
 	regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
-	regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies);
 
 	return 0;
 }

diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index daf6952..e9031fa 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c

@@ -75,6 +75,11 @@
 
 	/* GPIO */
 	ucb_gpio.ac97 = ac97;
+	if (pdata) {
+		ucb_gpio.gpio_setup = pdata->gpio_setup;
+		ucb_gpio.gpio_teardown = pdata->gpio_teardown;
+		ucb_gpio.gpio_offset = pdata->gpio_offset;
+	}
 	ucb->ucb1400_gpio = platform_device_alloc("ucb1400_gpio", -1);
 	if (!ucb->ucb1400_gpio) {
 		err = -ENOMEM;

diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c
index 3c1723aa..84ce6b9 100644
--- a/drivers/mfd/vexpress-config.c
+++ b/drivers/mfd/vexpress-config.c

@@ -184,13 +184,14 @@
 
 	spin_lock_irqsave(&bridge->transactions_lock, flags);
 
-	vexpress_config_dump_trans("Executing", trans);
-
-	if (list_empty(&bridge->transactions))
+	if (list_empty(&bridge->transactions)) {
+		vexpress_config_dump_trans("Executing", trans);
 		status = bridge->info->func_exec(trans->func->func,
 				trans->offset, trans->write, trans->data);
-	else
+	} else {
+		vexpress_config_dump_trans("Queuing", trans);
 		status = VEXPRESS_CONFIG_STATUS_WAIT;
+	}
 
 	switch (status) {
 	case VEXPRESS_CONFIG_STATUS_DONE:
@@ -212,25 +213,31 @@
 {
 	struct vexpress_config_trans *trans;
 	unsigned long flags;
+	const char *message = "Completed";
 
 	spin_lock_irqsave(&bridge->transactions_lock, flags);
 
 	trans = list_first_entry(&bridge->transactions,
 			struct vexpress_config_trans, list);
-	vexpress_config_dump_trans("Completed", trans);
-
 	trans->status = status;
-	list_del(&trans->list);
 
-	if (!list_empty(&bridge->transactions)) {
-		vexpress_config_dump_trans("Pending", trans);
+	do {
+		vexpress_config_dump_trans(message, trans);
+		list_del(&trans->list);
+		complete(&trans->completion);
 
-		bridge->info->func_exec(trans->func->func, trans->offset,
-				trans->write, trans->data);
-	}
+		if (list_empty(&bridge->transactions))
+			break;
+
+		trans = list_first_entry(&bridge->transactions,
+				struct vexpress_config_trans, list);
+		vexpress_config_dump_trans("Executing pending", trans);
+		trans->status = bridge->info->func_exec(trans->func->func,
+				trans->offset, trans->write, trans->data);
+		message = "Finished pending";
+	} while (trans->status == VEXPRESS_CONFIG_STATUS_DONE);
+
 	spin_unlock_irqrestore(&bridge->transactions_lock, flags);
-
-	complete(&trans->completion);
 }
 EXPORT_SYMBOL(vexpress_config_complete);
 

diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index bf75e96..96a020b 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c

@@ -490,12 +490,12 @@
 		return err;
 	}
 
+	vexpress_sysreg_dev = &pdev->dev;
+
 	platform_device_register_data(vexpress_sysreg_dev, "leds-gpio",
 			PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata,
 			sizeof(vexpress_sysreg_leds_pdata));
 
-	vexpress_sysreg_dev = &pdev->dev;
-
 	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
 
 	return 0;

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index f70c495..155c4a1 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c

@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/device.h>
 #include <linux/module.h>
 
 #include <linux/mfd/arizona/core.h>
@@ -57,31 +58,54 @@
 };
 
 static const struct reg_default wm5102_revb_patch[] = {
+	{ 0x19, 0x0001 },
 	{ 0x80, 0x0003 },
 	{ 0x081, 0xE022 },
-	{ 0x410, 0x4080 },
-	{ 0x418, 0x4080 },
-	{ 0x420, 0x4080 },
-	{ 0x428, 0xC000 },
+	{ 0x410, 0x6080 },
+	{ 0x418, 0xa080 },
+	{ 0x420, 0xa080 },
+	{ 0x428, 0xe000 },
+	{ 0x443, 0xDC1A },
 	{ 0x4B0, 0x0066 },
 	{ 0x458, 0x000b },
 	{ 0x212, 0x0000 },
+	{ 0x171, 0x0000 },
+	{ 0x35E, 0x000C },
+	{ 0x2D4, 0x0000 },
 	{ 0x80, 0x0000 },
 };
 
 /* We use a function so we can use ARRAY_SIZE() */
 int wm5102_patch(struct arizona *arizona)
 {
+	const struct reg_default *wm5102_patch;
+	int ret = 0;
+	int i, patch_size;
+
 	switch (arizona->rev) {
 	case 0:
-		return regmap_register_patch(arizona->regmap,
-					     wm5102_reva_patch,
-					     ARRAY_SIZE(wm5102_reva_patch));
+		wm5102_patch = wm5102_reva_patch;
+		patch_size = ARRAY_SIZE(wm5102_reva_patch);
 	default:
-		return regmap_register_patch(arizona->regmap,
-					     wm5102_revb_patch,
-					     ARRAY_SIZE(wm5102_revb_patch));
+		wm5102_patch = wm5102_revb_patch;
+		patch_size = ARRAY_SIZE(wm5102_revb_patch);
 	}
+
+	regcache_cache_bypass(arizona->regmap, true);
+
+	for (i = 0; i < patch_size; i++) {
+		ret = regmap_write(arizona->regmap, wm5102_patch[i].reg,
+				   wm5102_patch[i].def);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to write %x = %x: %d\n",
+				wm5102_patch[i].reg, wm5102_patch[i].def, ret);
+			goto out;
+		}
+	}
+
+out:
+	regcache_cache_bypass(arizona->regmap, false);
+	return ret;
 }
 
 static const struct regmap_irq wm5102_aod_irqs[ARIZONA_NUM_IRQ] = {
@@ -282,7 +306,7 @@
 	{ 0x00000155, 0x0000 },   /* R341   - Rate Estimator 4 */ 
 	{ 0x00000156, 0x0000 },   /* R342   - Rate Estimator 5 */ 
 	{ 0x00000161, 0x0000 },   /* R353   - Dynamic Frequency Scaling 1 */ 
-	{ 0x00000171, 0x0002 },   /* R369   - FLL1 Control 1 */
+	{ 0x00000171, 0x0000 },   /* R369   - FLL1 Control 1 */
 	{ 0x00000172, 0x0008 },   /* R370   - FLL1 Control 2 */ 
 	{ 0x00000173, 0x0018 },   /* R371   - FLL1 Control 3 */ 
 	{ 0x00000174, 0x007D },   /* R372   - FLL1 Control 4 */ 
@@ -366,7 +390,7 @@
 	{ 0x00000400, 0x0000 },   /* R1024  - Output Enables 1 */ 
 	{ 0x00000408, 0x0000 },   /* R1032  - Output Rate 1 */ 
 	{ 0x00000409, 0x0022 },   /* R1033  - Output Volume Ramp */ 
-	{ 0x00000410, 0x4080 },   /* R1040  - Output Path Config 1L */
+	{ 0x00000410, 0x6080 },   /* R1040  - Output Path Config 1L */
 	{ 0x00000411, 0x0180 },   /* R1041  - DAC Digital Volume 1L */ 
 	{ 0x00000412, 0x0081 },   /* R1042  - DAC Volume Limit 1L */
 	{ 0x00000413, 0x0001 },   /* R1043  - Noise Gate Select 1L */ 
@@ -374,7 +398,7 @@
 	{ 0x00000415, 0x0180 },   /* R1045  - DAC Digital Volume 1R */ 
 	{ 0x00000416, 0x0081 },   /* R1046  - DAC Volume Limit 1R */
 	{ 0x00000417, 0x0002 },   /* R1047  - Noise Gate Select 1R */ 
-	{ 0x00000418, 0x4080 },   /* R1048  - Output Path Config 2L */
+	{ 0x00000418, 0xA080 },   /* R1048  - Output Path Config 2L */
 	{ 0x00000419, 0x0180 },   /* R1049  - DAC Digital Volume 2L */ 
 	{ 0x0000041A, 0x0081 },   /* R1050  - DAC Volume Limit 2L */
 	{ 0x0000041B, 0x0004 },   /* R1051  - Noise Gate Select 2L */ 
@@ -382,11 +406,11 @@
 	{ 0x0000041D, 0x0180 },   /* R1053  - DAC Digital Volume 2R */ 
 	{ 0x0000041E, 0x0081 },   /* R1054  - DAC Volume Limit 2R */
 	{ 0x0000041F, 0x0008 },   /* R1055  - Noise Gate Select 2R */ 
-	{ 0x00000420, 0x4080 },   /* R1056  - Output Path Config 3L */
+	{ 0x00000420, 0xA080 },   /* R1056  - Output Path Config 3L */
 	{ 0x00000421, 0x0180 },   /* R1057  - DAC Digital Volume 3L */ 
 	{ 0x00000422, 0x0081 },   /* R1058  - DAC Volume Limit 3L */
 	{ 0x00000423, 0x0010 },   /* R1059  - Noise Gate Select 3L */ 
-	{ 0x00000428, 0xC000 },   /* R1064  - Output Path Config 4L */
+	{ 0x00000428, 0xE000 },   /* R1064  - Output Path Config 4L */
 	{ 0x00000429, 0x0180 },   /* R1065  - DAC Digital Volume 4L */ 
 	{ 0x0000042A, 0x0081 },   /* R1066  - Out Volume 4L */
 	{ 0x0000042B, 0x0040 },   /* R1067  - Noise Gate Select 4L */ 
@@ -401,7 +425,7 @@
 	{ 0x00000436, 0x0081 },   /* R1078  - DAC Volume Limit 5R */
 	{ 0x00000437, 0x0200 },   /* R1079  - Noise Gate Select 5R */
 	{ 0x00000450, 0x0000 },   /* R1104  - DAC AEC Control 1 */ 
-	{ 0x00000458, 0x0001 },   /* R1112  - Noise Gate Control */ 
+	{ 0x00000458, 0x000B },   /* R1112  - Noise Gate Control */
 	{ 0x00000490, 0x0069 },   /* R1168  - PDM SPK1 CTRL 1 */ 
 	{ 0x00000491, 0x0000 },   /* R1169  - PDM SPK1 CTRL 2 */ 
 	{ 0x00000500, 0x000C },   /* R1280  - AIF1 BCLK Ctrl */ 

diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c
index 4e70e15..e7ed14f66 100644
--- a/drivers/mfd/wm831x-spi.c
+++ b/drivers/mfd/wm831x-spi.c

@@ -37,7 +37,7 @@
 	spi->bits_per_word = 16;
 	spi->mode = SPI_MODE_0;
 
-	dev_set_drvdata(&spi->dev, wm831x);
+	spi_set_drvdata(spi, wm831x);
 	wm831x->dev = &spi->dev;
 
 	wm831x->regmap = devm_regmap_init_spi(spi, &wm831x_regmap_config);
@@ -53,7 +53,7 @@
 
 static int wm831x_spi_remove(struct spi_device *spi)
 {
-	struct wm831x *wm831x = dev_get_drvdata(&spi->dev);
+	struct wm831x *wm831x = spi_get_drvdata(spi);
 
 	wm831x_device_exit(wm831x);
 
@@ -69,7 +69,7 @@
 
 static void wm831x_spi_shutdown(struct spi_device *spi)
 {
-	struct wm831x *wm831x = dev_get_drvdata(&spi->dev);
+	struct wm831x *wm831x = spi_get_drvdata(spi);
 
 	wm831x_device_shutdown(wm831x);
 }

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 803e93f..00e4fe2 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c

@@ -19,6 +19,9 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/mfd/core.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
@@ -191,7 +194,7 @@
 	"SPKVDD2",
 };
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_RUNTIME
 static int wm8994_suspend(struct device *dev)
 {
 	struct wm8994 *wm8994 = dev_get_drvdata(dev);
@@ -396,6 +399,60 @@
 	{ 0x102, 0x0 },
 };
 
+#ifdef CONFIG_OF
+static int wm8994_set_pdata_from_of(struct wm8994 *wm8994)
+{
+	struct device_node *np = wm8994->dev->of_node;
+	struct wm8994_pdata *pdata = &wm8994->pdata;
+	int i;
+
+	if (!np)
+		return 0;
+
+	if (of_property_read_u32_array(np, "wlf,gpio-cfg", pdata->gpio_defaults,
+				       ARRAY_SIZE(pdata->gpio_defaults)) >= 0) {
+		for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) {
+			if (wm8994->pdata.gpio_defaults[i] == 0)
+				pdata->gpio_defaults[i]
+					= WM8994_CONFIGURE_GPIO;
+		}
+	}
+
+	of_property_read_u32_array(np, "wlf,micbias-cfg", pdata->micbias,
+				   ARRAY_SIZE(pdata->micbias));
+
+	pdata->lineout1_diff = true;
+	pdata->lineout2_diff = true;
+	if (of_find_property(np, "wlf,lineout1-se", NULL))
+		pdata->lineout1_diff = false;
+	if (of_find_property(np, "wlf,lineout2-se", NULL))
+		pdata->lineout2_diff = false;
+
+	if (of_find_property(np, "wlf,lineout1-feedback", NULL))
+		pdata->lineout1fb = true;
+	if (of_find_property(np, "wlf,lineout2-feedback", NULL))
+		pdata->lineout2fb = true;
+
+	if (of_find_property(np, "wlf,ldoena-always-driven", NULL))
+		pdata->lineout2fb = true;
+
+	pdata->ldo[0].enable = of_get_named_gpio(np, "wlf,ldo1ena", 0);
+	if (pdata->ldo[0].enable < 0)
+		pdata->ldo[0].enable = 0;
+
+	pdata->ldo[1].enable = of_get_named_gpio(np, "wlf,ldo2ena", 0);
+	if (pdata->ldo[1].enable < 0)
+		pdata->ldo[1].enable = 0;
+
+	return 0;
+}
+#else
+static int wm8994_set_pdata_from_of(struct wm8994 *wm8994)
+{
+	return 0;
+}
+#endif
+
 /*
  * Instantiate the generic non-control parts of the device.
  */
@@ -405,7 +462,7 @@
 	struct regmap_config *regmap_config;
 	const struct reg_default *regmap_patch = NULL;
 	const char *devname;
-	int ret, i, patch_regs;
+	int ret, i, patch_regs = 0;
 	int pulls = 0;
 
 	if (dev_get_platdata(wm8994->dev)) {
@@ -414,6 +471,10 @@
 	}
 	pdata = &wm8994->pdata;
 
+	ret = wm8994_set_pdata_from_of(wm8994);
+	if (ret != 0)
+		return ret;
+
 	dev_set_drvdata(wm8994->dev, wm8994);
 
 	/* Add the on-chip regulators first for bootstrapping */
@@ -673,9 +734,9 @@
 }
 
 static const struct of_device_id wm8994_of_match[] = {
-	{ .compatible = "wlf,wm1811", },
-	{ .compatible = "wlf,wm8994", },
-	{ .compatible = "wlf,wm8958", },
+	{ .compatible = "wlf,wm1811", .data = (void *)WM1811 },
+	{ .compatible = "wlf,wm8994", .data = (void *)WM8994 },
+	{ .compatible = "wlf,wm8958", .data = (void *)WM8958 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, wm8994_of_match);
@@ -683,6 +744,7 @@
 static int wm8994_i2c_probe(struct i2c_client *i2c,
 				      const struct i2c_device_id *id)
 {
+	const struct of_device_id *of_id;
 	struct wm8994 *wm8994;
 	int ret;
 
@@ -693,7 +755,14 @@
 	i2c_set_clientdata(i2c, wm8994);
 	wm8994->dev = &i2c->dev;
 	wm8994->irq = i2c->irq;
-	wm8994->type = id->driver_data;
+
+	if (i2c->dev.of_node) {
+		of_id = of_match_device(wm8994_of_match, &i2c->dev);
+		if (of_id)
+			wm8994->type = (int)of_id->data;
+	} else {
+		wm8994->type = id->driver_data;
+	}
 
 	wm8994->regmap = devm_regmap_init_i2c(i2c, &wm8994_base_regmap_config);
 	if (IS_ERR(wm8994->regmap)) {
@@ -724,15 +793,16 @@
 };
 MODULE_DEVICE_TABLE(i2c, wm8994_i2c_id);
 
-static UNIVERSAL_DEV_PM_OPS(wm8994_pm_ops, wm8994_suspend, wm8994_resume,
-			    NULL);
+static const struct dev_pm_ops wm8994_pm_ops = {
+	SET_RUNTIME_PM_OPS(wm8994_suspend, wm8994_resume, NULL)
+};
 
 static struct i2c_driver wm8994_i2c_driver = {
 	.driver = {
 		.name = "wm8994",
 		.owner = THIS_MODULE,
 		.pm = &wm8994_pm_ops,
-		.of_match_table = wm8994_of_match,
+		.of_match_table = of_match_ptr(wm8994_of_match),
 	},
 	.probe = wm8994_i2c_probe,
 	.remove = wm8994_i2c_remove,

diff --git a/drivers/power/rx51_battery.c b/drivers/power/rx51_battery.c
index 1a1dcb8..cbde1d6 100644
--- a/drivers/power/rx51_battery.c
+++ b/drivers/power/rx51_battery.c

@@ -42,6 +42,7 @@
 	req.method = TWL4030_MADC_SW1;
 	req.func_cb = NULL;
 	req.type = TWL4030_MADC_WAIT;
+	req.raw = true;
 
 	if (twl4030_madc_conversion(&req) <= 0)
 		return -ENODATA;

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 0e0bfa0..115b644 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig

@@ -147,8 +147,7 @@
 
 config  PWM_TIECAP
 	tristate "ECAP PWM support"
-	depends on SOC_AM33XX
-	select PWM_TIPWMSS
+	depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX
 	help
 	  PWM driver support for the ECAP APWM controller found on AM33XX
 	  TI SOC
@@ -158,8 +157,7 @@
 
 config  PWM_TIEHRPWM
 	tristate "EHRPWM PWM support"
-	depends on SOC_AM33XX
-	select PWM_TIPWMSS
+	depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX
 	help
 	  PWM driver support for the EHRPWM controller found on AM33XX
 	  TI SOC
@@ -169,7 +167,7 @@
 
 config  PWM_TIPWMSS
 	bool
-	depends on SOC_AM33XX && (PWM_TIEHRPWM || PWM_TIECAP)
+	default y if SOC_AM33XX && (PWM_TIECAP || PWM_TIEHRPWM)
 	help
 	  PWM Subsystem driver support for AM33xx SOC.
 

diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c
index 4248d041..1d07a6f 100644
--- a/drivers/pwm/pwm-ab8500.c
+++ b/drivers/pwm/pwm-ab8500.c

@@ -66,7 +66,7 @@
 				AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
 				1 << (chip->base - 1), ENABLE_PWM);
 	if (ret < 0)
-		dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
+		dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
 							pwm->label, ret);
 	return ret;
 }
@@ -88,6 +88,7 @@
 	.config = ab8500_pwm_config,
 	.enable = ab8500_pwm_enable,
 	.disable = ab8500_pwm_disable,
+	.owner = THIS_MODULE,
 };
 
 static int ab8500_pwm_probe(struct platform_device *pdev)
@@ -99,7 +100,7 @@
 	 * Nothing to be done in probe, this is required to get the
 	 * device which is required for ab8500 read and write
 	 */
-	ab8500 = kzalloc(sizeof(*ab8500), GFP_KERNEL);
+	ab8500 = devm_kzalloc(&pdev->dev, sizeof(*ab8500), GFP_KERNEL);
 	if (ab8500 == NULL) {
 		dev_err(&pdev->dev, "failed to allocate memory\n");
 		return -ENOMEM;
@@ -111,10 +112,8 @@
 	ab8500->chip.npwm = 1;
 
 	err = pwmchip_add(&ab8500->chip);
-	if (err < 0) {
-		kfree(ab8500);
+	if (err < 0)
 		return err;
-	}
 
 	dev_dbg(&pdev->dev, "pwm probe successful\n");
 	platform_set_drvdata(pdev, ab8500);
@@ -132,7 +131,6 @@
 		return err;
 
 	dev_dbg(&pdev->dev, "pwm driver removed\n");
-	kfree(ab8500);
 
 	return 0;
 }

diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index 16cb530..0a7b658 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c

@@ -358,6 +358,7 @@
 	.set_polarity = atmel_tcb_pwm_set_polarity,
 	.enable = atmel_tcb_pwm_enable,
 	.disable = atmel_tcb_pwm_disable,
+	.owner = THIS_MODULE,
 };
 
 static int atmel_tcb_pwm_probe(struct platform_device *pdev)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 3f5677b..ec28798 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c

@@ -43,7 +43,6 @@
 	struct clk	*clk_per;
 	struct clk	*clk_ipg;
 
-	int		enabled;
 	void __iomem	*mmio_base;
 
 	struct pwm_chip	chip;
@@ -135,7 +134,7 @@
 		MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
 		MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH;
 
-	if (imx->enabled)
+	if (test_bit(PWMF_ENABLED, &pwm->flags))
 		cr |= MX3_PWMCR_EN;
 
 	writel(cr, imx->mmio_base + MX3_PWMCR);
@@ -186,8 +185,6 @@
 
 	imx->set_enable(chip, true);
 
-	imx->enabled = 1;
-
 	return 0;
 }
 
@@ -198,7 +195,6 @@
 	imx->set_enable(chip, false);
 
 	clk_disable_unprepare(imx->clk_per);
-	imx->enabled = 0;
 }
 
 static struct pwm_ops imx_pwm_ops = {

diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
index b3f0d0d..8272883 100644
--- a/drivers/pwm/pwm-lpc32xx.c
+++ b/drivers/pwm/pwm-lpc32xx.c

@@ -37,6 +37,7 @@
 	struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
 	unsigned long long c;
 	int period_cycles, duty_cycles;
+	u32 val;
 
 	c = clk_get_rate(lpc32xx->clk) / 256;
 	c = c * period_ns;
@@ -68,8 +69,10 @@
 		c = 255;
 	duty_cycles = 256 - c;
 
-	writel(PWM_ENABLE | PWM_RELOADV(period_cycles) | PWM_DUTY(duty_cycles),
-		lpc32xx->base + (pwm->hwpwm << 2));
+	val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+	val &= ~0xFFFF;
+	val |= PWM_RELOADV(period_cycles) | PWM_DUTY(duty_cycles);
+	writel(val, lpc32xx->base + (pwm->hwpwm << 2));
 
 	return 0;
 }
@@ -77,15 +80,29 @@
 static int lpc32xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
+	u32 val;
+	int ret;
 
-	return clk_enable(lpc32xx->clk);
+	ret = clk_enable(lpc32xx->clk);
+	if (ret)
+		return ret;
+
+	val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+	val |= PWM_ENABLE;
+	writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+
+	return 0;
 }
 
 static void lpc32xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
+	u32 val;
 
-	writel(0, lpc32xx->base + (pwm->hwpwm << 2));
+	val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+	val &= ~PWM_ENABLE;
+	writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+
 	clk_disable(lpc32xx->clk);
 }
 
@@ -145,7 +162,7 @@
 	return pwmchip_remove(&lpc32xx->chip);
 }
 
-static struct of_device_id lpc32xx_pwm_dt_ids[] = {
+static const struct of_device_id lpc32xx_pwm_dt_ids[] = {
 	{ .compatible = "nxp,lpc3220-pwm", },
 	{ /* sentinel */ }
 };

diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c
index a53d309..3febddd 100644
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c

@@ -38,7 +38,6 @@
 
 struct mxs_pwm_chip {
 	struct pwm_chip chip;
-	struct device *dev;
 	struct clk *clk;
 	void __iomem *base;
 };
@@ -166,7 +165,6 @@
 		return ret;
 	}
 
-	mxs->dev = &pdev->dev;
 	platform_set_drvdata(pdev, mxs);
 
 	stmp_reset_block(mxs->base);
@@ -181,7 +179,7 @@
 	return pwmchip_remove(&mxs->chip);
 }
 
-static struct of_device_id mxs_pwm_dt_ids[] = {
+static const struct of_device_id mxs_pwm_dt_ids[] = {
 	{ .compatible = "fsl,imx23-pwm", },
 	{ /* sentinel */ }
 };

diff --git a/drivers/pwm/pwm-puv3.c b/drivers/pwm/pwm-puv3.c
index db964e6..d1eb499 100644
--- a/drivers/pwm/pwm-puv3.c
+++ b/drivers/pwm/pwm-puv3.c

@@ -27,7 +27,6 @@
 	struct pwm_chip chip;
 	void __iomem *base;
 	struct clk *clk;
-	bool enabled;
 };
 
 static inline struct puv3_pwm_chip *to_puv3(struct pwm_chip *chip)

diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index 20370e6..dee6ab55 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c

@@ -23,14 +23,13 @@
 #include <asm/div64.h>
 
 #define HAS_SECONDARY_PWM	0x10
-#define PWM_ID_BASE(d)		((d) & 0xf)
 
 static const struct platform_device_id pwm_id_table[] = {
 	/*   PWM    has_secondary_pwm? */
 	{ "pxa25x-pwm", 0 },
-	{ "pxa27x-pwm", 0 | HAS_SECONDARY_PWM },
-	{ "pxa168-pwm", 1 },
-	{ "pxa910-pwm", 1 },
+	{ "pxa27x-pwm", HAS_SECONDARY_PWM },
+	{ "pxa168-pwm", 0 },
+	{ "pxa910-pwm", 0 },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, pwm_id_table);
@@ -48,7 +47,6 @@
 	struct device	*dev;
 
 	struct clk	*clk;
-	int		clk_enabled;
 	void __iomem	*mmio_base;
 };
 
@@ -108,24 +106,15 @@
 static int pxa_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct pxa_pwm_chip *pc = to_pxa_pwm_chip(chip);
-	int rc = 0;
 
-	if (!pc->clk_enabled) {
-		rc = clk_prepare_enable(pc->clk);
-		if (!rc)
-			pc->clk_enabled++;
-	}
-	return rc;
+	return clk_prepare_enable(pc->clk);
 }
 
 static void pxa_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct pxa_pwm_chip *pc = to_pxa_pwm_chip(chip);
 
-	if (pc->clk_enabled) {
-		clk_disable_unprepare(pc->clk);
-		pc->clk_enabled--;
-	}
+	clk_disable_unprepare(pc->clk);
 }
 
 static struct pwm_ops pxa_pwm_ops = {
@@ -152,8 +141,6 @@
 	if (IS_ERR(pwm->clk))
 		return PTR_ERR(pwm->clk);
 
-	pwm->clk_enabled = 0;
-
 	pwm->chip.dev = &pdev->dev;
 	pwm->chip.ops = &pxa_pwm_ops;
 	pwm->chip.base = -1;

diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index 5207e6c..a0ece50 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c

@@ -289,10 +289,10 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int s3c_pwm_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int s3c_pwm_suspend(struct device *dev)
 {
-	struct s3c_chip *s3c = platform_get_drvdata(pdev);
+	struct s3c_chip *s3c = dev_get_drvdata(dev);
 
 	/* No one preserve these values during suspend so reset them
 	 * Otherwise driver leaves PWM unconfigured if same values
@@ -304,9 +304,9 @@
 	return 0;
 }
 
-static int s3c_pwm_resume(struct platform_device *pdev)
+static int s3c_pwm_resume(struct device *dev)
 {
-	struct s3c_chip *s3c = platform_get_drvdata(pdev);
+	struct s3c_chip *s3c = dev_get_drvdata(dev);
 	unsigned long tcon;
 
 	/* Restore invertion */
@@ -316,21 +316,19 @@
 
 	return 0;
 }
-
-#else
-#define s3c_pwm_suspend NULL
-#define s3c_pwm_resume NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(s3c_pwm_pm_ops, s3c_pwm_suspend,
+			s3c_pwm_resume);
+
 static struct platform_driver s3c_pwm_driver = {
 	.driver		= {
 		.name	= "s3c24xx-pwm",
 		.owner	= THIS_MODULE,
+		.pm	= &s3c_pwm_pm_ops,
 	},
 	.probe		= s3c_pwm_probe,
 	.remove		= s3c_pwm_remove,
-	.suspend	= s3c_pwm_suspend,
-	.resume		= s3c_pwm_resume,
 };
 
 static int __init pwm_init(void)

diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
index 69a2d9e..6d99e2c 100644
--- a/drivers/pwm/pwm-spear.c
+++ b/drivers/pwm/pwm-spear.c

@@ -49,13 +49,11 @@
  * @mmio_base: base address of pwm chip
  * @clk: pointer to clk structure of pwm chip
  * @chip: linux pwm chip representation
- * @dev: pointer to device structure of pwm chip
  */
 struct spear_pwm_chip {
 	void __iomem *mmio_base;
 	struct clk *clk;
 	struct pwm_chip chip;
-	struct device *dev;
 };
 
 static inline struct spear_pwm_chip *to_spear_pwm_chip(struct pwm_chip *chip)
@@ -143,7 +141,7 @@
 	u32 val;
 
 	rc = clk_enable(pc->clk);
-	if (!rc)
+	if (rc)
 		return rc;
 
 	val = spear_pwm_readl(pc, pwm->hwpwm, PWMCR);
@@ -200,7 +198,6 @@
 	if (IS_ERR(pc->clk))
 		return PTR_ERR(pc->clk);
 
-	pc->dev = &pdev->dev;
 	platform_set_drvdata(pdev, pc);
 
 	pc->chip.dev = &pdev->dev;
@@ -209,12 +206,12 @@
 	pc->chip.npwm = NUM_PWM;
 
 	ret = clk_prepare(pc->clk);
-	if (!ret)
+	if (ret)
 		return ret;
 
 	if (of_device_is_compatible(np, "st,spear1340-pwm")) {
 		ret = clk_enable(pc->clk);
-		if (!ret) {
+		if (ret) {
 			clk_unprepare(pc->clk);
 			return ret;
 		}
@@ -251,7 +248,7 @@
 	return pwmchip_remove(&pc->chip);
 }
 
-static struct of_device_id spear_pwm_of_match[] = {
+static const struct of_device_id spear_pwm_of_match[] = {
 	{ .compatible = "st,spear320-pwm" },
 	{ .compatible = "st,spear1340-pwm" },
 	{ }

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index af3ab48..3d75f4a 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c

@@ -233,7 +233,7 @@
 	return pwmchip_remove(&pc->chip);
 }
 
-static struct of_device_id tegra_pwm_of_match[] = {
+static const struct of_device_id tegra_pwm_of_match[] = {
 	{ .compatible = "nvidia,tegra20-pwm" },
 	{ .compatible = "nvidia,tegra30-pwm" },
 	{ }

diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
index 22e96e2..0d65fb2 100644
--- a/drivers/pwm/pwm-tiecap.c
+++ b/drivers/pwm/pwm-tiecap.c

@@ -295,7 +295,7 @@
 	return pwmchip_remove(&pc->chip);
 }
 
-void ecap_pwm_save_context(struct ecap_pwm_chip *pc)
+static void ecap_pwm_save_context(struct ecap_pwm_chip *pc)
 {
 	pm_runtime_get_sync(pc->chip.dev);
 	pc->ctx.ecctl2 = readw(pc->mmio_base + ECCTL2);
@@ -304,13 +304,14 @@
 	pm_runtime_put_sync(pc->chip.dev);
 }
 
-void ecap_pwm_restore_context(struct ecap_pwm_chip *pc)
+static void ecap_pwm_restore_context(struct ecap_pwm_chip *pc)
 {
 	writel(pc->ctx.cap3, pc->mmio_base + CAP3);
 	writel(pc->ctx.cap4, pc->mmio_base + CAP4);
 	writew(pc->ctx.ecctl2, pc->mmio_base + ECCTL2);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int ecap_pwm_suspend(struct device *dev)
 {
 	struct ecap_pwm_chip *pc = dev_get_drvdata(dev);
@@ -337,6 +338,7 @@
 	ecap_pwm_restore_context(pc);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(ecap_pwm_pm_ops, ecap_pwm_suspend, ecap_pwm_resume);
 

diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index 8b4c86f..6a21759 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c

@@ -533,7 +533,7 @@
 	return pwmchip_remove(&pc->chip);
 }
 
-void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc)
+static void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc)
 {
 	pm_runtime_get_sync(pc->chip.dev);
 	pc->ctx.tbctl = ehrpwm_read(pc->mmio_base, TBCTL);
@@ -547,7 +547,7 @@
 	pm_runtime_put_sync(pc->chip.dev);
 }
 
-void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc)
+static void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc)
 {
 	ehrpwm_write(pc->mmio_base, TBPRD, pc->ctx.tbprd);
 	ehrpwm_write(pc->mmio_base, CMPA, pc->ctx.cmpa);
@@ -559,6 +559,7 @@
 	ehrpwm_write(pc->mmio_base, TBCTL, pc->ctx.tbctl);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int ehrpwm_pwm_suspend(struct device *dev)
 {
 	struct ehrpwm_pwm_chip *pc = dev_get_drvdata(dev);
@@ -594,6 +595,7 @@
 	ehrpwm_pwm_restore_context(pc);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(ehrpwm_pwm_pm_ops, ehrpwm_pwm_suspend,
 		ehrpwm_pwm_resume);

diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c
index 17cbc59..c9c3d3a 100644
--- a/drivers/pwm/pwm-tipwmss.c
+++ b/drivers/pwm/pwm-tipwmss.c

@@ -101,6 +101,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int pwmss_suspend(struct device *dev)
 {
 	struct pwmss_info *info = dev_get_drvdata(dev);
@@ -118,6 +119,7 @@
 	writew(info->pwmss_clkconfig, info->mmio_base + PWMSS_CLKCONFIG);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(pwmss_pm_ops, pwmss_suspend, pwmss_resume);
 

diff --git a/drivers/pwm/pwm-tipwmss.h b/drivers/pwm/pwm-tipwmss.h
index 11f76a1..10ad804 100644
--- a/drivers/pwm/pwm-tipwmss.h
+++ b/drivers/pwm/pwm-tipwmss.h

@@ -18,7 +18,6 @@
 #ifndef __TIPWMSS_H
 #define __TIPWMSS_H
 
-#ifdef CONFIG_PWM_TIPWMSS
 /* PWM substem clock gating */
 #define PWMSS_ECAPCLK_EN	BIT(0)
 #define PWMSS_ECAPCLK_STOP_REQ	BIT(1)
@@ -28,6 +27,7 @@
 #define PWMSS_ECAPCLK_EN_ACK	BIT(0)
 #define PWMSS_EPWMCLK_EN_ACK	BIT(8)
 
+#ifdef CONFIG_PWM_TIPWMSS
 extern u16 pwmss_submodule_state_change(struct device *dev, int set);
 #else
 static inline u16 pwmss_submodule_state_change(struct device *dev, int set)

diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c
index 83e25d4..29d1bba 100644
--- a/drivers/pwm/pwm-twl-led.c
+++ b/drivers/pwm/pwm-twl-led.c

@@ -271,6 +271,7 @@
 	.enable = twl4030_pwmled_enable,
 	.disable = twl4030_pwmled_disable,
 	.config = twl4030_pwmled_config,
+	.owner = THIS_MODULE,
 };
 
 static const struct pwm_ops twl6030_pwmled_ops = {
@@ -279,6 +280,7 @@
 	.config = twl6030_pwmled_config,
 	.request = twl6030_pwmled_request,
 	.free = twl6030_pwmled_free,
+	.owner = THIS_MODULE,
 };
 
 static int twl_pwmled_probe(struct platform_device *pdev)
@@ -321,7 +323,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct of_device_id twl_pwmled_of_match[] = {
+static const struct of_device_id twl_pwmled_of_match[] = {
 	{ .compatible = "ti,twl4030-pwmled" },
 	{ .compatible = "ti,twl6030-pwmled" },
 	{ },

diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index bf3fda2..eef9105 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c

@@ -248,7 +248,7 @@
 	twl->twl6030_toggle3 = val;
 out:
 	mutex_unlock(&twl->mutex);
-	return 0;
+	return ret;
 }
 
 static void twl6030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
@@ -287,12 +287,14 @@
 	.disable = twl4030_pwm_disable,
 	.request = twl4030_pwm_request,
 	.free = twl4030_pwm_free,
+	.owner = THIS_MODULE,
 };
 
 static const struct pwm_ops twl6030_pwm_ops = {
 	.config = twl_pwm_config,
 	.enable = twl6030_pwm_enable,
 	.disable = twl6030_pwm_disable,
+	.owner = THIS_MODULE,
 };
 
 static int twl_pwm_probe(struct platform_device *pdev)
@@ -333,7 +335,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct of_device_id twl_pwm_of_match[] = {
+static const struct of_device_id twl_pwm_of_match[] = {
 	{ .compatible = "ti,twl4030-pwm" },
 	{ .compatible = "ti,twl6030-pwm" },
 	{ },

diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 178836e..bf07c3a 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c

@@ -345,7 +345,6 @@
 	struct list_head list;
 	u16 rn;
 	int standby;
-	int usecount;
 };
 
 struct assign_storage_sccb {
@@ -463,21 +462,10 @@
 			break;
 		if (start > istart + rzm - 1)
 			continue;
-		if (online) {
-			if (incr->usecount++)
-				continue;
-			/*
-			 * Don't break the loop if one assign fails. Loop may
-			 * be walked again on CANCEL and we can't save
-			 * information if state changed before or not.
-			 * So continue and increase usecount for all increments.
-			 */
+		if (online)
 			rc |= sclp_assign_storage(incr->rn);
-		} else {
-			if (--incr->usecount)
-				continue;
+		else
 			sclp_unassign_storage(incr->rn);
-		}
 	}
 	return rc ? -EIO : 0;
 }
@@ -561,8 +549,6 @@
 	add_memory_merged(0);
 }
 
-#define MEM_SCT_SIZE (1UL << SECTION_SIZE_BITS)
-
 static void __init insert_increment(u16 rn, int standby, int assigned)
 {
 	struct memory_increment *incr, *new_incr;
@@ -574,8 +560,6 @@
 		return;
 	new_incr->rn = rn;
 	new_incr->standby = standby;
-	if (!standby)
-		new_incr->usecount = rzm > MEM_SCT_SIZE ? rzm/MEM_SCT_SIZE : 1;
 	last_rn = 0;
 	prev = &sclp_mem_list;
 	list_for_each_entry(incr, &sclp_mem_list, list) {

diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 2282061..9e5e146 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c

@@ -426,7 +426,7 @@
 			      GFP_KERNEL);
 	if (!chunk_array)
 		return -ENOMEM;
-	detect_memory_layout(chunk_array);
+	detect_memory_layout(chunk_array, 0);
 	buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL);
 	if (!buf) {
 		kfree(chunk_array);
@@ -557,7 +557,7 @@
 /*
  * Initialize dump globals for a given architecture
  */
-static int __init sys_info_init(enum arch_id arch)
+static int __init sys_info_init(enum arch_id arch, unsigned long mem_end)
 {
 	int rc;
 
@@ -579,7 +579,7 @@
 	rc = init_cpu_info(arch);
 	if (rc)
 		return rc;
-	sys_info.mem_size = real_memory_size;
+	sys_info.mem_size = mem_end;
 
 	return 0;
 }
@@ -601,7 +601,7 @@
 	return 0;
 }
 
-static int __init get_mem_size(unsigned long *mem)
+static int __init get_mem_info(unsigned long *mem, unsigned long *end)
 {
 	int i;
 	struct mem_chunk *chunk_array;
@@ -610,33 +610,31 @@
 			      GFP_KERNEL);
 	if (!chunk_array)
 		return -ENOMEM;
-	detect_memory_layout(chunk_array);
+	detect_memory_layout(chunk_array, 0);
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (chunk_array[i].size == 0)
 			break;
 		*mem += chunk_array[i].size;
+		*end = max(*end, chunk_array[i].addr + chunk_array[i].size);
 	}
 	kfree(chunk_array);
 	return 0;
 }
 
-static int __init zcore_header_init(int arch, struct zcore_header *hdr)
+static void __init zcore_header_init(int arch, struct zcore_header *hdr,
+				     unsigned long mem_size)
 {
-	int rc, i;
-	unsigned long memory = 0;
 	u32 prefix;
+	int i;
 
 	if (arch == ARCH_S390X)
 		hdr->arch_id = DUMP_ARCH_S390X;
 	else
 		hdr->arch_id = DUMP_ARCH_S390;
-	rc = get_mem_size(&memory);
-	if (rc)
-		return rc;
-	hdr->mem_size = memory;
-	hdr->rmem_size = memory;
+	hdr->mem_size = mem_size;
+	hdr->rmem_size = mem_size;
 	hdr->mem_end = sys_info.mem_size;
-	hdr->num_pages = memory / PAGE_SIZE;
+	hdr->num_pages = mem_size / PAGE_SIZE;
 	hdr->tod = get_tod_clock();
 	get_cpu_id(&hdr->cpu_id);
 	for (i = 0; zfcpdump_save_areas[i]; i++) {
@@ -647,7 +645,6 @@
 		hdr->lc_vec[hdr->cpu_cnt] = prefix;
 		hdr->cpu_cnt++;
 	}
-	return 0;
 }
 
 /*
@@ -682,9 +679,11 @@
 
 static int __init zcore_init(void)
 {
+	unsigned long mem_size, mem_end;
 	unsigned char arch;
 	int rc;
 
+	mem_size = mem_end = 0;
 	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 		return -ENODATA;
 	if (OLDMEM_BASE)
@@ -727,13 +726,14 @@
 	}
 #endif /* CONFIG_64BIT */
 
-	rc = sys_info_init(arch);
+	rc = get_mem_info(&mem_size, &mem_end);
 	if (rc)
 		goto fail;
 
-	rc = zcore_header_init(arch, &zcore_header);
+	rc = sys_info_init(arch, mem_end);
 	if (rc)
 		goto fail;
+	zcore_header_init(arch, &zcore_header, mem_size);
 
 	rc = zcore_reipl_init();
 	if (rc)

diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index 2d2a966..a9fe3de 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c

@@ -1,7 +1,7 @@
 /*
  *   S/390 common I/O routines -- blacklisting of specific devices
  *
- *    Copyright IBM Corp. 1999, 2002
+ *    Copyright IBM Corp. 1999, 2013
  *    Author(s): Ingo Adlung (adlung@de.ibm.com)
  *		 Cornelia Huck (cornelia.huck@de.ibm.com)
  *		 Arnd Bergmann (arndb@de.ibm.com)
@@ -17,8 +17,9 @@
 #include <linux/ctype.h>
 #include <linux/device.h>
 
-#include <asm/cio.h>
 #include <asm/uaccess.h>
+#include <asm/cio.h>
+#include <asm/ipl.h>
 
 #include "blacklist.h"
 #include "cio.h"
@@ -172,6 +173,29 @@
 			to_cssid = __MAX_CSSID;
 			to_ssid = __MAX_SSID;
 			to = __MAX_SUBCHANNEL;
+		} else if (strcmp(parm, "ipldev") == 0) {
+			if (ipl_info.type == IPL_TYPE_CCW) {
+				from_cssid = 0;
+				from_ssid = ipl_info.data.ccw.dev_id.ssid;
+				from = ipl_info.data.ccw.dev_id.devno;
+			} else if (ipl_info.type == IPL_TYPE_FCP ||
+				   ipl_info.type == IPL_TYPE_FCP_DUMP) {
+				from_cssid = 0;
+				from_ssid = ipl_info.data.fcp.dev_id.ssid;
+				from = ipl_info.data.fcp.dev_id.devno;
+			} else {
+				continue;
+			}
+			to_cssid = from_cssid;
+			to_ssid = from_ssid;
+			to = from;
+		} else if (strcmp(parm, "condev") == 0) {
+			if (console_devno == -1)
+				continue;
+
+			from_cssid = to_cssid = 0;
+			from_ssid = to_ssid = 0;
+			from = to = console_devno;
 		} else {
 			rc = parse_busid(strsep(&parm, "-"), &from_cssid,
 					 &from_ssid, &from, msgtrigger);

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index b8b340a..9de41aa 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c

@@ -954,15 +954,11 @@
 
 void ap_bus_force_rescan(void)
 {
-	/* Delete the AP bus rescan timer. */
-	del_timer(&ap_config_timer);
-
-	/* processing a synchonuous bus rescan */
-	ap_scan_bus(NULL);
-
-	/* Setup the AP bus rescan timer again. */
-	ap_config_timer.expires = jiffies + ap_config_time * HZ;
-	add_timer(&ap_config_timer);
+	/* reconfigure the AP bus rescan timer. */
+	mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
+	/* processing a asynchronous bus rescan */
+	queue_work(ap_work_queue, &ap_config_work);
+	flush_work(&ap_config_work);
 }
 EXPORT_SYMBOL(ap_bus_force_rescan);
 
@@ -1305,8 +1301,9 @@
 	int rc, i;
 
 	ap_query_configuration();
-	if (ap_select_domain() != 0)
+	if (ap_select_domain() != 0) {
 		return;
+	}
 	for (i = 0; i < AP_DEVICES; i++) {
 		qid = AP_MKQID(i, ap_domain_index);
 		dev = bus_find_device(&ap_bus_type, NULL,

diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 6711e65..2ea6165 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c

@@ -443,29 +443,30 @@
 }
 /*
  * Init function for virtio
- * devices are in a single page above top of "normal" mem
+ * devices are in a single page above top of "normal" + standby mem
  */
 static int __init kvm_devices_init(void)
 {
 	int rc;
+	unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax();
 
 	if (!MACHINE_IS_KVM)
 		return -ENODEV;
 
-	if (test_devices_support(real_memory_size) < 0)
+	if (test_devices_support(total_memory_size) < 0)
 		return -ENODEV;
 
-	rc = vmem_add_mapping(real_memory_size, PAGE_SIZE);
+	rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
 	if (rc)
 		return rc;
 
-	kvm_devices = (void *) real_memory_size;
+	kvm_devices = (void *) total_memory_size;
 
 	kvm_root = root_device_register("kvm_s390");
 	if (IS_ERR(kvm_root)) {
 		rc = PTR_ERR(kvm_root);
 		printk(KERN_ERR "Could not register kvm_s390 root device");
-		vmem_remove_mapping(real_memory_size, PAGE_SIZE);
+		vmem_remove_mapping(total_memory_size, PAGE_SIZE);
 		return rc;
 	}
 

diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index fb877b59..779dc51 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c

@@ -31,6 +31,7 @@
 #include <asm/irq.h>
 #include <asm/cio.h>
 #include <asm/ccwdev.h>
+#include <asm/virtio-ccw.h>
 
 /*
  * virtio related functions
@@ -77,12 +78,9 @@
 	void *queue;
 	struct vq_info_block *info_block;
 	struct list_head node;
+	long cookie;
 };
 
-#define KVM_VIRTIO_CCW_RING_ALIGN 4096
-
-#define KVM_S390_VIRTIO_CCW_NOTIFY 3
-
 #define CCW_CMD_SET_VQ 0x13
 #define CCW_CMD_VDEV_RESET 0x33
 #define CCW_CMD_SET_IND 0x43
@@ -135,8 +133,11 @@
 	do {
 		spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags);
 		ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0);
-		if (!ret)
+		if (!ret) {
+			if (!vcdev->curr_io)
+				vcdev->err = 0;
 			vcdev->curr_io |= flag;
+		}
 		spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags);
 		cpu_relax();
 	} while (ret == -EBUSY);
@@ -145,15 +146,18 @@
 }
 
 static inline long do_kvm_notify(struct subchannel_id schid,
-				 unsigned long queue_index)
+				 unsigned long queue_index,
+				 long cookie)
 {
 	register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY;
 	register struct subchannel_id __schid asm("2") = schid;
 	register unsigned long __index asm("3") = queue_index;
 	register long __rc asm("2");
+	register long __cookie asm("4") = cookie;
 
 	asm volatile ("diag 2,4,0x500\n"
-		      : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index)
+		      : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index),
+		      "d"(__cookie)
 		      : "memory", "cc");
 	return __rc;
 }
@@ -166,7 +170,7 @@
 
 	vcdev = to_vc_device(info->vq->vdev);
 	ccw_device_get_schid(vcdev->cdev, &schid);
-	do_kvm_notify(schid, vq->index);
+	info->cookie = do_kvm_notify(schid, vq->index, info->cookie);
 }
 
 static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index fa00304..1fea627 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c

@@ -274,7 +274,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int pwm_backlight_suspend(struct device *dev)
 {
 	struct backlight_device *bl = dev_get_drvdata(dev);
@@ -296,19 +296,16 @@
 	backlight_update_status(bl);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(pwm_backlight_pm_ops, pwm_backlight_suspend,
 			 pwm_backlight_resume);
 
-#endif
-
 static struct platform_driver pwm_backlight_driver = {
 	.driver		= {
 		.name		= "pwm-backlight",
 		.owner		= THIS_MODULE,
-#ifdef CONFIG_PM
 		.pm		= &pwm_backlight_pm_ops,
-#endif
 		.of_match_table	= of_match_ptr(pwm_backlight_of_match),
 	},
 	.probe		= pwm_backlight_probe,

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 01443ce..13ddec9 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c

@@ -61,15 +61,6 @@
 		/* This is an autofs submount, we can't expire it */
 		if (autofs_type_indirect(sbi->type))
 			goto done;
-
-		/*
-		 * Otherwise it's an offset mount and we need to check
-		 * if we can umount its mount, if there is one.
-		 */
-		if (!d_mountpoint(path.dentry)) {
-			status = 0;
-			goto done;
-		}
 	}
 
 	/* Update the expiry counter if fs is busy */

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 9bd1625..085da86 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c

@@ -408,7 +408,7 @@
 	return NULL;
 }
 
-int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
+static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a60ea97..3e68ac1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -236,15 +236,21 @@
 static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
 {
 	struct inode *inode = req->r_inode;
+	struct ceph_osd_data *osd_data;
 	int rc = req->r_result;
 	int bytes = le32_to_cpu(msg->hdr.data_len);
+	int num_pages;
 	int i;
 
 	dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
 
 	/* unlock all pages, zeroing any data we didn't read */
-	for (i = 0; i < req->r_num_pages; i++, bytes -= PAGE_CACHE_SIZE) {
-		struct page *page = req->r_pages[i];
+	osd_data = osd_req_op_extent_osd_data(req, 0);
+	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+	num_pages = calc_pages_for((u64)osd_data->alignment,
+					(u64)osd_data->length);
+	for (i = 0; i < num_pages; i++) {
+		struct page *page = osd_data->pages[i];
 
 		if (bytes < (int)PAGE_CACHE_SIZE) {
 			/* zero (remainder of) page */
@@ -257,8 +263,9 @@
 		SetPageUptodate(page);
 		unlock_page(page);
 		page_cache_release(page);
+		bytes -= PAGE_CACHE_SIZE;
 	}
-	kfree(req->r_pages);
+	kfree(osd_data->pages);
 }
 
 static void ceph_unlock_page_vector(struct page **pages, int num_pages)
@@ -279,6 +286,7 @@
 		&ceph_inode_to_client(inode)->client->osdc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct page *page = list_entry(page_list->prev, struct page, lru);
+	struct ceph_vino vino;
 	struct ceph_osd_request *req;
 	u64 off;
 	u64 len;
@@ -303,18 +311,17 @@
 	len = nr_pages << PAGE_CACHE_SHIFT;
 	dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
 	     off, len);
-
-	req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
-				    off, &len,
-				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
-				    NULL, 0,
+	vino = ceph_vino(inode);
+	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
+				    1, CEPH_OSD_OP_READ,
+				    CEPH_OSD_FLAG_READ, NULL,
 				    ci->i_truncate_seq, ci->i_truncate_size,
-				    NULL, false, 0);
+				    false);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	/* build page vector */
-	nr_pages = len >> PAGE_CACHE_SHIFT;
+	nr_pages = calc_pages_for(0, len);
 	pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
 	ret = -ENOMEM;
 	if (!pages)
@@ -336,11 +343,12 @@
 		}
 		pages[i] = page;
 	}
-	req->r_pages = pages;
-	req->r_num_pages = nr_pages;
+	osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
 	req->r_callback = finish_read;
 	req->r_inode = inode;
 
+	ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
+
 	dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
 	ret = ceph_osdc_start_request(osdc, req, false);
 	if (ret < 0)
@@ -373,7 +381,8 @@
 		max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
 			>> PAGE_SHIFT;
 
-	dout("readpages %p file %p nr_pages %d max %d\n", inode, file, nr_pages,
+	dout("readpages %p file %p nr_pages %d max %d\n", inode,
+		file, nr_pages,
 	     max);
 	while (!list_empty(page_list)) {
 		rc = start_read(inode, page_list, max);
@@ -548,17 +557,23 @@
 {
 	struct inode *inode = req->r_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_data *osd_data;
 	unsigned wrote;
 	struct page *page;
+	int num_pages;
 	int i;
 	struct ceph_snap_context *snapc = req->r_snapc;
 	struct address_space *mapping = inode->i_mapping;
 	int rc = req->r_result;
-	u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
+	u64 bytes = req->r_ops[0].extent.length;
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	long writeback_stat;
 	unsigned issued = ceph_caps_issued(ci);
 
+	osd_data = osd_req_op_extent_osd_data(req, 0);
+	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+	num_pages = calc_pages_for((u64)osd_data->alignment,
+					(u64)osd_data->length);
 	if (rc >= 0) {
 		/*
 		 * Assume we wrote the pages we originally sent.  The
@@ -566,7 +581,7 @@
 		 * raced with a truncation and was adjusted at the osd,
 		 * so don't believe the reply.
 		 */
-		wrote = req->r_num_pages;
+		wrote = num_pages;
 	} else {
 		wrote = 0;
 		mapping_set_error(mapping, rc);
@@ -575,8 +590,8 @@
 	     inode, rc, bytes, wrote);
 
 	/* clean all pages */
-	for (i = 0; i < req->r_num_pages; i++) {
-		page = req->r_pages[i];
+	for (i = 0; i < num_pages; i++) {
+		page = osd_data->pages[i];
 		BUG_ON(!page);
 		WARN_ON(!PageUptodate(page));
 
@@ -605,32 +620,34 @@
 		unlock_page(page);
 	}
 	dout("%p wrote+cleaned %d pages\n", inode, wrote);
-	ceph_put_wrbuffer_cap_refs(ci, req->r_num_pages, snapc);
+	ceph_put_wrbuffer_cap_refs(ci, num_pages, snapc);
 
-	ceph_release_pages(req->r_pages, req->r_num_pages);
-	if (req->r_pages_from_pool)
-		mempool_free(req->r_pages,
+	ceph_release_pages(osd_data->pages, num_pages);
+	if (osd_data->pages_from_pool)
+		mempool_free(osd_data->pages,
 			     ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
 	else
-		kfree(req->r_pages);
+		kfree(osd_data->pages);
 	ceph_osdc_put_request(req);
 }
 
-/*
- * allocate a page vec, either directly, or if necessary, via a the
- * mempool.  we avoid the mempool if we can because req->r_num_pages
- * may be less than the maximum write size.
- */
-static void alloc_page_vec(struct ceph_fs_client *fsc,
-			   struct ceph_osd_request *req)
+static struct ceph_osd_request *
+ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len,
+				struct ceph_snap_context *snapc, int num_ops)
 {
-	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
-			       GFP_NOFS);
-	if (!req->r_pages) {
-		req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
-		req->r_pages_from_pool = 1;
-		WARN_ON(!req->r_pages);
-	}
+	struct ceph_fs_client *fsc;
+	struct ceph_inode_info *ci;
+	struct ceph_vino vino;
+
+	fsc = ceph_inode_to_client(inode);
+	ci = ceph_inode(inode);
+	vino = ceph_vino(inode);
+	/* BUG_ON(vino.snap != CEPH_NOSNAP); */
+
+	return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+			vino, offset, len, num_ops, CEPH_OSD_OP_WRITE,
+			CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK,
+			snapc, ci->i_truncate_seq, ci->i_truncate_size, true);
 }
 
 /*
@@ -653,7 +670,7 @@
 	unsigned wsize = 1 << inode->i_blkbits;
 	struct ceph_osd_request *req = NULL;
 	int do_sync;
-	u64 snap_size = 0;
+	u64 snap_size;
 
 	/*
 	 * Include a 'sync' in the OSD request if this is a data
@@ -699,6 +716,7 @@
 retry:
 	/* find oldest snap context with dirty data */
 	ceph_put_snap_context(snapc);
+	snap_size = 0;
 	snapc = get_oldest_context(inode, &snap_size);
 	if (!snapc) {
 		/* hmm, why does writepages get called when there
@@ -706,6 +724,8 @@
 		dout(" no snap context with dirty data?\n");
 		goto out;
 	}
+	if (snap_size == 0)
+		snap_size = i_size_read(inode);
 	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
 	     snapc, snapc->seq, snapc->num_snaps);
 	if (last_snapc && snapc != last_snapc) {
@@ -718,10 +738,14 @@
 	last_snapc = snapc;
 
 	while (!done && index <= end) {
+		int num_ops = do_sync ? 2 : 1;
+		struct ceph_vino vino;
 		unsigned i;
 		int first;
 		pgoff_t next;
 		int pvec_pages, locked_pages;
+		struct page **pages = NULL;
+		mempool_t *pool = NULL;	/* Becomes non-null if mempool used */
 		struct page *page;
 		int want;
 		u64 offset, len;
@@ -773,11 +797,8 @@
 				dout("waiting on writeback %p\n", page);
 				wait_on_page_writeback(page);
 			}
-			if ((snap_size && page_offset(page) > snap_size) ||
-			    (!snap_size &&
-			     page_offset(page) > i_size_read(inode))) {
-				dout("%p page eof %llu\n", page, snap_size ?
-				     snap_size : i_size_read(inode));
+			if (page_offset(page) >= snap_size) {
+				dout("%p page eof %llu\n", page, snap_size);
 				done = 1;
 				unlock_page(page);
 				break;
@@ -805,22 +826,23 @@
 				break;
 			}
 
-			/* ok */
+			/*
+			 * We have something to write.  If this is
+			 * the first locked page this time through,
+			 * allocate an osd request and a page array
+			 * that it will use.
+			 */
 			if (locked_pages == 0) {
+				size_t size;
+
+				BUG_ON(pages);
+
 				/* prepare async write request */
-				offset = (u64) page_offset(page);
+				offset = (u64)page_offset(page);
 				len = wsize;
-				req = ceph_osdc_new_request(&fsc->client->osdc,
-					    &ci->i_layout,
-					    ceph_vino(inode),
-					    offset, &len,
-					    CEPH_OSD_OP_WRITE,
-					    CEPH_OSD_FLAG_WRITE |
-						    CEPH_OSD_FLAG_ONDISK,
-					    snapc, do_sync,
-					    ci->i_truncate_seq,
-					    ci->i_truncate_size,
-					    &inode->i_mtime, true, 0);
+				req = ceph_writepages_osd_request(inode,
+							offset, &len, snapc,
+							num_ops);
 
 				if (IS_ERR(req)) {
 					rc = PTR_ERR(req);
@@ -828,11 +850,17 @@
 					break;
 				}
 
-				max_pages = req->r_num_pages;
-
-				alloc_page_vec(fsc, req);
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
+
+				max_pages = calc_pages_for(0, (u64)len);
+				size = max_pages * sizeof (*pages);
+				pages = kmalloc(size, GFP_NOFS);
+				if (!pages) {
+					pool = fsc->wb_pagevec_pool;
+					pages = mempool_alloc(pool, GFP_NOFS);
+					BUG_ON(!pages);
+				}
 			}
 
 			/* note position of first page in pvec */
@@ -850,7 +878,7 @@
 			}
 
 			set_page_writeback(page);
-			req->r_pages[locked_pages] = page;
+			pages[locked_pages] = page;
 			locked_pages++;
 			next = page->index + 1;
 		}
@@ -879,18 +907,27 @@
 			pvec.nr -= i-first;
 		}
 
-		/* submit the write */
-		offset = req->r_pages[0]->index << PAGE_CACHE_SHIFT;
-		len = min((snap_size ? snap_size : i_size_read(inode)) - offset,
+		/* Format the osd request message and submit the write */
+
+		offset = page_offset(pages[0]);
+		len = min(snap_size - offset,
 			  (u64)locked_pages << PAGE_CACHE_SHIFT);
 		dout("writepages got %d pages at %llu~%llu\n",
 		     locked_pages, offset, len);
 
-		/* revise final length, page count */
-		req->r_num_pages = locked_pages;
-		req->r_request_ops[0].extent.length = cpu_to_le64(len);
-		req->r_request_ops[0].payload_len = cpu_to_le32(len);
-		req->r_request->hdr.data_len = cpu_to_le32(len);
+		osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+							!!pool, false);
+
+		pages = NULL;	/* request message now owns the pages array */
+		pool = NULL;
+
+		/* Update the write op length in case we changed it */
+
+		osd_req_op_extent_update(req, 0, len);
+
+		vino = ceph_vino(inode);
+		ceph_osdc_build_request(req, offset, snapc, vino.snap,
+					&inode->i_mtime);
 
 		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
 		BUG_ON(rc);
@@ -1067,51 +1104,23 @@
 			    struct page **pagep, void **fsdata)
 {
 	struct inode *inode = file_inode(file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_file_info *fi = file->private_data;
 	struct page *page;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	int r, want, got = 0;
-
-	if (fi->fmode & CEPH_FILE_MODE_LAZY)
-		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
-	else
-		want = CEPH_CAP_FILE_BUFFER;
-
-	dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
-	     inode, ceph_vinop(inode), pos, len, inode->i_size);
-	r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len);
-	if (r < 0)
-		return r;
-	dout("write_begin %p %llx.%llx %llu~%u  got cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
-	if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) {
-		ceph_put_cap_refs(ci, got);
-		return -EAGAIN;
-	}
+	int r;
 
 	do {
 		/* get a page */
 		page = grab_cache_page_write_begin(mapping, index, 0);
-		if (!page) {
-			r = -ENOMEM;
-			break;
-		}
+		if (!page)
+			return -ENOMEM;
+		*pagep = page;
 
 		dout("write_begin file %p inode %p page %p %d~%d\n", file,
 		     inode, page, (int)pos, (int)len);
 
 		r = ceph_update_writeable_page(file, pos, len, page);
-		if (r)
-			page_cache_release(page);
 	} while (r == -EAGAIN);
 
-	if (r) {
-		ceph_put_cap_refs(ci, got);
-	} else {
-		*pagep = page;
-		*(int *)fsdata = got;
-	}
 	return r;
 }
 
@@ -1125,12 +1134,10 @@
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file_inode(file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	int check_cap = 0;
-	int got = (unsigned long)fsdata;
 
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
 	     inode, page, (int)pos, (int)copied, (int)len);
@@ -1153,19 +1160,6 @@
 	up_read(&mdsc->snap_rwsem);
 	page_cache_release(page);
 
-	if (copied > 0) {
-		int dirty;
-		spin_lock(&ci->i_ceph_lock);
-		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-		spin_unlock(&ci->i_ceph_lock);
-		if (dirty)
-			__mark_inode_dirty(inode, dirty);
-	}
-
-	dout("write_end %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
-	ceph_put_cap_refs(ci, got);
-
 	if (check_cap)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
 

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 78e2f57..da0f9b8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -490,15 +490,17 @@
 		ci->i_rdcache_gen++;
 
 	/*
-	 * if we are newly issued FILE_SHARED, clear D_COMPLETE; we
+	 * if we are newly issued FILE_SHARED, mark dir not complete; we
 	 * don't know what happened to this directory while we didn't
 	 * have the cap.
 	 */
 	if ((issued & CEPH_CAP_FILE_SHARED) &&
 	    (had & CEPH_CAP_FILE_SHARED) == 0) {
 		ci->i_shared_gen++;
-		if (S_ISDIR(ci->vfs_inode.i_mode))
-			ceph_dir_clear_complete(&ci->vfs_inode);
+		if (S_ISDIR(ci->vfs_inode.i_mode)) {
+			dout(" marking %p NOT complete\n", &ci->vfs_inode);
+			__ceph_dir_clear_complete(ci);
+		}
 	}
 }
 
@@ -553,6 +555,7 @@
 		cap->implemented = 0;
 		cap->mds = mds;
 		cap->mds_wanted = 0;
+		cap->mseq = 0;
 
 		cap->ci = ci;
 		__insert_cap_node(ci, cap);
@@ -628,7 +631,10 @@
 	cap->cap_id = cap_id;
 	cap->issued = issued;
 	cap->implemented |= issued;
-	cap->mds_wanted |= wanted;
+	if (mseq > cap->mseq)
+		cap->mds_wanted = wanted;
+	else
+		cap->mds_wanted |= wanted;
 	cap->seq = seq;
 	cap->issue_seq = seq;
 	cap->mseq = mseq;
@@ -997,9 +1003,9 @@
 	return 0;
 }
 
-static void __queue_cap_release(struct ceph_mds_session *session,
-				u64 ino, u64 cap_id, u32 migrate_seq,
-				u32 issue_seq)
+void __queue_cap_release(struct ceph_mds_session *session,
+			 u64 ino, u64 cap_id, u32 migrate_seq,
+			 u32 issue_seq)
 {
 	struct ceph_msg *msg;
 	struct ceph_mds_cap_release *head;
@@ -2046,6 +2052,13 @@
 		goto out;
 	}
 
+	/* finish pending truncate */
+	while (ci->i_truncate_pending) {
+		spin_unlock(&ci->i_ceph_lock);
+		__ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR));
+		spin_lock(&ci->i_ceph_lock);
+	}
+
 	if (need & CEPH_CAP_FILE_WR) {
 		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
 			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
@@ -2067,12 +2080,6 @@
 	}
 	have = __ceph_caps_issued(ci, &implemented);
 
-	/*
-	 * disallow writes while a truncate is pending
-	 */
-	if (ci->i_truncate_pending)
-		have &= ~CEPH_CAP_FILE_WR;
-
 	if ((have & need) == need) {
 		/*
 		 * Look at (implemented & ~have & not) so that we keep waiting

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6d797f4..f02d82b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c

@@ -107,7 +107,7 @@
  * falling back to a "normal" sync readdir if any dentries in the dir
  * are dropped.
  *
- * D_COMPLETE tells indicates we have all dentries in the dir.  It is
+ * Complete dir indicates that we have all dentries in the dir.  It is
  * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
  * the MDS if/when the directory is modified).
  */
@@ -198,8 +198,8 @@
 	filp->f_pos++;
 
 	/* make sure a dentry wasn't dropped while we didn't have parent lock */
-	if (!ceph_dir_test_complete(dir)) {
-		dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
+	if (!ceph_dir_is_complete(dir)) {
+		dout(" lost dir complete on %p; falling back to mds\n", dir);
 		err = -EAGAIN;
 		goto out;
 	}
@@ -258,7 +258,7 @@
 	if (filp->f_pos == 0) {
 		/* note dir version at start of readdir so we can tell
 		 * if any dentries get dropped */
-		fi->dir_release_count = ci->i_release_count;
+		fi->dir_release_count = atomic_read(&ci->i_release_count);
 
 		dout("readdir off 0 -> '.'\n");
 		if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
@@ -284,7 +284,7 @@
 	if ((filp->f_pos == 2 || fi->dentry) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
-	    ceph_dir_test_complete(inode) &&
+	    __ceph_dir_is_complete(ci) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
 		spin_unlock(&ci->i_ceph_lock);
 		err = __dcache_readdir(filp, dirent, filldir);
@@ -350,7 +350,8 @@
 
 		if (!req->r_did_prepopulate) {
 			dout("readdir !did_prepopulate");
-			fi->dir_release_count--;    /* preclude D_COMPLETE */
+			/* preclude from marking dir complete */
+			fi->dir_release_count--;
 		}
 
 		/* note next offset and last dentry name */
@@ -428,8 +429,9 @@
 	 * the complete dir contents in our cache.
 	 */
 	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_release_count == fi->dir_release_count) {
-		ceph_dir_set_complete(inode);
+	if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
+		dout(" marking %p complete\n", inode);
+		__ceph_dir_set_complete(ci, fi->dir_release_count);
 		ci->i_max_offset = filp->f_pos;
 	}
 	spin_unlock(&ci->i_ceph_lock);
@@ -604,7 +606,7 @@
 			    fsc->mount_options->snapdir_name,
 			    dentry->d_name.len) &&
 		    !is_root_ceph_dentry(dir, dentry) &&
-		    ceph_dir_test_complete(dir) &&
+		    __ceph_dir_is_complete(ci) &&
 		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
 			spin_unlock(&ci->i_ceph_lock);
 			dout(" dir %p complete, -ENOENT\n", dir);
@@ -1065,44 +1067,6 @@
 }
 
 /*
- * Set/clear/test dir complete flag on the dir's dentry.
- */
-void ceph_dir_set_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-	
-	if (dentry && ceph_dentry(dentry) &&
-	    ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
-		dout(" marking %p (%p) complete\n", inode, dentry);
-		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-}
-
-void ceph_dir_clear_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-
-	if (dentry && ceph_dentry(dentry)) {
-		dout(" marking %p (%p) complete\n", inode, dentry);
-		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-}
-
-bool ceph_dir_test_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-
-	if (dentry && ceph_dentry(dentry)) {
-		dout(" marking %p (%p) NOT complete\n", inode, dentry);
-		clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-	return false;
-}
-
-/*
  * When the VFS prunes a dentry from the cache, we need to clear the
  * complete flag on the parent directory.
  *
@@ -1110,15 +1074,13 @@
  */
 static void ceph_d_prune(struct dentry *dentry)
 {
-	struct ceph_dentry_info *di;
-
 	dout("ceph_d_prune %p\n", dentry);
 
 	/* do we have a valid parent? */
 	if (IS_ROOT(dentry))
 		return;
 
-	/* if we are not hashed, we don't affect D_COMPLETE */
+	/* if we are not hashed, we don't affect dir's completeness */
 	if (d_unhashed(dentry))
 		return;
 
@@ -1126,8 +1088,7 @@
 	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
 	 * cleared until d_release
 	 */
-	di = ceph_dentry(dentry->d_parent);
-	clear_bit(CEPH_D_COMPLETE, &di->flags);
+	ceph_dir_clear_complete(dentry->d_parent->d_inode);
 }
 
 /*

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bf338d9..d70830c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -446,19 +446,35 @@
 }
 
 /*
- * Write commit callback, called if we requested both an ACK and
- * ONDISK commit reply from the OSD.
+ * Write commit request unsafe callback, called to tell us when a
+ * request is unsafe (that is, in flight--has been handed to the
+ * messenger to send to its target osd).  It is called again when
+ * we've received a response message indicating the request is
+ * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
+ * is completed early (and unsuccessfully) due to a timeout or
+ * interrupt.
+ *
+ * This is used if we requested both an ACK and ONDISK commit reply
+ * from the OSD.
  */
-static void sync_write_commit(struct ceph_osd_request *req,
-			      struct ceph_msg *msg)
+static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
 {
 	struct ceph_inode_info *ci = ceph_inode(req->r_inode);
 
-	dout("sync_write_commit %p tid %llu\n", req, req->r_tid);
-	spin_lock(&ci->i_unsafe_lock);
-	list_del_init(&req->r_unsafe_item);
-	spin_unlock(&ci->i_unsafe_lock);
-	ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
+	dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
+		unsafe ? "un" : "");
+	if (unsafe) {
+		ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
+		spin_lock(&ci->i_unsafe_lock);
+		list_add_tail(&req->r_unsafe_item,
+			      &ci->i_unsafe_writes);
+		spin_unlock(&ci->i_unsafe_lock);
+	} else {
+		spin_lock(&ci->i_unsafe_lock);
+		list_del_init(&req->r_unsafe_item);
+		spin_unlock(&ci->i_unsafe_lock);
+		ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
+	}
 }
 
 /*
@@ -470,36 +486,33 @@
  * objects, rollback on failure, etc.)
  */
 static ssize_t ceph_sync_write(struct file *file, const char __user *data,
-			       size_t left, loff_t *offset)
+			       size_t left, loff_t pos, loff_t *ppos)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_snap_context *snapc;
+	struct ceph_vino vino;
 	struct ceph_osd_request *req;
+	int num_ops = 1;
 	struct page **pages;
 	int num_pages;
-	long long unsigned pos;
 	u64 len;
 	int written = 0;
 	int flags;
-	int do_sync = 0;
 	int check_caps = 0;
 	int page_align, io_align;
 	unsigned long buf_align;
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
+	bool own_pages = false;
 
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
 
-	dout("sync_write on file %p %lld~%u %s\n", file, *offset,
+	dout("sync_write on file %p %lld~%u %s\n", file, pos,
 	     (unsigned)left, (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
-	if (file->f_flags & O_APPEND)
-		pos = i_size_read(inode);
-	else
-		pos = *offset;
-
 	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
 	if (ret < 0)
 		return ret;
@@ -516,7 +529,7 @@
 	if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0)
 		flags |= CEPH_OSD_FLAG_ACK;
 	else
-		do_sync = 1;
+		num_ops++;	/* Also include a 'startsync' command. */
 
 	/*
 	 * we may need to do multiple writes here if we span an object
@@ -526,25 +539,20 @@
 	io_align = pos & ~PAGE_MASK;
 	buf_align = (unsigned long)data & ~PAGE_MASK;
 	len = left;
-	if (file->f_flags & O_DIRECT) {
-		/* write from beginning of first page, regardless of
-		   io alignment */
-		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
-		num_pages = calc_pages_for((unsigned long)data, len);
-	} else {
-		page_align = pos & ~PAGE_MASK;
-		num_pages = calc_pages_for(pos, len);
-	}
+
+	snapc = ci->i_snap_realm->cached_context;
+	vino = ceph_vino(inode);
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-				    ceph_vino(inode), pos, &len,
-				    CEPH_OSD_OP_WRITE, flags,
-				    ci->i_snap_realm->cached_context,
-				    do_sync,
+				    vino, pos, &len, num_ops,
+				    CEPH_OSD_OP_WRITE, flags, snapc,
 				    ci->i_truncate_seq, ci->i_truncate_size,
-				    &mtime, false, page_align);
+				    false);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	/* write from beginning of first page, regardless of io alignment */
+	page_align = file->f_flags & O_DIRECT ? buf_align : io_align;
+	num_pages = calc_pages_for(page_align, len);
 	if (file->f_flags & O_DIRECT) {
 		pages = ceph_get_direct_page_vector(data, num_pages, false);
 		if (IS_ERR(pages)) {
@@ -572,36 +580,20 @@
 
 		if ((file->f_flags & O_SYNC) == 0) {
 			/* get a second commit callback */
-			req->r_safe_callback = sync_write_commit;
-			req->r_own_pages = 1;
+			req->r_unsafe_callback = ceph_sync_write_unsafe;
+			req->r_inode = inode;
+			own_pages = true;
 		}
 	}
-	req->r_pages = pages;
-	req->r_num_pages = num_pages;
-	req->r_inode = inode;
+	osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+					false, own_pages);
+
+	/* BUG_ON(vino.snap != CEPH_NOSNAP); */
+	ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
 
 	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
-	if (!ret) {
-		if (req->r_safe_callback) {
-			/*
-			 * Add to inode unsafe list only after we
-			 * start_request so that a tid has been assigned.
-			 */
-			spin_lock(&ci->i_unsafe_lock);
-			list_add_tail(&req->r_unsafe_item,
-				      &ci->i_unsafe_writes);
-			spin_unlock(&ci->i_unsafe_lock);
-			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
-		}
-		
+	if (!ret)
 		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
-		if (ret < 0 && req->r_safe_callback) {
-			spin_lock(&ci->i_unsafe_lock);
-			list_del_init(&req->r_unsafe_item);
-			spin_unlock(&ci->i_unsafe_lock);
-			ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
-		}
-	}
 
 	if (file->f_flags & O_DIRECT)
 		ceph_put_page_vector(pages, num_pages, false);
@@ -614,12 +606,12 @@
 		pos += len;
 		written += len;
 		left -= len;
-		data += written;
+		data += len;
 		if (left)
 			goto more;
 
 		ret = written;
-		*offset = pos;
+		*ppos = pos;
 		if (pos > i_size_read(inode))
 			check_caps = ceph_inode_set_size(inode, pos);
 		if (check_caps)
@@ -653,7 +645,6 @@
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)len, inode);
 again:
-	__ceph_do_pending_vmtruncate(inode);
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
@@ -717,55 +708,75 @@
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc =
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
-	loff_t endoff = pos + iov->iov_len;
-	int got = 0;
-	int ret, err, written;
+	ssize_t count, written = 0;
+	int err, want, got;
+	bool hold_mutex;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
-retry_snap:
-	written = 0;
-	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
-		return -ENOSPC;
-	__ceph_do_pending_vmtruncate(inode);
+	sb_start_write(inode->i_sb);
+	mutex_lock(&inode->i_mutex);
+	hold_mutex = true;
 
-	/*
-	 * try to do a buffered write.  if we don't have sufficient
-	 * caps, we'll get -EAGAIN from generic_file_aio_write, or a
-	 * short write if we only get caps for some pages.
-	 */
-	if (!(iocb->ki_filp->f_flags & O_DIRECT) &&
-	    !(inode->i_sb->s_flags & MS_SYNCHRONOUS) &&
-	    !(fi->flags & CEPH_F_SYNC)) {
-		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-		if (ret >= 0)
-			written = ret;
-
-		if ((ret >= 0 || ret == -EIOCBQUEUED) &&
-		    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
-		     || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
-			err = vfs_fsync_range(file, pos, pos + written - 1, 1);
-			if (err < 0)
-				ret = err;
-		}
-		if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff)
-			goto out;
-	}
-
-	dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
-	     inode, ceph_vinop(inode), pos + written,
-	     (unsigned)iov->iov_len - written, inode->i_size);
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff);
-	if (ret < 0)
+	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+	if (err)
 		goto out;
 
-	dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos + written,
-	     (unsigned)iov->iov_len - written, ceph_cap_string(got));
-	ret = ceph_sync_write(file, iov->iov_base + written,
-			      iov->iov_len - written, &iocb->ki_pos);
-	if (ret >= 0) {
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = file->f_mapping->backing_dev_info;
+
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+	if (err)
+		goto out;
+
+	if (count == 0)
+		goto out;
+
+	err = file_remove_suid(file);
+	if (err)
+		goto out;
+
+	err = file_update_time(file);
+	if (err)
+		goto out;
+
+retry_snap:
+	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
+	     inode, ceph_vinop(inode), pos, count, inode->i_size);
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_BUFFER;
+	got = 0;
+	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+	if (err < 0)
+		goto out;
+
+	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
+	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
+
+	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
+	    (iocb->ki_filp->f_flags & O_DIRECT) ||
+	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
+	    (fi->flags & CEPH_F_SYNC)) {
+		mutex_unlock(&inode->i_mutex);
+		written = ceph_sync_write(file, iov->iov_base, count,
+					  pos, &iocb->ki_pos);
+	} else {
+		written = generic_file_buffered_write(iocb, iov, nr_segs,
+						      pos, &iocb->ki_pos,
+						      count, 0);
+		mutex_unlock(&inode->i_mutex);
+	}
+	hold_mutex = false;
+
+	if (written >= 0) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
@@ -773,18 +784,34 @@
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
 	}
+
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos + written,
-	     (unsigned)iov->iov_len - written, ceph_cap_string(got));
+	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+	     ceph_cap_string(got));
 	ceph_put_cap_refs(ci, got);
-out:
-	if (ret == -EOLDSNAPC) {
-		dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
-		     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
-		goto retry_snap;
+
+	if (written >= 0 &&
+	    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
+	     ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
+		err = vfs_fsync_range(file, pos, pos + written - 1, 1);
+		if (err < 0)
+			written = err;
 	}
 
-	return ret;
+	if (written == -EOLDSNAPC) {
+		dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
+		     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
+		mutex_lock(&inode->i_mutex);
+		hold_mutex = true;
+		goto retry_snap;
+	}
+out:
+	if (hold_mutex)
+		mutex_unlock(&inode->i_mutex);
+	sb_end_write(inode->i_sb);
+	current->backing_dev_info = NULL;
+
+	return written ? written : err;
 }
 
 /*
@@ -796,7 +823,7 @@
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
-	__ceph_do_pending_vmtruncate(inode);
+	__ceph_do_pending_vmtruncate(inode, false);
 
 	if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
 		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 851814d..be0f7e2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -302,7 +302,8 @@
 	ci->i_version = 0;
 	ci->i_time_warp_seq = 0;
 	ci->i_ceph_flags = 0;
-	ci->i_release_count = 0;
+	atomic_set(&ci->i_release_count, 1);
+	atomic_set(&ci->i_complete_count, 0);
 	ci->i_symlink = NULL;
 
 	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
@@ -561,7 +562,6 @@
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int i;
 	int issued = 0, implemented;
-	int updating_inode = 0;
 	struct timespec mtime, atime, ctime;
 	u32 nsplits;
 	struct ceph_buffer *xattr_blob = NULL;
@@ -601,7 +601,6 @@
 	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
 		goto no_change;
 	
-	updating_inode = 1;
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);
 
@@ -717,6 +716,17 @@
 		       ceph_vinop(inode), inode->i_mode);
 	}
 
+	/* set dir completion flag? */
+	if (S_ISDIR(inode->i_mode) &&
+	    ci->i_files == 0 && ci->i_subdirs == 0 &&
+	    ceph_snap(inode) == CEPH_NOSNAP &&
+	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+	    !__ceph_dir_is_complete(ci)) {
+		dout(" marking %p complete (empty)\n", inode);
+		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+		ci->i_max_offset = 2;
+	}
 no_change:
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -767,19 +777,6 @@
 		__ceph_get_fmode(ci, cap_fmode);
 	}
 
-	/* set dir completion flag? */
-	if (S_ISDIR(inode->i_mode) &&
-	    updating_inode &&                 /* didn't jump to no_change */
-	    ci->i_files == 0 && ci->i_subdirs == 0 &&
-	    ceph_snap(inode) == CEPH_NOSNAP &&
-	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
-	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
-	    !ceph_dir_test_complete(inode)) {
-		dout(" marking %p complete (empty)\n", inode);
-		ceph_dir_set_complete(inode);
-		ci->i_max_offset = 2;
-	}
-
 	/* update delegation info? */
 	if (dirinfo)
 		ceph_fill_dirfrag(inode, dirinfo);
@@ -861,7 +858,7 @@
 	di = ceph_dentry(dn);
 
 	spin_lock(&ci->i_ceph_lock);
-	if (!ceph_dir_test_complete(inode)) {
+	if (!__ceph_dir_is_complete(ci)) {
 		spin_unlock(&ci->i_ceph_lock);
 		return;
 	}
@@ -1065,8 +1062,8 @@
 			/*
 			 * d_move() puts the renamed dentry at the end of
 			 * d_subdirs.  We need to assign it an appropriate
-			 * directory offset so we can behave when holding
-			 * D_COMPLETE.
+			 * directory offset so we can behave when dir is
+			 * complete.
 			 */
 			ceph_set_dentry_offset(req->r_old_dentry);
 			dout("dn %p gets new offset %lld\n", req->r_old_dentry, 
@@ -1457,7 +1454,7 @@
 
 
 /*
- * called by trunc_wq; take i_mutex ourselves
+ * called by trunc_wq;
  *
  * We also truncate in a separate thread as well.
  */
@@ -1468,9 +1465,7 @@
 	struct inode *inode = &ci->vfs_inode;
 
 	dout("vmtruncate_work %p\n", inode);
-	mutex_lock(&inode->i_mutex);
-	__ceph_do_pending_vmtruncate(inode);
-	mutex_unlock(&inode->i_mutex);
+	__ceph_do_pending_vmtruncate(inode, true);
 	iput(inode);
 }
 
@@ -1494,12 +1489,10 @@
 }
 
 /*
- * called with i_mutex held.
- *
  * Make sure any pending truncation is applied before doing anything
  * that may depend on it.
  */
-void __ceph_do_pending_vmtruncate(struct inode *inode)
+void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 to;
@@ -1532,7 +1525,11 @@
 	     ci->i_truncate_pending, to);
 	spin_unlock(&ci->i_ceph_lock);
 
+	if (needlock)
+		mutex_lock(&inode->i_mutex);
 	truncate_inode_pages(inode->i_mapping, to);
+	if (needlock)
+		mutex_unlock(&inode->i_mutex);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (to == ci->i_truncate_size) {
@@ -1563,6 +1560,12 @@
 static const struct inode_operations ceph_symlink_iops = {
 	.readlink = generic_readlink,
 	.follow_link = ceph_sym_follow_link,
+	.setattr = ceph_setattr,
+	.getattr = ceph_getattr,
+	.setxattr = ceph_setxattr,
+	.getxattr = ceph_getxattr,
+	.listxattr = ceph_listxattr,
+	.removexattr = ceph_removexattr,
 };
 
 /*
@@ -1585,7 +1588,7 @@
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
-	__ceph_do_pending_vmtruncate(inode);
+	__ceph_do_pending_vmtruncate(inode, false);
 
 	err = inode_change_ok(inode, attr);
 	if (err != 0)
@@ -1767,7 +1770,7 @@
 	     ceph_cap_string(dirtied), mask);
 
 	ceph_mdsc_put_request(req);
-	__ceph_do_pending_vmtruncate(inode);
+	__ceph_do_pending_vmtruncate(inode, false);
 	return err;
 out:
 	spin_unlock(&ci->i_ceph_lock);

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 4a98934..e0b4ef3 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c

@@ -208,8 +208,9 @@
 
 	snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
 		 ceph_ino(inode), dl.object_no);
-	ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
-				osdc->osdmap);
+
+	ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+		ceph_file_layout_pg_pool(ci->i_layout));
 
 	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
 	if (dl.osd >= 0) {

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 442880d..4f22671 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -265,7 +265,8 @@
 {
 	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
 		return parse_reply_info_filelock(p, end, info, features);
-	else if (info->head->op == CEPH_MDS_OP_READDIR)
+	else if (info->head->op == CEPH_MDS_OP_READDIR ||
+		 info->head->op == CEPH_MDS_OP_LSSNAP)
 		return parse_reply_info_dir(p, end, info, features);
 	else if (info->head->op == CEPH_MDS_OP_CREATE)
 		return parse_reply_info_create(p, end, info, features);
@@ -364,9 +365,9 @@
 	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
 	if (atomic_dec_and_test(&s->s_ref)) {
 		if (s->s_auth.authorizer)
-		     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
-			     s->s_mdsc->fsc->client->monc.auth,
-			     s->s_auth.authorizer);
+			ceph_auth_destroy_authorizer(
+				s->s_mdsc->fsc->client->monc.auth,
+				s->s_auth.authorizer);
 		kfree(s);
 	}
 }
@@ -1196,6 +1197,8 @@
 	session->s_trim_caps--;
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
+		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+				    cap->mseq, cap->issue_seq);
 		__ceph_remove_cap(cap);
 	} else {
 		/* try to drop referring dentries */
@@ -1718,8 +1721,12 @@
 	msg->front.iov_len = p - msg->front.iov_base;
 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 
-	msg->pages = req->r_pages;
-	msg->nr_pages = req->r_num_pages;
+	if (req->r_data_len) {
+		/* outbound data set only by ceph_sync_setxattr() */
+		BUG_ON(!req->r_pages);
+		ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0);
+	}
+
 	msg->hdr.data_len = cpu_to_le32(req->r_data_len);
 	msg->hdr.data_off = cpu_to_le16(0);
 
@@ -1913,6 +1920,7 @@
 		req = list_entry(tmp_list.next,
 				 struct ceph_mds_request, r_wait);
 		list_del_init(&req->r_wait);
+		dout(" wake request %p tid %llu\n", req, req->r_tid);
 		__do_request(mdsc, req);
 	}
 }
@@ -2026,20 +2034,16 @@
 }
 
 /*
- * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
+ * Invalidate dir's completeness, dentry lease state on an aborted MDS
  * namespace request.
  */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 {
 	struct inode *inode = req->r_locked_dir;
-	struct ceph_inode_info *ci = ceph_inode(inode);
 
-	dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
-	spin_lock(&ci->i_ceph_lock);
+	dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
+
 	ceph_dir_clear_complete(inode);
-	ci->i_release_count++;
-	spin_unlock(&ci->i_ceph_lock);
-
 	if (req->r_dentry)
 		ceph_invalidate_dentry_lease(req->r_dentry);
 	if (req->r_old_dentry)
@@ -2599,11 +2603,13 @@
 			goto fail;
 	}
 
-	reply->pagelist = pagelist;
 	if (recon_state.flock)
 		reply->hdr.version = cpu_to_le16(2);
-	reply->hdr.data_len = cpu_to_le32(pagelist->length);
-	reply->nr_pages = calc_pages_for(0, pagelist->length);
+	if (pagelist->length) {
+		/* set up outbound data if we have any */
+		reply->hdr.data_len = cpu_to_le32(pagelist->length);
+		ceph_msg_data_add_pagelist(reply, pagelist);
+	}
 	ceph_con_send(&session->s_con, reply);
 
 	mutex_unlock(&session->s_mutex);
@@ -3433,13 +3439,17 @@
 	struct ceph_auth_handshake *auth = &s->s_auth;
 
 	if (force_new && auth->authorizer) {
-		if (ac->ops && ac->ops->destroy_authorizer)
-			ac->ops->destroy_authorizer(ac, auth->authorizer);
+		ceph_auth_destroy_authorizer(ac, auth->authorizer);
 		auth->authorizer = NULL;
 	}
-	if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
-		int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
-							auth);
+	if (!auth->authorizer) {
+		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+						      auth);
+		if (ret)
+			return ERR_PTR(ret);
+	} else {
+		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+						      auth);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -3455,7 +3465,7 @@
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
-	return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
+	return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len);
 }
 
 static int invalidate_authorizer(struct ceph_connection *con)
@@ -3464,12 +3474,32 @@
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
-	if (ac->ops->invalidate_authorizer)
-		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
+	ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
 
 	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
 }
 
+static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
+				struct ceph_msg_header *hdr, int *skip)
+{
+	struct ceph_msg *msg;
+	int type = (int) le16_to_cpu(hdr->type);
+	int front_len = (int) le32_to_cpu(hdr->front_len);
+
+	if (con->in_msg)
+		return con->in_msg;
+
+	*skip = 0;
+	msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+	if (!msg) {
+		pr_err("unable to allocate msg type %d len %d\n",
+		       type, front_len);
+		return NULL;
+	}
+
+	return msg;
+}
+
 static const struct ceph_connection_operations mds_con_ops = {
 	.get = con_get,
 	.put = con_put,
@@ -3478,6 +3508,7 @@
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
+	.alloc_msg = mds_alloc_msg,
 };
 
 /* eof */

diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 0d3c924..9278dec 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c

@@ -20,7 +20,10 @@
 {
 	int n = 0;
 	int i;
-	char r;
+
+	/* special case for one mds */
+	if (1 == m->m_max_mds && m->m_info[0].state > 0)
+		return 0;
 
 	/* count */
 	for (i = 0; i < m->m_max_mds; i++)
@@ -30,8 +33,7 @@
 		return -1;
 
 	/* pick */
-	get_random_bytes(&r, 1);
-	n = r % n;
+	n = prandom_u32() % n;
 	i = 0;
 	for (i = 0; n > 0; i++, n--)
 		while (m->m_info[i].state <= 0)

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index cbb2f54..f01645a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c

@@ -332,10 +332,9 @@
 	err = -ENOMEM;
 	if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
 		goto fail;
-	snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS);
+	snapc = ceph_create_snap_context(num, GFP_NOFS);
 	if (!snapc)
 		goto fail;
-	atomic_set(&snapc->nref, 1);
 
 	/* build (reverse sorted) snap vector */
 	num = 0;

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6ddc0bc..7d377c9 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c

@@ -479,6 +479,8 @@
 		CEPH_FEATURE_FLOCK |
 		CEPH_FEATURE_DIRLAYOUTHASH;
 	const unsigned required_features = 0;
+	int page_count;
+	size_t size;
 	int err = -ENOMEM;
 
 	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
@@ -522,8 +524,9 @@
 
 	/* set up mempools */
 	err = -ENOMEM;
-	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
-			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
+	page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
+	size = sizeof (struct page *) * (page_count ? page_count : 1);
+	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
 	if (!fsc->wb_pagevec_pool)
 		goto fail_trunc_wq;
 

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c7b3097..8696be2f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -204,7 +204,6 @@
  * Ceph dentry state
  */
 struct ceph_dentry_info {
-	unsigned long flags;
 	struct ceph_mds_session *lease_session;
 	u32 lease_gen, lease_shared_gen;
 	u32 lease_seq;
@@ -215,18 +214,6 @@
 	u64 offset;
 };
 
-/*
- * dentry flags
- *
- * The locking for D_COMPLETE is a bit odd:
- *  - we can clear it at almost any time (see ceph_d_prune)
- *  - it is only meaningful if:
- *    - we hold dir inode i_ceph_lock
- *    - we hold dir FILE_SHARED caps
- *    - the dentry D_COMPLETE is set
- */
-#define CEPH_D_COMPLETE 1  /* if set, d_u.d_subdirs is complete directory */
-
 struct ceph_inode_xattrs_info {
 	/*
 	 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -257,7 +244,8 @@
 	u32 i_time_warp_seq;
 
 	unsigned i_ceph_flags;
-	unsigned long i_release_count;
+	atomic_t i_release_count;
+	atomic_t i_complete_count;
 
 	struct ceph_dir_layout i_dir_layout;
 	struct ceph_file_layout i_layout;
@@ -267,7 +255,7 @@
 	struct timespec i_rctime;
 	u64 i_rbytes, i_rfiles, i_rsubdirs;
 	u64 i_files, i_subdirs;
-	u64 i_max_offset;  /* largest readdir offset, set with D_COMPLETE */
+	u64 i_max_offset;  /* largest readdir offset, set with complete dir */
 
 	struct rb_root i_fragtree;
 	struct mutex i_fragtree_mutex;
@@ -436,33 +424,31 @@
 #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
 #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
 
-static inline void ceph_i_clear(struct inode *inode, unsigned mask)
+static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
+					   int release_count)
 {
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_ceph_flags &= ~mask;
-	spin_unlock(&ci->i_ceph_lock);
+	atomic_set(&ci->i_complete_count, release_count);
 }
 
-static inline void ceph_i_set(struct inode *inode, unsigned mask)
+static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
 {
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_ceph_flags |= mask;
-	spin_unlock(&ci->i_ceph_lock);
+	atomic_inc(&ci->i_release_count);
 }
 
-static inline bool ceph_i_test(struct inode *inode, unsigned mask)
+static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
 {
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	bool r;
+	return atomic_read(&ci->i_complete_count) ==
+		atomic_read(&ci->i_release_count);
+}
 
-	spin_lock(&ci->i_ceph_lock);
-	r = (ci->i_ceph_flags & mask) == mask;
-	spin_unlock(&ci->i_ceph_lock);
-	return r;
+static inline void ceph_dir_clear_complete(struct inode *inode)
+{
+	__ceph_dir_clear_complete(ceph_inode(inode));
+}
+
+static inline bool ceph_dir_is_complete(struct inode *inode)
+{
+	return __ceph_dir_is_complete(ceph_inode(inode));
 }
 
 
@@ -489,13 +475,6 @@
 }
 
 /*
- * set/clear directory D_COMPLETE flag
- */
-void ceph_dir_set_complete(struct inode *inode);
-void ceph_dir_clear_complete(struct inode *inode);
-bool ceph_dir_test_complete(struct inode *inode);
-
-/*
  * caps helpers
  */
 static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
@@ -584,7 +563,7 @@
 	u64 next_offset;       /* offset of next chunk (last_name's + 1) */
 	char *last_name;       /* last entry in previous chunk */
 	struct dentry *dentry; /* next dentry (for dcache readdir) */
-	unsigned long dir_release_count;
+	int dir_release_count;
 
 	/* used for -o dirstat read() on directory thing */
 	char *dir_info;
@@ -713,7 +692,7 @@
 extern int ceph_inode_holds_cap(struct inode *inode, int mask);
 
 extern int ceph_inode_set_size(struct inode *inode, loff_t size);
-extern void __ceph_do_pending_vmtruncate(struct inode *inode);
+extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock);
 extern void ceph_queue_vmtruncate(struct inode *inode);
 
 extern void ceph_queue_invalidate(struct inode *inode);
@@ -755,6 +734,8 @@
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);
 
+extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino,
+				u64 cap_id, u32 migrate_seq, u32 issue_seq);
 extern void ceph_queue_caps_release(struct inode *inode);
 extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
 extern int ceph_fsync(struct file *file, loff_t start, loff_t end,

diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 1d36db1..a3b5654 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c

@@ -506,11 +506,11 @@
 
 	/* GSSAPI header */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding negTokenInit header");
+		cifs_dbg(FYI, "Error decoding negTokenInit header\n");
 		return 0;
 	} else if ((cls != ASN1_APL) || (con != ASN1_CON)
 		   || (tag != ASN1_EOC)) {
-		cFYI(1, "cls = %d con = %d tag = %d", cls, con, tag);
+		cifs_dbg(FYI, "cls = %d con = %d tag = %d\n", cls, con, tag);
 		return 0;
 	}
 
@@ -531,52 +531,52 @@
 
 	/* SPNEGO OID not present or garbled -- bail out */
 	if (!rc) {
-		cFYI(1, "Error decoding negTokenInit header");
+		cifs_dbg(FYI, "Error decoding negTokenInit header\n");
 		return 0;
 	}
 
 	/* SPNEGO */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding negTokenInit");
+		cifs_dbg(FYI, "Error decoding negTokenInit\n");
 		return 0;
 	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
 		   || (tag != ASN1_EOC)) {
-		cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0",
-		     cls, con, tag, end, *end);
+		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
+			 cls, con, tag, end, *end);
 		return 0;
 	}
 
 	/* negTokenInit */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding negTokenInit");
+		cifs_dbg(FYI, "Error decoding negTokenInit\n");
 		return 0;
 	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
 		   || (tag != ASN1_SEQ)) {
-		cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1",
-		     cls, con, tag, end, *end);
+		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
+			 cls, con, tag, end, *end);
 		return 0;
 	}
 
 	/* sequence */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding 2nd part of negTokenInit");
+		cifs_dbg(FYI, "Error decoding 2nd part of negTokenInit\n");
 		return 0;
 	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
 		   || (tag != ASN1_EOC)) {
-		cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0",
-		     cls, con, tag, end, *end);
+		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
+			 cls, con, tag, end, *end);
 		return 0;
 	}
 
 	/* sequence of */
 	if (asn1_header_decode
 	    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding 2nd part of negTokenInit");
+		cifs_dbg(FYI, "Error decoding 2nd part of negTokenInit\n");
 		return 0;
 	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
 		   || (tag != ASN1_SEQ)) {
-		cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1",
-		     cls, con, tag, end, *end);
+		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
+			 cls, con, tag, end, *end);
 		return 0;
 	}
 
@@ -584,15 +584,15 @@
 	while (!asn1_eoc_decode(&ctx, sequence_end)) {
 		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
 		if (!rc) {
-			cFYI(1, "Error decoding negTokenInit hdr exit2");
+			cifs_dbg(FYI, "Error decoding negTokenInit hdr exit2\n");
 			return 0;
 		}
 		if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
 			if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
 
-				cFYI(1, "OID len = %d oid = 0x%lx 0x%lx "
-					"0x%lx 0x%lx", oidlen, *oid,
-					*(oid + 1), *(oid + 2), *(oid + 3));
+				cifs_dbg(FYI, "OID len = %d oid = 0x%lx 0x%lx 0x%lx 0x%lx\n",
+					 oidlen, *oid, *(oid + 1), *(oid + 2),
+					 *(oid + 3));
 
 				if (compare_oid(oid, oidlen, MSKRB5_OID,
 						MSKRB5_OID_LEN))
@@ -610,7 +610,7 @@
 				kfree(oid);
 			}
 		} else {
-			cFYI(1, "Should be an oid what is going on?");
+			cifs_dbg(FYI, "Should be an oid what is going on?\n");
 		}
 	}
 

diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 282d6de..6c665bf 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c

@@ -92,7 +92,7 @@
 		break;
 
 	default:
-		cERROR(1, "Unknown network family '%d'", sa->sa_family);
+		cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
 		key_len = 0;
 		break;
 	}
@@ -152,7 +152,7 @@
 
 	sharename = extract_sharename(tcon->treeName);
 	if (IS_ERR(sharename)) {
-		cFYI(1, "%s: couldn't extract sharename", __func__);
+		cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
 		sharename = NULL;
 		return 0;
 	}
@@ -302,7 +302,7 @@
 	pagevec_init(&pvec, 0);
 	first = 0;
 
-	cFYI(1, "%s: cifs inode 0x%p now uncached", __func__, cifsi);
+	cifs_dbg(FYI, "%s: cifs inode 0x%p now uncached\n", __func__, cifsi);
 
 	for (;;) {
 		nr_pages = pagevec_lookup(&pvec,

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index d9ea6ed..d597483 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c

@@ -57,15 +57,32 @@
 	}
 }
 
+#ifdef CONFIG_CIFS_DEBUG
+void cifs_vfs_err(const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	printk(KERN_ERR "CIFS VFS: %pV", &vaf);
+
+	va_end(args);
+}
+#endif
+
 void cifs_dump_detail(void *buf)
 {
 #ifdef CONFIG_CIFS_DEBUG2
 	struct smb_hdr *smb = (struct smb_hdr *)buf;
 
-	cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
-		  smb->Command, smb->Status.CifsError,
-		  smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
-	cERROR(1, "smb buf %p len %u", smb, smbCalcSize(smb));
+	cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n",
+		 smb->Command, smb->Status.CifsError,
+		 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
+	cifs_dbg(VFS, "smb buf %p len %u\n", smb, smbCalcSize(smb));
 #endif /* CONFIG_CIFS_DEBUG2 */
 }
 
@@ -78,25 +95,25 @@
 	if (server == NULL)
 		return;
 
-	cERROR(1, "Dump pending requests:");
+	cifs_dbg(VFS, "Dump pending requests:\n");
 	spin_lock(&GlobalMid_Lock);
 	list_for_each(tmp, &server->pending_mid_q) {
 		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-		cERROR(1, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu",
-			mid_entry->mid_state,
-			le16_to_cpu(mid_entry->command),
-			mid_entry->pid,
-			mid_entry->callback_data,
-			mid_entry->mid);
+		cifs_dbg(VFS, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu\n",
+			 mid_entry->mid_state,
+			 le16_to_cpu(mid_entry->command),
+			 mid_entry->pid,
+			 mid_entry->callback_data,
+			 mid_entry->mid);
 #ifdef CONFIG_CIFS_STATS2
-		cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld",
-			mid_entry->large_buf,
-			mid_entry->resp_buf,
-			mid_entry->when_received,
-			jiffies);
+		cifs_dbg(VFS, "IsLarge: %d buf: %p time rcv: %ld now: %ld\n",
+			 mid_entry->large_buf,
+			 mid_entry->resp_buf,
+			 mid_entry->when_received,
+			 jiffies);
 #endif /* STATS2 */
-		cERROR(1, "IsMult: %d IsEnd: %d", mid_entry->multiRsp,
-			  mid_entry->multiEnd);
+		cifs_dbg(VFS, "IsMult: %d IsEnd: %d\n",
+			 mid_entry->multiRsp, mid_entry->multiEnd);
 		if (mid_entry->resp_buf) {
 			cifs_dump_detail(mid_entry->resp_buf);
 			cifs_dump_mem("existing buf: ",
@@ -603,7 +620,7 @@
 			global_secflags = CIFSSEC_MAX;
 			return count;
 		} else if (!isdigit(c)) {
-			cERROR(1, "invalid flag %c", c);
+			cifs_dbg(VFS, "invalid flag %c\n", c);
 			return -EINVAL;
 		}
 	}
@@ -611,16 +628,16 @@
 
 	flags = simple_strtoul(flags_string, NULL, 0);
 
-	cFYI(1, "sec flags 0x%x", flags);
+	cifs_dbg(FYI, "sec flags 0x%x\n", flags);
 
 	if (flags <= 0)  {
-		cERROR(1, "invalid security flags %s", flags_string);
+		cifs_dbg(VFS, "invalid security flags %s\n", flags_string);
 		return -EINVAL;
 	}
 
 	if (flags & ~CIFSSEC_MASK) {
-		cERROR(1, "attempt to set unsupported security flags 0x%x",
-			flags & ~CIFSSEC_MASK);
+		cifs_dbg(VFS, "attempt to set unsupported security flags 0x%x\n",
+			 flags & ~CIFSSEC_MASK);
 		return -EINVAL;
 	}
 	/* flags look ok - update the global security flags for cifs module */
@@ -628,9 +645,9 @@
 	if (global_secflags & CIFSSEC_MUST_SIGN) {
 		/* requiring signing implies signing is allowed */
 		global_secflags |= CIFSSEC_MAY_SIGN;
-		cFYI(1, "packet signing now required");
+		cifs_dbg(FYI, "packet signing now required\n");
 	} else if ((global_secflags & CIFSSEC_MAY_SIGN) == 0) {
-		cFYI(1, "packet signing disabled");
+		cifs_dbg(FYI, "packet signing disabled\n");
 	}
 	/* BB should we turn on MAY flags for other MUST options? */
 	return count;

diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 69ae3d3..c99b40f 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h

@@ -25,18 +25,20 @@
 void cifs_dump_mem(char *label, void *data, int length);
 void cifs_dump_detail(void *);
 void cifs_dump_mids(struct TCP_Server_Info *);
-#ifdef CONFIG_CIFS_DEBUG2
-#define DBG2 2
-#else
-#define DBG2 0
-#endif
 extern int traceSMB;		/* flag which enables the function below */
 void dump_smb(void *, int);
 #define CIFS_INFO	0x01
 #define CIFS_RC		0x02
 #define CIFS_TIMER	0x04
 
+#define VFS 1
+#define FYI 2
 extern int cifsFYI;
+#ifdef CONFIG_CIFS_DEBUG2
+#define NOISY 4
+#else
+#define NOISY 0
+#endif
 
 /*
  *	debug ON
@@ -44,31 +46,21 @@
  */
 #ifdef CONFIG_CIFS_DEBUG
 
+__printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
+
 /* information message: e.g., configuration, major event */
-#define cifsfyi(fmt, ...)						\
+#define cifs_dbg(type, fmt, ...)					\
 do {									\
-	if (cifsFYI & CIFS_INFO)					\
-		printk(KERN_DEBUG "%s: " fmt "\n",			\
-		       __FILE__, ##__VA_ARGS__);			\
-} while (0)
-
-#define cFYI(set, fmt, ...)						\
-do {									\
-	if (set)							\
-		cifsfyi(fmt, ##__VA_ARGS__);				\
-} while (0)
-
-#define cifswarn(fmt, ...)						\
-	printk(KERN_WARNING fmt "\n", ##__VA_ARGS__)
-
-/* error event message: e.g., i/o error */
-#define cifserror(fmt, ...)						\
-	printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__);		\
-
-#define cERROR(set, fmt, ...)						\
-do {									\
-	if (set)							\
-		cifserror(fmt, ##__VA_ARGS__);				\
+	if (type == FYI) {						\
+		if (cifsFYI & CIFS_INFO) {				\
+			printk(KERN_DEBUG "%s: " fmt,			\
+			       __FILE__, ##__VA_ARGS__);		\
+		}							\
+	} else if (type == VFS) {					\
+		cifs_vfs_err(fmt, ##__VA_ARGS__);			\
+	} else if (type == NOISY && type != 0) {			\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
+	}								\
 } while (0)
 
 /*
@@ -76,27 +68,11 @@
  *	---------
  */
 #else		/* _CIFS_DEBUG */
-#define cifsfyi(fmt, ...)						\
+#define cifs_dbg(type, fmt, ...)					\
 do {									\
 	if (0)								\
-		printk(KERN_DEBUG "%s: " fmt "\n",			\
-		       __FILE__, ##__VA_ARGS__);			\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
 } while (0)
-#define cFYI(set, fmt, ...)						\
-do {									\
-	if (0 && set)							\
-		cifsfyi(fmt, ##__VA_ARGS__);				\
-} while (0)
-#define cifserror(fmt, ...)						\
-do {									\
-	if (0)								\
-		printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__);	\
-} while (0)
-#define cERROR(set, fmt, ...)						\
-do {									\
-	if (0 && set)							\
-		cifserror(fmt, ##__VA_ARGS__);				\
-} while (0)
-#endif		/* _CIFS_DEBUG */
+#endif
 
 #endif				/* _H_CIFS_DEBUG */

diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 210fce2..8e33ec6 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c

@@ -84,8 +84,8 @@
 	/* find server name end */
 	pSep = memchr(UNC+2, '\\', len-2);
 	if (!pSep) {
-		cERROR(1, "%s: no server name end in node name: %s",
-			__func__, node_name);
+		cifs_dbg(VFS, "%s: no server name end in node name: %s\n",
+			 __func__, node_name);
 		kfree(UNC);
 		return ERR_PTR(-EINVAL);
 	}
@@ -141,8 +141,8 @@
 
 	rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
 	if (rc < 0) {
-		cFYI(1, "%s: Failed to resolve server part of %s to IP: %d",
-			__func__, *devname, rc);
+		cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n",
+			 __func__, *devname, rc);
 		goto compose_mount_options_err;
 	}
 
@@ -216,8 +216,8 @@
 		strcat(mountdata, fullpath + ref->path_consumed);
 	}
 
-	/*cFYI(1, "%s: parent mountdata: %s", __func__,sb_mountdata);*/
-	/*cFYI(1, "%s: submount mountdata: %s", __func__, mountdata );*/
+	/*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
+	/*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
 
 compose_mount_options_out:
 	kfree(srvIP);
@@ -260,11 +260,12 @@
 
 static void dump_referral(const struct dfs_info3_param *ref)
 {
-	cFYI(1, "DFS: ref path: %s", ref->path_name);
-	cFYI(1, "DFS: node path: %s", ref->node_name);
-	cFYI(1, "DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type);
-	cFYI(1, "DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag,
-				ref->path_consumed);
+	cifs_dbg(FYI, "DFS: ref path: %s\n", ref->path_name);
+	cifs_dbg(FYI, "DFS: node path: %s\n", ref->node_name);
+	cifs_dbg(FYI, "DFS: fl: %hd, srv_type: %hd\n",
+		 ref->flags, ref->server_type);
+	cifs_dbg(FYI, "DFS: ref_flags: %hd, path_consumed: %hd\n",
+		 ref->ref_flag, ref->path_consumed);
 }
 
 /*
@@ -283,7 +284,7 @@
 	struct vfsmount *mnt;
 	struct tcon_link *tlink;
 
-	cFYI(1, "in %s", __func__);
+	cifs_dbg(FYI, "in %s\n", __func__);
 	BUG_ON(IS_ROOT(mntpt));
 
 	/*
@@ -320,15 +321,15 @@
 		/* connect to a node */
 		len = strlen(referrals[i].node_name);
 		if (len < 2) {
-			cERROR(1, "%s: Net Address path too short: %s",
-					__func__, referrals[i].node_name);
+			cifs_dbg(VFS, "%s: Net Address path too short: %s\n",
+				 __func__, referrals[i].node_name);
 			mnt = ERR_PTR(-EINVAL);
 			break;
 		}
 		mnt = cifs_dfs_do_refmount(cifs_sb,
 				full_path, referrals + i);
-		cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
-					referrals[i].node_name, mnt);
+		cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
+			 __func__, referrals[i].node_name, mnt);
 		if (!IS_ERR(mnt))
 			goto success;
 	}
@@ -343,7 +344,7 @@
 free_full_path:
 	kfree(full_path);
 cdda_exit:
-	cFYI(1, "leaving %s" , __func__);
+	cifs_dbg(FYI, "leaving %s\n" , __func__);
 	return mnt;
 }
 
@@ -354,11 +355,11 @@
 {
 	struct vfsmount *newmnt;
 
-	cFYI(1, "in %s", __func__);
+	cifs_dbg(FYI, "in %s\n", __func__);
 
 	newmnt = cifs_dfs_do_automount(path->dentry);
 	if (IS_ERR(newmnt)) {
-		cFYI(1, "leaving %s [automount failed]" , __func__);
+		cifs_dbg(FYI, "leaving %s [automount failed]\n" , __func__);
 		return newmnt;
 	}
 
@@ -366,7 +367,7 @@
 	mnt_set_expiry(newmnt, &cifs_dfs_automount_list);
 	schedule_delayed_work(&cifs_dfs_automount_task,
 			      cifs_dfs_mountpoint_expiry_timeout);
-	cFYI(1, "leaving %s [ok]" , __func__);
+	cifs_dbg(FYI, "leaving %s [ok]\n" , __func__);
 	return newmnt;
 }
 

diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 10e7747..a3e9325 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c

@@ -37,12 +37,11 @@
 	int ret;
 
 	ret = -ENOMEM;
-	payload = kmalloc(prep->datalen, GFP_KERNEL);
+	payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL);
 	if (!payload)
 		goto error;
 
 	/* attach the data */
-	memcpy(payload, prep->data, prep->datalen);
 	key->payload.data = payload;
 	ret = 0;
 
@@ -164,7 +163,7 @@
 	dp = description + strlen(description);
 	sprintf(dp, ";pid=0x%x", current->pid);
 
-	cFYI(1, "key description = %s", description);
+	cifs_dbg(FYI, "key description = %s\n", description);
 	spnego_key = request_key(&cifs_spnego_key_type, description, "");
 
 #ifdef CONFIG_CIFS_DEBUG2

diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 71d5d0a..0227b45 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c

@@ -227,8 +227,8 @@
 	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 		charlen = codepage->char2uni(from, len, &wchar_to);
 		if (charlen < 1) {
-			cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d",
-				*from, charlen);
+			cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
+				 *from, charlen);
 			/* A question mark */
 			wchar_to = 0x003f;
 			charlen = 1;

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index f1e3f25..51f5e0e 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c

@@ -63,11 +63,10 @@
 		key->datalen = prep->datalen;
 		return 0;
 	}
-	payload = kmalloc(prep->datalen, GFP_KERNEL);
+	payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL);
 	if (!payload)
 		return -ENOMEM;
 
-	memcpy(payload, prep->data, prep->datalen);
 	key->payload.data = payload;
 	key->datalen = prep->datalen;
 	return 0;
@@ -219,13 +218,13 @@
 	sidkey = request_key(&cifs_idmap_key_type, desc, "");
 	if (IS_ERR(sidkey)) {
 		rc = -EINVAL;
-		cFYI(1, "%s: Can't map %cid %u to a SID", __func__,
-			sidtype == SIDOWNER ? 'u' : 'g', cid);
+		cifs_dbg(FYI, "%s: Can't map %cid %u to a SID\n",
+			 __func__, sidtype == SIDOWNER ? 'u' : 'g', cid);
 		goto out_revert_creds;
 	} else if (sidkey->datalen < CIFS_SID_BASE_SIZE) {
 		rc = -EIO;
-		cFYI(1, "%s: Downcall contained malformed key "
-			"(datalen=%hu)", __func__, sidkey->datalen);
+		cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu)\n",
+			 __func__, sidkey->datalen);
 		goto invalidate_key;
 	}
 
@@ -241,8 +240,8 @@
 	ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32));
 	if (ksid_size > sidkey->datalen) {
 		rc = -EIO;
-		cFYI(1, "%s: Downcall contained malformed key (datalen=%hu, "
-			"ksid_size=%u)", __func__, sidkey->datalen, ksid_size);
+		cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu, ksid_size=%u)\n",
+			 __func__, sidkey->datalen, ksid_size);
 		goto invalidate_key;
 	}
 
@@ -274,8 +273,8 @@
 	 * Just return an error.
 	 */
 	if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) {
-		cFYI(1, "%s: %u subauthorities is too many!", __func__,
-			psid->num_subauth);
+		cifs_dbg(FYI, "%s: %u subauthorities is too many!\n",
+			 __func__, psid->num_subauth);
 		return -EIO;
 	}
 
@@ -287,8 +286,8 @@
 	sidkey = request_key(&cifs_idmap_key_type, sidstr, "");
 	if (IS_ERR(sidkey)) {
 		rc = -EINVAL;
-		cFYI(1, "%s: Can't map SID %s to a %cid", __func__, sidstr,
-			sidtype == SIDOWNER ? 'u' : 'g');
+		cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n",
+			 __func__, sidstr, sidtype == SIDOWNER ? 'u' : 'g');
 		goto out_revert_creds;
 	}
 
@@ -300,8 +299,8 @@
 	BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
 	if (sidkey->datalen != sizeof(uid_t)) {
 		rc = -EIO;
-		cFYI(1, "%s: Downcall contained malformed key "
-			"(datalen=%hu)", __func__, sidkey->datalen);
+		cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu)\n",
+			 __func__, sidkey->datalen);
 		key_invalidate(sidkey);
 		goto out_key_put;
 	}
@@ -346,7 +345,8 @@
 	struct key *keyring;
 	int ret;
 
-	cFYI(1, "Registering the %s key type", cifs_idmap_key_type.name);
+	cifs_dbg(FYI, "Registering the %s key type\n",
+		 cifs_idmap_key_type.name);
 
 	/* create an override credential set with a special thread keyring in
 	 * which requests are cached
@@ -379,7 +379,7 @@
 	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
 	root_cred = cred;
 
-	cFYI(1, "cifs idmap keyring: %d", key_serial(keyring));
+	cifs_dbg(FYI, "cifs idmap keyring: %d\n", key_serial(keyring));
 	return 0;
 
 failed_put_key:
@@ -395,7 +395,7 @@
 	key_revoke(root_cred->thread_keyring);
 	unregister_key_type(&cifs_idmap_key_type);
 	put_cred(root_cred);
-	cFYI(1, "Unregistered %s key type", cifs_idmap_key_type.name);
+	cifs_dbg(FYI, "Unregistered %s key type\n", cifs_idmap_key_type.name);
 }
 
 /* copy ntsd, owner sid, and group sid from a security descriptor to another */
@@ -462,14 +462,14 @@
 			*pbits_to_set &= ~S_IXUGO;
 		return;
 	} else if (type != ACCESS_ALLOWED) {
-		cERROR(1, "unknown access control type %d", type);
+		cifs_dbg(VFS, "unknown access control type %d\n", type);
 		return;
 	}
 	/* else ACCESS_ALLOWED type */
 
 	if (flags & GENERIC_ALL) {
 		*pmode |= (S_IRWXUGO & (*pbits_to_set));
-		cFYI(DBG2, "all perms");
+		cifs_dbg(NOISY, "all perms\n");
 		return;
 	}
 	if ((flags & GENERIC_WRITE) ||
@@ -482,7 +482,7 @@
 			((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
 		*pmode |= (S_IXUGO & (*pbits_to_set));
 
-	cFYI(DBG2, "access flags 0x%x mode now 0x%x", flags, *pmode);
+	cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode);
 	return;
 }
 
@@ -511,7 +511,8 @@
 	if (mode & S_IXUGO)
 		*pace_flags |= SET_FILE_EXEC_RIGHTS;
 
-	cFYI(DBG2, "mode: 0x%x, access flags now 0x%x", mode, *pace_flags);
+	cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n",
+		 mode, *pace_flags);
 	return;
 }
 
@@ -551,24 +552,24 @@
 	/* validate that we do not go past end of acl */
 
 	if (le16_to_cpu(pace->size) < 16) {
-		cERROR(1, "ACE too small %d", le16_to_cpu(pace->size));
+		cifs_dbg(VFS, "ACE too small %d\n", le16_to_cpu(pace->size));
 		return;
 	}
 
 	if (end_of_acl < (char *)pace + le16_to_cpu(pace->size)) {
-		cERROR(1, "ACL too small to parse ACE");
+		cifs_dbg(VFS, "ACL too small to parse ACE\n");
 		return;
 	}
 
 	num_subauth = pace->sid.num_subauth;
 	if (num_subauth) {
 		int i;
-		cFYI(1, "ACE revision %d num_auth %d type %d flags %d size %d",
-			pace->sid.revision, pace->sid.num_subauth, pace->type,
-			pace->flags, le16_to_cpu(pace->size));
+		cifs_dbg(FYI, "ACE revision %d num_auth %d type %d flags %d size %d\n",
+			 pace->sid.revision, pace->sid.num_subauth, pace->type,
+			 pace->flags, le16_to_cpu(pace->size));
 		for (i = 0; i < num_subauth; ++i) {
-			cFYI(1, "ACE sub_auth[%d]: 0x%x", i,
-				le32_to_cpu(pace->sid.sub_auth[i]));
+			cifs_dbg(FYI, "ACE sub_auth[%d]: 0x%x\n",
+				 i, le32_to_cpu(pace->sid.sub_auth[i]));
 		}
 
 		/* BB add length check to make sure that we do not have huge
@@ -601,13 +602,13 @@
 
 	/* validate that we do not go past end of acl */
 	if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
-		cERROR(1, "ACL too small to parse DACL");
+		cifs_dbg(VFS, "ACL too small to parse DACL\n");
 		return;
 	}
 
-	cFYI(DBG2, "DACL revision %d size %d num aces %d",
-		le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
-		le32_to_cpu(pdacl->num_aces));
+	cifs_dbg(NOISY, "DACL revision %d size %d num aces %d\n",
+		 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
+		 le32_to_cpu(pdacl->num_aces));
 
 	/* reset rwx permissions for user/group/other.
 	   Also, if num_aces is 0 i.e. DACL has no ACEs,
@@ -627,10 +628,8 @@
 			return;
 		ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
 				GFP_KERNEL);
-		if (!ppace) {
-			cERROR(1, "DACL memory allocation error");
+		if (!ppace)
 			return;
-		}
 
 		for (i = 0; i < num_aces; ++i) {
 			ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
@@ -703,25 +702,25 @@
 	/* validate that we do not go past end of ACL - sid must be at least 8
 	   bytes long (assuming no sub-auths - e.g. the null SID */
 	if (end_of_acl < (char *)psid + 8) {
-		cERROR(1, "ACL too small to parse SID %p", psid);
+		cifs_dbg(VFS, "ACL too small to parse SID %p\n", psid);
 		return -EINVAL;
 	}
 
 #ifdef CONFIG_CIFS_DEBUG2
 	if (psid->num_subauth) {
 		int i;
-		cFYI(1, "SID revision %d num_auth %d",
-			psid->revision, psid->num_subauth);
+		cifs_dbg(FYI, "SID revision %d num_auth %d\n",
+			 psid->revision, psid->num_subauth);
 
 		for (i = 0; i < psid->num_subauth; i++) {
-			cFYI(1, "SID sub_auth[%d]: 0x%x ", i,
-				le32_to_cpu(psid->sub_auth[i]));
+			cifs_dbg(FYI, "SID sub_auth[%d]: 0x%x\n",
+				 i, le32_to_cpu(psid->sub_auth[i]));
 		}
 
 		/* BB add length check to make sure that we do not have huge
 			num auths and therefore go off the end */
-		cFYI(1, "RID 0x%x",
-			le32_to_cpu(psid->sub_auth[psid->num_subauth-1]));
+		cifs_dbg(FYI, "RID 0x%x\n",
+			 le32_to_cpu(psid->sub_auth[psid->num_subauth-1]));
 	}
 #endif
 
@@ -748,31 +747,33 @@
 				le32_to_cpu(pntsd->gsidoffset));
 	dacloffset = le32_to_cpu(pntsd->dacloffset);
 	dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
-	cFYI(DBG2, "revision %d type 0x%x ooffset 0x%x goffset 0x%x "
-		 "sacloffset 0x%x dacloffset 0x%x",
+	cifs_dbg(NOISY, "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n",
 		 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
 		 le32_to_cpu(pntsd->gsidoffset),
 		 le32_to_cpu(pntsd->sacloffset), dacloffset);
 /*	cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
 	rc = parse_sid(owner_sid_ptr, end_of_acl);
 	if (rc) {
-		cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
+		cifs_dbg(FYI, "%s: Error %d parsing Owner SID\n", __func__, rc);
 		return rc;
 	}
 	rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
 	if (rc) {
-		cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
+		cifs_dbg(FYI, "%s: Error %d mapping Owner SID to uid\n",
+			 __func__, rc);
 		return rc;
 	}
 
 	rc = parse_sid(group_sid_ptr, end_of_acl);
 	if (rc) {
-		cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
+		cifs_dbg(FYI, "%s: Error %d mapping Owner SID to gid\n",
+			 __func__, rc);
 		return rc;
 	}
 	rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
 	if (rc) {
-		cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
+		cifs_dbg(FYI, "%s: Error %d mapping Group SID to gid\n",
+			 __func__, rc);
 		return rc;
 	}
 
@@ -780,7 +781,7 @@
 		parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
 			   group_sid_ptr, fattr);
 	else
-		cFYI(1, "no ACL"); /* BB grant all or default perms? */
+		cifs_dbg(FYI, "no ACL\n"); /* BB grant all or default perms? */
 
 	return rc;
 }
@@ -830,8 +831,8 @@
 			id = from_kuid(&init_user_ns, uid);
 			rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
 			if (rc) {
-				cFYI(1, "%s: Mapping error %d for owner id %d",
-						__func__, rc, id);
+				cifs_dbg(FYI, "%s: Mapping error %d for owner id %d\n",
+					 __func__, rc, id);
 				kfree(nowner_sid_ptr);
 				return rc;
 			}
@@ -850,8 +851,8 @@
 			id = from_kgid(&init_user_ns, gid);
 			rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
 			if (rc) {
-				cFYI(1, "%s: Mapping error %d for group id %d",
-						__func__, rc, id);
+				cifs_dbg(FYI, "%s: Mapping error %d for group id %d\n",
+					 __func__, rc, id);
 				kfree(ngroup_sid_ptr);
 				return rc;
 			}
@@ -881,7 +882,7 @@
 
 	cifs_put_tlink(tlink);
 
-	cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+	cifs_dbg(FYI, "%s: rc = %d ACL len %d\n", __func__, rc, *pacllen);
 	if (rc)
 		return ERR_PTR(rc);
 	return pntsd;
@@ -918,7 +919,7 @@
 	cifs_put_tlink(tlink);
 	free_xid(xid);
 
-	cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+	cifs_dbg(FYI, "%s: rc = %d ACL len %d\n", __func__, rc, *pacllen);
 	if (rc)
 		return ERR_PTR(rc);
 	return pntsd;
@@ -972,12 +973,12 @@
 			create_options, &fid, &oplock, NULL, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc) {
-		cERROR(1, "Unable to open file to set ACL");
+		cifs_dbg(VFS, "Unable to open file to set ACL\n");
 		goto out;
 	}
 
 	rc = CIFSSMBSetCIFSACL(xid, tcon, fid, pnntsd, acllen, aclflag);
-	cFYI(DBG2, "SetCIFSACL rc = %d", rc);
+	cifs_dbg(NOISY, "SetCIFSACL rc = %d\n", rc);
 
 	CIFSSMBClose(xid, tcon, fid);
 out:
@@ -995,7 +996,7 @@
 	u32 acllen = 0;
 	int rc = 0;
 
-	cFYI(DBG2, "converting ACL to mode for %s", path);
+	cifs_dbg(NOISY, "converting ACL to mode for %s\n", path);
 
 	if (pfid)
 		pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen);
@@ -1005,12 +1006,12 @@
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
 	if (IS_ERR(pntsd)) {
 		rc = PTR_ERR(pntsd);
-		cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+		cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
 	} else {
 		rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
 		kfree(pntsd);
 		if (rc)
-			cERROR(1, "parse sec desc failed rc = %d", rc);
+			cifs_dbg(VFS, "parse sec desc failed rc = %d\n", rc);
 	}
 
 	return rc;
@@ -1027,13 +1028,13 @@
 	struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
 	struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
 
-	cFYI(DBG2, "set ACL from mode for %s", path);
+	cifs_dbg(NOISY, "set ACL from mode for %s\n", path);
 
 	/* Get the security descriptor */
 	pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
 	if (IS_ERR(pntsd)) {
 		rc = PTR_ERR(pntsd);
-		cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+		cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
 		goto out;
 	}
 
@@ -1046,7 +1047,6 @@
 	secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN);
 	pnntsd = kmalloc(secdesclen, GFP_KERNEL);
 	if (!pnntsd) {
-		cERROR(1, "Unable to allocate security descriptor");
 		kfree(pntsd);
 		return -ENOMEM;
 	}
@@ -1054,12 +1054,12 @@
 	rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid,
 				&aclflag);
 
-	cFYI(DBG2, "build_sec_desc rc: %d", rc);
+	cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
 
 	if (!rc) {
 		/* Set the security descriptor */
 		rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag);
-		cFYI(DBG2, "set_cifs_acl rc: %d", rc);
+		cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
 	}
 
 	kfree(pnntsd);

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 652f505..71436d1 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c

@@ -50,20 +50,20 @@
 		return -EINVAL;
 
 	if (!server->secmech.sdescmd5) {
-		cERROR(1, "%s: Can't generate signature", __func__);
+		cifs_dbg(VFS, "%s: Can't generate signature\n", __func__);
 		return -1;
 	}
 
 	rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
 	if (rc) {
-		cERROR(1, "%s: Could not init md5", __func__);
+		cifs_dbg(VFS, "%s: Could not init md5\n", __func__);
 		return rc;
 	}
 
 	rc = crypto_shash_update(&server->secmech.sdescmd5->shash,
 		server->session_key.response, server->session_key.len);
 	if (rc) {
-		cERROR(1, "%s: Could not update with response", __func__);
+		cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
 		return rc;
 	}
 
@@ -71,7 +71,7 @@
 		if (iov[i].iov_len == 0)
 			continue;
 		if (iov[i].iov_base == NULL) {
-			cERROR(1, "null iovec entry");
+			cifs_dbg(VFS, "null iovec entry\n");
 			return -EIO;
 		}
 		/* The first entry includes a length field (which does not get
@@ -88,8 +88,8 @@
 				iov[i].iov_base, iov[i].iov_len);
 		}
 		if (rc) {
-			cERROR(1, "%s: Could not update with payload",
-							__func__);
+			cifs_dbg(VFS, "%s: Could not update with payload\n",
+				 __func__);
 			return rc;
 		}
 	}
@@ -106,7 +106,7 @@
 
 	rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
 	if (rc)
-		cERROR(1, "%s: Could not generate md5 hash", __func__);
+		cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
 
 	return rc;
 }
@@ -135,8 +135,8 @@
 				cpu_to_le32(server->sequence_number);
 	cifs_pdu->Signature.Sequence.Reserved = 0;
 
-	*pexpected_response_sequence_number = server->sequence_number++;
-	server->sequence_number++;
+	*pexpected_response_sequence_number = ++server->sequence_number;
+	++server->sequence_number;
 
 	rc = cifs_calc_signature(rqst, server, smb_signature);
 	if (rc)
@@ -196,8 +196,8 @@
 
 	/* Do not need to verify session setups with signature "BSRSPYL "  */
 	if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0)
-		cFYI(1, "dummy signature received for smb command 0x%x",
-			cifs_pdu->Command);
+		cifs_dbg(FYI, "dummy signature received for smb command 0x%x\n",
+			 cifs_pdu->Command);
 
 	/* save off the origiginal signature so we can modify the smb and check
 		its signature against what the server sent */
@@ -235,30 +235,30 @@
 		return -EINVAL;
 
 	ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
-	if (!ses->auth_key.response) {
-		cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
+	if (!ses->auth_key.response)
 		return -ENOMEM;
-	}
+
 	ses->auth_key.len = temp_len;
 
 	rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
 			ses->auth_key.response + CIFS_SESS_KEY_SIZE, nls_cp);
 	if (rc) {
-		cFYI(1, "%s Can't generate NTLM response, error: %d",
-			__func__, rc);
+		cifs_dbg(FYI, "%s Can't generate NTLM response, error: %d\n",
+			 __func__, rc);
 		return rc;
 	}
 
 	rc = E_md4hash(ses->password, temp_key, nls_cp);
 	if (rc) {
-		cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
+		cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n",
+			 __func__, rc);
 		return rc;
 	}
 
 	rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
 	if (rc)
-		cFYI(1, "%s Can't generate NTLM session key, error: %d",
-			__func__, rc);
+		cifs_dbg(FYI, "%s Can't generate NTLM session key, error: %d\n",
+			 __func__, rc);
 
 	return rc;
 }
@@ -334,7 +334,6 @@
 	ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
 	if (!ses->auth_key.response) {
 		ses->auth_key.len = 0;
-		cERROR(1, "Challenge target info allocation failure");
 		return -ENOMEM;
 	}
 
@@ -420,7 +419,7 @@
 	wchar_t *server;
 
 	if (!ses->server->secmech.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash");
+		cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
 		return -1;
 	}
 
@@ -430,13 +429,13 @@
 	rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
 				CIFS_NTHASH_SIZE);
 	if (rc) {
-		cERROR(1, "%s: Could not set NT Hash as a key", __func__);
+		cifs_dbg(VFS, "%s: Could not set NT Hash as a key\n", __func__);
 		return rc;
 	}
 
 	rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
 	if (rc) {
-		cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5");
+		cifs_dbg(VFS, "%s: could not init hmacmd5\n", __func__);
 		return rc;
 	}
 
@@ -444,7 +443,6 @@
 	len = ses->user_name ? strlen(ses->user_name) : 0;
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
 	if (user == NULL) {
-		cERROR(1, "calc_ntlmv2_hash: user mem alloc failure");
 		rc = -ENOMEM;
 		return rc;
 	}
@@ -460,7 +458,7 @@
 				(char *)user, 2 * len);
 	kfree(user);
 	if (rc) {
-		cERROR(1, "%s: Could not update with user", __func__);
+		cifs_dbg(VFS, "%s: Could not update with user\n", __func__);
 		return rc;
 	}
 
@@ -470,7 +468,6 @@
 
 		domain = kmalloc(2 + (len * 2), GFP_KERNEL);
 		if (domain == NULL) {
-			cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
 			rc = -ENOMEM;
 			return rc;
 		}
@@ -481,8 +478,8 @@
 					(char *)domain, 2 * len);
 		kfree(domain);
 		if (rc) {
-			cERROR(1, "%s: Could not update with domain",
-								__func__);
+			cifs_dbg(VFS, "%s: Could not update with domain\n",
+				 __func__);
 			return rc;
 		}
 	} else if (ses->serverName) {
@@ -490,7 +487,6 @@
 
 		server = kmalloc(2 + (len * 2), GFP_KERNEL);
 		if (server == NULL) {
-			cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
 			rc = -ENOMEM;
 			return rc;
 		}
@@ -501,8 +497,8 @@
 					(char *)server, 2 * len);
 		kfree(server);
 		if (rc) {
-			cERROR(1, "%s: Could not update with server",
-								__func__);
+			cifs_dbg(VFS, "%s: Could not update with server\n",
+				 __func__);
 			return rc;
 		}
 	}
@@ -510,7 +506,7 @@
 	rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
 					ntlmv2_hash);
 	if (rc)
-		cERROR(1, "%s: Could not generate md5 hash", __func__);
+		cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
 
 	return rc;
 }
@@ -522,20 +518,21 @@
 	unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
 
 	if (!ses->server->secmech.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash");
+		cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
 		return -1;
 	}
 
 	rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
 				ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
 	if (rc) {
-		cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__);
+		cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
+			 __func__);
 		return rc;
 	}
 
 	rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
 	if (rc) {
-		cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+		cifs_dbg(VFS, "%s: could not init hmacmd5\n", __func__);
 		return rc;
 	}
 
@@ -548,14 +545,14 @@
 	rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
 		ses->auth_key.response + offset, ses->auth_key.len - offset);
 	if (rc) {
-		cERROR(1, "%s: Could not update with response", __func__);
+		cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
 		return rc;
 	}
 
 	rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
 		ses->auth_key.response + CIFS_SESS_KEY_SIZE);
 	if (rc)
-		cERROR(1, "%s: Could not generate md5 hash", __func__);
+		cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
 
 	return rc;
 }
@@ -575,14 +572,15 @@
 		if (!ses->domainName) {
 			rc = find_domain_name(ses, nls_cp);
 			if (rc) {
-				cERROR(1, "error %d finding domain name", rc);
+				cifs_dbg(VFS, "error %d finding domain name\n",
+					 rc);
 				goto setup_ntlmv2_rsp_ret;
 			}
 		}
 	} else {
 		rc = build_avpair_blob(ses, nls_cp);
 		if (rc) {
-			cERROR(1, "error %d building av pair blob", rc);
+			cifs_dbg(VFS, "error %d building av pair blob\n", rc);
 			goto setup_ntlmv2_rsp_ret;
 		}
 	}
@@ -595,7 +593,6 @@
 	if (!ses->auth_key.response) {
 		rc = ENOMEM;
 		ses->auth_key.len = 0;
-		cERROR(1, "%s: Can't allocate auth blob", __func__);
 		goto setup_ntlmv2_rsp_ret;
 	}
 	ses->auth_key.len += baselen;
@@ -613,14 +610,14 @@
 	/* calculate ntlmv2_hash */
 	rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
 	if (rc) {
-		cERROR(1, "could not get v2 hash rc %d", rc);
+		cifs_dbg(VFS, "could not get v2 hash rc %d\n", rc);
 		goto setup_ntlmv2_rsp_ret;
 	}
 
 	/* calculate first part of the client response (CR1) */
 	rc = CalcNTLMv2_response(ses, ntlmv2_hash);
 	if (rc) {
-		cERROR(1, "Could not calculate CR1  rc: %d", rc);
+		cifs_dbg(VFS, "Could not calculate CR1 rc: %d\n", rc);
 		goto setup_ntlmv2_rsp_ret;
 	}
 
@@ -628,13 +625,14 @@
 	rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
 		ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
 	if (rc) {
-		cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__);
+		cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
+			 __func__);
 		goto setup_ntlmv2_rsp_ret;
 	}
 
 	rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
 	if (rc) {
-		cERROR(1, "%s: Could not init hmacmd5", __func__);
+		cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
 		goto setup_ntlmv2_rsp_ret;
 	}
 
@@ -642,14 +640,14 @@
 		ses->auth_key.response + CIFS_SESS_KEY_SIZE,
 		CIFS_HMAC_MD5_HASH_SIZE);
 	if (rc) {
-		cERROR(1, "%s: Could not update with response", __func__);
+		cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
 		goto setup_ntlmv2_rsp_ret;
 	}
 
 	rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
 		ses->auth_key.response);
 	if (rc)
-		cERROR(1, "%s: Could not generate md5 hash", __func__);
+		cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
 
 setup_ntlmv2_rsp_ret:
 	kfree(tiblob);
@@ -671,7 +669,7 @@
 	tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_arc4)) {
 		rc = PTR_ERR(tfm_arc4);
-		cERROR(1, "could not allocate crypto API arc4");
+		cifs_dbg(VFS, "could not allocate crypto API arc4\n");
 		return rc;
 	}
 
@@ -680,7 +678,8 @@
 	rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
 					CIFS_SESS_KEY_SIZE);
 	if (rc) {
-		cERROR(1, "%s: Could not set response as a key", __func__);
+		cifs_dbg(VFS, "%s: Could not set response as a key\n",
+			 __func__);
 		return rc;
 	}
 
@@ -689,7 +688,7 @@
 
 	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
 	if (rc) {
-		cERROR(1, "could not encrypt session key rc: %d", rc);
+		cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc);
 		crypto_free_blkcipher(tfm_arc4);
 		return rc;
 	}
@@ -731,20 +730,20 @@
 
 	server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
 	if (IS_ERR(server->secmech.hmacmd5)) {
-		cERROR(1, "could not allocate crypto hmacmd5");
+		cifs_dbg(VFS, "could not allocate crypto hmacmd5\n");
 		return PTR_ERR(server->secmech.hmacmd5);
 	}
 
 	server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
 	if (IS_ERR(server->secmech.md5)) {
-		cERROR(1, "could not allocate crypto md5");
+		cifs_dbg(VFS, "could not allocate crypto md5\n");
 		rc = PTR_ERR(server->secmech.md5);
 		goto crypto_allocate_md5_fail;
 	}
 
 	server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
 	if (IS_ERR(server->secmech.hmacsha256)) {
-		cERROR(1, "could not allocate crypto hmacsha256\n");
+		cifs_dbg(VFS, "could not allocate crypto hmacsha256\n");
 		rc = PTR_ERR(server->secmech.hmacsha256);
 		goto crypto_allocate_hmacsha256_fail;
 	}
@@ -753,7 +752,6 @@
 			crypto_shash_descsize(server->secmech.hmacmd5);
 	server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
 	if (!server->secmech.sdeschmacmd5) {
-		cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5");
 		rc = -ENOMEM;
 		goto crypto_allocate_hmacmd5_sdesc_fail;
 	}
@@ -764,7 +762,6 @@
 			crypto_shash_descsize(server->secmech.md5);
 	server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
 	if (!server->secmech.sdescmd5) {
-		cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5");
 		rc = -ENOMEM;
 		goto crypto_allocate_md5_sdesc_fail;
 	}
@@ -775,7 +772,6 @@
 			crypto_shash_descsize(server->secmech.hmacsha256);
 	server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL);
 	if (!server->secmech.sdeschmacsha256) {
-		cERROR(1, "%s: Can't alloc hmacsha256\n", __func__);
 		rc = -ENOMEM;
 		goto crypto_allocate_hmacsha256_sdesc_fail;
 	}

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 345fc89..72e4efe 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c

@@ -161,7 +161,7 @@
 
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
-		cFYI(1, "export ops supported");
+		cifs_dbg(FYI, "export ops supported\n");
 		sb->s_export_op = &cifs_export_ops;
 	}
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
@@ -169,7 +169,7 @@
 	return 0;
 
 out_no_root:
-	cERROR(1, "cifs_read_super: get root inode failed");
+	cifs_dbg(VFS, "%s: get root inode failed\n", __func__);
 	return rc;
 }
 
@@ -502,7 +502,7 @@
 	/* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
 	/* cancel_notify_requests(tcon); */
 	if (tcon->ses && tcon->ses->server) {
-		cFYI(1, "wake up tasks now - umount begin not complete");
+		cifs_dbg(FYI, "wake up tasks now - umount begin not complete\n");
 		wake_up_all(&tcon->ses->server->request_q);
 		wake_up_all(&tcon->ses->server->response_q);
 		msleep(1); /* yield */
@@ -573,7 +573,7 @@
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	cFYI(1, "Get root dentry for %s", full_path);
+	cifs_dbg(FYI, "Get root dentry for %s\n", full_path);
 
 	sep = CIFS_DIR_SEP(cifs_sb);
 	dentry = dget(sb->s_root);
@@ -632,7 +632,7 @@
 	struct cifs_mnt_data mnt_data;
 	struct dentry *root;
 
-	cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
+	cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
 
 	volume_info = cifs_get_volume_info((char *)data, dev_name);
 	if (IS_ERR(volume_info))
@@ -655,7 +655,8 @@
 	rc = cifs_mount(cifs_sb, volume_info);
 	if (rc) {
 		if (!(flags & MS_SILENT))
-			cERROR(1, "cifs_mount failed w/return code = %d", rc);
+			cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
+				 rc);
 		root = ERR_PTR(rc);
 		goto out_mountdata;
 	}
@@ -675,7 +676,7 @@
 	}
 
 	if (sb->s_root) {
-		cFYI(1, "Use existing superblock");
+		cifs_dbg(FYI, "Use existing superblock\n");
 		cifs_umount(cifs_sb);
 	} else {
 		rc = cifs_read_super(sb);
@@ -691,7 +692,7 @@
 	if (IS_ERR(root))
 		goto out_super;
 
-	cFYI(1, "dentry root is: %p", root);
+	cifs_dbg(FYI, "dentry root is: %p\n", root);
 	goto out;
 
 out_super:
@@ -723,7 +724,8 @@
 
 	rc = filemap_fdatawrite(inode->i_mapping);
 	if (rc)
-		cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode);
+		cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
+			 rc, inode);
 
 	return written;
 }
@@ -1030,7 +1032,10 @@
 	} else {
 		CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/
 	}
-/*	cERROR(1, "CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize); */
+/*
+	cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n",
+		 CIFSMaxBufSize, CIFSMaxBufSize);
+*/
 	cifs_req_cachep = kmem_cache_create("cifs_request",
 					    CIFSMaxBufSize + max_hdr_size, 0,
 					    SLAB_HWCACHE_ALIGN, NULL);
@@ -1041,7 +1046,7 @@
 		cifs_min_rcv = 1;
 	else if (cifs_min_rcv > 64) {
 		cifs_min_rcv = 64;
-		cERROR(1, "cifs_min_rcv set to maximum (64)");
+		cifs_dbg(VFS, "cifs_min_rcv set to maximum (64)\n");
 	}
 
 	cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
@@ -1072,7 +1077,7 @@
 		cifs_min_small = 2;
 	else if (cifs_min_small > 256) {
 		cifs_min_small = 256;
-		cFYI(1, "cifs_min_small set to maximum (256)");
+		cifs_dbg(FYI, "cifs_min_small set to maximum (256)\n");
 	}
 
 	cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
@@ -1163,10 +1168,11 @@
 
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
-		cFYI(1, "cifs_max_pending set to min of 2");
+		cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");
 	} else if (cifs_max_pending > CIFS_MAX_REQ) {
 		cifs_max_pending = CIFS_MAX_REQ;
-		cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ);
+		cifs_dbg(FYI, "cifs_max_pending set to max of %u\n",
+			 CIFS_MAX_REQ);
 	}
 
 	cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
@@ -1235,7 +1241,7 @@
 static void __exit
 exit_cifs(void)
 {
-	cFYI(DBG2, "exit_cifs");
+	cifs_dbg(NOISY, "exit_cifs\n");
 	unregister_filesystem(&cifs_fs_type);
 	cifs_dfs_release_automount_timer();
 #ifdef CONFIG_CIFS_ACL

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f450f06..dda188a 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h

@@ -45,17 +45,17 @@
 #define get_xid()						\
 ({								\
 	unsigned int __xid = _get_xid();				\
-	cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d",	\
-	     __func__, __xid,					\
-	     from_kuid(&init_user_ns, current_fsuid()));	\
+	cifs_dbg(FYI, "CIFS VFS: in %s as Xid: %u with uid: %d\n",	\
+		 __func__, __xid,					\
+		 from_kuid(&init_user_ns, current_fsuid()));		\
 	__xid;							\
 })
 
 #define free_xid(curr_xid)					\
 do {								\
 	_free_xid(curr_xid);					\
-	cFYI(1, "CIFS VFS: leaving %s (xid = %u) rc = %d",	\
-	     __func__, curr_xid, (int)rc);			\
+	cifs_dbg(FYI, "CIFS VFS: leaving %s (xid = %u) rc = %d\n",	\
+		 __func__, curr_xid, (int)rc);				\
 } while (0)
 extern int init_cifs_idmap(void);
 extern void exit_cifs_idmap(void);

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8e2e799..a58dc77 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c

@@ -139,8 +139,8 @@
 		if (smb_command != SMB_COM_WRITE_ANDX &&
 		    smb_command != SMB_COM_OPEN_ANDX &&
 		    smb_command != SMB_COM_TREE_DISCONNECT) {
-			cFYI(1, "can not send cmd %d while umounting",
-				smb_command);
+			cifs_dbg(FYI, "can not send cmd %d while umounting\n",
+				 smb_command);
 			return -ENODEV;
 		}
 	}
@@ -163,7 +163,7 @@
 		 * back on-line
 		 */
 		if (!tcon->retry) {
-			cFYI(1, "gave up waiting on reconnect in smb_init");
+			cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
 			return -EHOSTDOWN;
 		}
 	}
@@ -191,7 +191,7 @@
 	cifs_mark_open_files_invalid(tcon);
 	rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
 	mutex_unlock(&ses->session_mutex);
-	cFYI(1, "reconnect tcon rc = %d", rc);
+	cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
 
 	if (rc)
 		goto out;
@@ -396,7 +396,7 @@
 	else /* if override flags set only sign/seal OR them with global auth */
 		secFlags = global_secflags | ses->overrideSecFlg;
 
-	cFYI(1, "secFlags 0x%x", secFlags);
+	cifs_dbg(FYI, "secFlags 0x%x\n", secFlags);
 
 	pSMB->hdr.Mid = get_next_mid(server);
 	pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
@@ -404,12 +404,12 @@
 	if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
-		cFYI(1, "Kerberos only mechanism, enable extended security");
+		cifs_dbg(FYI, "Kerberos only mechanism, enable extended security\n");
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	} else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
-		cFYI(1, "NTLMSSP only mechanism, enable extended security");
+		cifs_dbg(FYI, "NTLMSSP only mechanism, enable extended security\n");
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	}
 
@@ -428,7 +428,7 @@
 		goto neg_err_exit;
 
 	server->dialect = le16_to_cpu(pSMBr->DialectIndex);
-	cFYI(1, "Dialect: %d", server->dialect);
+	cifs_dbg(FYI, "Dialect: %d\n", server->dialect);
 	/* Check wct = 1 error case */
 	if ((pSMBr->hdr.WordCount < 13) || (server->dialect == BAD_PROT)) {
 		/* core returns wct = 1, but we do not ask for core - otherwise
@@ -447,8 +447,7 @@
 			(secFlags & CIFSSEC_MAY_PLNTXT))
 			server->secType = LANMAN;
 		else {
-			cERROR(1, "mount failed weak security disabled"
-				   " in /proc/fs/cifs/SecurityFlags");
+			cifs_dbg(VFS, "mount failed weak security disabled in /proc/fs/cifs/SecurityFlags\n");
 			rc = -EOPNOTSUPP;
 			goto neg_err_exit;
 		}
@@ -482,9 +481,9 @@
 			utc = CURRENT_TIME;
 			ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
 					    rsp->SrvTime.Time, 0);
-			cFYI(1, "SrvTime %d sec since 1970 (utc: %d) diff: %d",
-				(int)ts.tv_sec, (int)utc.tv_sec,
-				(int)(utc.tv_sec - ts.tv_sec));
+			cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n",
+				 (int)ts.tv_sec, (int)utc.tv_sec,
+				 (int)(utc.tv_sec - ts.tv_sec));
 			val = (int)(utc.tv_sec - ts.tv_sec);
 			seconds = abs(val);
 			result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
@@ -498,7 +497,7 @@
 			server->timeAdj = (int)tmp;
 			server->timeAdj *= 60; /* also in seconds */
 		}
-		cFYI(1, "server->timeAdj: %d seconds", server->timeAdj);
+		cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj);
 
 
 		/* BB get server time for time conversions and add
@@ -513,14 +512,13 @@
 			goto neg_err_exit;
 		}
 
-		cFYI(1, "LANMAN negotiated");
+		cifs_dbg(FYI, "LANMAN negotiated\n");
 		/* we will not end up setting signing flags - as no signing
 		was in LANMAN and server did not return the flags on */
 		goto signing_check;
 #else /* weak security disabled */
 	} else if (pSMBr->hdr.WordCount == 13) {
-		cERROR(1, "mount failed, cifs module not built "
-			  "with CIFS_WEAK_PW_HASH support");
+		cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n");
 		rc = -EOPNOTSUPP;
 #endif /* WEAK_PW_HASH */
 		goto neg_err_exit;
@@ -532,14 +530,13 @@
 	/* else wct == 17 NTLM */
 	server->sec_mode = pSMBr->SecurityMode;
 	if ((server->sec_mode & SECMODE_USER) == 0)
-		cFYI(1, "share mode security");
+		cifs_dbg(FYI, "share mode security\n");
 
 	if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0)
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 		if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
 #endif /* CIFS_WEAK_PW_HASH */
-			cERROR(1, "Server requests plain text password"
-				  " but client support disabled");
+			cifs_dbg(VFS, "Server requests plain text password but client support disabled\n");
 
 	if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
 		server->secType = NTLMv2;
@@ -555,7 +552,7 @@
 		server->secType = LANMAN;
 	else {
 		rc = -EOPNOTSUPP;
-		cERROR(1, "Invalid security type");
+		cifs_dbg(VFS, "Invalid security type\n");
 		goto neg_err_exit;
 	}
 	/* else ... any others ...? */
@@ -568,7 +565,7 @@
 	/* probably no need to store and check maxvcs */
 	server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize);
 	server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
-	cFYI(DBG2, "Max buf = %d", ses->server->maxBuf);
+	cifs_dbg(NOISY, "Max buf = %d\n", ses->server->maxBuf);
 	server->capabilities = le32_to_cpu(pSMBr->Capabilities);
 	server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
 	server->timeAdj *= 60;
@@ -590,7 +587,7 @@
 			if (memcmp(server->server_GUID,
 				   pSMBr->u.extended_response.
 				   GUID, 16) != 0) {
-				cFYI(1, "server UID changed");
+				cifs_dbg(FYI, "server UID changed\n");
 				memcpy(server->server_GUID,
 					pSMBr->u.extended_response.GUID,
 					16);
@@ -633,21 +630,19 @@
 	if ((secFlags & CIFSSEC_MAY_SIGN) == 0) {
 		/* MUST_SIGN already includes the MAY_SIGN FLAG
 		   so if this is zero it means that signing is disabled */
-		cFYI(1, "Signing disabled");
+		cifs_dbg(FYI, "Signing disabled\n");
 		if (server->sec_mode & SECMODE_SIGN_REQUIRED) {
-			cERROR(1, "Server requires "
-				   "packet signing to be enabled in "
-				   "/proc/fs/cifs/SecurityFlags.");
+			cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n");
 			rc = -EOPNOTSUPP;
 		}
 		server->sec_mode &=
 			~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
 	} else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
 		/* signing required */
-		cFYI(1, "Must sign - secFlags 0x%x", secFlags);
+		cifs_dbg(FYI, "Must sign - secFlags 0x%x\n", secFlags);
 		if ((server->sec_mode &
 			(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
-			cERROR(1, "signing required but server lacks support");
+			cifs_dbg(VFS, "signing required but server lacks support\n");
 			rc = -EOPNOTSUPP;
 		} else
 			server->sec_mode |= SECMODE_SIGN_REQUIRED;
@@ -661,7 +656,7 @@
 neg_err_exit:
 	cifs_buf_release(pSMB);
 
-	cFYI(1, "negprot rc %d", rc);
+	cifs_dbg(FYI, "negprot rc %d\n", rc);
 	return rc;
 }
 
@@ -671,7 +666,7 @@
 	struct smb_hdr *smb_buffer;
 	int rc = 0;
 
-	cFYI(1, "In tree disconnect");
+	cifs_dbg(FYI, "In tree disconnect\n");
 
 	/* BB: do we need to check this? These should never be NULL. */
 	if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
@@ -693,7 +688,7 @@
 
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *)smb_buffer, 0);
 	if (rc)
-		cFYI(1, "Tree disconnect failed %d", rc);
+		cifs_dbg(FYI, "Tree disconnect failed %d\n", rc);
 
 	/* No need to return error on this operation if tid invalidated and
 	   closed on server already e.g. due to tcp session crashing */
@@ -728,7 +723,7 @@
 	struct smb_rqst rqst = { .rq_iov = &iov,
 				 .rq_nvec = 1 };
 
-	cFYI(1, "In echo request");
+	cifs_dbg(FYI, "In echo request\n");
 
 	rc = small_smb_init(SMB_COM_ECHO, 0, NULL, (void **)&smb);
 	if (rc)
@@ -747,7 +742,7 @@
 	rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback,
 			     server, CIFS_ASYNC_OP | CIFS_ECHO_OP);
 	if (rc)
-		cFYI(1, "Echo request failed: %d", rc);
+		cifs_dbg(FYI, "Echo request failed: %d\n", rc);
 
 	cifs_small_buf_release(smb);
 
@@ -760,7 +755,7 @@
 	LOGOFF_ANDX_REQ *pSMB;
 	int rc = 0;
 
-	cFYI(1, "In SMBLogoff for session disconnect");
+	cifs_dbg(FYI, "In SMBLogoff for session disconnect\n");
 
 	/*
 	 * BB: do we need to check validity of ses and server? They should
@@ -814,7 +809,7 @@
 	int bytes_returned = 0;
 	__u16 params, param_offset, offset, byte_count;
 
-	cFYI(1, "In POSIX delete");
+	cifs_dbg(FYI, "In POSIX delete\n");
 PsxDelete:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -866,7 +861,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "Posix delete returned %d", rc);
+		cifs_dbg(FYI, "Posix delete returned %d\n", rc);
 	cifs_buf_release(pSMB);
 
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes);
@@ -914,7 +909,7 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes);
 	if (rc)
-		cFYI(1, "Error in RMFile = %d", rc);
+		cifs_dbg(FYI, "Error in RMFile = %d\n", rc);
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -934,7 +929,7 @@
 	int name_len;
 	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
-	cFYI(1, "In CIFSSMBRmDir");
+	cifs_dbg(FYI, "In CIFSSMBRmDir\n");
 RmDirRetry:
 	rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -960,7 +955,7 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_rmdirs);
 	if (rc)
-		cFYI(1, "Error in RMDir = %d", rc);
+		cifs_dbg(FYI, "Error in RMDir = %d\n", rc);
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -979,7 +974,7 @@
 	int name_len;
 	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
-	cFYI(1, "In CIFSSMBMkDir");
+	cifs_dbg(FYI, "In CIFSSMBMkDir\n");
 MkDirRetry:
 	rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -1005,7 +1000,7 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_mkdirs);
 	if (rc)
-		cFYI(1, "Error in Mkdir = %d", rc);
+		cifs_dbg(FYI, "Error in Mkdir = %d\n", rc);
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -1029,7 +1024,7 @@
 	OPEN_PSX_REQ *pdata;
 	OPEN_PSX_RSP *psx_rsp;
 
-	cFYI(1, "In POSIX Create");
+	cifs_dbg(FYI, "In POSIX Create\n");
 PsxCreat:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -1083,11 +1078,11 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Posix create returned %d", rc);
+		cifs_dbg(FYI, "Posix create returned %d\n", rc);
 		goto psx_create_err;
 	}
 
-	cFYI(1, "copying inode info");
+	cifs_dbg(FYI, "copying inode info\n");
 	rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
 	if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
@@ -1109,11 +1104,11 @@
 	/* check to make sure response data is there */
 	if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
 		pRetData->Type = cpu_to_le32(-1); /* unknown */
-		cFYI(DBG2, "unknown type");
+		cifs_dbg(NOISY, "unknown type\n");
 	} else {
 		if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
 					+ sizeof(FILE_UNIX_BASIC_INFO)) {
-			cERROR(1, "Open response data too small");
+			cifs_dbg(VFS, "Open response data too small\n");
 			pRetData->Type = cpu_to_le32(-1);
 			goto psx_create_err;
 		}
@@ -1160,7 +1155,7 @@
 			ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC;
 			break;
 		default:
-			cFYI(1, "unknown disposition %d", disposition);
+			cifs_dbg(FYI, "unknown disposition %d\n", disposition);
 			ofun =  SMBOPEN_OAPPEND; /* regular open */
 	}
 	return ofun;
@@ -1251,7 +1246,7 @@
 			(struct smb_hdr *)pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_opens);
 	if (rc) {
-		cFYI(1, "Error in Open = %d", rc);
+		cifs_dbg(FYI, "Error in Open = %d\n", rc);
 	} else {
 	/* BB verify if wct == 15 */
 
@@ -1364,7 +1359,7 @@
 			(struct smb_hdr *)pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_opens);
 	if (rc) {
-		cFYI(1, "Error in Open = %d", rc);
+		cifs_dbg(FYI, "Error in Open = %d\n", rc);
 	} else {
 		*pOplock = pSMBr->OplockLevel; /* 1 byte no need to le_to_cpu */
 		*netfid = pSMBr->Fid;	/* cifs fid stays in le */
@@ -1425,8 +1420,8 @@
 	char *buf = server->smallbuf;
 	unsigned int buflen = get_rfc1002_length(buf) + 4;
 
-	cFYI(1, "%s: mid=%llu offset=%llu bytes=%u", __func__,
-		mid->mid, rdata->offset, rdata->bytes);
+	cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
+		 __func__, mid->mid, rdata->offset, rdata->bytes);
 
 	/*
 	 * read the rest of READ_RSP header (sans Data array), or whatever we
@@ -1447,16 +1442,16 @@
 	/* Was the SMB read successful? */
 	rdata->result = server->ops->map_error(buf, false);
 	if (rdata->result != 0) {
-		cFYI(1, "%s: server returned error %d", __func__,
-			rdata->result);
+		cifs_dbg(FYI, "%s: server returned error %d\n",
+			 __func__, rdata->result);
 		return cifs_readv_discard(server, mid);
 	}
 
 	/* Is there enough to get to the rest of the READ_RSP header? */
 	if (server->total_read < server->vals->read_rsp_size) {
-		cFYI(1, "%s: server returned short header. got=%u expected=%zu",
-			__func__, server->total_read,
-			server->vals->read_rsp_size);
+		cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n",
+			 __func__, server->total_read,
+			 server->vals->read_rsp_size);
 		rdata->result = -EIO;
 		return cifs_readv_discard(server, mid);
 	}
@@ -1468,19 +1463,19 @@
 		 * is beyond the EOF. Treat it as if the data starts just after
 		 * the header.
 		 */
-		cFYI(1, "%s: data offset (%u) inside read response header",
-			__func__, data_offset);
+		cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
+			 __func__, data_offset);
 		data_offset = server->total_read;
 	} else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
 		/* data_offset is beyond the end of smallbuf */
-		cFYI(1, "%s: data offset (%u) beyond end of smallbuf",
-			__func__, data_offset);
+		cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
+			 __func__, data_offset);
 		rdata->result = -EIO;
 		return cifs_readv_discard(server, mid);
 	}
 
-	cFYI(1, "%s: total_read=%u data_offset=%u", __func__,
-		server->total_read, data_offset);
+	cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n",
+		 __func__, server->total_read, data_offset);
 
 	len = data_offset - server->total_read;
 	if (len > 0) {
@@ -1496,8 +1491,8 @@
 	/* set up first iov for signature check */
 	rdata->iov.iov_base = buf;
 	rdata->iov.iov_len = server->total_read;
-	cFYI(1, "0: iov_base=%p iov_len=%zu",
-		rdata->iov.iov_base, rdata->iov.iov_len);
+	cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+		 rdata->iov.iov_base, rdata->iov.iov_len);
 
 	/* how much data is in the response? */
 	data_len = server->ops->read_data_length(buf);
@@ -1514,8 +1509,8 @@
 	server->total_read += length;
 	rdata->bytes = length;
 
-	cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read,
-		buflen, data_len);
+	cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
+		 server->total_read, buflen, data_len);
 
 	/* discard anything left over */
 	if (server->total_read < buflen)
@@ -1538,8 +1533,9 @@
 				 .rq_pagesz = rdata->pagesz,
 				 .rq_tailsz = rdata->tailsz };
 
-	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
-		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
+	cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n",
+		 __func__, mid->mid, mid->mid_state, rdata->result,
+		 rdata->bytes);
 
 	switch (mid->mid_state) {
 	case MID_RESPONSE_RECEIVED:
@@ -1549,10 +1545,10 @@
 			int rc = 0;
 
 			rc = cifs_verify_signature(&rqst, server,
-						  mid->sequence_number + 1);
+						  mid->sequence_number);
 			if (rc)
-				cERROR(1, "SMB signature verification returned "
-				       "error = %d", rc);
+				cifs_dbg(VFS, "SMB signature verification returned error = %d\n",
+					 rc);
 		}
 		/* FIXME: should this be counted toward the initiating task? */
 		task_io_account_read(rdata->bytes);
@@ -1582,8 +1578,8 @@
 	struct smb_rqst rqst = { .rq_iov = &rdata->iov,
 				 .rq_nvec = 1 };
 
-	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
-		rdata->offset, rdata->bytes);
+	cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
+		 __func__, rdata->offset, rdata->bytes);
 
 	if (tcon->ses->capabilities & CAP_LARGE_FILES)
 		wct = 12;
@@ -1653,7 +1649,7 @@
 	struct cifs_tcon *tcon = io_parms->tcon;
 	unsigned int count = io_parms->length;
 
-	cFYI(1, "Reading %d bytes on fid %d", count, netfid);
+	cifs_dbg(FYI, "Reading %d bytes on fid %d\n", count, netfid);
 	if (tcon->ses->capabilities & CAP_LARGE_FILES)
 		wct = 12;
 	else {
@@ -1701,7 +1697,7 @@
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_reads);
 	pSMBr = (READ_RSP *)iov[0].iov_base;
 	if (rc) {
-		cERROR(1, "Send error in read = %d", rc);
+		cifs_dbg(VFS, "Send error in read = %d\n", rc);
 	} else {
 		int data_length = le16_to_cpu(pSMBr->DataLengthHigh);
 		data_length = data_length << 16;
@@ -1711,7 +1707,7 @@
 		/*check that DataLength would not go beyond end of SMB */
 		if ((data_length > CIFSMaxBufSize)
 				|| (data_length > count)) {
-			cFYI(1, "bad length %d for count %d",
+			cifs_dbg(FYI, "bad length %d for count %d\n",
 				 data_length, count);
 			rc = -EIO;
 			*nbytes = 0;
@@ -1719,7 +1715,7 @@
 			pReadData = (char *) (&pSMBr->hdr.Protocol) +
 					le16_to_cpu(pSMBr->DataOffset);
 /*			if (rc = copy_to_user(buf, pReadData, data_length)) {
-				cERROR(1, "Faulting on read rc = %d",rc);
+				cifs_dbg(VFS, "Faulting on read rc = %d\n",rc);
 				rc = -EFAULT;
 			}*/ /* can not use copy_to_user when using page cache*/
 			if (*buf)
@@ -1767,7 +1763,7 @@
 
 	*nbytes = 0;
 
-	/* cFYI(1, "write at %lld %d bytes", offset, count);*/
+	/* cifs_dbg(FYI, "write at %lld %d bytes\n", offset, count);*/
 	if (tcon->ses == NULL)
 		return -ECONNABORTED;
 
@@ -1852,7 +1848,7 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, long_op);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
 	if (rc) {
-		cFYI(1, "Send error in write = %d", rc);
+		cifs_dbg(FYI, "Send error in write = %d\n", rc);
 	} else {
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
@@ -1959,7 +1955,7 @@
 
 	/* this would overflow */
 	if (nr_pages == 0) {
-		cERROR(1, "%s: called with nr_pages == 0!", __func__);
+		cifs_dbg(VFS, "%s: called with nr_pages == 0!\n", __func__);
 		return NULL;
 	}
 
@@ -2075,7 +2071,8 @@
 	rqst.rq_pagesz = wdata->pagesz;
 	rqst.rq_tailsz = wdata->tailsz;
 
-	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
+	cifs_dbg(FYI, "async write at %llu %u bytes\n",
+		 wdata->offset, wdata->bytes);
 
 	smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF);
 	smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16);
@@ -2123,7 +2120,7 @@
 
 	*nbytes = 0;
 
-	cFYI(1, "write2 at %lld %d bytes", (long long)offset, count);
+	cifs_dbg(FYI, "write2 at %lld %d bytes\n", (long long)offset, count);
 
 	if (tcon->ses->capabilities & CAP_LARGE_FILES) {
 		wct = 14;
@@ -2182,7 +2179,7 @@
 	rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
 	if (rc) {
-		cFYI(1, "Send error Write2 = %d", rc);
+		cifs_dbg(FYI, "Send error Write2 = %d\n", rc);
 	} else if (resp_buf_type == 0) {
 		/* presumably this can not happen, but best to be safe */
 		rc = -EIO;
@@ -2223,7 +2220,8 @@
 	int resp_buf_type;
 	__u16 count;
 
-	cFYI(1, "cifs_lockv num lock %d num unlock %d", num_lock, num_unlock);
+	cifs_dbg(FYI, "cifs_lockv num lock %d num unlock %d\n",
+		 num_lock, num_unlock);
 
 	rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
 	if (rc)
@@ -2249,7 +2247,7 @@
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
 	rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP);
 	if (rc)
-		cFYI(1, "Send error in cifs_lockv = %d", rc);
+		cifs_dbg(FYI, "Send error in cifs_lockv = %d\n", rc);
 
 	return rc;
 }
@@ -2268,7 +2266,8 @@
 	int flags = 0;
 	__u16 count;
 
-	cFYI(1, "CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock);
+	cifs_dbg(FYI, "CIFSSMBLock timeout %d numLock %d\n",
+		 (int)waitFlag, numLock);
 	rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
 
 	if (rc)
@@ -2317,7 +2316,7 @@
 	}
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
 	if (rc)
-		cFYI(1, "Send error in Lock = %d", rc);
+		cifs_dbg(FYI, "Send error in Lock = %d\n", rc);
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
 	since file handle passed in no longer valid */
@@ -2341,7 +2340,7 @@
 	__u16 params, param_offset, offset, byte_count, count;
 	struct kvec iov[1];
 
-	cFYI(1, "Posix Lock");
+	cifs_dbg(FYI, "Posix Lock\n");
 
 	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
 
@@ -2408,7 +2407,7 @@
 	}
 
 	if (rc) {
-		cFYI(1, "Send error in Posix Lock = %d", rc);
+		cifs_dbg(FYI, "Send error in Posix Lock = %d\n", rc);
 	} else if (pLockData) {
 		/* lock structure can be returned on get */
 		__u16 data_offset;
@@ -2465,7 +2464,7 @@
 {
 	int rc = 0;
 	CLOSE_REQ *pSMB = NULL;
-	cFYI(1, "In CIFSSMBClose");
+	cifs_dbg(FYI, "In CIFSSMBClose\n");
 
 /* do not retry on dead session on close */
 	rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB);
@@ -2482,7 +2481,7 @@
 	if (rc) {
 		if (rc != -EINTR) {
 			/* EINTR is expected when user ctl-c to kill app */
-			cERROR(1, "Send error in Close = %d", rc);
+			cifs_dbg(VFS, "Send error in Close = %d\n", rc);
 		}
 	}
 
@@ -2498,7 +2497,7 @@
 {
 	int rc = 0;
 	FLUSH_REQ *pSMB = NULL;
-	cFYI(1, "In CIFSSMBFlush");
+	cifs_dbg(FYI, "In CIFSSMBFlush\n");
 
 	rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB);
 	if (rc)
@@ -2509,7 +2508,7 @@
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_flushes);
 	if (rc)
-		cERROR(1, "Send error in Flush = %d", rc);
+		cifs_dbg(VFS, "Send error in Flush = %d\n", rc);
 
 	return rc;
 }
@@ -2527,7 +2526,7 @@
 	__u16 count;
 	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
-	cFYI(1, "In CIFSSMBRename");
+	cifs_dbg(FYI, "In CIFSSMBRename\n");
 renameRetry:
 	rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -2574,7 +2573,7 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_renames);
 	if (rc)
-		cFYI(1, "Send error in rename = %d", rc);
+		cifs_dbg(FYI, "Send error in rename = %d\n", rc);
 
 	cifs_buf_release(pSMB);
 
@@ -2598,7 +2597,7 @@
 	int len_of_str;
 	__u16 params, param_offset, offset, count, byte_count;
 
-	cFYI(1, "Rename to File by handle");
+	cifs_dbg(FYI, "Rename to File by handle\n");
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB,
 			(void **) &pSMBr);
 	if (rc)
@@ -2655,7 +2654,8 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&pTcon->stats.cifs_stats.num_t2renames);
 	if (rc)
-		cFYI(1, "Send error in Rename (by file handle) = %d", rc);
+		cifs_dbg(FYI, "Send error in Rename (by file handle) = %d\n",
+			 rc);
 
 	cifs_buf_release(pSMB);
 
@@ -2677,7 +2677,7 @@
 	int name_len, name_len2;
 	__u16 count;
 
-	cFYI(1, "In CIFSSMBCopy");
+	cifs_dbg(FYI, "In CIFSSMBCopy\n");
 copyRetry:
 	rc = smb_init(SMB_COM_COPY, 1, tcon, (void **) &pSMB,
 			(void **) &pSMBr);
@@ -2722,8 +2722,8 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in copy = %d with %d files copied",
-			rc, le16_to_cpu(pSMBr->CopyCount));
+		cifs_dbg(FYI, "Send error in copy = %d with %d files copied\n",
+			 rc, le16_to_cpu(pSMBr->CopyCount));
 	}
 	cifs_buf_release(pSMB);
 
@@ -2747,7 +2747,7 @@
 	int bytes_returned = 0;
 	__u16 params, param_offset, offset, byte_count;
 
-	cFYI(1, "In Symlink Unix style");
+	cifs_dbg(FYI, "In Symlink Unix style\n");
 createSymLinkRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -2812,7 +2812,8 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_symlinks);
 	if (rc)
-		cFYI(1, "Send error in SetPathInfo create symlink = %d", rc);
+		cifs_dbg(FYI, "Send error in SetPathInfo create symlink = %d\n",
+			 rc);
 
 	cifs_buf_release(pSMB);
 
@@ -2836,7 +2837,7 @@
 	int bytes_returned = 0;
 	__u16 params, param_offset, offset, byte_count;
 
-	cFYI(1, "In Create Hard link Unix style");
+	cifs_dbg(FYI, "In Create Hard link Unix style\n");
 createHardLinkRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -2898,7 +2899,8 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks);
 	if (rc)
-		cFYI(1, "Send error in SetPathInfo (hard link) = %d", rc);
+		cifs_dbg(FYI, "Send error in SetPathInfo (hard link) = %d\n",
+			 rc);
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -2920,7 +2922,7 @@
 	__u16 count;
 	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
-	cFYI(1, "In CIFSCreateHardLink");
+	cifs_dbg(FYI, "In CIFSCreateHardLink\n");
 winCreateHardLinkRetry:
 
 	rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB,
@@ -2972,7 +2974,7 @@
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks);
 	if (rc)
-		cFYI(1, "Send error in hard link (NT rename) = %d", rc);
+		cifs_dbg(FYI, "Send error in hard link (NT rename) = %d\n", rc);
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -2995,7 +2997,7 @@
 	__u16 params, byte_count;
 	char *data_start;
 
-	cFYI(1, "In QPathSymLinkInfo (Unix) for path %s", searchName);
+	cifs_dbg(FYI, "In QPathSymLinkInfo (Unix) for path %s\n", searchName);
 
 querySymLinkRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -3042,7 +3044,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QuerySymLinkInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QuerySymLinkInfo = %d\n", rc);
 	} else {
 		/* decode response */
 
@@ -3097,7 +3099,8 @@
 	struct smb_com_transaction_ioctl_req *pSMB;
 	struct smb_com_transaction_ioctl_rsp *pSMBr;
 
-	cFYI(1, "In Windows reparse style QueryLink for path %s", searchName);
+	cifs_dbg(FYI, "In Windows reparse style QueryLink for path %s\n",
+		 searchName);
 	rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
 	if (rc)
@@ -3125,7 +3128,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QueryReparseLinkInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc);
 	} else {		/* decode response */
 		__u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
 		__u32 data_count = le32_to_cpu(pSMBr->DataCount);
@@ -3149,7 +3152,7 @@
 			if ((reparse_buf->LinkNamesBuf +
 				reparse_buf->TargetNameOffset +
 				reparse_buf->TargetNameLen) > end_of_smb) {
-				cFYI(1, "reparse buf beyond SMB");
+				cifs_dbg(FYI, "reparse buf beyond SMB\n");
 				rc = -EIO;
 				goto qreparse_out;
 			}
@@ -3170,12 +3173,11 @@
 			}
 		} else {
 			rc = -EIO;
-			cFYI(1, "Invalid return data count on "
-				 "get reparse info ioctl");
+			cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
 		}
 		symlinkinfo[buflen] = 0; /* just in case so the caller
 					does not go off the end of the buffer */
-		cFYI(1, "readlink result - %s", symlinkinfo);
+		cifs_dbg(FYI, "readlink result - %s\n", symlinkinfo);
 	}
 
 qreparse_out:
@@ -3198,7 +3200,10 @@
 	ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm);
 	ace->e_tag  = cpu_to_le16(cifs_ace->cifs_e_tag);
 	ace->e_id   = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid));
-	/* cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id); */
+/*
+	cifs_dbg(FYI, "perm %d tag %d id %d\n",
+		 ace->e_perm, ace->e_tag, ace->e_id);
+*/
 
 	return;
 }
@@ -3224,8 +3229,8 @@
 		size += sizeof(struct cifs_posix_ace) * count;
 		/* check if we would go beyond end of SMB */
 		if (size_of_data_area < size) {
-			cFYI(1, "bad CIFS POSIX ACL size %d vs. %d",
-				size_of_data_area, size);
+			cifs_dbg(FYI, "bad CIFS POSIX ACL size %d vs. %d\n",
+				 size_of_data_area, size);
 			return -EINVAL;
 		}
 	} else if (acl_type & ACL_TYPE_DEFAULT) {
@@ -3272,7 +3277,10 @@
 		cifs_ace->cifs_uid = cpu_to_le64(-1);
 	} else
 		cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id));
-	/*cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id);*/
+/*
+	cifs_dbg(FYI, "perm %d tag %d id %d\n",
+		 ace->e_perm, ace->e_tag, ace->e_id);
+*/
 	return rc;
 }
 
@@ -3290,12 +3298,11 @@
 		return 0;
 
 	count = posix_acl_xattr_count((size_t)buflen);
-	cFYI(1, "setting acl with %d entries from buf of length %d and "
-		"version of %d",
-		count, buflen, le32_to_cpu(local_acl->a_version));
+	cifs_dbg(FYI, "setting acl with %d entries from buf of length %d and version of %d\n",
+		 count, buflen, le32_to_cpu(local_acl->a_version));
 	if (le32_to_cpu(local_acl->a_version) != 2) {
-		cFYI(1, "unknown POSIX ACL version %d",
-		     le32_to_cpu(local_acl->a_version));
+		cifs_dbg(FYI, "unknown POSIX ACL version %d\n",
+			 le32_to_cpu(local_acl->a_version));
 		return 0;
 	}
 	cifs_acl->version = cpu_to_le16(1);
@@ -3304,7 +3311,7 @@
 	else if (acl_type == ACL_TYPE_DEFAULT)
 		cifs_acl->default_entry_count = cpu_to_le16(count);
 	else {
-		cFYI(1, "unknown ACL type %d", acl_type);
+		cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
 		return 0;
 	}
 	for (i = 0; i < count; i++) {
@@ -3337,7 +3344,7 @@
 	int name_len;
 	__u16 params, byte_count;
 
-	cFYI(1, "In GetPosixACL (Unix) for path %s", searchName);
+	cifs_dbg(FYI, "In GetPosixACL (Unix) for path %s\n", searchName);
 
 queryAclRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -3390,7 +3397,7 @@
 		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get);
 	if (rc) {
-		cFYI(1, "Send error in Query POSIX ACL = %d", rc);
+		cifs_dbg(FYI, "Send error in Query POSIX ACL = %d\n", rc);
 	} else {
 		/* decode response */
 
@@ -3427,7 +3434,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count, data_count, param_offset, offset;
 
-	cFYI(1, "In SetPosixACL (Unix) for path %s", fileName);
+	cifs_dbg(FYI, "In SetPosixACL (Unix) for path %s\n", fileName);
 setAclRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -3482,7 +3489,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "Set POSIX ACL returned %d", rc);
+		cifs_dbg(FYI, "Set POSIX ACL returned %d\n", rc);
 
 setACLerrorExit:
 	cifs_buf_release(pSMB);
@@ -3502,7 +3509,7 @@
 	int bytes_returned;
 	__u16 params, byte_count;
 
-	cFYI(1, "In GetExtAttr");
+	cifs_dbg(FYI, "In GetExtAttr\n");
 	if (tcon == NULL)
 		return -ENODEV;
 
@@ -3541,7 +3548,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "error %d in GetExtAttr", rc);
+		cifs_dbg(FYI, "error %d in GetExtAttr\n", rc);
 	} else {
 		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -3556,7 +3563,7 @@
 			struct file_chattr_info *pfinfo;
 			/* BB Do we need a cast or hash here ? */
 			if (count != 16) {
-				cFYI(1, "Illegal size ret in GetExtAttr");
+				cifs_dbg(FYI, "Illegal size ret in GetExtAttr\n");
 				rc = -EIO;
 				goto GetExtAttrOut;
 			}
@@ -3644,21 +3651,21 @@
 
 	/* should we also check that parm and data areas do not overlap? */
 	if (*ppparm > end_of_smb) {
-		cFYI(1, "parms start after end of smb");
+		cifs_dbg(FYI, "parms start after end of smb\n");
 		return -EINVAL;
 	} else if (parm_count + *ppparm > end_of_smb) {
-		cFYI(1, "parm end after end of smb");
+		cifs_dbg(FYI, "parm end after end of smb\n");
 		return -EINVAL;
 	} else if (*ppdata > end_of_smb) {
-		cFYI(1, "data starts after end of smb");
+		cifs_dbg(FYI, "data starts after end of smb\n");
 		return -EINVAL;
 	} else if (data_count + *ppdata > end_of_smb) {
-		cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
-			*ppdata, data_count, (data_count + *ppdata),
-			end_of_smb, pSMBr);
+		cifs_dbg(FYI, "data %p + count %d (%p) past smb end %p start %p\n",
+			 *ppdata, data_count, (data_count + *ppdata),
+			 end_of_smb, pSMBr);
 		return -EINVAL;
 	} else if (parm_count + data_count > bcc) {
-		cFYI(1, "parm count and data count larger than SMB");
+		cifs_dbg(FYI, "parm count and data count larger than SMB\n");
 		return -EINVAL;
 	}
 	*pdatalen = data_count;
@@ -3676,7 +3683,7 @@
 	QUERY_SEC_DESC_REQ *pSMB;
 	struct kvec iov[1];
 
-	cFYI(1, "GetCifsACL");
+	cifs_dbg(FYI, "GetCifsACL\n");
 
 	*pbuflen = 0;
 	*acl_inf = NULL;
@@ -3701,7 +3708,7 @@
 			 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get);
 	if (rc) {
-		cFYI(1, "Send error in QuerySecDesc = %d", rc);
+		cifs_dbg(FYI, "Send error in QuerySecDesc = %d\n", rc);
 	} else {                /* decode response */
 		__le32 *parm;
 		__u32 parm_len;
@@ -3716,7 +3723,8 @@
 			goto qsec_out;
 		pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
 
-		cFYI(1, "smb %p parm %p data %p", pSMBr, parm, *acl_inf);
+		cifs_dbg(FYI, "smb %p parm %p data %p\n",
+			 pSMBr, parm, *acl_inf);
 
 		if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
 			rc = -EIO;      /* bad smb */
@@ -3728,8 +3736,8 @@
 
 		acl_len = le32_to_cpu(*parm);
 		if (acl_len != *pbuflen) {
-			cERROR(1, "acl length %d does not match %d",
-				   acl_len, *pbuflen);
+			cifs_dbg(VFS, "acl length %d does not match %d\n",
+				 acl_len, *pbuflen);
 			if (*pbuflen > acl_len)
 				*pbuflen = acl_len;
 		}
@@ -3738,16 +3746,15 @@
 		   header followed by the smallest SID */
 		if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) ||
 		    (*pbuflen >= 64 * 1024)) {
-			cERROR(1, "bad acl length %d", *pbuflen);
+			cifs_dbg(VFS, "bad acl length %d\n", *pbuflen);
 			rc = -EINVAL;
 			*pbuflen = 0;
 		} else {
-			*acl_inf = kmalloc(*pbuflen, GFP_KERNEL);
+			*acl_inf = kmemdup(pdata, *pbuflen, GFP_KERNEL);
 			if (*acl_inf == NULL) {
 				*pbuflen = 0;
 				rc = -ENOMEM;
 			}
-			memcpy(*acl_inf, pdata, *pbuflen);
 		}
 	}
 qsec_out:
@@ -3809,9 +3816,10 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
 
-	cFYI(1, "SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc);
+	cifs_dbg(FYI, "SetCIFSACL bytes_returned: %d, rc: %d\n",
+		 bytes_returned, rc);
 	if (rc)
-		cFYI(1, "Set CIFS ACL returned %d", rc);
+		cifs_dbg(FYI, "Set CIFS ACL returned %d\n", rc);
 	cifs_buf_release(pSMB);
 
 	if (rc == -EAGAIN)
@@ -3835,7 +3843,7 @@
 	int bytes_returned;
 	int name_len;
 
-	cFYI(1, "In SMBQPath path %s", search_name);
+	cifs_dbg(FYI, "In SMBQPath path %s\n", search_name);
 QInfRetry:
 	rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -3862,7 +3870,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QueryInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QueryInfo = %d\n", rc);
 	} else if (data) {
 		struct timespec ts;
 		__u32 time = le32_to_cpu(pSMBr->last_write_time);
@@ -3936,7 +3944,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QPathInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -3973,7 +3981,7 @@
 	int name_len;
 	__u16 params, byte_count;
 
-	/* cFYI(1, "In QPathInfo path %s", search_name); */
+	/* cifs_dbg(FYI, "In QPathInfo path %s\n", search_name); */
 QPathInfoRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -4023,7 +4031,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QPathInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -4104,14 +4112,12 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QPathInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
 		if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
-			cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response. "
-				   "Unix Extensions can be disabled on mount "
-				   "by specifying the nosfu mount option.");
+			cifs_dbg(VFS, "Malformed FILE_UNIX_BASIC_INFO response. Unix Extensions can be disabled on mount by specifying the nosfu mount option.\n");
 			rc = -EIO;	/* bad smb */
 		} else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4143,7 +4149,7 @@
 	int name_len;
 	__u16 params, byte_count;
 
-	cFYI(1, "In QPathInfo (Unix) the path %s", searchName);
+	cifs_dbg(FYI, "In QPathInfo (Unix) the path %s\n", searchName);
 UnixQPathInfoRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -4190,14 +4196,12 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QPathInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
 		if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
-			cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response. "
-				   "Unix Extensions can be disabled on mount "
-				   "by specifying the nosfu mount option.");
+			cifs_dbg(VFS, "Malformed FILE_UNIX_BASIC_INFO response. Unix Extensions can be disabled on mount by specifying the nosfu mount option.\n");
 			rc = -EIO;	/* bad smb */
 		} else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4231,7 +4235,7 @@
 	__u16 params, byte_count;
 	struct nls_table *nls_codepage;
 
-	cFYI(1, "In FindFirst for %s", searchName);
+	cifs_dbg(FYI, "In FindFirst for %s\n", searchName);
 
 findFirstRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -4314,7 +4318,7 @@
 	if (rc) {/* BB add logic to retry regular search if Unix search
 			rejected unexpectedly by server */
 		/* BB Add code to handle unsupported level rc */
-		cFYI(1, "Error in FindFirst = %d", rc);
+		cifs_dbg(FYI, "Error in FindFirst = %d\n", rc);
 
 		cifs_buf_release(pSMB);
 
@@ -4352,7 +4356,7 @@
 				psrch_inf->entries_in_buffer;
 			lnoff = le16_to_cpu(parms->LastNameOffset);
 			if (CIFSMaxBufSize < lnoff) {
-				cERROR(1, "ignoring corrupt resume name");
+				cifs_dbg(VFS, "ignoring corrupt resume name\n");
 				psrch_inf->last_entry = NULL;
 				return rc;
 			}
@@ -4383,7 +4387,7 @@
 	unsigned int name_len;
 	__u16 params, byte_count;
 
-	cFYI(1, "In FindNext");
+	cifs_dbg(FYI, "In FindNext\n");
 
 	if (psrch_inf->endOfSearch)
 		return -ENOENT;
@@ -4444,7 +4448,7 @@
 			cifs_buf_release(pSMB);
 			rc = 0; /* search probably was closed at end of search*/
 		} else
-			cFYI(1, "FindNext returned = %d", rc);
+			cifs_dbg(FYI, "FindNext returned = %d\n", rc);
 	} else {                /* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -4479,15 +4483,15 @@
 				psrch_inf->entries_in_buffer;
 			lnoff = le16_to_cpu(parms->LastNameOffset);
 			if (CIFSMaxBufSize < lnoff) {
-				cERROR(1, "ignoring corrupt resume name");
+				cifs_dbg(VFS, "ignoring corrupt resume name\n");
 				psrch_inf->last_entry = NULL;
 				return rc;
 			} else
 				psrch_inf->last_entry =
 					psrch_inf->srch_entries_start + lnoff;
 
-/*  cFYI(1, "fnxt2 entries in buf %d index_of_last %d",
-	    psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry); */
+/*  cifs_dbg(FYI, "fnxt2 entries in buf %d index_of_last %d\n",
+    psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry); */
 
 			/* BB fixme add unlock here */
 		}
@@ -4512,7 +4516,7 @@
 	int rc = 0;
 	FINDCLOSE_REQ *pSMB = NULL;
 
-	cFYI(1, "In CIFSSMBFindClose");
+	cifs_dbg(FYI, "In CIFSSMBFindClose\n");
 	rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB);
 
 	/* no sense returning error if session restarted
@@ -4526,7 +4530,7 @@
 	pSMB->ByteCount = 0;
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
 	if (rc)
-		cERROR(1, "Send error in FindClose = %d", rc);
+		cifs_dbg(VFS, "Send error in FindClose = %d\n", rc);
 
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_fclose);
 
@@ -4548,7 +4552,7 @@
 	int name_len, bytes_returned;
 	__u16 params, byte_count;
 
-	cFYI(1, "In GetSrvInodeNum for %s", search_name);
+	cifs_dbg(FYI, "In GetSrvInodeNum for %s\n", search_name);
 	if (tcon == NULL)
 		return -ENODEV;
 
@@ -4599,7 +4603,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "error %d in QueryInternalInfo", rc);
+		cifs_dbg(FYI, "error %d in QueryInternalInfo\n", rc);
 	} else {
 		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4614,7 +4618,7 @@
 			struct file_internal_info *pfinfo;
 			/* BB Do we need a cast or hash here ? */
 			if (count < 8) {
-				cFYI(1, "Illegal size ret in QryIntrnlInf");
+				cifs_dbg(FYI, "Illegal size ret in QryIntrnlInf\n");
 				rc = -EIO;
 				goto GetInodeNumOut;
 			}
@@ -4655,16 +4659,16 @@
 	*num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
 
 	if (*num_of_nodes < 1) {
-		cERROR(1, "num_referrals: must be at least > 0,"
-			"but we get num_referrals = %d", *num_of_nodes);
+		cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
+			 *num_of_nodes);
 		rc = -EINVAL;
 		goto parse_DFS_referrals_exit;
 	}
 
 	ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
 	if (ref->VersionNumber != cpu_to_le16(3)) {
-		cERROR(1, "Referrals of V%d version are not supported,"
-			"should be V3", le16_to_cpu(ref->VersionNumber));
+		cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
+			 le16_to_cpu(ref->VersionNumber));
 		rc = -EINVAL;
 		goto parse_DFS_referrals_exit;
 	}
@@ -4673,14 +4677,12 @@
 	data_end = (char *)(&(pSMBr->PathConsumed)) +
 				le16_to_cpu(pSMBr->t2.DataCount);
 
-	cFYI(1, "num_referrals: %d dfs flags: 0x%x ...",
-			*num_of_nodes,
-			le32_to_cpu(pSMBr->DFSFlags));
+	cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
+		 *num_of_nodes, le32_to_cpu(pSMBr->DFSFlags));
 
-	*target_nodes = kzalloc(sizeof(struct dfs_info3_param) *
-			*num_of_nodes, GFP_KERNEL);
+	*target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
+				GFP_KERNEL);
 	if (*target_nodes == NULL) {
-		cERROR(1, "Failed to allocate buffer for target_nodes");
 		rc = -ENOMEM;
 		goto parse_DFS_referrals_exit;
 	}
@@ -4759,7 +4761,7 @@
 	*num_of_nodes = 0;
 	*target_nodes = NULL;
 
-	cFYI(1, "In GetDFSRefer the path %s", search_name);
+	cifs_dbg(FYI, "In GetDFSRefer the path %s\n", search_name);
 	if (ses == NULL)
 		return -ENODEV;
 getDFSRetry:
@@ -4827,7 +4829,7 @@
 	rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in GetDFSRefer = %d", rc);
+		cifs_dbg(FYI, "Send error in GetDFSRefer = %d\n", rc);
 		goto GetDFSRefExit;
 	}
 	rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4838,9 +4840,8 @@
 		goto GetDFSRefExit;
 	}
 
-	cFYI(1, "Decoding GetDFSRefer response BCC: %d  Offset %d",
-				get_bcc(&pSMBr->hdr),
-				le16_to_cpu(pSMBr->t2.DataOffset));
+	cifs_dbg(FYI, "Decoding GetDFSRefer response BCC: %d  Offset %d\n",
+		 get_bcc(&pSMBr->hdr), le16_to_cpu(pSMBr->t2.DataOffset));
 
 	/* parse returned result into more usable form */
 	rc = parse_DFS_referrals(pSMBr, num_of_nodes,
@@ -4869,7 +4870,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count;
 
-	cFYI(1, "OldQFSInfo");
+	cifs_dbg(FYI, "OldQFSInfo\n");
 oldQFSInfoRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		(void **) &pSMBr);
@@ -4902,7 +4903,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QFSInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QFSInfo = %d\n", rc);
 	} else {                /* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -4910,7 +4911,7 @@
 			rc = -EIO;      /* bad smb */
 		else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
-			cFYI(1, "qfsinf resp BCC: %d  Offset %d",
+			cifs_dbg(FYI, "qfsinf resp BCC: %d  Offset %d\n",
 				 get_bcc(&pSMBr->hdr), data_offset);
 
 			response_data = (FILE_SYSTEM_ALLOC_INFO *)
@@ -4923,10 +4924,10 @@
 			       le32_to_cpu(response_data->TotalAllocationUnits);
 			FSData->f_bfree = FSData->f_bavail =
 				le32_to_cpu(response_data->FreeAllocationUnits);
-			cFYI(1, "Blocks: %lld  Free: %lld Block size %ld",
-			     (unsigned long long)FSData->f_blocks,
-			     (unsigned long long)FSData->f_bfree,
-			     FSData->f_bsize);
+			cifs_dbg(FYI, "Blocks: %lld  Free: %lld Block size %ld\n",
+				 (unsigned long long)FSData->f_blocks,
+				 (unsigned long long)FSData->f_bfree,
+				 FSData->f_bsize);
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4949,7 +4950,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count;
 
-	cFYI(1, "In QFSInfo");
+	cifs_dbg(FYI, "In QFSInfo\n");
 QFSInfoRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -4982,7 +4983,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QFSInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QFSInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -5003,10 +5004,10 @@
 			    le64_to_cpu(response_data->TotalAllocationUnits);
 			FSData->f_bfree = FSData->f_bavail =
 			    le64_to_cpu(response_data->FreeAllocationUnits);
-			cFYI(1, "Blocks: %lld  Free: %lld Block size %ld",
-			     (unsigned long long)FSData->f_blocks,
-			     (unsigned long long)FSData->f_bfree,
-			     FSData->f_bsize);
+			cifs_dbg(FYI, "Blocks: %lld  Free: %lld Block size %ld\n",
+				 (unsigned long long)FSData->f_blocks,
+				 (unsigned long long)FSData->f_bfree,
+				 FSData->f_bsize);
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -5028,7 +5029,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count;
 
-	cFYI(1, "In QFSAttributeInfo");
+	cifs_dbg(FYI, "In QFSAttributeInfo\n");
 QFSAttributeRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -5062,7 +5063,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cERROR(1, "Send error in QFSAttributeInfo = %d", rc);
+		cifs_dbg(VFS, "Send error in QFSAttributeInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -5098,7 +5099,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count;
 
-	cFYI(1, "In QFSDeviceInfo");
+	cifs_dbg(FYI, "In QFSDeviceInfo\n");
 QFSDeviceRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -5133,7 +5134,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QFSDeviceInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QFSDeviceInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -5169,7 +5170,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count;
 
-	cFYI(1, "In QFSUnixInfo");
+	cifs_dbg(FYI, "In QFSUnixInfo\n");
 QFSUnixRetry:
 	rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
 				   (void **) &pSMB, (void **) &pSMBr);
@@ -5203,7 +5204,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cERROR(1, "Send error in QFSUnixInfo = %d", rc);
+		cifs_dbg(VFS, "Send error in QFSUnixInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -5238,7 +5239,7 @@
 	int bytes_returned = 0;
 	__u16 params, param_offset, offset, byte_count;
 
-	cFYI(1, "In SETFSUnixInfo");
+	cifs_dbg(FYI, "In SETFSUnixInfo\n");
 SETFSUnixRetry:
 	/* BB switch to small buf init to save memory */
 	rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
@@ -5286,7 +5287,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cERROR(1, "Send error in SETFSUnixInfo = %d", rc);
+		cifs_dbg(VFS, "Send error in SETFSUnixInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 		if (rc)
@@ -5314,7 +5315,7 @@
 	int bytes_returned = 0;
 	__u16 params, byte_count;
 
-	cFYI(1, "In QFSPosixInfo");
+	cifs_dbg(FYI, "In QFSPosixInfo\n");
 QFSPosixRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -5348,7 +5349,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QFSUnixInfo = %d", rc);
+		cifs_dbg(FYI, "Send error in QFSUnixInfo = %d\n", rc);
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
@@ -5410,7 +5411,7 @@
 
 	__u16 params, byte_count, data_count, param_offset, offset;
 
-	cFYI(1, "In SetEOF");
+	cifs_dbg(FYI, "In SetEOF\n");
 SetEOFRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -5476,7 +5477,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "SetPathInfo (file size) returned %d", rc);
+		cifs_dbg(FYI, "SetPathInfo (file size) returned %d\n", rc);
 
 	cifs_buf_release(pSMB);
 
@@ -5495,8 +5496,8 @@
 	int rc = 0;
 	__u16 params, param_offset, offset, byte_count, count;
 
-	cFYI(1, "SetFileSize (via SetFileInfo) %lld",
-			(long long)size);
+	cifs_dbg(FYI, "SetFileSize (via SetFileInfo) %lld\n",
+		 (long long)size);
 	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
 
 	if (rc)
@@ -5553,7 +5554,8 @@
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
 	if (rc) {
-		cFYI(1, "Send error in SetFileInfo (SetFileSize) = %d", rc);
+		cifs_dbg(FYI, "Send error in SetFileInfo (SetFileSize) = %d\n",
+			 rc);
 	}
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
@@ -5577,7 +5579,7 @@
 	int rc = 0;
 	__u16 params, param_offset, offset, byte_count, count;
 
-	cFYI(1, "Set Times (via SetFileInfo)");
+	cifs_dbg(FYI, "Set Times (via SetFileInfo)\n");
 	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
 
 	if (rc)
@@ -5623,7 +5625,8 @@
 	memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
 	if (rc)
-		cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
+		cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n",
+			 rc);
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
 		since file handle passed in no longer valid */
@@ -5640,7 +5643,7 @@
 	int rc = 0;
 	__u16 params, param_offset, offset, byte_count, count;
 
-	cFYI(1, "Set File Disposition (via SetFileInfo)");
+	cifs_dbg(FYI, "Set File Disposition (via SetFileInfo)\n");
 	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
 
 	if (rc)
@@ -5682,7 +5685,7 @@
 	*data_offset = delete_file ? 1 : 0;
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
 	if (rc)
-		cFYI(1, "Send error in SetFileDisposition = %d", rc);
+		cifs_dbg(FYI, "Send error in SetFileDisposition = %d\n", rc);
 
 	return rc;
 }
@@ -5700,7 +5703,7 @@
 	char *data_offset;
 	__u16 params, param_offset, offset, byte_count, count;
 
-	cFYI(1, "In SetTimes");
+	cifs_dbg(FYI, "In SetTimes\n");
 
 SetTimesRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -5756,7 +5759,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "SetPathInfo (times) returned %d", rc);
+		cifs_dbg(FYI, "SetPathInfo (times) returned %d\n", rc);
 
 	cifs_buf_release(pSMB);
 
@@ -5781,7 +5784,7 @@
 	int bytes_returned;
 	int name_len;
 
-	cFYI(1, "In SetAttrLegacy");
+	cifs_dbg(FYI, "In SetAttrLegacy\n");
 
 SetAttrLgcyRetry:
 	rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB,
@@ -5807,7 +5810,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "Error in LegacySetAttr = %d", rc);
+		cifs_dbg(FYI, "Error in LegacySetAttr = %d\n", rc);
 
 	cifs_buf_release(pSMB);
 
@@ -5875,7 +5878,7 @@
 	int rc = 0;
 	u16 params, param_offset, offset, byte_count, count;
 
-	cFYI(1, "Set Unix Info (via SetFileInfo)");
+	cifs_dbg(FYI, "Set Unix Info (via SetFileInfo)\n");
 	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
 
 	if (rc)
@@ -5921,7 +5924,8 @@
 
 	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
 	if (rc)
-		cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
+		cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n",
+			 rc);
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
 		since file handle passed in no longer valid */
@@ -5943,7 +5947,7 @@
 	FILE_UNIX_BASIC_INFO *data_offset;
 	__u16 params, param_offset, offset, count, byte_count;
 
-	cFYI(1, "In SetUID/GID/Mode");
+	cifs_dbg(FYI, "In SetUID/GID/Mode\n");
 setPermsRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -5999,7 +6003,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "SetPathInfo (perms) returned %d", rc);
+		cifs_dbg(FYI, "SetPathInfo (perms) returned %d\n", rc);
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -6036,7 +6040,7 @@
 	__u16 params, byte_count, data_offset;
 	unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0;
 
-	cFYI(1, "In Query All EAs path %s", searchName);
+	cifs_dbg(FYI, "In Query All EAs path %s\n", searchName);
 QAllEAsRetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -6083,7 +6087,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cFYI(1, "Send error in QueryAllEAs = %d", rc);
+		cifs_dbg(FYI, "Send error in QueryAllEAs = %d\n", rc);
 		goto QAllEAsOut;
 	}
 
@@ -6111,16 +6115,16 @@
 				(((char *) &pSMBr->hdr.Protocol) + data_offset);
 
 	list_len = le32_to_cpu(ea_response_data->list_len);
-	cFYI(1, "ea length %d", list_len);
+	cifs_dbg(FYI, "ea length %d\n", list_len);
 	if (list_len <= 8) {
-		cFYI(1, "empty EA list returned from server");
+		cifs_dbg(FYI, "empty EA list returned from server\n");
 		goto QAllEAsOut;
 	}
 
 	/* make sure list_len doesn't go past end of SMB */
 	end_of_smb = (char *)pByteArea(&pSMBr->hdr) + get_bcc(&pSMBr->hdr);
 	if ((char *)ea_response_data + list_len > end_of_smb) {
-		cFYI(1, "EA list appears to go beyond SMB");
+		cifs_dbg(FYI, "EA list appears to go beyond SMB\n");
 		rc = -EIO;
 		goto QAllEAsOut;
 	}
@@ -6137,7 +6141,7 @@
 		temp_ptr += 4;
 		/* make sure we can read name_len and value_len */
 		if (list_len < 0) {
-			cFYI(1, "EA entry goes beyond length of list");
+			cifs_dbg(FYI, "EA entry goes beyond length of list\n");
 			rc = -EIO;
 			goto QAllEAsOut;
 		}
@@ -6146,7 +6150,7 @@
 		value_len = le16_to_cpu(temp_fea->value_len);
 		list_len -= name_len + 1 + value_len;
 		if (list_len < 0) {
-			cFYI(1, "EA entry goes beyond length of list");
+			cifs_dbg(FYI, "EA entry goes beyond length of list\n");
 			rc = -EIO;
 			goto QAllEAsOut;
 		}
@@ -6214,7 +6218,7 @@
 	int bytes_returned = 0;
 	__u16 params, param_offset, byte_count, offset, count;
 
-	cFYI(1, "In SetEA");
+	cifs_dbg(FYI, "In SetEA\n");
 SetEARetry:
 	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
@@ -6296,7 +6300,7 @@
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc)
-		cFYI(1, "SetPathInfo (EA) returned %d", rc);
+		cifs_dbg(FYI, "SetPathInfo (EA) returned %d\n", rc);
 
 	cifs_buf_release(pSMB);
 
@@ -6339,7 +6343,7 @@
 	struct dir_notify_req *dnotify_req;
 	int bytes_returned;
 
-	cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
+	cifs_dbg(FYI, "In CIFSSMBNotify for file handle %d\n", (int)netfid);
 	rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
 	if (rc)
@@ -6368,7 +6372,7 @@
 			 (struct smb_hdr *)pSMBr, &bytes_returned,
 			 CIFS_ASYNC_OP);
 	if (rc) {
-		cFYI(1, "Error in Notify = %d", rc);
+		cifs_dbg(FYI, "Error in Notify = %d\n", rc);
 	} else {
 		/* Add file to outstanding requests */
 		/* BB change to kmem cache alloc */

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 21b3a29..99eeaa1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c

@@ -95,9 +95,7 @@
 
 	/* Mount options which take string value */
 	Opt_user, Opt_pass, Opt_ip,
-	Opt_unc, Opt_domain,
-	Opt_srcaddr, Opt_prefixpath,
-	Opt_iocharset,
+	Opt_domain, Opt_srcaddr, Opt_iocharset,
 	Opt_netbiosname, Opt_servern,
 	Opt_ver, Opt_vers, Opt_sec, Opt_cache,
 
@@ -193,14 +191,14 @@
 	{ Opt_blank_ip, "addr=" },
 	{ Opt_ip, "ip=%s" },
 	{ Opt_ip, "addr=%s" },
-	{ Opt_unc, "unc=%s" },
-	{ Opt_unc, "target=%s" },
-	{ Opt_unc, "path=%s" },
+	{ Opt_ignore, "unc=%s" },
+	{ Opt_ignore, "target=%s" },
+	{ Opt_ignore, "path=%s" },
 	{ Opt_domain, "dom=%s" },
 	{ Opt_domain, "domain=%s" },
 	{ Opt_domain, "workgroup=%s" },
 	{ Opt_srcaddr, "srcaddr=%s" },
-	{ Opt_prefixpath, "prefixpath=%s" },
+	{ Opt_ignore, "prefixpath=%s" },
 	{ Opt_iocharset, "iocharset=%s" },
 	{ Opt_netbiosname, "netbiosname=%s" },
 	{ Opt_servern, "servern=%s" },
@@ -318,11 +316,12 @@
 	server->max_read = 0;
 #endif
 
-	cFYI(1, "Reconnecting tcp session");
+	cifs_dbg(FYI, "Reconnecting tcp session\n");
 
 	/* before reconnecting the tcp session, mark the smb session (uid)
 		and the tid bad so they are not used until reconnected */
-	cFYI(1, "%s: marking sessions and tcons for reconnect", __func__);
+	cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n",
+		 __func__);
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each(tmp, &server->smb_ses_list) {
 		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
@@ -336,15 +335,14 @@
 	spin_unlock(&cifs_tcp_ses_lock);
 
 	/* do not want to be sending data on a socket we are freeing */
-	cFYI(1, "%s: tearing down socket", __func__);
+	cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
 	mutex_lock(&server->srv_mutex);
 	if (server->ssocket) {
-		cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state,
-			server->ssocket->flags);
+		cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n",
+			 server->ssocket->state, server->ssocket->flags);
 		kernel_sock_shutdown(server->ssocket, SHUT_WR);
-		cFYI(1, "Post shutdown state: 0x%x Flags: 0x%lx",
-			server->ssocket->state,
-			server->ssocket->flags);
+		cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n",
+			 server->ssocket->state, server->ssocket->flags);
 		sock_release(server->ssocket);
 		server->ssocket = NULL;
 	}
@@ -358,7 +356,7 @@
 
 	/* mark submitted MIDs for retry and issue callback */
 	INIT_LIST_HEAD(&retry_list);
-	cFYI(1, "%s: moving mids to private list", __func__);
+	cifs_dbg(FYI, "%s: moving mids to private list\n", __func__);
 	spin_lock(&GlobalMid_Lock);
 	list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
 		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
@@ -368,7 +366,7 @@
 	}
 	spin_unlock(&GlobalMid_Lock);
 
-	cFYI(1, "%s: issuing mid callbacks", __func__);
+	cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
 	list_for_each_safe(tmp, tmp2, &retry_list) {
 		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
 		list_del_init(&mid_entry->qhead);
@@ -381,7 +379,7 @@
 		/* we should try only the port we connected to before */
 		rc = generic_ip_connect(server);
 		if (rc) {
-			cFYI(1, "reconnect error %d", rc);
+			cifs_dbg(FYI, "reconnect error %d\n", rc);
 			msleep(3000);
 		} else {
 			atomic_inc(&tcpSesReconnectCount);
@@ -415,8 +413,8 @@
 
 	rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
 	if (rc)
-		cFYI(1, "Unable to send echo request to server: %s",
-			server->hostname);
+		cifs_dbg(FYI, "Unable to send echo request to server: %s\n",
+			 server->hostname);
 
 requeue_echo:
 	queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
@@ -428,7 +426,7 @@
 	if (!server->bigbuf) {
 		server->bigbuf = (char *)cifs_buf_get();
 		if (!server->bigbuf) {
-			cERROR(1, "No memory for large SMB response");
+			cifs_dbg(VFS, "No memory for large SMB response\n");
 			msleep(3000);
 			/* retry will check if exiting */
 			return false;
@@ -441,7 +439,7 @@
 	if (!server->smallbuf) {
 		server->smallbuf = (char *)cifs_small_buf_get();
 		if (!server->smallbuf) {
-			cERROR(1, "No memory for SMB response");
+			cifs_dbg(VFS, "No memory for SMB response\n");
 			msleep(1000);
 			/* retry will check if exiting */
 			return false;
@@ -471,9 +469,8 @@
 	 */
 	if (server->tcpStatus == CifsGood &&
 	    time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
-		cERROR(1, "Server %s has not responded in %d seconds. "
-			  "Reconnecting...", server->hostname,
-			  (2 * SMB_ECHO_INTERVAL) / HZ);
+		cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
+			 server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
 		cifs_reconnect(server);
 		wake_up(&server->response_q);
 		return true;
@@ -584,8 +581,8 @@
 			length = 0;
 			continue;
 		} else if (length <= 0) {
-			cFYI(1, "Received no data or error: expecting %d "
-				"got %d", to_read, length);
+			cifs_dbg(FYI, "Received no data or error: expecting %d\n"
+				 "got %d", to_read, length);
 			cifs_reconnect(server);
 			total_read = -EAGAIN;
 			break;
@@ -619,17 +616,17 @@
 		/* Regular SMB response */
 		return true;
 	case RFC1002_SESSION_KEEP_ALIVE:
-		cFYI(1, "RFC 1002 session keep alive");
+		cifs_dbg(FYI, "RFC 1002 session keep alive\n");
 		break;
 	case RFC1002_POSITIVE_SESSION_RESPONSE:
-		cFYI(1, "RFC 1002 positive session response");
+		cifs_dbg(FYI, "RFC 1002 positive session response\n");
 		break;
 	case RFC1002_NEGATIVE_SESSION_RESPONSE:
 		/*
 		 * We get this from Windows 98 instead of an error on
 		 * SMB negprot response.
 		 */
-		cFYI(1, "RFC 1002 negative session response");
+		cifs_dbg(FYI, "RFC 1002 negative session response\n");
 		/* give server a second to clean up */
 		msleep(1000);
 		/*
@@ -643,7 +640,7 @@
 		wake_up(&server->response_q);
 		break;
 	default:
-		cERROR(1, "RFC 1002 unknown response type 0x%x", type);
+		cifs_dbg(VFS, "RFC 1002 unknown response type 0x%x\n", type);
 		cifs_reconnect(server);
 	}
 
@@ -729,7 +726,7 @@
 		spin_lock(&GlobalMid_Lock);
 		list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
 			mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-			cFYI(1, "Clearing mid 0x%llx", mid_entry->mid);
+			cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
 			mid_entry->mid_state = MID_SHUTDOWN;
 			list_move(&mid_entry->qhead, &dispose_list);
 		}
@@ -738,7 +735,7 @@
 		/* now walk dispose list and issue callbacks */
 		list_for_each_safe(tmp, tmp2, &dispose_list) {
 			mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-			cFYI(1, "Callback mid 0x%llx", mid_entry->mid);
+			cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
 			list_del_init(&mid_entry->qhead);
 			mid_entry->callback(mid_entry);
 		}
@@ -755,7 +752,7 @@
 		 * least 45 seconds before giving up on a request getting a
 		 * response and going ahead and killing cifsd.
 		 */
-		cFYI(1, "Wait for exit from demultiplex thread");
+		cifs_dbg(FYI, "Wait for exit from demultiplex thread\n");
 		msleep(46000);
 		/*
 		 * If threads still have not exited they are probably never
@@ -782,8 +779,7 @@
 
 	/* make sure this will fit in a large buffer */
 	if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - 4) {
-		cERROR(1, "SMB response too long (%u bytes)",
-			pdu_length);
+		cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
 		cifs_reconnect(server);
 		wake_up(&server->response_q);
 		return -EAGAIN;
@@ -841,7 +837,7 @@
 	struct mid_q_entry *mid_entry;
 
 	current->flags |= PF_MEMALLOC;
-	cFYI(1, "Demultiplex PID: %d", task_pid_nr(current));
+	cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
 
 	length = atomic_inc_return(&tcpSesAllocCount);
 	if (length > 1)
@@ -871,14 +867,14 @@
 		 */
 		pdu_length = get_rfc1002_length(buf);
 
-		cFYI(1, "RFC1002 header 0x%x", pdu_length);
+		cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
 		if (!is_smb_response(server, buf[0]))
 			continue;
 
 		/* make sure we have enough to get to the MID */
 		if (pdu_length < HEADER_SIZE(server) - 1 - 4) {
-			cERROR(1, "SMB response too short (%u bytes)",
-				pdu_length);
+			cifs_dbg(VFS, "SMB response too short (%u bytes)\n",
+				 pdu_length);
 			cifs_reconnect(server);
 			wake_up(&server->response_q);
 			continue;
@@ -910,8 +906,8 @@
 				mid_entry->callback(mid_entry);
 		} else if (!server->ops->is_oplock_break ||
 			   !server->ops->is_oplock_break(buf, server)) {
-			cERROR(1, "No task to wake, unknown frame received! "
-				   "NumMids %d", atomic_read(&midCount));
+			cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
+				 atomic_read(&midCount));
 			cifs_dump_mem("Received Data is: ", buf,
 				      HEADER_SIZE(server));
 #ifdef CONFIG_CIFS_DEBUG2
@@ -1037,7 +1033,7 @@
 		break;
 	case Opt_sec_krb5p:
 		/* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */
-		cERROR(1, "Krb5 cifs privacy not supported");
+		cifs_dbg(VFS, "Krb5 cifs privacy not supported\n");
 		break;
 	case Opt_sec_ntlmssp:
 		vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
@@ -1067,7 +1063,7 @@
 		vol->nullauth = 1;
 		break;
 	default:
-		cERROR(1, "bad security option: %s", value);
+		cifs_dbg(VFS, "bad security option: %s\n", value);
 		return 1;
 	}
 
@@ -1093,7 +1089,7 @@
 		vol->strict_io = false;
 		break;
 	default:
-		cERROR(1, "bad cache= option: %s", value);
+		cifs_dbg(VFS, "bad cache= option: %s\n", value);
 		return 1;
 	}
 	return 0;
@@ -1124,7 +1120,7 @@
 		break;
 #endif
 	default:
-		cERROR(1, "Unknown vers= option specified: %s", value);
+		cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
 		return 1;
 	}
 	return 0;
@@ -1255,7 +1251,7 @@
 			separator[0] = options[4];
 			options += 5;
 		} else {
-			cFYI(1, "Null separator not allowed");
+			cifs_dbg(FYI, "Null separator not allowed\n");
 		}
 	}
 	vol->backupuid_specified = false; /* no backup intent for a user */
@@ -1440,8 +1436,7 @@
 			break;
 		case Opt_fsc:
 #ifndef CONFIG_CIFS_FSCACHE
-			cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE "
-				  "kernel config option set");
+			cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
 			goto cifs_parse_mount_err;
 #endif
 			vol->fsc = true;
@@ -1459,55 +1454,55 @@
 		/* Numeric Values */
 		case Opt_backupuid:
 			if (get_option_uid(args, &vol->backupuid)) {
-				cERROR(1, "%s: Invalid backupuid value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid backupuid value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->backupuid_specified = true;
 			break;
 		case Opt_backupgid:
 			if (get_option_gid(args, &vol->backupgid)) {
-				cERROR(1, "%s: Invalid backupgid value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid backupgid value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->backupgid_specified = true;
 			break;
 		case Opt_uid:
 			if (get_option_uid(args, &vol->linux_uid)) {
-				cERROR(1, "%s: Invalid uid value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid uid value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			uid_specified = true;
 			break;
 		case Opt_cruid:
 			if (get_option_uid(args, &vol->cred_uid)) {
-				cERROR(1, "%s: Invalid cruid value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid cruid value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			break;
 		case Opt_gid:
 			if (get_option_gid(args, &vol->linux_gid)) {
-				cERROR(1, "%s: Invalid gid value",
-						__func__);
+				cifs_dbg(VFS, "%s: Invalid gid value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			gid_specified = true;
 			break;
 		case Opt_file_mode:
 			if (get_option_ul(args, &option)) {
-				cERROR(1, "%s: Invalid file_mode value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid file_mode value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->file_mode = option;
 			break;
 		case Opt_dirmode:
 			if (get_option_ul(args, &option)) {
-				cERROR(1, "%s: Invalid dir_mode value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid dir_mode value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->dir_mode = option;
@@ -1515,37 +1510,37 @@
 		case Opt_port:
 			if (get_option_ul(args, &option) ||
 			    option > USHRT_MAX) {
-				cERROR(1, "%s: Invalid port value", __func__);
+				cifs_dbg(VFS, "%s: Invalid port value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			port = (unsigned short)option;
 			break;
 		case Opt_rsize:
 			if (get_option_ul(args, &option)) {
-				cERROR(1, "%s: Invalid rsize value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid rsize value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->rsize = option;
 			break;
 		case Opt_wsize:
 			if (get_option_ul(args, &option)) {
-				cERROR(1, "%s: Invalid wsize value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid wsize value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->wsize = option;
 			break;
 		case Opt_actimeo:
 			if (get_option_ul(args, &option)) {
-				cERROR(1, "%s: Invalid actimeo value",
-					__func__);
+				cifs_dbg(VFS, "%s: Invalid actimeo value\n",
+					 __func__);
 				goto cifs_parse_mount_err;
 			}
 			vol->actimeo = HZ * option;
 			if (vol->actimeo > CIFS_MAX_ACTIMEO) {
-				cERROR(1, "CIFS: attribute cache"
-					  "timeout too large");
+				cifs_dbg(VFS, "attribute cache timeout too large\n");
 				goto cifs_parse_mount_err;
 			}
 			break;
@@ -1568,11 +1563,8 @@
 				goto cifs_parse_mount_err;
 			}
 			vol->username = kstrdup(string, GFP_KERNEL);
-			if (!vol->username) {
-				printk(KERN_WARNING "CIFS: no memory "
-						    "for username\n");
+			if (!vol->username)
 				goto cifs_parse_mount_err;
-			}
 			break;
 		case Opt_blank_pass:
 			/* passwords have to be handled differently
@@ -1660,30 +1652,6 @@
 			}
 			got_ip = true;
 			break;
-		case Opt_unc:
-			string = vol->UNC;
-			vol->UNC = match_strdup(args);
-			if (vol->UNC == NULL)
-				goto out_nomem;
-
-			convert_delimiter(vol->UNC, '\\');
-			if (vol->UNC[0] != '\\' || vol->UNC[1] != '\\') {
-				printk(KERN_ERR "CIFS: UNC Path does not "
-						"begin with // or \\\\\n");
-				goto cifs_parse_mount_err;
-			}
-
-			/* Compare old unc= option to new one */
-			if (!string || strcmp(string, vol->UNC))
-				printk(KERN_WARNING "CIFS: the value of the "
-					"unc= mount option does not match the "
-					"device string. Using the unc= option "
-					"for now. In 3.10, that option will "
-					"be ignored and the contents of the "
-					"device string will be used "
-					"instead. (%s != %s)\n", string,
-					vol->UNC);
-			break;
 		case Opt_domain:
 			string = match_strdup(args);
 			if (string == NULL)
@@ -1701,7 +1669,7 @@
 						    "for domainname\n");
 				goto cifs_parse_mount_err;
 			}
-			cFYI(1, "Domain name set");
+			cifs_dbg(FYI, "Domain name set\n");
 			break;
 		case Opt_srcaddr:
 			string = match_strdup(args);
@@ -1716,26 +1684,6 @@
 				goto cifs_parse_mount_err;
 			}
 			break;
-		case Opt_prefixpath:
-			/* skip over any leading delimiter */
-			if (*args[0].from == '/' || *args[0].from == '\\')
-				args[0].from++;
-
-			string = vol->prepath;
-			vol->prepath = match_strdup(args);
-			if (vol->prepath == NULL)
-				goto out_nomem;
-			/* Compare old prefixpath= option to new one */
-			if (!string || strcmp(string, vol->prepath))
-				printk(KERN_WARNING "CIFS: the value of the "
-					"prefixpath= mount option does not "
-					"match the device string. Using the "
-					"prefixpath= option for now. In 3.10, "
-					"that option will be ignored and the "
-					"contents of the device string will be "
-					"used instead.(%s != %s)\n", string,
-					vol->prepath);
-			break;
 		case Opt_iocharset:
 			string = match_strdup(args);
 			if (string == NULL)
@@ -1759,7 +1707,7 @@
 			/* if iocharset not set then load_nls_default
 			 * is used by caller
 			 */
-			cFYI(1, "iocharset set to %s", string);
+			 cifs_dbg(FYI, "iocharset set to %s\n", string);
 			break;
 		case Opt_netbiosname:
 			string = match_strdup(args);
@@ -1873,20 +1821,18 @@
 #ifndef CONFIG_KEYS
 	/* Muliuser mounts require CONFIG_KEYS support */
 	if (vol->multiuser) {
-		cERROR(1, "Multiuser mounts require kernels with "
-			  "CONFIG_KEYS enabled.");
+		cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
 		goto cifs_parse_mount_err;
 	}
 #endif
 	if (!vol->UNC) {
-		cERROR(1, "CIFS mount error: No usable UNC path provided in "
-			  "device string or in unc= option!");
+		cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string or in unc= option!\n");
 		goto cifs_parse_mount_err;
 	}
 
 	/* make sure UNC has a share name */
 	if (!strchr(vol->UNC + 3, '\\')) {
-		cERROR(1, "Malformed UNC. Unable to find share name.");
+		cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n");
 		goto cifs_parse_mount_err;
 	}
 
@@ -2107,7 +2053,7 @@
 
 		++server->srv_count;
 		spin_unlock(&cifs_tcp_ses_lock);
-		cFYI(1, "Existing tcp session with server found");
+		cifs_dbg(FYI, "Existing tcp session with server found\n");
 		return server;
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
@@ -2154,7 +2100,7 @@
 	struct TCP_Server_Info *tcp_ses = NULL;
 	int rc;
 
-	cFYI(1, "UNC: %s", volume_info->UNC);
+	cifs_dbg(FYI, "UNC: %s\n", volume_info->UNC);
 
 	/* see if we already have a matching tcp_ses */
 	tcp_ses = cifs_find_tcp_session(volume_info);
@@ -2169,7 +2115,7 @@
 
 	rc = cifs_crypto_shash_allocate(tcp_ses);
 	if (rc) {
-		cERROR(1, "could not setup hash structures rc %d", rc);
+		cifs_dbg(VFS, "could not setup hash structures rc %d\n", rc);
 		goto out_err;
 	}
 
@@ -2216,7 +2162,7 @@
 
 	rc = ip_connect(tcp_ses);
 	if (rc < 0) {
-		cERROR(1, "Error connecting to socket. Aborting operation");
+		cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
 		goto out_err_crypto_release;
 	}
 
@@ -2229,7 +2175,7 @@
 				  tcp_ses, "cifsd");
 	if (IS_ERR(tcp_ses->tsk)) {
 		rc = PTR_ERR(tcp_ses->tsk);
-		cERROR(1, "error %d create cifsd thread", rc);
+		cifs_dbg(VFS, "error %d create cifsd thread\n", rc);
 		module_put(THIS_MODULE);
 		goto out_err_crypto_release;
 	}
@@ -2316,7 +2262,7 @@
 	unsigned int xid;
 	struct TCP_Server_Info *server = ses->server;
 
-	cFYI(1, "%s: ses_count=%d", __func__, ses->ses_count);
+	cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
 	spin_lock(&cifs_tcp_ses_lock);
 	if (--ses->ses_count > 0) {
 		spin_unlock(&cifs_tcp_ses_lock);
@@ -2368,23 +2314,24 @@
 		sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr);
 		break;
 	default:
-		cFYI(1, "Bad ss_family (%hu)", server->dstaddr.ss_family);
+		cifs_dbg(FYI, "Bad ss_family (%hu)\n",
+			 server->dstaddr.ss_family);
 		rc = -EINVAL;
 		goto out_err;
 	}
 
-	cFYI(1, "%s: desc=%s", __func__, desc);
+	cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
 	key = request_key(&key_type_logon, desc, "");
 	if (IS_ERR(key)) {
 		if (!ses->domainName) {
-			cFYI(1, "domainName is NULL");
+			cifs_dbg(FYI, "domainName is NULL\n");
 			rc = PTR_ERR(key);
 			goto out_err;
 		}
 
 		/* didn't work, try to find a domain key */
 		sprintf(desc, "cifs:d:%s", ses->domainName);
-		cFYI(1, "%s: desc=%s", __func__, desc);
+		cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
 		key = request_key(&key_type_logon, desc, "");
 		if (IS_ERR(key)) {
 			rc = PTR_ERR(key);
@@ -2402,32 +2349,34 @@
 	/* find first : in payload */
 	payload = (char *)upayload->data;
 	delim = strnchr(payload, upayload->datalen, ':');
-	cFYI(1, "payload=%s", payload);
+	cifs_dbg(FYI, "payload=%s\n", payload);
 	if (!delim) {
-		cFYI(1, "Unable to find ':' in payload (datalen=%d)",
-				upayload->datalen);
+		cifs_dbg(FYI, "Unable to find ':' in payload (datalen=%d)\n",
+			 upayload->datalen);
 		rc = -EINVAL;
 		goto out_key_put;
 	}
 
 	len = delim - payload;
 	if (len > MAX_USERNAME_SIZE || len <= 0) {
-		cFYI(1, "Bad value from username search (len=%zd)", len);
+		cifs_dbg(FYI, "Bad value from username search (len=%zd)\n",
+			 len);
 		rc = -EINVAL;
 		goto out_key_put;
 	}
 
 	vol->username = kstrndup(payload, len, GFP_KERNEL);
 	if (!vol->username) {
-		cFYI(1, "Unable to allocate %zd bytes for username", len);
+		cifs_dbg(FYI, "Unable to allocate %zd bytes for username\n",
+			 len);
 		rc = -ENOMEM;
 		goto out_key_put;
 	}
-	cFYI(1, "%s: username=%s", __func__, vol->username);
+	cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username);
 
 	len = key->datalen - (len + 1);
 	if (len > MAX_PASSWORD_SIZE || len <= 0) {
-		cFYI(1, "Bad len for password search (len=%zd)", len);
+		cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len);
 		rc = -EINVAL;
 		kfree(vol->username);
 		vol->username = NULL;
@@ -2437,7 +2386,8 @@
 	++delim;
 	vol->password = kstrndup(delim, len, GFP_KERNEL);
 	if (!vol->password) {
-		cFYI(1, "Unable to allocate %zd bytes for password", len);
+		cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
+			 len);
 		rc = -ENOMEM;
 		kfree(vol->username);
 		vol->username = NULL;
@@ -2449,7 +2399,7 @@
 	key_put(key);
 out_err:
 	kfree(desc);
-	cFYI(1, "%s: returning %d", __func__, rc);
+	cifs_dbg(FYI, "%s: returning %d\n", __func__, rc);
 	return rc;
 }
 #else /* ! CONFIG_KEYS */
@@ -2474,7 +2424,8 @@
 
 	ses = cifs_find_smb_ses(server, volume_info);
 	if (ses) {
-		cFYI(1, "Existing smb sess found (status=%d)", ses->status);
+		cifs_dbg(FYI, "Existing smb sess found (status=%d)\n",
+			 ses->status);
 
 		mutex_lock(&ses->session_mutex);
 		rc = cifs_negotiate_protocol(xid, ses);
@@ -2486,7 +2437,7 @@
 			return ERR_PTR(rc);
 		}
 		if (ses->need_reconnect) {
-			cFYI(1, "Session needs reconnect");
+			cifs_dbg(FYI, "Session needs reconnect\n");
 			rc = cifs_setup_session(xid, ses,
 						volume_info->local_nls);
 			if (rc) {
@@ -2505,7 +2456,7 @@
 		return ses;
 	}
 
-	cFYI(1, "Existing smb sess not found");
+	cifs_dbg(FYI, "Existing smb sess not found\n");
 	ses = sesInfoAlloc();
 	if (ses == NULL)
 		goto get_ses_fail;
@@ -2595,7 +2546,7 @@
 	unsigned int xid;
 	struct cifs_ses *ses = tcon->ses;
 
-	cFYI(1, "%s: tc_count=%d", __func__, tcon->tc_count);
+	cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
 	spin_lock(&cifs_tcp_ses_lock);
 	if (--tcon->tc_count > 0) {
 		spin_unlock(&cifs_tcp_ses_lock);
@@ -2623,12 +2574,11 @@
 
 	tcon = cifs_find_tcon(ses, volume_info->UNC);
 	if (tcon) {
-		cFYI(1, "Found match on UNC path");
+		cifs_dbg(FYI, "Found match on UNC path\n");
 		/* existing tcon already has a reference */
 		cifs_put_smb_ses(ses);
 		if (tcon->seal != volume_info->seal)
-			cERROR(1, "transport encryption setting "
-				   "conflicts with existing tid");
+			cifs_dbg(VFS, "transport encryption setting conflicts with existing tid\n");
 		return tcon;
 	}
 
@@ -2660,13 +2610,13 @@
 	rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon,
 					    volume_info->local_nls);
 	free_xid(xid);
-	cFYI(1, "Tcon rc = %d", rc);
+	cifs_dbg(FYI, "Tcon rc = %d\n", rc);
 	if (rc)
 		goto out_fail;
 
 	if (volume_info->nodfs) {
 		tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
-		cFYI(1, "DFS disabled (%d)", tcon->Flags);
+		cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags);
 	}
 	tcon->seal = volume_info->seal;
 	/*
@@ -2820,7 +2770,7 @@
 		strcpy(temp_unc + 2 + strlen(ses->serverName), "\\IPC$");
 		rc = ses->server->ops->tree_connect(xid, ses, temp_unc, NULL,
 						    nls_codepage);
-		cFYI(1, "Tcon rc = %d ipc_tid = %d", rc, ses->ipc_tid);
+		cifs_dbg(FYI, "Tcon rc = %d ipc_tid = %d\n", rc, ses->ipc_tid);
 		kfree(temp_unc);
 	}
 	if (rc == 0)
@@ -2898,13 +2848,11 @@
 			saddr4 = (struct sockaddr_in *)&server->srcaddr;
 			saddr6 = (struct sockaddr_in6 *)&server->srcaddr;
 			if (saddr6->sin6_family == AF_INET6)
-				cERROR(1, "cifs: "
-				       "Failed to bind to: %pI6c, error: %d",
-				       &saddr6->sin6_addr, rc);
+				cifs_dbg(VFS, "Failed to bind to: %pI6c, error: %d\n",
+					 &saddr6->sin6_addr, rc);
 			else
-				cERROR(1, "cifs: "
-				       "Failed to bind to: %pI4, error: %d",
-				       &saddr4->sin_addr.s_addr, rc);
+				cifs_dbg(VFS, "Failed to bind to: %pI4, error: %d\n",
+					 &saddr4->sin_addr.s_addr, rc);
 		}
 	}
 	return rc;
@@ -3009,13 +2957,13 @@
 		rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM,
 				   IPPROTO_TCP, &socket, 1);
 		if (rc < 0) {
-			cERROR(1, "Error %d creating socket", rc);
+			cifs_dbg(VFS, "Error %d creating socket\n", rc);
 			server->ssocket = NULL;
 			return rc;
 		}
 
 		/* BB other socket options to set KEEPALIVE, NODELAY? */
-		cFYI(1, "Socket created");
+		cifs_dbg(FYI, "Socket created\n");
 		server->ssocket = socket;
 		socket->sk->sk_allocation = GFP_NOFS;
 		if (sfamily == AF_INET6)
@@ -3049,16 +2997,17 @@
 		rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
 				(char *)&val, sizeof(val));
 		if (rc)
-			cFYI(1, "set TCP_NODELAY socket option error %d", rc);
+			cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n",
+				 rc);
 	}
 
-	 cFYI(1, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
+	cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n",
 		 socket->sk->sk_sndbuf,
 		 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
 
 	rc = socket->ops->connect(socket, saddr, slen, 0);
 	if (rc < 0) {
-		cFYI(1, "Error %d connecting to server", rc);
+		cifs_dbg(FYI, "Error %d connecting to server\n", rc);
 		sock_release(socket);
 		server->ssocket = NULL;
 		return rc;
@@ -3116,19 +3065,19 @@
 	if (vol_info && vol_info->no_linux_ext) {
 		tcon->fsUnixInfo.Capability = 0;
 		tcon->unix_ext = 0; /* Unix Extensions disabled */
-		cFYI(1, "Linux protocol extensions disabled");
+		cifs_dbg(FYI, "Linux protocol extensions disabled\n");
 		return;
 	} else if (vol_info)
 		tcon->unix_ext = 1; /* Unix Extensions supported */
 
 	if (tcon->unix_ext == 0) {
-		cFYI(1, "Unix extensions disabled so not set on reconnect");
+		cifs_dbg(FYI, "Unix extensions disabled so not set on reconnect\n");
 		return;
 	}
 
 	if (!CIFSSMBQFSUnixInfo(xid, tcon)) {
 		__u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
-		cFYI(1, "unix caps which server supports %lld", cap);
+		cifs_dbg(FYI, "unix caps which server supports %lld\n", cap);
 		/* check for reconnect case in which we do not
 		   want to change the mount behavior if we can avoid it */
 		if (vol_info == NULL) {
@@ -3138,22 +3087,22 @@
 				cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
 			if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
 				if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
-					cERROR(1, "POSIXPATH support change");
+					cifs_dbg(VFS, "POSIXPATH support change\n");
 				cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
 			} else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
-				cERROR(1, "possible reconnect error");
-				cERROR(1, "server disabled POSIX path support");
+				cifs_dbg(VFS, "possible reconnect error\n");
+				cifs_dbg(VFS, "server disabled POSIX path support\n");
 			}
 		}
 
 		if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
-			cERROR(1, "per-share encryption not supported yet");
+			cifs_dbg(VFS, "per-share encryption not supported yet\n");
 
 		cap &= CIFS_UNIX_CAP_MASK;
 		if (vol_info && vol_info->no_psx_acl)
 			cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
 		else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
-			cFYI(1, "negotiated posix acl support");
+			cifs_dbg(FYI, "negotiated posix acl support\n");
 			if (cifs_sb)
 				cifs_sb->mnt_cifs_flags |=
 					CIFS_MOUNT_POSIXACL;
@@ -3162,43 +3111,38 @@
 		if (vol_info && vol_info->posix_paths == 0)
 			cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
 		else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
-			cFYI(1, "negotiate posix pathnames");
+			cifs_dbg(FYI, "negotiate posix pathnames\n");
 			if (cifs_sb)
 				cifs_sb->mnt_cifs_flags |=
 					CIFS_MOUNT_POSIX_PATHS;
 		}
 
-		cFYI(1, "Negotiate caps 0x%x", (int)cap);
+		cifs_dbg(FYI, "Negotiate caps 0x%x\n", (int)cap);
 #ifdef CONFIG_CIFS_DEBUG2
 		if (cap & CIFS_UNIX_FCNTL_CAP)
-			cFYI(1, "FCNTL cap");
+			cifs_dbg(FYI, "FCNTL cap\n");
 		if (cap & CIFS_UNIX_EXTATTR_CAP)
-			cFYI(1, "EXTATTR cap");
+			cifs_dbg(FYI, "EXTATTR cap\n");
 		if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
-			cFYI(1, "POSIX path cap");
+			cifs_dbg(FYI, "POSIX path cap\n");
 		if (cap & CIFS_UNIX_XATTR_CAP)
-			cFYI(1, "XATTR cap");
+			cifs_dbg(FYI, "XATTR cap\n");
 		if (cap & CIFS_UNIX_POSIX_ACL_CAP)
-			cFYI(1, "POSIX ACL cap");
+			cifs_dbg(FYI, "POSIX ACL cap\n");
 		if (cap & CIFS_UNIX_LARGE_READ_CAP)
-			cFYI(1, "very large read cap");
+			cifs_dbg(FYI, "very large read cap\n");
 		if (cap & CIFS_UNIX_LARGE_WRITE_CAP)
-			cFYI(1, "very large write cap");
+			cifs_dbg(FYI, "very large write cap\n");
 		if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP)
-			cFYI(1, "transport encryption cap");
+			cifs_dbg(FYI, "transport encryption cap\n");
 		if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
-			cFYI(1, "mandatory transport encryption cap");
+			cifs_dbg(FYI, "mandatory transport encryption cap\n");
 #endif /* CIFS_DEBUG2 */
 		if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
 			if (vol_info == NULL) {
-				cFYI(1, "resetting capabilities failed");
+				cifs_dbg(FYI, "resetting capabilities failed\n");
 			} else
-				cERROR(1, "Negotiating Unix capabilities "
-					   "with the server failed. Consider "
-					   "mounting with the Unix Extensions "
-					   "disabled if problems are found "
-					   "by specifying the nounix mount "
-					   "option.");
+				cifs_dbg(VFS, "Negotiating Unix capabilities with the server failed. Consider mounting with the Unix Extensions disabled if problems are found by specifying the nounix mount option.\n");
 
 		}
 	}
@@ -3223,8 +3167,8 @@
 	cifs_sb->mnt_gid = pvolume_info->linux_gid;
 	cifs_sb->mnt_file_mode = pvolume_info->file_mode;
 	cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
-	cFYI(1, "file mode: 0x%hx  dir mode: 0x%hx",
-		cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
+	cifs_dbg(FYI, "file mode: 0x%hx  dir mode: 0x%hx\n",
+		 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
 
 	cifs_sb->actimeo = pvolume_info->actimeo;
 	cifs_sb->local_nls = pvolume_info->local_nls;
@@ -3273,21 +3217,19 @@
 	if (pvolume_info->strict_io)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
 	if (pvolume_info->direct_io) {
-		cFYI(1, "mounting share using direct i/o");
+		cifs_dbg(FYI, "mounting share using direct i/o\n");
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
 	}
 	if (pvolume_info->mfsymlinks) {
 		if (pvolume_info->sfu_emul) {
-			cERROR(1,  "mount option mfsymlinks ignored if sfu "
-				   "mount option is used");
+			cifs_dbg(VFS, "mount option mfsymlinks ignored if sfu mount option is used\n");
 		} else {
 			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS;
 		}
 	}
 
 	if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
-		cERROR(1, "mount option dynperm ignored if cifsacl "
-			   "mount option supported");
+		cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n");
 }
 
 static void
@@ -3339,7 +3281,7 @@
 
 	*pos = '\0'; /* add trailing null */
 	convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
-	cFYI(1, "%s: full_path=%s", __func__, full_path);
+	cifs_dbg(FYI, "%s: full_path=%s\n", __func__, full_path);
 	return full_path;
 }
 
@@ -3410,14 +3352,14 @@
 		return -EINVAL;
 
 	if (volume_info->nullauth) {
-		cFYI(1, "Anonymous login");
+		cifs_dbg(FYI, "Anonymous login\n");
 		kfree(volume_info->username);
 		volume_info->username = NULL;
 	} else if (volume_info->username) {
 		/* BB fixme parse for domain name here */
-		cFYI(1, "Username: %s", volume_info->username);
+		cifs_dbg(FYI, "Username: %s\n", volume_info->username);
 	} else {
-		cifserror("No username specified");
+		cifs_dbg(VFS, "No username specified\n");
 	/* In userspace mount helper we can get user name from alternate
 	   locations such as env variables and files on disk */
 		return -EINVAL;
@@ -3430,7 +3372,7 @@
 	} else {
 		volume_info->local_nls = load_nls(volume_info->iocharset);
 		if (volume_info->local_nls == NULL) {
-			cERROR(1, "CIFS mount error: iocharset %s not found",
+			cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n",
 				 volume_info->iocharset);
 			return -ELIBACC;
 		}
@@ -3780,13 +3722,13 @@
 		if (length == 3) {
 			if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
 			    (bcc_ptr[2] == 'C')) {
-				cFYI(1, "IPC connection");
+				cifs_dbg(FYI, "IPC connection\n");
 				tcon->ipc = 1;
 			}
 		} else if (length == 2) {
 			if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
 				/* the most common case */
-				cFYI(1, "disk share connection");
+				cifs_dbg(FYI, "disk share connection\n");
 			}
 		}
 		bcc_ptr += length + 1;
@@ -3799,7 +3741,7 @@
 						      bytes_left, is_unicode,
 						      nls_codepage);
 
-		cFYI(1, "nativeFileSystem=%s", tcon->nativeFileSystem);
+		cifs_dbg(FYI, "nativeFileSystem=%s\n", tcon->nativeFileSystem);
 
 		if ((smb_buffer_response->WordCount == 3) ||
 			 (smb_buffer_response->WordCount == 7))
@@ -3807,7 +3749,7 @@
 			tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
 		else
 			tcon->Flags = 0;
-		cFYI(1, "Tcon flags: 0x%x ", tcon->Flags);
+		cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags);
 	} else if ((rc == 0) && tcon == NULL) {
 		/* all we need to save for IPC$ connection */
 		ses->ipc_tid = smb_buffer_response->Tid;
@@ -3885,16 +3827,16 @@
 	if (linuxExtEnabled == 0)
 		ses->capabilities &= (~server->vals->cap_unix);
 
-	cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
+	cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
 		 server->sec_mode, server->capabilities, server->timeAdj);
 
 	if (server->ops->sess_setup)
 		rc = server->ops->sess_setup(xid, ses, nls_info);
 
 	if (rc) {
-		cERROR(1, "Send error in SessSetup = %d", rc);
+		cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc);
 	} else {
-		mutex_lock(&ses->server->srv_mutex);
+		mutex_lock(&server->srv_mutex);
 		if (!server->session_estab) {
 			server->session_key.response = ses->auth_key.response;
 			server->session_key.len = ses->auth_key.len;
@@ -3904,7 +3846,7 @@
 		}
 		mutex_unlock(&server->srv_mutex);
 
-		cFYI(1, "CIFS Session Established successfully");
+		cifs_dbg(FYI, "CIFS Session Established successfully\n");
 		spin_lock(&GlobalMid_Lock);
 		ses->status = CifsGood;
 		ses->need_reconnect = false;

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1cd0162..5699b50 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c

@@ -102,7 +102,7 @@
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
 		if (temp == NULL) {
-			cERROR(1, "corrupt dentry");
+			cifs_dbg(VFS, "corrupt dentry\n");
 			rcu_read_unlock();
 			return NULL;
 		}
@@ -124,12 +124,12 @@
 			full_path[namelen] = dirsep;
 			strncpy(full_path + namelen + 1, temp->d_name.name,
 				temp->d_name.len);
-			cFYI(0, "name: %s", full_path + namelen);
+			cifs_dbg(FYI, "name: %s\n", full_path + namelen);
 		}
 		spin_unlock(&temp->d_lock);
 		temp = temp->d_parent;
 		if (temp == NULL) {
-			cERROR(1, "corrupt dentry");
+			cifs_dbg(VFS, "corrupt dentry\n");
 			rcu_read_unlock();
 			kfree(full_path);
 			return NULL;
@@ -137,8 +137,8 @@
 	}
 	rcu_read_unlock();
 	if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
-		cFYI(1, "did not end path lookup where expected. namelen=%d "
-			"dfsplen=%d", namelen, dfsplen);
+		cifs_dbg(FYI, "did not end path lookup where expected. namelen=%ddfsplen=%d\n",
+			 namelen, dfsplen);
 		/* presumably this is only possible if racing with a rename
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
@@ -178,7 +178,7 @@
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
 		for (i = 0; i < direntry->d_name.len; i++) {
 			if (direntry->d_name.name[i] == '\\') {
-				cFYI(1, "Invalid file name");
+				cifs_dbg(FYI, "Invalid file name\n");
 				return -EINVAL;
 			}
 		}
@@ -291,7 +291,7 @@
 	else if ((oflags & O_CREAT) == O_CREAT)
 		disposition = FILE_OPEN_IF;
 	else
-		cFYI(1, "Create flag not set in create function");
+		cifs_dbg(FYI, "Create flag not set in create function\n");
 
 	/*
 	 * BB add processing to set equivalent of mode - e.g. via CreateX with
@@ -323,7 +323,7 @@
 			       desired_access, create_options, fid, oplock,
 			       buf, cifs_sb);
 	if (rc) {
-		cFYI(1, "cifs_create returned 0x%x", rc);
+		cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
 		goto out;
 	}
 
@@ -389,7 +389,8 @@
 
 cifs_create_set_dentry:
 	if (rc != 0) {
-		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
+		cifs_dbg(FYI, "Create worked, get_inode_info failed rc = %d\n",
+			 rc);
 		if (server->ops->close)
 			server->ops->close(xid, tcon, fid);
 		goto out;
@@ -452,12 +453,14 @@
 
 	xid = get_xid();
 
-	cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
-	     inode, direntry->d_name.name, direntry);
+	cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n",
+		 inode, direntry->d_name.name, direntry);
 
 	tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
-	if (IS_ERR(tlink))
+	if (IS_ERR(tlink)) {
+		rc = PTR_ERR(tlink);
 		goto out_free_xid;
+	}
 
 	tcon = tlink_tcon(tlink);
 	server = tcon->ses->server;
@@ -518,8 +521,8 @@
 	__u32 oplock;
 	int created = FILE_CREATED;
 
-	cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
-	     inode, direntry->d_name.name, direntry);
+	cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n",
+		 inode, direntry->d_name.name, direntry);
 
 	tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
 	rc = PTR_ERR(tlink);
@@ -613,7 +616,7 @@
 		goto mknod_out;
 
 
-	cFYI(1, "sfu compat create special file");
+	cifs_dbg(FYI, "sfu compat create special file\n");
 
 	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 	if (buf == NULL) {
@@ -688,8 +691,8 @@
 
 	xid = get_xid();
 
-	cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
-	      parent_dir_inode, direntry->d_name.name, direntry);
+	cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n",
+		 parent_dir_inode, direntry->d_name.name, direntry);
 
 	/* check whether path exists */
 
@@ -715,11 +718,12 @@
 	}
 
 	if (direntry->d_inode != NULL) {
-		cFYI(1, "non-NULL inode in lookup");
+		cifs_dbg(FYI, "non-NULL inode in lookup\n");
 	} else {
-		cFYI(1, "NULL inode in lookup");
+		cifs_dbg(FYI, "NULL inode in lookup\n");
 	}
-	cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);
+	cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
+		 full_path, direntry->d_inode);
 
 	if (pTcon->unix_ext) {
 		rc = cifs_get_inode_info_unix(&newInode, full_path,
@@ -742,7 +746,7 @@
 	/*	if it was once a directory (but how can we tell?) we could do
 		shrink_dcache_parent(direntry); */
 	} else if (rc != -EACCES) {
-		cERROR(1, "Unexpected lookup error %d", rc);
+		cifs_dbg(VFS, "Unexpected lookup error %d\n", rc);
 		/* We special case check for Access Denied - since that
 		is a common return code */
 	}
@@ -807,7 +811,7 @@
 {
 	int rc = 0;
 
-	cFYI(1, "In cifs d_delete, name = %s", direntry->d_name.name);
+	cifs_dbg(FYI, "In cifs d_delete, name = %s\n", direntry->d_name.name);
 
 	return rc;
 }     */

diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 1d2d91d..e7512e4 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c

@@ -55,7 +55,7 @@
 
 	len = strlen(unc);
 	if (len < 3) {
-		cFYI(1, "%s: unc is too short: %s", __func__, unc);
+		cifs_dbg(FYI, "%s: unc is too short: %s\n", __func__, unc);
 		return -EINVAL;
 	}
 
@@ -68,8 +68,8 @@
 	if (sep)
 		len = sep - hostname;
 	else
-		cFYI(1, "%s: probably server name is whole unc: %s",
-		     __func__, unc);
+		cifs_dbg(FYI, "%s: probably server name is whole unc: %s\n",
+			 __func__, unc);
 
 	/* Try to interpret hostname as an IPv4 or IPv6 address */
 	rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len);
@@ -79,11 +79,11 @@
 	/* Perform the upcall */
 	rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
 	if (rc < 0)
-		cFYI(1, "%s: unable to resolve: %*.*s",
-			__func__, len, len, hostname);
+		cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
+			 __func__, len, len, hostname);
 	else
-		cFYI(1, "%s: resolved: %*.*s to %s",
-		     __func__, len, len, hostname, *ip_addr);
+		cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n",
+			 __func__, len, len, hostname, *ip_addr);
 	return rc;
 
 name_is_IP_address:
@@ -92,7 +92,8 @@
 		return -ENOMEM;
 	memcpy(name, hostname, len);
 	name[len] = 0;
-	cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name);
+	cifs_dbg(FYI, "%s: unc is IP, skipping dns upcall: %s\n",
+		 __func__, name);
 	*ip_addr = name;
 	return 0;
 }

diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 9c7ecdc..ce8b7f6 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c

@@ -49,7 +49,7 @@
 static struct dentry *cifs_get_parent(struct dentry *dentry)
 {
 	/* BB need to add code here eventually to enable export via NFSD */
-	cFYI(1, "get parent for %p", dentry);
+	cifs_dbg(FYI, "get parent for %p\n", dentry);
 	return ERR_PTR(-EACCES);
 }
 

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2d4a231..48b29d2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c

@@ -78,9 +78,8 @@
 		if (flags & O_EXCL)
 			posix_flags |= SMB_O_EXCL;
 	} else if (flags & O_EXCL)
-		cFYI(1, "Application %s pid %d has incorrectly set O_EXCL flag"
-			"but not O_CREAT on file open. Ignoring O_EXCL",
-			current->comm, current->tgid);
+		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
+			 current->comm, current->tgid);
 
 	if (flags & O_TRUNC)
 		posix_flags |= SMB_O_TRUNC;
@@ -123,7 +122,7 @@
 	struct tcon_link *tlink;
 	struct cifs_tcon *tcon;
 
-	cFYI(1, "posix open %s", full_path);
+	cifs_dbg(FYI, "posix open %s\n", full_path);
 
 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 	if (presp_data == NULL)
@@ -308,7 +307,7 @@
 	 */
 	if (oplock == server->vals->oplock_read &&
 						cifs_has_mand_locks(cinode)) {
-		cFYI(1, "Reset oplock val from read to None due to mand locks");
+		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 		oplock = 0;
 	}
 
@@ -374,8 +373,8 @@
 	list_del(&cifs_file->tlist);
 
 	if (list_empty(&cifsi->openFileList)) {
-		cFYI(1, "closing last open instance for inode %p",
-			cifs_file->dentry->d_inode);
+		cifs_dbg(FYI, "closing last open instance for inode %p\n",
+			 cifs_file->dentry->d_inode);
 		/*
 		 * In strict cache mode we need invalidate mapping on the last
 		 * close  because it may cause a error when we open this file
@@ -454,7 +453,7 @@
 		goto out;
 	}
 
-	cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
+	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 		 inode, file->f_flags, full_path);
 
 	if (server->oplocks)
@@ -470,16 +469,13 @@
 				cifs_sb->mnt_file_mode /* ignored */,
 				file->f_flags, &oplock, &fid.netfid, xid);
 		if (rc == 0) {
-			cFYI(1, "posix open succeeded");
+			cifs_dbg(FYI, "posix open succeeded\n");
 			posix_open_ok = true;
 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 			if (tcon->ses->serverNOS)
-				cERROR(1, "server %s of type %s returned"
-					   " unexpected error on SMB posix open"
-					   ", disabling posix open support."
-					   " Check if server update available.",
-					   tcon->ses->serverName,
-					   tcon->ses->serverNOS);
+				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
+					 tcon->ses->serverName,
+					 tcon->ses->serverNOS);
 			tcon->broken_posix_open = true;
 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
@@ -621,8 +617,8 @@
 		return rc;
 	}
 
-	cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
-	     full_path);
+	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
+		 inode, cfile->f_flags, full_path);
 
 	if (tcon->ses->server->oplocks)
 		oplock = REQ_OPLOCK;
@@ -643,7 +639,7 @@
 				     cifs_sb->mnt_file_mode /* ignored */,
 				     oflags, &oplock, &fid.netfid, xid);
 		if (rc == 0) {
-			cFYI(1, "posix reopen succeeded");
+			cifs_dbg(FYI, "posix reopen succeeded\n");
 			goto reopen_success;
 		}
 		/*
@@ -672,8 +668,8 @@
 			       NULL, cifs_sb);
 	if (rc) {
 		mutex_unlock(&cfile->fh_mutex);
-		cFYI(1, "cifs_reopen returned 0x%x", rc);
-		cFYI(1, "oplock: %d", oplock);
+		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
+		cifs_dbg(FYI, "oplock: %d\n", oplock);
 		goto reopen_error_exit;
 	}
 
@@ -729,7 +725,7 @@
 	struct TCP_Server_Info *server;
 	char *buf;
 
-	cFYI(1, "Closedir inode = 0x%p", inode);
+	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 
 	if (cfile == NULL)
 		return rc;
@@ -738,7 +734,7 @@
 	tcon = tlink_tcon(cfile->tlink);
 	server = tcon->ses->server;
 
-	cFYI(1, "Freeing private data in close dir");
+	cifs_dbg(FYI, "Freeing private data in close dir\n");
 	spin_lock(&cifs_file_list_lock);
 	if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 		cfile->invalidHandle = true;
@@ -747,7 +743,7 @@
 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 		else
 			rc = -ENOSYS;
-		cFYI(1, "Closing uncompleted readdir with rc %d", rc);
+		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 		/* not much we can do if it fails anyway, ignore rc */
 		rc = 0;
 	} else
@@ -755,7 +751,7 @@
 
 	buf = cfile->srch_inf.ntwrk_buf_start;
 	if (buf) {
-		cFYI(1, "closedir free smb buf in srch struct");
+		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 		cfile->srch_inf.ntwrk_buf_start = NULL;
 		if (cfile->srch_inf.smallBuf)
 			cifs_small_buf_release(buf);
@@ -1140,7 +1136,7 @@
 			 * The list ended. We don't have enough allocated
 			 * structures - something is really wrong.
 			 */
-			cERROR(1, "Can't push all brlocks!");
+			cifs_dbg(VFS, "Can't push all brlocks!\n");
 			break;
 		}
 		length = 1 + flock->fl_end - flock->fl_start;
@@ -1213,47 +1209,46 @@
 		bool *wait_flag, struct TCP_Server_Info *server)
 {
 	if (flock->fl_flags & FL_POSIX)
-		cFYI(1, "Posix");
+		cifs_dbg(FYI, "Posix\n");
 	if (flock->fl_flags & FL_FLOCK)
-		cFYI(1, "Flock");
+		cifs_dbg(FYI, "Flock\n");
 	if (flock->fl_flags & FL_SLEEP) {
-		cFYI(1, "Blocking lock");
+		cifs_dbg(FYI, "Blocking lock\n");
 		*wait_flag = true;
 	}
 	if (flock->fl_flags & FL_ACCESS)
-		cFYI(1, "Process suspended by mandatory locking - "
-			"not implemented yet");
+		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
 	if (flock->fl_flags & FL_LEASE)
-		cFYI(1, "Lease on file - not implemented yet");
+		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
 	if (flock->fl_flags &
 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
 	       FL_ACCESS | FL_LEASE | FL_CLOSE)))
-		cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
+		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
 
 	*type = server->vals->large_lock_type;
 	if (flock->fl_type == F_WRLCK) {
-		cFYI(1, "F_WRLCK ");
+		cifs_dbg(FYI, "F_WRLCK\n");
 		*type |= server->vals->exclusive_lock_type;
 		*lock = 1;
 	} else if (flock->fl_type == F_UNLCK) {
-		cFYI(1, "F_UNLCK");
+		cifs_dbg(FYI, "F_UNLCK\n");
 		*type |= server->vals->unlock_lock_type;
 		*unlock = 1;
 		/* Check if unlock includes more than one lock range */
 	} else if (flock->fl_type == F_RDLCK) {
-		cFYI(1, "F_RDLCK");
+		cifs_dbg(FYI, "F_RDLCK\n");
 		*type |= server->vals->shared_lock_type;
 		*lock = 1;
 	} else if (flock->fl_type == F_EXLCK) {
-		cFYI(1, "F_EXLCK");
+		cifs_dbg(FYI, "F_EXLCK\n");
 		*type |= server->vals->exclusive_lock_type;
 		*lock = 1;
 	} else if (flock->fl_type == F_SHLCK) {
-		cFYI(1, "F_SHLCK");
+		cifs_dbg(FYI, "F_SHLCK\n");
 		*type |= server->vals->shared_lock_type;
 		*lock = 1;
 	} else
-		cFYI(1, "Unknown type of lock");
+		cifs_dbg(FYI, "Unknown type of lock\n");
 }
 
 static int
@@ -1296,8 +1291,8 @@
 					    type, 0, 1, false);
 		flock->fl_type = F_UNLCK;
 		if (rc != 0)
-			cERROR(1, "Error unlocking previously locked "
-				  "range %d during test of lock", rc);
+			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
+				 rc);
 		return 0;
 	}
 
@@ -1316,8 +1311,8 @@
 			type | server->vals->shared_lock_type, 0, 1, false);
 		flock->fl_type = F_RDLCK;
 		if (rc != 0)
-			cERROR(1, "Error unlocking previously locked "
-				  "range %d during test of lock", rc);
+			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
+				 rc);
 	} else
 		flock->fl_type = F_WRLCK;
 
@@ -1508,8 +1503,8 @@
 		if (!CIFS_I(inode)->clientCanCacheAll &&
 					CIFS_I(inode)->clientCanCacheRead) {
 			cifs_invalidate_mapping(inode);
-			cFYI(1, "Set no oplock for inode=%p due to mand locks",
-			     inode);
+			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
+				 inode);
 			CIFS_I(inode)->clientCanCacheRead = false;
 		}
 
@@ -1546,9 +1541,9 @@
 	rc = -EACCES;
 	xid = get_xid();
 
-	cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
-		"end: %lld", cmd, flock->fl_flags, flock->fl_type,
-		flock->fl_start, flock->fl_end);
+	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
+		 cmd, flock->fl_flags, flock->fl_type,
+		 flock->fl_start, flock->fl_end);
 
 	cfile = (struct cifsFileInfo *)file->private_data;
 	tcon = tlink_tcon(cfile->tlink);
@@ -1620,8 +1615,8 @@
 
 	cifs_sb = CIFS_SB(dentry->d_sb);
 
-	cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
-	     *offset, dentry->d_name.name);
+	cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
+		 write_size, *offset, dentry->d_name.name);
 
 	tcon = tlink_tcon(open_file->tlink);
 	server = tcon->ses->server;
@@ -1736,7 +1731,7 @@
 	it being zero) during stress testcases so we need to check for it */
 
 	if (cifs_inode == NULL) {
-		cERROR(1, "Null inode passed to cifs_writeable_file");
+		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
 		dump_stack();
 		return NULL;
 	}
@@ -1848,7 +1843,7 @@
 		else if (bytes_written < 0)
 			rc = bytes_written;
 	} else {
-		cFYI(1, "No writeable filehandles for inode");
+		cifs_dbg(FYI, "No writeable filehandles for inode\n");
 		rc = -EIO;
 	}
 
@@ -2015,7 +2010,7 @@
 			wdata->cfile = find_writable_file(CIFS_I(mapping->host),
 							  false);
 			if (!wdata->cfile) {
-				cERROR(1, "No writable handles for inode");
+				cifs_dbg(VFS, "No writable handles for inode\n");
 				rc = -EBADF;
 				break;
 			}
@@ -2076,7 +2071,7 @@
 /* BB add check for wbc flags */
 	page_cache_get(page);
 	if (!PageUptodate(page))
-		cFYI(1, "ppw - page not up to date");
+		cifs_dbg(FYI, "ppw - page not up to date\n");
 
 	/*
 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
@@ -2127,7 +2122,7 @@
 	else
 		pid = current->tgid;
 
-	cFYI(1, "write_end for page %p from pos %lld with %d bytes",
+	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
 		 page, pos, copied);
 
 	if (PageChecked(page)) {
@@ -2191,13 +2186,13 @@
 
 	xid = get_xid();
 
-	cFYI(1, "Sync file - name: %s datasync: 0x%x",
-		file->f_path.dentry->d_name.name, datasync);
+	cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
+		 file->f_path.dentry->d_name.name, datasync);
 
 	if (!CIFS_I(inode)->clientCanCacheRead) {
 		rc = cifs_invalidate_mapping(inode);
 		if (rc) {
-			cFYI(1, "rc: %d during invalidate phase", rc);
+			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
 			rc = 0; /* don't care about it in fsync */
 		}
 	}
@@ -2233,8 +2228,8 @@
 
 	xid = get_xid();
 
-	cFYI(1, "Sync file - name: %s datasync: 0x%x",
-		file->f_path.dentry->d_name.name, datasync);
+	cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
+		 file->f_path.dentry->d_name.name, datasync);
 
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
@@ -2262,7 +2257,7 @@
 	if (file->f_mode & FMODE_WRITE)
 		rc = filemap_write_and_wait(inode->i_mapping);
 
-	cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
+	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
 
 	return rc;
 }
@@ -2579,8 +2574,8 @@
 		 * an old data.
 		 */
 		cifs_invalidate_mapping(inode);
-		cFYI(1, "Set no oplock for inode=%p after a write operation",
-		     inode);
+		cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
+			 inode);
 		cinode->clientCanCacheRead = false;
 	}
 	return written;
@@ -2756,15 +2751,15 @@
 			/* enough data to fill the page */
 			iov.iov_base = kmap(page);
 			iov.iov_len = PAGE_SIZE;
-			cFYI(1, "%u: iov_base=%p iov_len=%zu",
-				i, iov.iov_base, iov.iov_len);
+			cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
+				 i, iov.iov_base, iov.iov_len);
 			len -= PAGE_SIZE;
 		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
 			iov.iov_base = kmap(page);
 			iov.iov_len = len;
-			cFYI(1, "%u: iov_base=%p iov_len=%zu",
-				i, iov.iov_base, iov.iov_len);
+			cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
+				 i, iov.iov_base, iov.iov_len);
 			memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
 			rdata->tailsz = len;
 			len = 0;
@@ -2824,7 +2819,7 @@
 		pid = current->tgid;
 
 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
-		cFYI(1, "attempting read on write only file instance");
+		cifs_dbg(FYI, "attempting read on write only file instance\n");
 
 	do {
 		cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
@@ -3003,7 +2998,7 @@
 		pid = current->tgid;
 
 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
-		cFYI(1, "attempting read on write only file instance");
+		cifs_dbg(FYI, "attempting read on write only file instance\n");
 
 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
 	     total_read += bytes_read, cur_offset += bytes_read) {
@@ -3094,7 +3089,8 @@
 	xid = get_xid();
 	rc = cifs_revalidate_file(file);
 	if (rc) {
-		cFYI(1, "Validation prior to mmap failed, error=%d", rc);
+		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
+			 rc);
 		free_xid(xid);
 		return rc;
 	}
@@ -3147,7 +3143,7 @@
 	/* determine the eof that the server (probably) has */
 	eof = CIFS_I(rdata->mapping->host)->server_eof;
 	eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
-	cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
+	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
 
 	rdata->tailsz = PAGE_CACHE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
@@ -3157,15 +3153,15 @@
 			/* enough data to fill the page */
 			iov.iov_base = kmap(page);
 			iov.iov_len = PAGE_CACHE_SIZE;
-			cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
-				i, page->index, iov.iov_base, iov.iov_len);
+			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
+				 i, page->index, iov.iov_base, iov.iov_len);
 			len -= PAGE_CACHE_SIZE;
 		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
 			iov.iov_base = kmap(page);
 			iov.iov_len = len;
-			cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
-				i, page->index, iov.iov_base, iov.iov_len);
+			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
+				 i, page->index, iov.iov_base, iov.iov_len);
 			memset(iov.iov_base + len,
 				'\0', PAGE_CACHE_SIZE - len);
 			rdata->tailsz = len;
@@ -3245,8 +3241,8 @@
 	rc = 0;
 	INIT_LIST_HEAD(&tmplist);
 
-	cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
-		mapping, num_pages);
+	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
+		 __func__, file, mapping, num_pages);
 
 	/*
 	 * Start with the page at end of list and move it to private
@@ -3376,7 +3372,7 @@
 	if (rc < 0)
 		goto io_error;
 	else
-		cFYI(1, "Bytes read %d", rc);
+		cifs_dbg(FYI, "Bytes read %d\n", rc);
 
 	file_inode(file)->i_atime =
 		current_fs_time(file_inode(file)->i_sb);
@@ -3414,7 +3410,7 @@
 		return rc;
 	}
 
-	cFYI(1, "readpage %p at offset %d 0x%x",
+	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
 		 page, (int)offset, (int)offset);
 
 	rc = cifs_readpage_worker(file, page, &offset);
@@ -3481,7 +3477,7 @@
 	struct page *page;
 	int rc = 0;
 
-	cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
+	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
@@ -3570,7 +3566,7 @@
 		.range_end = range_end,
 	};
 
-	cFYI(1, "Launder page: %p", page);
+	cifs_dbg(FYI, "Launder page: %p\n", page);
 
 	if (clear_page_dirty_for_io(page))
 		rc = cifs_writepage_locked(page, &wbc);
@@ -3590,8 +3586,8 @@
 
 	if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
 						cifs_has_mand_locks(cinode)) {
-		cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
-		     inode);
+		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
+			 inode);
 		cinode->clientCanCacheRead = false;
 	}
 
@@ -3606,12 +3602,12 @@
 			mapping_set_error(inode->i_mapping, rc);
 			cifs_invalidate_mapping(inode);
 		}
-		cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
+		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
 	}
 
 	rc = cifs_push_locks(cfile);
 	if (rc)
-		cERROR(1, "Push locks rc = %d", rc);
+		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
 
 	/*
 	 * releasing stale oplock after recent reconnect of smb session using
@@ -3622,7 +3618,7 @@
 	if (!cfile->oplock_break_cancelled) {
 		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
 							     cinode);
-		cFYI(1, "Oplock release rc = %d", rc);
+		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
 }
 

diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 42e5363..2f4bc5a 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c

@@ -28,14 +28,14 @@
 	server->fscache =
 		fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
 				&cifs_fscache_server_index_def, server);
-	cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
-			server->fscache);
+	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
+		 __func__, server, server->fscache);
 }
 
 void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
 {
-	cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
-			server->fscache);
+	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
+		 __func__, server, server->fscache);
 	fscache_relinquish_cookie(server->fscache, 0);
 	server->fscache = NULL;
 }
@@ -47,13 +47,13 @@
 	tcon->fscache =
 		fscache_acquire_cookie(server->fscache,
 				&cifs_fscache_super_index_def, tcon);
-	cFYI(1, "%s: (0x%p/0x%p)", __func__, server->fscache,
-			tcon->fscache);
+	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
+		 __func__, server->fscache, tcon->fscache);
 }
 
 void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
 {
-	cFYI(1, "%s: (0x%p)", __func__, tcon->fscache);
+	cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache);
 	fscache_relinquish_cookie(tcon->fscache, 0);
 	tcon->fscache = NULL;
 }
@@ -70,8 +70,8 @@
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
 		cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
 				&cifs_fscache_inode_object_def, cifsi);
-		cFYI(1, "%s: got FH cookie (0x%p/0x%p)", __func__,
-				tcon->fscache, cifsi->fscache);
+		cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
+			 __func__, tcon->fscache, cifsi->fscache);
 	}
 }
 
@@ -80,7 +80,7 @@
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 
 	if (cifsi->fscache) {
-		cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
+		cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
 		fscache_relinquish_cookie(cifsi->fscache, 0);
 		cifsi->fscache = NULL;
 	}
@@ -91,7 +91,7 @@
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 
 	if (cifsi->fscache) {
-		cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
+		cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
 		fscache_uncache_all_inode_pages(cifsi->fscache, inode);
 		fscache_relinquish_cookie(cifsi->fscache, 1);
 		cifsi->fscache = NULL;
@@ -120,8 +120,8 @@
 					cifs_sb_master_tcon(cifs_sb)->fscache,
 					&cifs_fscache_inode_object_def,
 					cifsi);
-		cFYI(1, "%s: new cookie 0x%p oldcookie 0x%p",
-				__func__, cifsi->fscache, old);
+		cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
+			 __func__, cifsi->fscache, old);
 	}
 }
 
@@ -131,8 +131,8 @@
 		struct inode *inode = page->mapping->host;
 		struct cifsInodeInfo *cifsi = CIFS_I(inode);
 
-		cFYI(1, "%s: (0x%p/0x%p)", __func__, page,
-				cifsi->fscache);
+		cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
+			 __func__, page, cifsi->fscache);
 		if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
 			return 0;
 	}
@@ -143,7 +143,7 @@
 static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
 						int error)
 {
-	cFYI(1, "%s: (0x%p/%d)", __func__, page, error);
+	cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error);
 	if (!error)
 		SetPageUptodate(page);
 	unlock_page(page);
@@ -156,8 +156,8 @@
 {
 	int ret;
 
-	cFYI(1, "%s: (fsc:%p, p:%p, i:0x%p", __func__,
-			CIFS_I(inode)->fscache, page, inode);
+	cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
+		 __func__, CIFS_I(inode)->fscache, page, inode);
 	ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
 					 cifs_readpage_from_fscache_complete,
 					 NULL,
@@ -165,15 +165,15 @@
 	switch (ret) {
 
 	case 0: /* page found in fscache, read submitted */
-		cFYI(1, "%s: submitted", __func__);
+		cifs_dbg(FYI, "%s: submitted\n", __func__);
 		return ret;
 	case -ENOBUFS:	/* page won't be cached */
 	case -ENODATA:	/* page not in cache */
-		cFYI(1, "%s: %d", __func__, ret);
+		cifs_dbg(FYI, "%s: %d\n", __func__, ret);
 		return 1;
 
 	default:
-		cERROR(1, "unknown error ret = %d", ret);
+		cifs_dbg(VFS, "unknown error ret = %d\n", ret);
 	}
 	return ret;
 }
@@ -188,8 +188,8 @@
 {
 	int ret;
 
-	cFYI(1, "%s: (0x%p/%u/0x%p)", __func__,
-			CIFS_I(inode)->fscache, *nr_pages, inode);
+	cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
+		 __func__, CIFS_I(inode)->fscache, *nr_pages, inode);
 	ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
 					  pages, nr_pages,
 					  cifs_readpage_from_fscache_complete,
@@ -197,16 +197,16 @@
 					  mapping_gfp_mask(mapping));
 	switch (ret) {
 	case 0:	/* read submitted to the cache for all pages */
-		cFYI(1, "%s: submitted", __func__);
+		cifs_dbg(FYI, "%s: submitted\n", __func__);
 		return ret;
 
 	case -ENOBUFS:	/* some pages are not cached and can't be */
 	case -ENODATA:	/* some pages are not cached */
-		cFYI(1, "%s: no page", __func__);
+		cifs_dbg(FYI, "%s: no page\n", __func__);
 		return 1;
 
 	default:
-		cFYI(1, "unknown error ret = %d", ret);
+		cifs_dbg(FYI, "unknown error ret = %d\n", ret);
 	}
 
 	return ret;
@@ -216,8 +216,8 @@
 {
 	int ret;
 
-	cFYI(1, "%s: (fsc: %p, p: %p, i: %p)", __func__,
-			CIFS_I(inode)->fscache, page, inode);
+	cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
+		 __func__, CIFS_I(inode)->fscache, page, inode);
 	ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
 	if (ret != 0)
 		fscache_uncache_page(CIFS_I(inode)->fscache, page);
@@ -228,7 +228,7 @@
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 	struct fscache_cookie *cookie = cifsi->fscache;
 
-	cFYI(1, "%s: (0x%p/0x%p)", __func__, page, cookie);
+	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
 	fscache_wait_on_page_write(cookie, page);
 	fscache_uncache_page(cookie, page);
 }

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 20887bf..fc30251 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c

@@ -91,30 +91,32 @@
 {
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
 
-	cFYI(1, "%s: revalidating inode %llu", __func__, cifs_i->uniqueid);
+	cifs_dbg(FYI, "%s: revalidating inode %llu\n",
+		 __func__, cifs_i->uniqueid);
 
 	if (inode->i_state & I_NEW) {
-		cFYI(1, "%s: inode %llu is new", __func__, cifs_i->uniqueid);
+		cifs_dbg(FYI, "%s: inode %llu is new\n",
+			 __func__, cifs_i->uniqueid);
 		return;
 	}
 
 	/* don't bother with revalidation if we have an oplock */
 	if (cifs_i->clientCanCacheRead) {
-		cFYI(1, "%s: inode %llu is oplocked", __func__,
-			 cifs_i->uniqueid);
+		cifs_dbg(FYI, "%s: inode %llu is oplocked\n",
+			 __func__, cifs_i->uniqueid);
 		return;
 	}
 
 	 /* revalidate if mtime or size have changed */
 	if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) &&
 	    cifs_i->server_eof == fattr->cf_eof) {
-		cFYI(1, "%s: inode %llu is unchanged", __func__,
-			 cifs_i->uniqueid);
+		cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
+			 __func__, cifs_i->uniqueid);
 		return;
 	}
 
-	cFYI(1, "%s: invalidating inode %llu mapping", __func__,
-		 cifs_i->uniqueid);
+	cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n",
+		 __func__, cifs_i->uniqueid);
 	cifs_i->invalid_mapping = true;
 }
 
@@ -240,7 +242,7 @@
 		/* safest to call it a file if we do not know */
 		fattr->cf_mode |= S_IFREG;
 		fattr->cf_dtype = DT_REG;
-		cFYI(1, "unknown type %d", le32_to_cpu(info->Type));
+		cifs_dbg(FYI, "unknown type %d\n", le32_to_cpu(info->Type));
 		break;
 	}
 
@@ -279,7 +281,7 @@
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 
-	cFYI(1, "creating fake fattr for DFS referral");
+	cifs_dbg(FYI, "creating fake fattr for DFS referral\n");
 
 	memset(fattr, 0, sizeof(*fattr));
 	fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
@@ -329,7 +331,7 @@
 	struct tcon_link *tlink;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 
-	cFYI(1, "Getting info on %s", full_path);
+	cifs_dbg(FYI, "Getting info on %s\n", full_path);
 
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
@@ -355,7 +357,7 @@
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
 		int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
 		if (tmprc)
-			cFYI(1, "CIFSCheckMFSymlink: %d", tmprc);
+			cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
 	}
 
 	if (*pinode == NULL) {
@@ -422,7 +424,7 @@
 				 &buf_type);
 		if ((rc == 0) && (bytes_read >= 8)) {
 			if (memcmp("IntxBLK", pbuf, 8) == 0) {
-				cFYI(1, "Block device");
+				cifs_dbg(FYI, "Block device\n");
 				fattr->cf_mode |= S_IFBLK;
 				fattr->cf_dtype = DT_BLK;
 				if (bytes_read == 24) {
@@ -434,7 +436,7 @@
 					fattr->cf_rdev = MKDEV(mjr, mnr);
 				}
 			} else if (memcmp("IntxCHR", pbuf, 8) == 0) {
-				cFYI(1, "Char device");
+				cifs_dbg(FYI, "Char device\n");
 				fattr->cf_mode |= S_IFCHR;
 				fattr->cf_dtype = DT_CHR;
 				if (bytes_read == 24) {
@@ -446,7 +448,7 @@
 					fattr->cf_rdev = MKDEV(mjr, mnr);
 				}
 			} else if (memcmp("IntxLNK", pbuf, 7) == 0) {
-				cFYI(1, "Symlink");
+				cifs_dbg(FYI, "Symlink\n");
 				fattr->cf_mode |= S_IFLNK;
 				fattr->cf_dtype = DT_LNK;
 			} else {
@@ -497,10 +499,10 @@
 	else if (rc > 3) {
 		mode = le32_to_cpu(*((__le32 *)ea_value));
 		fattr->cf_mode &= ~SFBITS_MASK;
-		cFYI(1, "special bits 0%o org mode 0%o", mode,
-			 fattr->cf_mode);
+		cifs_dbg(FYI, "special bits 0%o org mode 0%o\n",
+			 mode, fattr->cf_mode);
 		fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
-		cFYI(1, "special mode bits 0%o", mode);
+		cifs_dbg(FYI, "special mode bits 0%o\n", mode);
 	}
 
 	return 0;
@@ -635,11 +637,11 @@
 	tcon = tlink_tcon(tlink);
 	server = tcon->ses->server;
 
-	cFYI(1, "Getting info on %s", full_path);
+	cifs_dbg(FYI, "Getting info on %s\n", full_path);
 
 	if ((data == NULL) && (*inode != NULL)) {
 		if (CIFS_I(*inode)->clientCanCacheRead) {
-			cFYI(1, "No need to revalidate cached inode sizes");
+			cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
 			goto cgii_exit;
 		}
 	}
@@ -714,7 +716,8 @@
 						tcon, cifs_sb, full_path,
 						&fattr.cf_uniqueid, data);
 				if (tmprc) {
-					cFYI(1, "GetSrvInodeNum rc %d", tmprc);
+					cifs_dbg(FYI, "GetSrvInodeNum rc %d\n",
+						 tmprc);
 					fattr.cf_uniqueid = iunique(sb, ROOT_I);
 					cifs_autodisable_serverino(cifs_sb);
 				}
@@ -729,7 +732,7 @@
 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
 		tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
 		if (tmprc)
-			cFYI(1, "cifs_sfu_type failed: %d", tmprc);
+			cifs_dbg(FYI, "cifs_sfu_type failed: %d\n", tmprc);
 	}
 
 #ifdef CONFIG_CIFS_ACL
@@ -737,8 +740,8 @@
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 		rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, full_path, fid);
 		if (rc) {
-			cFYI(1, "%s: Getting ACL failed with error: %d",
-				__func__, rc);
+			cifs_dbg(FYI, "%s: Getting ACL failed with error: %d\n",
+				 __func__, rc);
 			goto cgii_exit;
 		}
 	}
@@ -752,7 +755,7 @@
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
 		tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
 		if (tmprc)
-			cFYI(1, "CIFSCheckMFSymlink: %d", tmprc);
+			cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
 	}
 
 	if (!*inode) {
@@ -836,7 +839,7 @@
 	struct inode *inode;
 
 retry_iget5_locked:
-	cFYI(1, "looking for uniqueid=%llu", fattr->cf_uniqueid);
+	cifs_dbg(FYI, "looking for uniqueid=%llu\n", fattr->cf_uniqueid);
 
 	/* hash down to 32-bits on 32-bit arch */
 	hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
@@ -899,7 +902,7 @@
 #endif
 
 	if (rc && tcon->ipc) {
-		cFYI(1, "ipc connection - fake read inode");
+		cifs_dbg(FYI, "ipc connection - fake read inode\n");
 		spin_lock(&inode->i_lock);
 		inode->i_mode |= S_IFDIR;
 		set_nlink(inode, 2);
@@ -958,7 +961,7 @@
 	 * server times.
 	 */
 	if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
-		cFYI(1, "CIFS - CTIME changed");
+		cifs_dbg(FYI, "CIFS - CTIME changed\n");
 		info_buf.ChangeTime =
 		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
 	} else
@@ -1127,7 +1130,7 @@
 	struct iattr *attrs = NULL;
 	__u32 dosattr = 0, origattr = 0;
 
-	cFYI(1, "cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry);
+	cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry);
 
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
@@ -1150,7 +1153,7 @@
 		rc = CIFSPOSIXDelFile(xid, tcon, full_path,
 			SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-		cFYI(1, "posix del rc %d", rc);
+		cifs_dbg(FYI, "posix del rc %d\n", rc);
 		if ((rc == 0) || (rc == -ENOENT))
 			goto psx_del_no_retry;
 	}
@@ -1320,7 +1323,7 @@
 	if (rc == -EOPNOTSUPP)
 		goto posix_mkdir_out;
 	else if (rc) {
-		cFYI(1, "posix mkdir returned 0x%x", rc);
+		cifs_dbg(FYI, "posix mkdir returned 0x%x\n", rc);
 		d_drop(dentry);
 		goto posix_mkdir_out;
 	}
@@ -1342,11 +1345,12 @@
 	d_instantiate(dentry, newinode);
 
 #ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, "instantiated dentry %p %s to inode %p", dentry,
-	     dentry->d_name.name, newinode);
+	cifs_dbg(FYI, "instantiated dentry %p %s to inode %p\n",
+		 dentry, dentry->d_name.name, newinode);
 
 	if (newinode->i_nlink != 2)
-		cFYI(1, "unexpected number of links %d", newinode->i_nlink);
+		cifs_dbg(FYI, "unexpected number of links %d\n",
+			 newinode->i_nlink);
 #endif
 
 posix_mkdir_out:
@@ -1368,7 +1372,8 @@
 	struct TCP_Server_Info *server;
 	char *full_path;
 
-	cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode);
+	cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n",
+		 mode, inode);
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 	tlink = cifs_sb_tlink(cifs_sb);
@@ -1402,7 +1407,7 @@
 	/* BB add setting the equivalent of mode via CreateX w/ACLs */
 	rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb);
 	if (rc) {
-		cFYI(1, "cifs_mkdir returned 0x%x", rc);
+		cifs_dbg(FYI, "cifs_mkdir returned 0x%x\n", rc);
 		d_drop(direntry);
 		goto mkdir_out;
 	}
@@ -1432,7 +1437,7 @@
 	char *full_path = NULL;
 	struct cifsInodeInfo *cifsInode;
 
-	cFYI(1, "cifs_rmdir, inode = 0x%p", inode);
+	cifs_dbg(FYI, "cifs_rmdir, inode = 0x%p\n", inode);
 
 	xid = get_xid();
 
@@ -1681,8 +1686,8 @@
 	if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
 		rc = invalidate_inode_pages2(inode->i_mapping);
 		if (rc) {
-			cERROR(1, "%s: could not invalidate inode %p", __func__,
-			       inode);
+			cifs_dbg(VFS, "%s: could not invalidate inode %p\n",
+				 __func__, inode);
 			cifs_i->invalid_mapping = true;
 		}
 	}
@@ -1732,8 +1737,8 @@
 		goto out;
 	}
 
-	cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
-		 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
+	cifs_dbg(FYI, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time %ld jiffies %ld\n",
+		 full_path, inode, inode->i_count.counter,
 		 dentry, dentry->d_time, jiffies);
 
 	if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1883,7 +1888,7 @@
 		else
 			rc = -ENOSYS;
 		cifsFileInfo_put(open_file);
-		cFYI(1, "SetFSize for attrs rc = %d", rc);
+		cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc);
 		if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 			unsigned int bytes_written;
 
@@ -1894,7 +1899,7 @@
 			io_parms.length = attrs->ia_size;
 			rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
 					  NULL, NULL, 1);
-			cFYI(1, "Wrt seteof rc %d", rc);
+			cifs_dbg(FYI, "Wrt seteof rc %d\n", rc);
 		}
 	} else
 		rc = -EINVAL;
@@ -1920,7 +1925,7 @@
 						attrs->ia_size, cifs_sb, false);
 	else
 		rc = -ENOSYS;
-	cFYI(1, "SetEOF by path (setattrs) rc = %d", rc);
+	cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc);
 	if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 		__u16 netfid;
 		int oplock = 0;
@@ -1940,7 +1945,7 @@
 			io_parms.length = attrs->ia_size;
 			rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL,
 					  NULL,  1);
-			cFYI(1, "wrt seteof rc %d", rc);
+			cifs_dbg(FYI, "wrt seteof rc %d\n", rc);
 			CIFSSMBClose(xid, tcon, netfid);
 		}
 	}
@@ -1971,7 +1976,7 @@
 	struct cifs_unix_set_info_args *args = NULL;
 	struct cifsFileInfo *open_file;
 
-	cFYI(1, "setattr_unix on file %s attrs->ia_valid=0x%x",
+	cifs_dbg(FYI, "setattr_unix on file %s attrs->ia_valid=0x%x\n",
 		 direntry->d_name.name, attrs->ia_valid);
 
 	xid = get_xid();
@@ -2114,7 +2119,7 @@
 
 	xid = get_xid();
 
-	cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
+	cifs_dbg(FYI, "setattr on file %s attrs->iavalid 0x%x\n",
 		 direntry->d_name.name, attrs->ia_valid);
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
@@ -2166,8 +2171,8 @@
 			rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64,
 							uid, gid);
 			if (rc) {
-				cFYI(1, "%s: Setting id failed with error: %d",
-					__func__, rc);
+				cifs_dbg(FYI, "%s: Setting id failed with error: %d\n",
+					 __func__, rc);
 				goto cifs_setattr_exit;
 			}
 		}
@@ -2188,8 +2193,8 @@
 			rc = id_mode_to_cifs_acl(inode, full_path, mode,
 						INVALID_UID, INVALID_GID);
 			if (rc) {
-				cFYI(1, "%s: Setting ACL failed with error: %d",
-					__func__, rc);
+				cifs_dbg(FYI, "%s: Setting ACL failed with error: %d\n",
+					 __func__, rc);
 				goto cifs_setattr_exit;
 			}
 		} else
@@ -2277,7 +2282,7 @@
 #if 0
 void cifs_delete_inode(struct inode *inode)
 {
-	cFYI(1, "In cifs_delete_inode, inode = 0x%p", inode);
+	cifs_dbg(FYI, "In cifs_delete_inode, inode = 0x%p\n", inode);
 	/* may have to add back in if and when safe distributed caching of
 	   directories added e.g. via FindNotify */
 }

diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 6c9f121..3e08455 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c

@@ -44,7 +44,7 @@
 
 	xid = get_xid();
 
-	cFYI(1, "ioctl file %p  cmd %u  arg %lu", filep, command, arg);
+	cifs_dbg(FYI, "ioctl file %p  cmd %u  arg %lu\n", filep, command, arg);
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 
@@ -83,11 +83,11 @@
 				 *		       &ExtAttrMask);
 				 */
 			}
-			cFYI(1, "set flags not implemented yet");
+			cifs_dbg(FYI, "set flags not implemented yet\n");
 			break;
 #endif /* CONFIG_CIFS_POSIX */
 		default:
-			cFYI(1, "unsupported ioctl");
+			cifs_dbg(FYI, "unsupported ioctl\n");
 			break;
 	}
 

diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 9f6c4c4..b83c3f5 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c

@@ -56,14 +56,14 @@
 	md5 = crypto_alloc_shash("md5", 0, 0);
 	if (IS_ERR(md5)) {
 		rc = PTR_ERR(md5);
-		cERROR(1, "%s: Crypto md5 allocation error %d", __func__, rc);
+		cifs_dbg(VFS, "%s: Crypto md5 allocation error %d\n",
+			 __func__, rc);
 		return rc;
 	}
 	size = sizeof(struct shash_desc) + crypto_shash_descsize(md5);
 	sdescmd5 = kmalloc(size, GFP_KERNEL);
 	if (!sdescmd5) {
 		rc = -ENOMEM;
-		cERROR(1, "%s: Memory allocation failure", __func__);
 		goto symlink_hash_err;
 	}
 	sdescmd5->shash.tfm = md5;
@@ -71,17 +71,17 @@
 
 	rc = crypto_shash_init(&sdescmd5->shash);
 	if (rc) {
-		cERROR(1, "%s: Could not init md5 shash", __func__);
+		cifs_dbg(VFS, "%s: Could not init md5 shash\n", __func__);
 		goto symlink_hash_err;
 	}
 	rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
 	if (rc) {
-		cERROR(1, "%s: Could not update with link_str", __func__);
+		cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__);
 		goto symlink_hash_err;
 	}
 	rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
 	if (rc)
-		cERROR(1, "%s: Could not generate md5 hash", __func__);
+		cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
 
 symlink_hash_err:
 	crypto_free_shash(md5);
@@ -115,7 +115,7 @@
 
 	rc = symlink_hash(link_len, link_str, md5_hash);
 	if (rc) {
-		cFYI(1, "%s: MD5 hash failure: %d", __func__, rc);
+		cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
 		return rc;
 	}
 
@@ -154,7 +154,7 @@
 
 	rc = symlink_hash(link_len, link_str, md5_hash);
 	if (rc) {
-		cFYI(1, "%s: MD5 hash failure: %d", __func__, rc);
+		cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
 		return rc;
 	}
 
@@ -521,7 +521,7 @@
 	if (!full_path)
 		goto out;
 
-	cFYI(1, "Full path: %s inode = 0x%p", full_path, inode);
+	cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode);
 
 	rc = -EACCES;
 	/*
@@ -578,8 +578,8 @@
 		goto symlink_exit;
 	}
 
-	cFYI(1, "Full path: %s", full_path);
-	cFYI(1, "symname is %s", symname);
+	cifs_dbg(FYI, "Full path: %s\n", full_path);
+	cifs_dbg(FYI, "symname is %s\n", symname);
 
 	/* BB what if DFS and this volume is on different share? BB */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
@@ -601,8 +601,8 @@
 						 inode->i_sb, xid, NULL);
 
 		if (rc != 0) {
-			cFYI(1, "Create symlink ok, getinodeinfo fail rc = %d",
-			      rc);
+			cifs_dbg(FYI, "Create symlink ok, getinodeinfo fail rc = %d\n",
+				 rc);
 		} else {
 			d_instantiate(direntry, newinode);
 		}

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1b15bf8..1bec014 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c

@@ -54,7 +54,7 @@
 	if (GlobalTotalActiveXid > GlobalMaxActiveXid)
 		GlobalMaxActiveXid = GlobalTotalActiveXid;
 	if (GlobalTotalActiveXid > 65000)
-		cFYI(1, "warning: more than 65000 requests active");
+		cifs_dbg(FYI, "warning: more than 65000 requests active\n");
 	xid = GlobalCurrentXid++;
 	spin_unlock(&GlobalMid_Lock);
 	return xid;
@@ -91,7 +91,7 @@
 sesInfoFree(struct cifs_ses *buf_to_free)
 {
 	if (buf_to_free == NULL) {
-		cFYI(1, "Null buffer passed to sesInfoFree");
+		cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n");
 		return;
 	}
 
@@ -130,7 +130,7 @@
 tconInfoFree(struct cifs_tcon *buf_to_free)
 {
 	if (buf_to_free == NULL) {
-		cFYI(1, "Null buffer passed to tconInfoFree");
+		cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n");
 		return;
 	}
 	atomic_dec(&tconInfoAllocCount);
@@ -180,7 +180,7 @@
 cifs_buf_release(void *buf_to_free)
 {
 	if (buf_to_free == NULL) {
-		/* cFYI(1, "Null buffer passed to cifs_buf_release");*/
+		/* cifs_dbg(FYI, "Null buffer passed to cifs_buf_release\n");*/
 		return;
 	}
 	mempool_free(buf_to_free, cifs_req_poolp);
@@ -216,7 +216,7 @@
 {
 
 	if (buf_to_free == NULL) {
-		cFYI(1, "Null buffer passed to cifs_small_buf_release");
+		cifs_dbg(FYI, "Null buffer passed to cifs_small_buf_release\n");
 		return;
 	}
 	mempool_free(buf_to_free, cifs_sm_req_poolp);
@@ -282,15 +282,15 @@
 {
 	/* does it have the right SMB "signature" ? */
 	if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
-		cERROR(1, "Bad protocol string signature header 0x%x",
-			*(unsigned int *)smb->Protocol);
+		cifs_dbg(VFS, "Bad protocol string signature header 0x%x\n",
+			 *(unsigned int *)smb->Protocol);
 		return 1;
 	}
 
 	/* Make sure that message ids match */
 	if (mid != smb->Mid) {
-		cERROR(1, "Mids do not match. received=%u expected=%u",
-			smb->Mid, mid);
+		cifs_dbg(VFS, "Mids do not match. received=%u expected=%u\n",
+			 smb->Mid, mid);
 		return 1;
 	}
 
@@ -302,7 +302,7 @@
 	if (smb->Command == SMB_COM_LOCKING_ANDX)
 		return 0;
 
-	cERROR(1, "Server sent request, not response. mid=%u", smb->Mid);
+	cifs_dbg(VFS, "Server sent request, not response. mid=%u\n", smb->Mid);
 	return 1;
 }
 
@@ -313,8 +313,8 @@
 	__u16 mid = smb->Mid;
 	__u32 rfclen = be32_to_cpu(smb->smb_buf_length);
 	__u32 clc_len;  /* calculated length */
-	cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x",
-		total_read, rfclen);
+	cifs_dbg(FYI, "checkSMB Length: 0x%x, smb_buf_length: 0x%x\n",
+		 total_read, rfclen);
 
 	/* is this frame too small to even get to a BCC? */
 	if (total_read < 2 + sizeof(struct smb_hdr)) {
@@ -340,9 +340,9 @@
 				tmp[sizeof(struct smb_hdr)+1] = 0;
 				return 0;
 			}
-			cERROR(1, "rcvd invalid byte count (bcc)");
+			cifs_dbg(VFS, "rcvd invalid byte count (bcc)\n");
 		} else {
-			cERROR(1, "Length less than smb header size");
+			cifs_dbg(VFS, "Length less than smb header size\n");
 		}
 		return -EIO;
 	}
@@ -353,8 +353,8 @@
 	clc_len = smbCalcSize(smb);
 
 	if (4 + rfclen != total_read) {
-		cERROR(1, "Length read does not match RFC1001 length %d",
-				rfclen);
+		cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n",
+			 rfclen);
 		return -EIO;
 	}
 
@@ -365,12 +365,12 @@
 			if (((4 + rfclen) & 0xFFFF) == (clc_len & 0xFFFF))
 				return 0; /* bcc wrapped */
 		}
-		cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u",
-				clc_len, 4 + rfclen, smb->Mid);
+		cifs_dbg(FYI, "Calculated size %u vs length %u mismatch for mid=%u\n",
+			 clc_len, 4 + rfclen, smb->Mid);
 
 		if (4 + rfclen < clc_len) {
-			cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u",
-					rfclen, smb->Mid);
+			cifs_dbg(VFS, "RFC1001 size %u smaller than SMB for mid=%u\n",
+				 rfclen, smb->Mid);
 			return -EIO;
 		} else if (rfclen > clc_len + 512) {
 			/*
@@ -382,8 +382,8 @@
 			 * trailing data, we choose limit the amount of extra
 			 * data to 512 bytes.
 			 */
-			cERROR(1, "RFC1001 size %u more than 512 bytes larger "
-				  "than SMB for mid=%u", rfclen, smb->Mid);
+			cifs_dbg(VFS, "RFC1001 size %u more than 512 bytes larger than SMB for mid=%u\n",
+				 rfclen, smb->Mid);
 			return -EIO;
 		}
 	}
@@ -401,7 +401,7 @@
 	struct cifsInodeInfo *pCifsInode;
 	struct cifsFileInfo *netfile;
 
-	cFYI(1, "Checking for oplock break or dnotify response");
+	cifs_dbg(FYI, "Checking for oplock break or dnotify response\n");
 	if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
 	   (pSMB->hdr.Flags & SMBFLG_RESPONSE)) {
 		struct smb_com_transaction_change_notify_rsp *pSMBr =
@@ -413,15 +413,15 @@
 
 			pnotify = (struct file_notify_information *)
 				((char *)&pSMBr->hdr.Protocol + data_offset);
-			cFYI(1, "dnotify on %s Action: 0x%x",
+			cifs_dbg(FYI, "dnotify on %s Action: 0x%x\n",
 				 pnotify->FileName, pnotify->Action);
 			/*   cifs_dump_mem("Rcvd notify Data: ",buf,
 				sizeof(struct smb_hdr)+60); */
 			return true;
 		}
 		if (pSMBr->hdr.Status.CifsError) {
-			cFYI(1, "notify err 0x%d",
-				pSMBr->hdr.Status.CifsError);
+			cifs_dbg(FYI, "notify err 0x%d\n",
+				 pSMBr->hdr.Status.CifsError);
 			return true;
 		}
 		return false;
@@ -435,7 +435,7 @@
 		   large dirty files cached on the client */
 		if ((NT_STATUS_INVALID_HANDLE) ==
 		   le32_to_cpu(pSMB->hdr.Status.CifsError)) {
-			cFYI(1, "invalid handle on oplock break");
+			cifs_dbg(FYI, "invalid handle on oplock break\n");
 			return true;
 		} else if (ERRbadfid ==
 		   le16_to_cpu(pSMB->hdr.Status.DosError.Error)) {
@@ -447,7 +447,7 @@
 	if (pSMB->hdr.WordCount != 8)
 		return false;
 
-	cFYI(1, "oplock type 0x%d level 0x%d",
+	cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n",
 		 pSMB->LockType, pSMB->OplockLevel);
 	if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
 		return false;
@@ -469,7 +469,7 @@
 				if (pSMB->Fid != netfile->fid.netfid)
 					continue;
 
-				cFYI(1, "file id match, oplock break");
+				cifs_dbg(FYI, "file id match, oplock break\n");
 				pCifsInode = CIFS_I(netfile->dentry->d_inode);
 
 				cifs_set_oplock_level(pCifsInode,
@@ -484,12 +484,12 @@
 			}
 			spin_unlock(&cifs_file_list_lock);
 			spin_unlock(&cifs_tcp_ses_lock);
-			cFYI(1, "No matching file for oplock break");
+			cifs_dbg(FYI, "No matching file for oplock break\n");
 			return true;
 		}
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
-	cFYI(1, "Can not process oplock break for non-existent connection");
+	cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
 	return true;
 }
 
@@ -536,12 +536,8 @@
 {
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
-		cERROR(1, "Autodisabling the use of server inode numbers on "
-			   "%s. This server doesn't seem to support them "
-			   "properly. Hardlinks will not be recognized on this "
-			   "mount. Consider mounting with the \"noserverino\" "
-			   "option to silence this message.",
-			   cifs_sb_master_tcon(cifs_sb)->treeName);
+		cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s. This server doesn't seem to support them properly. Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n",
+			 cifs_sb_master_tcon(cifs_sb)->treeName);
 	}
 }
 
@@ -552,13 +548,13 @@
 	if (oplock == OPLOCK_EXCLUSIVE) {
 		cinode->clientCanCacheAll = true;
 		cinode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock granted on inode %p",
-		     &cinode->vfs_inode);
+		cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
+			 &cinode->vfs_inode);
 	} else if (oplock == OPLOCK_READ) {
 		cinode->clientCanCacheAll = false;
 		cinode->clientCanCacheRead = true;
-		cFYI(1, "Level II Oplock granted on inode %p",
-		    &cinode->vfs_inode);
+		cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
+			 &cinode->vfs_inode);
 	} else {
 		cinode->clientCanCacheAll = false;
 		cinode->clientCanCacheRead = false;

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index c0b25b2..af847e1 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c

@@ -150,8 +150,8 @@
 	else if (address_family == AF_INET6)
 		ret = in6_pton(cp, len, dst , '\\', NULL);
 
-	cFYI(DBG2, "address conversion returned %d for %*.*s",
-	     ret, len, len, cp);
+	cifs_dbg(NOISY, "address conversion returned %d for %*.*s\n",
+		 ret, len, len, cp);
 	if (ret > 0)
 		ret = 1;
 	return ret;
@@ -887,7 +887,7 @@
 	}
 	/* else ERRHRD class errors or junk  - return EIO */
 
-	cFYI(1, "Mapping smb error code 0x%x to POSIX err %d",
+	cifs_dbg(FYI, "Mapping smb error code 0x%x to POSIX err %d\n",
 		 le32_to_cpu(smb->Status.CifsError), rc);
 
 	/* generic corrective action e.g. reconnect SMB session on
@@ -951,20 +951,20 @@
 	SMB_TIME *st = (SMB_TIME *)&time;
 	SMB_DATE *sd = (SMB_DATE *)&date;
 
-	cFYI(1, "date %d time %d", date, time);
+	cifs_dbg(FYI, "date %d time %d\n", date, time);
 
 	sec = 2 * st->TwoSeconds;
 	min = st->Minutes;
 	if ((sec > 59) || (min > 59))
-		cERROR(1, "illegal time min %d sec %d", min, sec);
+		cifs_dbg(VFS, "illegal time min %d sec %d\n", min, sec);
 	sec += (min * 60);
 	sec += 60 * 60 * st->Hours;
 	if (st->Hours > 24)
-		cERROR(1, "illegal hours %d", st->Hours);
+		cifs_dbg(VFS, "illegal hours %d\n", st->Hours);
 	days = sd->Day;
 	month = sd->Month;
 	if ((days > 31) || (month > 12)) {
-		cERROR(1, "illegal date, month %d day: %d", month, days);
+		cifs_dbg(VFS, "illegal date, month %d day: %d\n", month, days);
 		if (month > 12)
 			month = 12;
 	}
@@ -990,7 +990,7 @@
 
 	ts.tv_sec = sec + offset;
 
-	/* cFYI(1, "sec after cnvrt dos to unix time %d",sec); */
+	/* cifs_dbg(FYI, "sec after cnvrt dos to unix time %d\n",sec); */
 
 	ts.tv_nsec = 0;
 	return ts;

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index df40cc5..770d5a9 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c

@@ -48,15 +48,15 @@
 	if (file) {
 		cf = file->private_data;
 		if (cf == NULL) {
-			cFYI(1, "empty cifs private file data");
+			cifs_dbg(FYI, "empty cifs private file data\n");
 			return;
 		}
 		if (cf->invalidHandle)
-			cFYI(1, "invalid handle");
+			cifs_dbg(FYI, "invalid handle\n");
 		if (cf->srch_inf.endOfSearch)
-			cFYI(1, "end of search");
+			cifs_dbg(FYI, "end of search\n");
 		if (cf->srch_inf.emptyDir)
-			cFYI(1, "empty dir");
+			cifs_dbg(FYI, "empty dir\n");
 	}
 }
 #else
@@ -80,7 +80,7 @@
 	struct super_block *sb = parent->d_inode->i_sb;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 
-	cFYI(1, "%s: for %s", __func__, name->name);
+	cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
 
 	dentry = d_hash_and_lookup(parent, name);
 	if (unlikely(IS_ERR(dentry)))
@@ -233,7 +233,7 @@
 				fid,
 				cifs_sb->local_nls);
 		if (CIFSSMBClose(xid, ptcon, fid)) {
-			cFYI(1, "Error closing temporary reparsepoint open");
+			cifs_dbg(FYI, "Error closing temporary reparsepoint open\n");
 		}
 	}
 }
@@ -285,7 +285,7 @@
 		goto error_exit;
 	}
 
-	cFYI(1, "Full path: %s start at: %lld", full_path, file->f_pos);
+	cifs_dbg(FYI, "Full path: %s start at: %lld\n", full_path, file->f_pos);
 
 ffirst_retry:
 	/* test for Unix extensions */
@@ -336,7 +336,7 @@
 		if (ustr[len] == 0)
 			return len << 1;
 	}
-	cFYI(1, "Unicode string longer than PATH_MAX found");
+	cifs_dbg(FYI, "Unicode string longer than PATH_MAX found\n");
 	return len << 1;
 }
 
@@ -353,18 +353,18 @@
 				pfData->FileNameLength;
 	} else
 		new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
-	cFYI(1, "new entry %p old entry %p", new_entry, old_entry);
+	cifs_dbg(FYI, "new entry %p old entry %p\n", new_entry, old_entry);
 	/* validate that new_entry is not past end of SMB */
 	if (new_entry >= end_of_smb) {
-		cERROR(1, "search entry %p began after end of SMB %p old entry %p",
-			new_entry, end_of_smb, old_entry);
+		cifs_dbg(VFS, "search entry %p began after end of SMB %p old entry %p\n",
+			 new_entry, end_of_smb, old_entry);
 		return NULL;
 	} else if (((level == SMB_FIND_FILE_INFO_STANDARD) &&
 		    (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb))
 		  || ((level != SMB_FIND_FILE_INFO_STANDARD) &&
 		   (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb)))  {
-		cERROR(1, "search entry %p extends after end of SMB %p",
-			new_entry, end_of_smb);
+		cifs_dbg(VFS, "search entry %p extends after end of SMB %p\n",
+			 new_entry, end_of_smb);
 		return NULL;
 	} else
 		return new_entry;
@@ -457,7 +457,7 @@
 		cifs_fill_dirent_std(de, info);
 		break;
 	default:
-		cFYI(1, "Unknown findfirst level %d", level);
+		cifs_dbg(FYI, "Unknown findfirst level %d\n", level);
 		return -EINVAL;
 	}
 
@@ -572,7 +572,7 @@
 	if (((index_to_find < cfile->srch_inf.index_of_last_entry) &&
 	     is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
 		/* close and restart search */
-		cFYI(1, "search backing up - close and restart search");
+		cifs_dbg(FYI, "search backing up - close and restart search\n");
 		spin_lock(&cifs_file_list_lock);
 		if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 			cfile->invalidHandle = true;
@@ -582,7 +582,7 @@
 		} else
 			spin_unlock(&cifs_file_list_lock);
 		if (cfile->srch_inf.ntwrk_buf_start) {
-			cFYI(1, "freeing SMB ff cache buf on search rewind");
+			cifs_dbg(FYI, "freeing SMB ff cache buf on search rewind\n");
 			if (cfile->srch_inf.smallBuf)
 				cifs_small_buf_release(cfile->srch_inf.
 						ntwrk_buf_start);
@@ -593,7 +593,7 @@
 		}
 		rc = initiate_cifs_search(xid, file);
 		if (rc) {
-			cFYI(1, "error %d reinitiating a search on rewind",
+			cifs_dbg(FYI, "error %d reinitiating a search on rewind\n",
 				 rc);
 			return rc;
 		}
@@ -608,7 +608,7 @@
 
 	while ((index_to_find >= cfile->srch_inf.index_of_last_entry) &&
 	       (rc == 0) && !cfile->srch_inf.endOfSearch) {
-		cFYI(1, "calling findnext2");
+		cifs_dbg(FYI, "calling findnext2\n");
 		rc = server->ops->query_dir_next(xid, tcon, &cfile->fid,
 						 search_flags,
 						 &cfile->srch_inf);
@@ -631,7 +631,7 @@
 		first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
 					- cfile->srch_inf.entries_in_buffer;
 		pos_in_buf = index_to_find - first_entry_in_buffer;
-		cFYI(1, "found entry - pos_in_buf %d", pos_in_buf);
+		cifs_dbg(FYI, "found entry - pos_in_buf %d\n", pos_in_buf);
 
 		for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) {
 			/* go entry by entry figuring out which is first */
@@ -640,19 +640,18 @@
 		}
 		if ((cur_ent == NULL) && (i < pos_in_buf)) {
 			/* BB fixme - check if we should flag this error */
-			cERROR(1, "reached end of buf searching for pos in buf"
-				  " %d index to find %lld rc %d", pos_in_buf,
-				  index_to_find, rc);
+			cifs_dbg(VFS, "reached end of buf searching for pos in buf %d index to find %lld rc %d\n",
+				 pos_in_buf, index_to_find, rc);
 		}
 		rc = 0;
 		*current_entry = cur_ent;
 	} else {
-		cFYI(1, "index not in buffer - could not findnext into it");
+		cifs_dbg(FYI, "index not in buffer - could not findnext into it\n");
 		return 0;
 	}
 
 	if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) {
-		cFYI(1, "can not return entries pos_in_buf beyond last");
+		cifs_dbg(FYI, "can not return entries pos_in_buf beyond last\n");
 		*num_to_ret = 0;
 	} else
 		*num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf;
@@ -678,8 +677,8 @@
 		return rc;
 
 	if (de.namelen > max_len) {
-		cERROR(1, "bad search response length %zd past smb end",
-			  de.namelen);
+		cifs_dbg(VFS, "bad search response length %zd past smb end\n",
+			 de.namelen);
 		return -EINVAL;
 	}
 
@@ -768,7 +767,7 @@
 	 */
 	if (file->private_data == NULL) {
 		rc = initiate_cifs_search(xid, file);
-		cFYI(1, "initiate cifs search rc %d", rc);
+		cifs_dbg(FYI, "initiate cifs search rc %d\n", rc);
 		if (rc)
 			goto rddir2_exit;
 	}
@@ -777,7 +776,7 @@
 	case 0:
 		if (filldir(direntry, ".", 1, file->f_pos,
 		     file_inode(file)->i_ino, DT_DIR) < 0) {
-			cERROR(1, "Filldir for current dir failed");
+			cifs_dbg(VFS, "Filldir for current dir failed\n");
 			rc = -ENOMEM;
 			break;
 		}
@@ -785,7 +784,7 @@
 	case 1:
 		if (filldir(direntry, "..", 2, file->f_pos,
 		     parent_ino(file->f_path.dentry), DT_DIR) < 0) {
-			cERROR(1, "Filldir for parent dir failed");
+			cifs_dbg(VFS, "Filldir for parent dir failed\n");
 			rc = -ENOMEM;
 			break;
 		}
@@ -804,7 +803,7 @@
 		cifsFile = file->private_data;
 		if (cifsFile->srch_inf.endOfSearch) {
 			if (cifsFile->srch_inf.emptyDir) {
-				cFYI(1, "End of search, empty dir");
+				cifs_dbg(FYI, "End of search, empty dir\n");
 				rc = 0;
 				break;
 			}
@@ -817,16 +816,16 @@
 		rc = find_cifs_entry(xid, tcon, file, &current_entry,
 				     &num_to_fill);
 		if (rc) {
-			cFYI(1, "fce error %d", rc);
+			cifs_dbg(FYI, "fce error %d\n", rc);
 			goto rddir2_exit;
 		} else if (current_entry != NULL) {
-			cFYI(1, "entry %lld found", file->f_pos);
+			cifs_dbg(FYI, "entry %lld found\n", file->f_pos);
 		} else {
-			cFYI(1, "could not find entry");
+			cifs_dbg(FYI, "could not find entry\n");
 			goto rddir2_exit;
 		}
-		cFYI(1, "loop through %d times filling dir for net buf %p",
-			num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
+		cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n",
+			 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
 		max_len = tcon->ses->server->ops->calc_smb_size(
 				cifsFile->srch_inf.ntwrk_buf_start);
 		end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
@@ -840,8 +839,8 @@
 		for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
 			if (current_entry == NULL) {
 				/* evaluate whether this case is an error */
-				cERROR(1, "past SMB end,  num to fill %d i %d",
-					  num_to_fill, i);
+				cifs_dbg(VFS, "past SMB end,  num to fill %d i %d\n",
+					 num_to_fill, i);
 				break;
 			}
 			/*
@@ -858,8 +857,8 @@
 			file->f_pos++;
 			if (file->f_pos ==
 				cifsFile->srch_inf.index_of_last_entry) {
-				cFYI(1, "last entry in buf at pos %lld %s",
-					file->f_pos, tmp_buf);
+				cifs_dbg(FYI, "last entry in buf at pos %lld %s\n",
+					 file->f_pos, tmp_buf);
 				cifs_save_resume_key(current_entry, cifsFile);
 				break;
 			} else

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 76809f4..f230571 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c

@@ -283,11 +283,11 @@
 	int len;
 	char *data = *pbcc_area;
 
-	cFYI(1, "bleft %d", bleft);
+	cifs_dbg(FYI, "bleft %d\n", bleft);
 
 	kfree(ses->serverOS);
 	ses->serverOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp);
-	cFYI(1, "serverOS=%s", ses->serverOS);
+	cifs_dbg(FYI, "serverOS=%s\n", ses->serverOS);
 	len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
 	data += len;
 	bleft -= len;
@@ -296,7 +296,7 @@
 
 	kfree(ses->serverNOS);
 	ses->serverNOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp);
-	cFYI(1, "serverNOS=%s", ses->serverNOS);
+	cifs_dbg(FYI, "serverNOS=%s\n", ses->serverNOS);
 	len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
 	data += len;
 	bleft -= len;
@@ -305,7 +305,7 @@
 
 	kfree(ses->serverDomain);
 	ses->serverDomain = cifs_strndup_from_utf16(data, bleft, true, nls_cp);
-	cFYI(1, "serverDomain=%s", ses->serverDomain);
+	cifs_dbg(FYI, "serverDomain=%s\n", ses->serverDomain);
 
 	return;
 }
@@ -318,7 +318,7 @@
 	int len;
 	char *bcc_ptr = *pbcc_area;
 
-	cFYI(1, "decode sessetup ascii. bleft %d", bleft);
+	cifs_dbg(FYI, "decode sessetup ascii. bleft %d\n", bleft);
 
 	len = strnlen(bcc_ptr, bleft);
 	if (len >= bleft)
@@ -330,7 +330,7 @@
 	if (ses->serverOS)
 		strncpy(ses->serverOS, bcc_ptr, len);
 	if (strncmp(ses->serverOS, "OS/2", 4) == 0) {
-			cFYI(1, "OS/2 server");
+		cifs_dbg(FYI, "OS/2 server\n");
 			ses->flags |= CIFS_SES_OS2;
 	}
 
@@ -359,7 +359,7 @@
 	/* BB For newer servers which do not support Unicode,
 	   but thus do return domain here we could add parsing
 	   for it later, but it is not very important */
-	cFYI(1, "ascii: bytes left %d", bleft);
+	cifs_dbg(FYI, "ascii: bytes left %d\n", bleft);
 
 	return rc;
 }
@@ -373,16 +373,18 @@
 	CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
 
 	if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
-		cERROR(1, "challenge blob len %d too small", blob_len);
+		cifs_dbg(VFS, "challenge blob len %d too small\n", blob_len);
 		return -EINVAL;
 	}
 
 	if (memcmp(pblob->Signature, "NTLMSSP", 8)) {
-		cERROR(1, "blob signature incorrect %s", pblob->Signature);
+		cifs_dbg(VFS, "blob signature incorrect %s\n",
+			 pblob->Signature);
 		return -EINVAL;
 	}
 	if (pblob->MessageType != NtLmChallenge) {
-		cERROR(1, "Incorrect message type %d", pblob->MessageType);
+		cifs_dbg(VFS, "Incorrect message type %d\n",
+			 pblob->MessageType);
 		return -EINVAL;
 	}
 
@@ -395,16 +397,17 @@
 	tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
 	tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
 	if (tioffset > blob_len || tioffset + tilen > blob_len) {
-		cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen);
+		cifs_dbg(VFS, "tioffset + tilen too high %u + %u",
+			tioffset, tilen);
 		return -EINVAL;
 	}
 	if (tilen) {
-		ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
+		ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen,
+						 GFP_KERNEL);
 		if (!ses->auth_key.response) {
-			cERROR(1, "Challenge target info allocation failure");
+			cifs_dbg(VFS, "Challenge target info alloc failure");
 			return -ENOMEM;
 		}
-		memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
 		ses->auth_key.len = tilen;
 	}
 
@@ -486,7 +489,7 @@
 	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
 	rc = setup_ntlmv2_rsp(ses, nls_cp);
 	if (rc) {
-		cERROR(1, "Error %d during NTLMSSP authentication", rc);
+		cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
 		goto setup_ntlmv2_ret;
 	}
 	memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
@@ -580,7 +583,7 @@
 		return -EINVAL;
 
 	type = ses->server->secType;
-	cFYI(1, "sess setup type %d", type);
+	cifs_dbg(FYI, "sess setup type %d\n", type);
 	if (type == RawNTLMSSP) {
 		/* if memory allocation is successful, caller of this function
 		 * frees it.
@@ -674,7 +677,7 @@
 		changed to do higher than lanman dialect and
 		we reconnected would we ever calc signing_key? */
 
-		cFYI(1, "Negotiating LANMAN setting up strings");
+		cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
 		/* Unicode not allowed for LANMAN dialects */
 		ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
 #endif
@@ -688,7 +691,8 @@
 		/* calculate ntlm response and session key */
 		rc = setup_ntlm_response(ses, nls_cp);
 		if (rc) {
-			cERROR(1, "Error %d during NTLM authentication", rc);
+			cifs_dbg(VFS, "Error %d during NTLM authentication\n",
+				 rc);
 			goto ssetup_exit;
 		}
 
@@ -718,7 +722,8 @@
 		/* calculate nlmv2 response and session key */
 		rc = setup_ntlmv2_rsp(ses, nls_cp);
 		if (rc) {
-			cERROR(1, "Error %d during NTLMv2 authentication", rc);
+			cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n",
+				 rc);
 			goto ssetup_exit;
 		}
 		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
@@ -754,21 +759,21 @@
 		/* check version field to make sure that cifs.upcall is
 		   sending us a response in an expected form */
 		if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
-			cERROR(1, "incorrect version of cifs.upcall (expected"
-				   " %d but got %d)",
+			cifs_dbg(VFS, "incorrect version of cifs.upcall "
+				   "expected %d but got %d)",
 				   CIFS_SPNEGO_UPCALL_VERSION, msg->version);
 			rc = -EKEYREJECTED;
 			goto ssetup_exit;
 		}
 
-		ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
+		ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+						 GFP_KERNEL);
 		if (!ses->auth_key.response) {
-			cERROR(1, "Kerberos can't allocate (%u bytes) memory",
+			cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory",
 					msg->sesskey_len);
 			rc = -ENOMEM;
 			goto ssetup_exit;
 		}
-		memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
 		ses->auth_key.len = msg->sesskey_len;
 
 		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -790,18 +795,18 @@
 		/* BB: is this right? */
 			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
 #else /* ! CONFIG_CIFS_UPCALL */
-		cERROR(1, "Kerberos negotiated but upcall support disabled!");
+		cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
 		rc = -ENOSYS;
 		goto ssetup_exit;
 #endif /* CONFIG_CIFS_UPCALL */
 	} else if (type == RawNTLMSSP) {
 		if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
-			cERROR(1, "NTLMSSP requires Unicode support");
+			cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
 			rc = -ENOSYS;
 			goto ssetup_exit;
 		}
 
-		cFYI(1, "ntlmssp session setup phase %d", phase);
+		cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase);
 		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
 		capabilities |= CAP_EXTENDED_SECURITY;
 		pSMB->req.Capabilities |= cpu_to_le32(capabilities);
@@ -824,7 +829,6 @@
 				5*sizeof(struct _AUTHENTICATE_MESSAGE),
 				GFP_KERNEL);
 			if (!ntlmsspblob) {
-				cERROR(1, "Can't allocate NTLMSSP blob");
 				rc = -ENOMEM;
 				goto ssetup_exit;
 			}
@@ -844,7 +848,7 @@
 			smb_buf->Uid = ses->Suid;
 			break;
 		default:
-			cERROR(1, "invalid phase %d", phase);
+			cifs_dbg(VFS, "invalid phase %d\n", phase);
 			rc = -ENOSYS;
 			goto ssetup_exit;
 		}
@@ -855,7 +859,7 @@
 		}
 		unicode_oslm_strings(&bcc_ptr, nls_cp);
 	} else {
-		cERROR(1, "secType %d not supported!", type);
+		cifs_dbg(VFS, "secType %d not supported!\n", type);
 		rc = -ENOSYS;
 		goto ssetup_exit;
 	}
@@ -880,7 +884,7 @@
 	    (smb_buf->Status.CifsError ==
 			cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
 		if (phase != NtLmNegotiate) {
-			cERROR(1, "Unexpected more processing error");
+			cifs_dbg(VFS, "Unexpected more processing error\n");
 			goto ssetup_exit;
 		}
 		/* NTLMSSP Negotiate sent now processing challenge (response) */
@@ -892,14 +896,14 @@
 
 	if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
 		rc = -EIO;
-		cERROR(1, "bad word count %d", smb_buf->WordCount);
+		cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
 		goto ssetup_exit;
 	}
 	action = le16_to_cpu(pSMB->resp.Action);
 	if (action & GUEST_LOGIN)
-		cFYI(1, "Guest login"); /* BB mark SesInfo struct? */
+		cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
 	ses->Suid = smb_buf->Uid;   /* UID left in wire format (le) */
-	cFYI(1, "UID = %llu ", ses->Suid);
+	cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
 	/* response can have either 3 or 4 word count - Samba sends 3 */
 	/* and lanman response is 3 */
 	bytes_remaining = get_bcc(smb_buf);
@@ -908,7 +912,8 @@
 	if (smb_buf->WordCount == 4) {
 		blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
 		if (blob_len > bytes_remaining) {
-			cERROR(1, "bad security blob length %d", blob_len);
+			cifs_dbg(VFS, "bad security blob length %d\n",
+				 blob_len);
 			rc = -EINVAL;
 			goto ssetup_exit;
 		}
@@ -946,7 +951,7 @@
 	kfree(ntlmsspblob);
 	ntlmsspblob = NULL;
 	if (resp_buf_type == CIFS_SMALL_BUFFER) {
-		cFYI(1, "ssetup freeing small buf %p", iov[0].iov_base);
+		cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base);
 		cifs_small_buf_release(iov[0].iov_base);
 	} else if (resp_buf_type == CIFS_LARGE_BUFFER)
 		cifs_buf_release(iov[0].iov_base);

diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 47bc5a8..3efdb9d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c

@@ -61,10 +61,13 @@
 	 */
 	--server->sequence_number;
 	rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
+	if (rc < 0)
+		server->sequence_number--;
+
 	mutex_unlock(&server->srv_mutex);
 
-	cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
-		in_buf->Mid, rc);
+	cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n",
+		 in_buf->Mid, rc);
 
 	return rc;
 }
@@ -249,7 +252,7 @@
 	/* check for plausible wct, bcc and t2 data and parm sizes */
 	/* check for parm and data offset going beyond end of smb */
 	if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */
-		cFYI(1, "invalid transact2 word count");
+		cifs_dbg(FYI, "invalid transact2 word count\n");
 		return -EINVAL;
 	}
 
@@ -261,18 +264,18 @@
 	if (total_data_size == data_in_this_rsp)
 		return 0;
 	else if (total_data_size < data_in_this_rsp) {
-		cFYI(1, "total data %d smaller than data in frame %d",
-			total_data_size, data_in_this_rsp);
+		cifs_dbg(FYI, "total data %d smaller than data in frame %d\n",
+			 total_data_size, data_in_this_rsp);
 		return -EINVAL;
 	}
 
 	remaining = total_data_size - data_in_this_rsp;
 
-	cFYI(1, "missing %d bytes from transact2, check next response",
-		remaining);
+	cifs_dbg(FYI, "missing %d bytes from transact2, check next response\n",
+		 remaining);
 	if (total_data_size > CIFSMaxBufSize) {
-		cERROR(1, "TotalDataSize %d is over maximum buffer %d",
-			total_data_size, CIFSMaxBufSize);
+		cifs_dbg(VFS, "TotalDataSize %d is over maximum buffer %d\n",
+			 total_data_size, CIFSMaxBufSize);
 		return -EINVAL;
 	}
 	return remaining;
@@ -293,28 +296,28 @@
 	tgt_total_cnt = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
 
 	if (tgt_total_cnt != src_total_cnt)
-		cFYI(1, "total data count of primary and secondary t2 differ "
-			"source=%hu target=%hu", src_total_cnt, tgt_total_cnt);
+		cifs_dbg(FYI, "total data count of primary and secondary t2 differ source=%hu target=%hu\n",
+			 src_total_cnt, tgt_total_cnt);
 
 	total_in_tgt = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
 
 	remaining = tgt_total_cnt - total_in_tgt;
 
 	if (remaining < 0) {
-		cFYI(1, "Server sent too much data. tgt_total_cnt=%hu "
-			"total_in_tgt=%hu", tgt_total_cnt, total_in_tgt);
+		cifs_dbg(FYI, "Server sent too much data. tgt_total_cnt=%hu total_in_tgt=%hu\n",
+			 tgt_total_cnt, total_in_tgt);
 		return -EPROTO;
 	}
 
 	if (remaining == 0) {
 		/* nothing to do, ignore */
-		cFYI(1, "no more data remains");
+		cifs_dbg(FYI, "no more data remains\n");
 		return 0;
 	}
 
 	total_in_src = get_unaligned_le16(&pSMBs->t2_rsp.DataCount);
 	if (remaining < total_in_src)
-		cFYI(1, "transact2 2nd response contains too much data");
+		cifs_dbg(FYI, "transact2 2nd response contains too much data\n");
 
 	/* find end of first SMB data area */
 	data_area_of_tgt = (char *)&pSMBt->hdr.Protocol +
@@ -329,7 +332,8 @@
 	total_in_tgt += total_in_src;
 	/* is the result too big for the field? */
 	if (total_in_tgt > USHRT_MAX) {
-		cFYI(1, "coalesced DataCount too large (%u)", total_in_tgt);
+		cifs_dbg(FYI, "coalesced DataCount too large (%u)\n",
+			 total_in_tgt);
 		return -EPROTO;
 	}
 	put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount);
@@ -339,7 +343,7 @@
 	byte_count += total_in_src;
 	/* is the result too big for the field? */
 	if (byte_count > USHRT_MAX) {
-		cFYI(1, "coalesced BCC too large (%u)", byte_count);
+		cifs_dbg(FYI, "coalesced BCC too large (%u)\n", byte_count);
 		return -EPROTO;
 	}
 	put_bcc(byte_count, target_hdr);
@@ -348,7 +352,8 @@
 	byte_count += total_in_src;
 	/* don't allow buffer to overflow */
 	if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
-		cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count);
+		cifs_dbg(FYI, "coalesced BCC exceeds buffer size (%u)\n",
+			 byte_count);
 		return -ENOBUFS;
 	}
 	target_hdr->smb_buf_length = cpu_to_be32(byte_count);
@@ -358,12 +363,12 @@
 
 	if (remaining != total_in_src) {
 		/* more responses to go */
-		cFYI(1, "waiting for more secondary responses");
+		cifs_dbg(FYI, "waiting for more secondary responses\n");
 		return 1;
 	}
 
 	/* we are done */
-	cFYI(1, "found the last secondary response");
+	cifs_dbg(FYI, "found the last secondary response\n");
 	return 0;
 }
 
@@ -388,7 +393,7 @@
 	}
 	if (!server->large_buf) {
 		/*FIXME: switch to already allocated largebuf?*/
-		cERROR(1, "1st trans2 resp needs bigbuf");
+		cifs_dbg(VFS, "1st trans2 resp needs bigbuf\n");
 	} else {
 		/* Have first buffer */
 		mid->resp_buf = buf;
@@ -776,8 +781,7 @@
 			goto out;
 	}
 
-	cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported "
-		"by this server");
+	cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n");
 	rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
 			 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
 			 &netfid, &oplock, NULL, cifs_sb->local_nls,

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 71e6aed..5da1b55 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c

@@ -43,13 +43,13 @@
 	if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
 		cinode->clientCanCacheAll = true;
 		cinode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock granted on inode %p",
-		     &cinode->vfs_inode);
+		cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
+			 &cinode->vfs_inode);
 	} else if (oplock == SMB2_OPLOCK_LEVEL_II) {
 		cinode->clientCanCacheAll = false;
 		cinode->clientCanCacheRead = true;
-		cFYI(1, "Level II Oplock granted on inode %p",
-		    &cinode->vfs_inode);
+		cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
+			 &cinode->vfs_inode);
 	} else {
 		cinode->clientCanCacheAll = false;
 		cinode->clientCanCacheRead = false;

diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 7064824..fff6dfb 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c

@@ -92,7 +92,7 @@
 				      (FILE_BASIC_INFO *)data);
 		break;
 	default:
-		cERROR(1, "Invalid command");
+		cifs_dbg(VFS, "Invalid command\n");
 		break;
 	}
 

diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 494c912..7c2f45c 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c

@@ -2472,7 +2472,7 @@
 
 	/* on error mapping not found  - return EIO */
 
-	cFYI(1, "Mapping SMB2 status code %d to POSIX err %d",
+	cifs_dbg(FYI, "Mapping SMB2 status code %d to POSIX err %d\n",
 		 smb2err, rc);
 
 	return rc;

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 7b1c5e3..10383d8 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c

@@ -45,17 +45,17 @@
 			if (hdr->Command == SMB2_OPLOCK_BREAK)
 				return 0;
 			else
-				cERROR(1, "Received Request not response");
+				cifs_dbg(VFS, "Received Request not response\n");
 		}
 	} else { /* bad signature or mid */
 		if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
-			cERROR(1, "Bad protocol string signature header %x",
-				  *(unsigned int *) hdr->ProtocolId);
+			cifs_dbg(VFS, "Bad protocol string signature header %x\n",
+				 *(unsigned int *) hdr->ProtocolId);
 		if (mid != hdr->MessageId)
-			cERROR(1, "Mids do not match: %llu and %llu", mid,
-				  hdr->MessageId);
+			cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
+				 mid, hdr->MessageId);
 	}
-	cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId);
+	cifs_dbg(VFS, "Bad SMB detected. The Mid=%llu\n", hdr->MessageId);
 	return 1;
 }
 
@@ -101,7 +101,8 @@
 	int command;
 
 	/* BB disable following printk later */
-	cFYI(1, "%s length: 0x%x, smb_buf_length: 0x%x", __func__, length, len);
+	cifs_dbg(FYI, "%s length: 0x%x, smb_buf_length: 0x%x\n",
+		 __func__, length, len);
 
 	/*
 	 * Add function to do table lookup of StructureSize by command
@@ -117,12 +118,13 @@
 			 */
 			return 0;
 		} else {
-			cERROR(1, "Length less than SMB header size");
+			cifs_dbg(VFS, "Length less than SMB header size\n");
 		}
 		return 1;
 	}
 	if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) {
-		cERROR(1, "SMB length greater than maximum, mid=%llu", mid);
+		cifs_dbg(VFS, "SMB length greater than maximum, mid=%llu\n",
+			 mid);
 		return 1;
 	}
 
@@ -130,14 +132,14 @@
 		return 1;
 
 	if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
-		cERROR(1, "Illegal structure size %u",
-			  le16_to_cpu(hdr->StructureSize));
+		cifs_dbg(VFS, "Illegal structure size %u\n",
+			 le16_to_cpu(hdr->StructureSize));
 		return 1;
 	}
 
 	command = le16_to_cpu(hdr->Command);
 	if (command >= NUMBER_OF_SMB2_COMMANDS) {
-		cERROR(1, "Illegal SMB2 command %d", command);
+		cifs_dbg(VFS, "Illegal SMB2 command %d\n", command);
 		return 1;
 	}
 
@@ -145,30 +147,30 @@
 		if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 ||
 		    pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) {
 			/* error packets have 9 byte structure size */
-			cERROR(1, "Illegal response size %u for command %d",
-				   le16_to_cpu(pdu->StructureSize2), command);
+			cifs_dbg(VFS, "Illegal response size %u for command %d\n",
+				 le16_to_cpu(pdu->StructureSize2), command);
 			return 1;
 		} else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0)
 			   && (le16_to_cpu(pdu->StructureSize2) != 44)
 			   && (le16_to_cpu(pdu->StructureSize2) != 36)) {
 			/* special case for SMB2.1 lease break message */
-			cERROR(1, "Illegal response size %d for oplock break",
-				   le16_to_cpu(pdu->StructureSize2));
+			cifs_dbg(VFS, "Illegal response size %d for oplock break\n",
+				 le16_to_cpu(pdu->StructureSize2));
 			return 1;
 		}
 	}
 
 	if (4 + len != length) {
-		cERROR(1, "Total length %u RFC1002 length %u mismatch mid %llu",
-			  length, 4 + len, mid);
+		cifs_dbg(VFS, "Total length %u RFC1002 length %u mismatch mid %llu\n",
+			 length, 4 + len, mid);
 		return 1;
 	}
 
 	clc_len = smb2_calc_size(hdr);
 
 	if (4 + len != clc_len) {
-		cFYI(1, "Calculated size %u length %u mismatch mid %llu",
-			clc_len, 4 + len, mid);
+		cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
+			 clc_len, 4 + len, mid);
 		/* Windows 7 server returns 24 bytes more */
 		if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
 			return 0;
@@ -267,7 +269,7 @@
 	case SMB2_CHANGE_NOTIFY:
 	default:
 		/* BB FIXME for unimplemented cases above */
-		cERROR(1, "no length check for command");
+		cifs_dbg(VFS, "no length check for command\n");
 		break;
 	}
 
@@ -276,20 +278,20 @@
 	 * we have little choice but to ignore the data area in this case.
 	 */
 	if (*off > 4096) {
-		cERROR(1, "offset %d too large, data area ignored", *off);
+		cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off);
 		*len = 0;
 		*off = 0;
 	} else if (*off < 0) {
-		cERROR(1, "negative offset %d to data invalid ignore data area",
-			  *off);
+		cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n",
+			 *off);
 		*off = 0;
 		*len = 0;
 	} else if (*len < 0) {
-		cERROR(1, "negative data length %d invalid, data area ignored",
-			  *len);
+		cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n",
+			 *len);
 		*len = 0;
 	} else if (*len > 128 * 1024) {
-		cERROR(1, "data area larger than 128K: %d", *len);
+		cifs_dbg(VFS, "data area larger than 128K: %d\n", *len);
 		*len = 0;
 	}
 
@@ -324,7 +326,7 @@
 		goto calc_size_exit;
 
 	smb2_get_data_area_len(&offset, &data_length, hdr);
-	cFYI(1, "SMB2 data length %d offset %d", data_length, offset);
+	cifs_dbg(FYI, "SMB2 data length %d offset %d\n", data_length, offset);
 
 	if (data_length > 0) {
 		/*
@@ -335,15 +337,15 @@
 		 * the size of the RFC1001 hdr.
 		 */
 		if (offset + 4 + 1 < len) {
-			cERROR(1, "data area offset %d overlaps SMB2 header %d",
-				  offset + 4 + 1, len);
+			cifs_dbg(VFS, "data area offset %d overlaps SMB2 header %d\n",
+				 offset + 4 + 1, len);
 			data_length = 0;
 		} else {
 			len = 4 + offset + data_length;
 		}
 	}
 calc_size_exit:
-	cFYI(1, "SMB2 len %d", len);
+	cifs_dbg(FYI, "SMB2 len %d\n", len);
 	return len;
 }
 
@@ -405,7 +407,7 @@
 
 	rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key,
 			      lw->lease_state);
-	cFYI(1, "Lease release rc %d", rc);
+	cifs_dbg(FYI, "Lease release rc %d\n", rc);
 	cifs_put_tlink(lw->tlink);
 	kfree(lw);
 }
@@ -426,15 +428,13 @@
 				  SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED);
 
 	lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
-	if (!lw) {
-		cERROR(1, "Memory allocation failed during lease break check");
+	if (!lw)
 		return false;
-	}
 
 	INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
 	lw->lease_state = rsp->NewLeaseState;
 
-	cFYI(1, "Checking for lease break");
+	cifs_dbg(FYI, "Checking for lease break\n");
 
 	/* look up tcon based on tid & uid */
 	spin_lock(&cifs_tcp_ses_lock);
@@ -455,9 +455,9 @@
 					   SMB2_LEASE_KEY_SIZE))
 					continue;
 
-				cFYI(1, "found in the open list");
-				cFYI(1, "lease key match, lease break 0x%d",
-				     le32_to_cpu(rsp->NewLeaseState));
+				cifs_dbg(FYI, "found in the open list\n");
+				cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+					 le32_to_cpu(rsp->NewLeaseState));
 
 				smb2_set_oplock_level(cinode,
 				  smb2_map_lease_to_oplock(rsp->NewLeaseState));
@@ -489,9 +489,9 @@
 						   &lw->lease_break);
 				}
 
-				cFYI(1, "found in the pending open list");
-				cFYI(1, "lease key match, lease break 0x%d",
-				     le32_to_cpu(rsp->NewLeaseState));
+				cifs_dbg(FYI, "found in the pending open list\n");
+				cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+					 le32_to_cpu(rsp->NewLeaseState));
 
 				open->oplock =
 				  smb2_map_lease_to_oplock(rsp->NewLeaseState);
@@ -506,7 +506,7 @@
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
 	kfree(lw);
-	cFYI(1, "Can not process lease break - no lease matched");
+	cifs_dbg(FYI, "Can not process lease break - no lease matched\n");
 	return false;
 }
 
@@ -520,7 +520,7 @@
 	struct cifsInodeInfo *cinode;
 	struct cifsFileInfo *cfile;
 
-	cFYI(1, "Checking for oplock break");
+	cifs_dbg(FYI, "Checking for oplock break\n");
 
 	if (rsp->hdr.Command != SMB2_OPLOCK_BREAK)
 		return false;
@@ -533,7 +533,7 @@
 			return false;
 	}
 
-	cFYI(1, "oplock level 0x%d", rsp->OplockLevel);
+	cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel);
 
 	/* look up tcon based on tid & uid */
 	spin_lock(&cifs_tcp_ses_lock);
@@ -553,7 +553,7 @@
 				    cfile->fid.volatile_fid)
 					continue;
 
-				cFYI(1, "file id match, oplock break");
+				cifs_dbg(FYI, "file id match, oplock break\n");
 				cinode = CIFS_I(cfile->dentry->d_inode);
 
 				if (!cinode->clientCanCacheAll &&
@@ -573,11 +573,11 @@
 			}
 			spin_unlock(&cifs_file_list_lock);
 			spin_unlock(&cifs_tcp_ses_lock);
-			cFYI(1, "No matching file for oplock break");
+			cifs_dbg(FYI, "No matching file for oplock break\n");
 			return true;
 		}
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
-	cFYI(1, "Can not process oplock break for non-existent connection");
+	cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
 	return false;
 }

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index bceffe7..f2e76f3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c

@@ -38,13 +38,13 @@
 	case 1:
 		server->echoes = false;
 		server->oplocks = false;
-		cERROR(1, "disabling echoes and oplocks");
+		cifs_dbg(VFS, "disabling echoes and oplocks\n");
 		break;
 	case 2:
 		server->echoes = true;
 		server->oplocks = false;
 		server->echo_credits = 1;
-		cFYI(1, "disabling oplocks");
+		cifs_dbg(FYI, "disabling oplocks\n");
 		break;
 	default:
 		server->echoes = true;
@@ -147,10 +147,10 @@
 #ifdef CONFIG_CIFS_DEBUG2
 	struct smb2_hdr *smb = (struct smb2_hdr *)buf;
 
-	cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d",
-		  smb->Command, smb->Status, smb->Flags, smb->MessageId,
-		  smb->ProcessId);
-	cERROR(1, "smb buf %p len %u", smb, smb2_calc_size(smb));
+	cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n",
+		 smb->Command, smb->Status, smb->Flags, smb->MessageId,
+		 smb->ProcessId);
+	cifs_dbg(VFS, "smb buf %p len %u\n", smb, smb2_calc_size(smb));
 #endif
 }
 
@@ -436,7 +436,7 @@
 		       &oplock, NULL);
 	kfree(utf16_path);
 	if (rc) {
-		cERROR(1, "open dir failed");
+		cifs_dbg(VFS, "open dir failed\n");
 		return rc;
 	}
 
@@ -448,7 +448,7 @@
 	rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0,
 				  srch_inf);
 	if (rc) {
-		cERROR(1, "query directory failed");
+		cifs_dbg(VFS, "query directory failed\n");
 		SMB2_close(xid, tcon, persistent_fid, volatile_fid);
 	}
 	return rc;

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 41d9d07..2b95ce2b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c

@@ -155,8 +155,8 @@
 		if ((smb2_command != SMB2_WRITE) &&
 		   (smb2_command != SMB2_CREATE) &&
 		   (smb2_command != SMB2_TREE_DISCONNECT)) {
-			cFYI(1, "can not send cmd %d while umounting",
-				smb2_command);
+			cifs_dbg(FYI, "can not send cmd %d while umounting\n",
+				 smb2_command);
 			return -ENODEV;
 		}
 	}
@@ -200,7 +200,7 @@
 		 * back on-line
 		 */
 		if (!tcon->retry) {
-			cFYI(1, "gave up waiting on reconnect in smb_init");
+			cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
 			return -EHOSTDOWN;
 		}
 	}
@@ -227,7 +227,7 @@
 	cifs_mark_open_files_invalid(tcon);
 	rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage);
 	mutex_unlock(&tcon->ses->session_mutex);
-	cFYI(1, "reconnect tcon rc = %d", rc);
+	cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
 	if (rc)
 		goto out;
 	atomic_inc(&tconInfoReconnectCount);
@@ -335,7 +335,7 @@
 	char *security_blob;
 	int flags = CIFS_NEG_OP;
 
-	cFYI(1, "Negotiate protocol");
+	cifs_dbg(FYI, "Negotiate protocol\n");
 
 	if (ses->server)
 		server = ses->server;
@@ -354,7 +354,7 @@
 	else /* if override flags set only sign/seal OR them with global auth */
 		sec_flags = global_secflags | ses->overrideSecFlg;
 
-	cFYI(1, "sec_flags 0x%x", sec_flags);
+	cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags);
 
 	req->hdr.SessionId = 0;
 
@@ -389,19 +389,19 @@
 	if (rc != 0)
 		goto neg_exit;
 
-	cFYI(1, "mode 0x%x", rsp->SecurityMode);
+	cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
 
 	/* BB we may eventually want to match the negotiated vs. requested
 	   dialect, even though we are only requesting one at a time */
 	if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
-		cFYI(1, "negotiated smb2.0 dialect");
+		cifs_dbg(FYI, "negotiated smb2.0 dialect\n");
 	else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
-		cFYI(1, "negotiated smb2.1 dialect");
+		cifs_dbg(FYI, "negotiated smb2.1 dialect\n");
 	else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID))
-		cFYI(1, "negotiated smb3.0 dialect");
+		cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
 	else {
-		cERROR(1, "Illegal dialect returned by server %d",
-			   le16_to_cpu(rsp->DialectRevision));
+		cifs_dbg(VFS, "Illegal dialect returned by server %d\n",
+			 le16_to_cpu(rsp->DialectRevision));
 		rc = -EIO;
 		goto neg_exit;
 	}
@@ -419,35 +419,34 @@
 	security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
 					       &rsp->hdr);
 	if (blob_length == 0) {
-		cERROR(1, "missing security blob on negprot");
+		cifs_dbg(VFS, "missing security blob on negprot\n");
 		rc = -EIO;
 		goto neg_exit;
 	}
 
-	cFYI(1, "sec_flags 0x%x", sec_flags);
+	cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags);
 	if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
-		cFYI(1, "Signing required");
+		cifs_dbg(FYI, "Signing required\n");
 		if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED |
 		      SMB2_NEGOTIATE_SIGNING_ENABLED))) {
-			cERROR(1, "signing required but server lacks support");
+			cifs_dbg(VFS, "signing required but server lacks support\n");
 			rc = -EOPNOTSUPP;
 			goto neg_exit;
 		}
 		server->sec_mode |= SECMODE_SIGN_REQUIRED;
 	} else if (sec_flags & CIFSSEC_MAY_SIGN) {
-		cFYI(1, "Signing optional");
+		cifs_dbg(FYI, "Signing optional\n");
 		if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
-			cFYI(1, "Server requires signing");
+			cifs_dbg(FYI, "Server requires signing\n");
 			server->sec_mode |= SECMODE_SIGN_REQUIRED;
 		} else {
 			server->sec_mode &=
 				~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
 		}
 	} else {
-		cFYI(1, "Signing disabled");
+		cifs_dbg(FYI, "Signing disabled\n");
 		if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
-			cERROR(1, "Server requires packet signing to be enabled"
-				  " in /proc/fs/cifs/SecurityFlags.");
+			cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n");
 			rc = -EOPNOTSUPP;
 			goto neg_exit;
 		}
@@ -489,7 +488,7 @@
 	char *ntlmssp_blob = NULL;
 	bool use_spnego = false; /* else use raw ntlmssp */
 
-	cFYI(1, "Session Setup");
+	cifs_dbg(FYI, "Session Setup\n");
 
 	if (ses->server)
 		server = ses->server;
@@ -522,7 +521,7 @@
 	else /* if override flags set only sign/seal OR them with global auth */
 		sec_flags = global_secflags | ses->overrideSecFlg;
 
-	cFYI(1, "sec_flags 0x%x", sec_flags);
+	cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags);
 
 	req->hdr.SessionId = 0; /* First session, not a reauthenticate */
 	req->VcNumber = 0; /* MBZ */
@@ -558,7 +557,7 @@
 					sizeof(struct _NEGOTIATE_MESSAGE),
 					ntlmssp_blob); */
 			/* BB eventually need to add this */
-			cERROR(1, "spnego not supported for SMB2 yet");
+			cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
 			rc = -EOPNOTSUPP;
 			kfree(ntlmssp_blob);
 			goto ssetup_exit;
@@ -572,14 +571,14 @@
 		ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
 				       GFP_KERNEL);
 		if (ntlmssp_blob == NULL) {
-			cERROR(1, "failed to malloc ntlmssp blob");
 			rc = -ENOMEM;
 			goto ssetup_exit;
 		}
 		rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
 					     nls_cp);
 		if (rc) {
-			cFYI(1, "build_ntlmssp_auth_blob failed %d", rc);
+			cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
+				 rc);
 			goto ssetup_exit; /* BB double check error handling */
 		}
 		if (use_spnego) {
@@ -587,7 +586,7 @@
 							&security_blob,
 							blob_length,
 							ntlmssp_blob); */
-			cERROR(1, "spnego not supported for SMB2 yet");
+			cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
 			rc = -EOPNOTSUPP;
 			kfree(ntlmssp_blob);
 			goto ssetup_exit;
@@ -595,7 +594,7 @@
 			security_blob = ntlmssp_blob;
 		}
 	} else {
-		cERROR(1, "illegal ntlmssp phase");
+		cifs_dbg(VFS, "illegal ntlmssp phase\n");
 		rc = -EIO;
 		goto ssetup_exit;
 	}
@@ -620,13 +619,13 @@
 	if (resp_buftype != CIFS_NO_BUFFER &&
 	    rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
 		if (phase != NtLmNegotiate) {
-			cERROR(1, "Unexpected more processing error");
+			cifs_dbg(VFS, "Unexpected more processing error\n");
 			goto ssetup_exit;
 		}
 		if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
 				le16_to_cpu(rsp->SecurityBufferOffset)) {
-			cERROR(1, "Invalid security buffer offset %d",
-				  le16_to_cpu(rsp->SecurityBufferOffset));
+			cifs_dbg(VFS, "Invalid security buffer offset %d\n",
+				 le16_to_cpu(rsp->SecurityBufferOffset));
 			rc = -EIO;
 			goto ssetup_exit;
 		}
@@ -667,7 +666,7 @@
 	int rc = 0;
 	struct TCP_Server_Info *server;
 
-	cFYI(1, "disconnect session %p", ses);
+	cifs_dbg(FYI, "disconnect session %p\n", ses);
 
 	if (ses && (ses->server))
 		server = ses->server;
@@ -711,7 +710,7 @@
 	struct TCP_Server_Info *server;
 	__le16 *unc_path = NULL;
 
-	cFYI(1, "TCON");
+	cifs_dbg(FYI, "TCON\n");
 
 	if ((ses->server) && tree)
 		server = ses->server;
@@ -775,15 +774,15 @@
 	}
 
 	if (rsp->ShareType & SMB2_SHARE_TYPE_DISK)
-		cFYI(1, "connection to disk share");
+		cifs_dbg(FYI, "connection to disk share\n");
 	else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) {
 		tcon->ipc = true;
-		cFYI(1, "connection to pipe share");
+		cifs_dbg(FYI, "connection to pipe share\n");
 	} else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) {
 		tcon->print = true;
-		cFYI(1, "connection to printer");
+		cifs_dbg(FYI, "connection to printer\n");
 	} else {
-		cERROR(1, "unknown share type %d", rsp->ShareType);
+		cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType);
 		rc = -EOPNOTSUPP;
 		goto tcon_error_exit;
 	}
@@ -797,7 +796,7 @@
 
 	if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
 	    ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
-		cERROR(1, "DFS capability contradicts DFS flag");
+		cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
 
 tcon_exit:
 	free_rsp_buf(resp_buftype, rsp);
@@ -806,7 +805,7 @@
 
 tcon_error_exit:
 	if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
-		cERROR(1, "BAD_NETWORK_NAME: %s", tree);
+		cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
 		tcon->bad_network_name = true;
 	}
 	goto tcon_exit;
@@ -820,7 +819,7 @@
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 
-	cFYI(1, "Tree Disconnect");
+	cifs_dbg(FYI, "Tree Disconnect\n");
 
 	if (ses && (ses->server))
 		server = ses->server;
@@ -846,12 +845,10 @@
 {
 	struct create_lease *buf;
 
-	buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL);
+	buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL);
 	if (!buf)
 		return NULL;
 
-	memset(buf, 0, sizeof(struct create_lease));
-
 	buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
 	buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
 	if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
@@ -925,7 +922,7 @@
 	int rc = 0;
 	int num_iovecs = 2;
 
-	cFYI(1, "create/open");
+	cifs_dbg(FYI, "create/open\n");
 
 	if (ses && (ses->server))
 		server = ses->server;
@@ -1051,7 +1048,7 @@
 	int resp_buftype;
 	int rc = 0;
 
-	cFYI(1, "Close");
+	cifs_dbg(FYI, "Close\n");
 
 	if (ses && (ses->server))
 		server = ses->server;
@@ -1097,20 +1094,20 @@
 
 
 	if (buffer_length < min_buf_size) {
-		cERROR(1, "buffer length %d smaller than minimum size %d",
-			   buffer_length, min_buf_size);
+		cifs_dbg(VFS, "buffer length %d smaller than minimum size %d\n",
+			 buffer_length, min_buf_size);
 		return -EINVAL;
 	}
 
 	/* check if beyond RFC1001 maximum length */
 	if ((smb_len > 0x7FFFFF) || (buffer_length > 0x7FFFFF)) {
-		cERROR(1, "buffer length %d or smb length %d too large",
-			   buffer_length, smb_len);
+		cifs_dbg(VFS, "buffer length %d or smb length %d too large\n",
+			 buffer_length, smb_len);
 		return -EINVAL;
 	}
 
 	if ((begin_of_buf > end_of_smb) || (end_of_buf > end_of_smb)) {
-		cERROR(1, "illegal server response, bad offset to data");
+		cifs_dbg(VFS, "illegal server response, bad offset to data\n");
 		return -EINVAL;
 	}
 
@@ -1155,7 +1152,7 @@
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 
-	cFYI(1, "Query Info");
+	cifs_dbg(FYI, "Query Info\n");
 
 	if (ses && (ses->server))
 		server = ses->server;
@@ -1247,7 +1244,7 @@
 	struct smb_rqst rqst = { .rq_iov = &iov,
 				 .rq_nvec = 1 };
 
-	cFYI(1, "In echo request");
+	cifs_dbg(FYI, "In echo request\n");
 
 	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
 	if (rc)
@@ -1262,7 +1259,7 @@
 	rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server,
 			     CIFS_ECHO_OP);
 	if (rc)
-		cFYI(1, "Echo request failed: %d", rc);
+		cifs_dbg(FYI, "Echo request failed: %d\n", rc);
 
 	cifs_small_buf_release(req);
 	return rc;
@@ -1279,7 +1276,7 @@
 	int resp_buftype;
 	int rc = 0;
 
-	cFYI(1, "Flush");
+	cifs_dbg(FYI, "Flush\n");
 
 	if (ses && (ses->server))
 		server = ses->server;
@@ -1379,8 +1376,9 @@
 				 .rq_pagesz = rdata->pagesz,
 				 .rq_tailsz = rdata->tailsz };
 
-	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
-		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
+	cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n",
+		 __func__, mid->mid, mid->mid_state, rdata->result,
+		 rdata->bytes);
 
 	switch (mid->mid_state) {
 	case MID_RESPONSE_RECEIVED:
@@ -1392,8 +1390,8 @@
 
 			rc = smb2_verify_signature(&rqst, server);
 			if (rc)
-				cERROR(1, "SMB signature verification returned "
-				       "error = %d", rc);
+				cifs_dbg(VFS, "SMB signature verification returned error = %d\n",
+					 rc);
 		}
 		/* FIXME: should this be counted toward the initiating task? */
 		task_io_account_read(rdata->bytes);
@@ -1426,8 +1424,8 @@
 	struct smb_rqst rqst = { .rq_iov = &rdata->iov,
 				 .rq_nvec = 1 };
 
-	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
-		rdata->offset, rdata->bytes);
+	cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
+		 __func__, rdata->offset, rdata->bytes);
 
 	io_parms.tcon = tlink_tcon(rdata->cfile->tlink);
 	io_parms.offset = rdata->offset;
@@ -1481,13 +1479,13 @@
 
 	if (rc) {
 		cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
-		cERROR(1, "Send error in read = %d", rc);
+		cifs_dbg(VFS, "Send error in read = %d\n", rc);
 	} else {
 		*nbytes = le32_to_cpu(rsp->DataLength);
 		if ((*nbytes > CIFS_MAX_MSGSIZE) ||
 		    (*nbytes > io_parms->length)) {
-			cFYI(1, "bad length %d for count %d", *nbytes,
-				io_parms->length);
+			cifs_dbg(FYI, "bad length %d for count %d\n",
+				 *nbytes, io_parms->length);
 			rc = -EIO;
 			*nbytes = 0;
 		}
@@ -1597,7 +1595,8 @@
 	rqst.rq_pagesz = wdata->pagesz;
 	rqst.rq_tailsz = wdata->tailsz;
 
-	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
+	cifs_dbg(FYI, "async write at %llu %u bytes\n",
+		 wdata->offset, wdata->bytes);
 
 	req->Length = cpu_to_le32(wdata->bytes);
 
@@ -1670,7 +1669,7 @@
 
 	if (rc) {
 		cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
-		cERROR(1, "Send error in write = %d", rc);
+		cifs_dbg(VFS, "Send error in write = %d\n", rc);
 	} else
 		*nbytes = le32_to_cpu(rsp->DataLength);
 
@@ -1696,14 +1695,14 @@
 					((char *)entryptr + next_offset);
 
 		if ((char *)entryptr + size > end_of_buf) {
-			cERROR(1, "malformed search entry would overflow");
+			cifs_dbg(VFS, "malformed search entry would overflow\n");
 			break;
 		}
 
 		len = le32_to_cpu(entryptr->FileNameLength);
 		if ((char *)entryptr + len + size > end_of_buf) {
-			cERROR(1, "directory entry name would overflow frame "
-				  "end of buf %p", end_of_buf);
+			cifs_dbg(VFS, "directory entry name would overflow frame end of buf %p\n",
+				 end_of_buf);
 			break;
 		}
 
@@ -1759,8 +1758,8 @@
 		info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1;
 		break;
 	default:
-		cERROR(1, "info level %u isn't supported",
-		       srch_inf->info_level);
+		cifs_dbg(VFS, "info level %u isn't supported\n",
+			 srch_inf->info_level);
 		rc = -EINVAL;
 		goto qdir_exit;
 	}
@@ -1824,15 +1823,15 @@
 			num_entries(srch_inf->srch_entries_start, end_of_smb,
 				    &srch_inf->last_entry, info_buf_size);
 	srch_inf->index_of_last_entry += srch_inf->entries_in_buffer;
-	cFYI(1, "num entries %d last_index %lld srch start %p srch end %p",
-		srch_inf->entries_in_buffer, srch_inf->index_of_last_entry,
-		srch_inf->srch_entries_start, srch_inf->last_entry);
+	cifs_dbg(FYI, "num entries %d last_index %lld srch start %p srch end %p\n",
+		 srch_inf->entries_in_buffer, srch_inf->index_of_last_entry,
+		 srch_inf->srch_entries_start, srch_inf->last_entry);
 	if (resp_buftype == CIFS_LARGE_BUFFER)
 		srch_inf->smallBuf = false;
 	else if (resp_buftype == CIFS_SMALL_BUFFER)
 		srch_inf->smallBuf = true;
 	else
-		cERROR(1, "illegal search buffer type");
+		cifs_dbg(VFS, "illegal search buffer type\n");
 
 	if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
 		srch_inf->endOfSearch = 1;
@@ -2017,7 +2016,7 @@
 	int rc;
 	struct smb2_oplock_break *req = NULL;
 
-	cFYI(1, "SMB2_oplock_break");
+	cifs_dbg(FYI, "SMB2_oplock_break\n");
 	rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
 
 	if (rc)
@@ -2033,7 +2032,7 @@
 
 	if (rc) {
 		cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
-		cFYI(1, "Send error in Oplock Break = %d", rc);
+		cifs_dbg(FYI, "Send error in Oplock Break = %d\n", rc);
 	}
 
 	return rc;
@@ -2058,7 +2057,7 @@
 	int rc;
 	struct smb2_query_info_req *req;
 
-	cFYI(1, "Query FSInfo level %d", level);
+	cifs_dbg(FYI, "Query FSInfo level %d\n", level);
 
 	if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
 		return -EIO;
@@ -2131,7 +2130,7 @@
 	int resp_buf_type;
 	unsigned int count;
 
-	cFYI(1, "smb2_lockv num lock %d", num_lock);
+	cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock);
 
 	rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req);
 	if (rc)
@@ -2155,7 +2154,7 @@
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
 	rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP);
 	if (rc) {
-		cFYI(1, "Send error in smb2_lockv = %d", rc);
+		cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc);
 		cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
 	}
 
@@ -2186,7 +2185,7 @@
 	int rc;
 	struct smb2_lease_ack *req = NULL;
 
-	cFYI(1, "SMB2_lease_break");
+	cifs_dbg(FYI, "SMB2_lease_break\n");
 	rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
 
 	if (rc)
@@ -2204,7 +2203,7 @@
 
 	if (rc) {
 		cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
-		cFYI(1, "Send error in Lease Break = %d", rc);
+		cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc);
 	}
 
 	return rc;

diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 8dd73e6..01f0ac8 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c

@@ -55,13 +55,13 @@
 	rc = crypto_shash_setkey(server->secmech.hmacsha256,
 		server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
 	if (rc) {
-		cERROR(1, "%s: Could not update with response\n", __func__);
+		cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
 		return rc;
 	}
 
 	rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
 	if (rc) {
-		cERROR(1, "%s: Could not init md5\n", __func__);
+		cifs_dbg(VFS, "%s: Could not init md5\n", __func__);
 		return rc;
 	}
 
@@ -69,7 +69,7 @@
 		if (iov[i].iov_len == 0)
 			continue;
 		if (iov[i].iov_base == NULL) {
-			cERROR(1, "null iovec entry");
+			cifs_dbg(VFS, "null iovec entry\n");
 			return -EIO;
 		}
 		/*
@@ -90,8 +90,8 @@
 				iov[i].iov_base, iov[i].iov_len);
 		}
 		if (rc) {
-			cERROR(1, "%s: Could not update with payload\n",
-							__func__);
+			cifs_dbg(VFS, "%s: Could not update with payload\n",
+				 __func__);
 			return rc;
 		}
 	}
@@ -109,7 +109,7 @@
 	rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
 				sigptr);
 	if (rc)
-		cERROR(1, "%s: Could not generate sha256 hash\n", __func__);
+		cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
 
 	memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE);
 
@@ -119,7 +119,7 @@
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
-	cFYI(1, "smb3 signatures not supported yet");
+	cifs_dbg(FYI, "smb3 signatures not supported yet\n");
 	return -EOPNOTSUPP;
 }
 
@@ -163,8 +163,8 @@
 
 	/* Do not need to verify session setups with signature "BSRSPYL " */
 	if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0)
-		cFYI(1, "dummy signature received for smb command 0x%x",
-			smb2_pdu->Command);
+		cifs_dbg(FYI, "dummy signature received for smb command 0x%x\n",
+			 smb2_pdu->Command);
 
 	/*
 	 * Save off the origiginal signature so we can modify the smb and check
@@ -205,7 +205,7 @@
 	struct mid_q_entry *temp;
 
 	if (server == NULL) {
-		cERROR(1, "Null TCP session in smb2_mid_entry_alloc");
+		cifs_dbg(VFS, "Null TCP session in smb2_mid_entry_alloc\n");
 		return NULL;
 	}
 
@@ -241,7 +241,7 @@
 		return -ENOENT;
 
 	if (ses->server->tcpStatus == CifsNeedReconnect) {
-		cFYI(1, "tcp session dead - return to caller to retry");
+		cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
 		return -EAGAIN;
 	}
 
@@ -281,8 +281,8 @@
 
 		rc = smb2_verify_signature(&rqst, server);
 		if (rc)
-			cERROR(1, "SMB signature verification returned error = "
-			       "%d", rc);
+			cifs_dbg(VFS, "SMB signature verification returned error = %d\n",
+				 rc);
 	}
 
 	return map_smb2_to_linux_error(mid->resp_buf, log_error);

diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index a0a58fb..43eb136 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c

@@ -78,7 +78,7 @@
 	tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_des)) {
 		rc = PTR_ERR(tfm_des);
-		cERROR(1, "could not allocate des crypto API");
+		cifs_dbg(VFS, "could not allocate des crypto API\n");
 		goto smbhash_err;
 	}
 
@@ -91,7 +91,7 @@
 
 	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
 	if (rc)
-		cERROR(1, "could not encrypt crypt key rc: %d", rc);
+		cifs_dbg(VFS, "could not encrypt crypt key rc: %d\n", rc);
 
 	crypto_free_blkcipher(tfm_des);
 smbhash_err:
@@ -139,14 +139,14 @@
 	md4 = crypto_alloc_shash("md4", 0, 0);
 	if (IS_ERR(md4)) {
 		rc = PTR_ERR(md4);
-		cERROR(1, "%s: Crypto md4 allocation error %d", __func__, rc);
+		cifs_dbg(VFS, "%s: Crypto md4 allocation error %d\n",
+			 __func__, rc);
 		return rc;
 	}
 	size = sizeof(struct shash_desc) + crypto_shash_descsize(md4);
 	sdescmd4 = kmalloc(size, GFP_KERNEL);
 	if (!sdescmd4) {
 		rc = -ENOMEM;
-		cERROR(1, "%s: Memory allocation failure", __func__);
 		goto mdfour_err;
 	}
 	sdescmd4->shash.tfm = md4;
@@ -154,17 +154,17 @@
 
 	rc = crypto_shash_init(&sdescmd4->shash);
 	if (rc) {
-		cERROR(1, "%s: Could not init md4 shash", __func__);
+		cifs_dbg(VFS, "%s: Could not init md4 shash\n", __func__);
 		goto mdfour_err;
 	}
 	rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len);
 	if (rc) {
-		cERROR(1, "%s: Could not update with link_str", __func__);
+		cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__);
 		goto mdfour_err;
 	}
 	rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
 	if (rc)
-		cERROR(1, "%s: Could not genereate md4 hash", __func__);
+		cifs_dbg(VFS, "%s: Could not generate md4 hash\n", __func__);
 
 mdfour_err:
 	crypto_free_shash(md4);
@@ -238,7 +238,8 @@
 
 	rc = E_md4hash(passwd, p16, codepage);
 	if (rc) {
-		cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
+		cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n",
+			 __func__, rc);
 		return rc;
 	}
 	memcpy(p21, p16, 16);

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1a52868..bfbf470 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c

@@ -49,7 +49,7 @@
 	struct mid_q_entry *temp;
 
 	if (server == NULL) {
-		cERROR(1, "Null TCP session in AllocMidQEntry");
+		cifs_dbg(VFS, "Null TCP session in AllocMidQEntry\n");
 		return NULL;
 	}
 
@@ -61,7 +61,7 @@
 		temp->mid = smb_buffer->Mid;	/* always LE */
 		temp->pid = current->pid;
 		temp->command = cpu_to_le16(smb_buffer->Command);
-		cFYI(1, "For smb_command %d", smb_buffer->Command);
+		cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
 	/*	do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
 		/* when mid allocated can be before when sent */
 		temp->when_alloc = jiffies;
@@ -179,17 +179,11 @@
 		 */
 		rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
 				    n_vec - first_vec, remaining);
-		if (rc == -ENOSPC || rc == -EAGAIN) {
-			/*
-			 * Catch if a low level driver returns -ENOSPC. This
-			 * WARN_ON will be removed by 3.10 if no one reports
-			 * seeing this.
-			 */
-			WARN_ON_ONCE(rc == -ENOSPC);
+		if (rc == -EAGAIN) {
 			i++;
 			if (i >= 14 || (!server->noblocksnd && (i > 2))) {
-				cERROR(1, "sends on sock %p stuck for 15 "
-					  "seconds", ssocket);
+				cifs_dbg(VFS, "sends on sock %p stuck for 15 seconds\n",
+					 ssocket);
 				rc = -EAGAIN;
 				break;
 			}
@@ -209,14 +203,14 @@
 		}
 
 		if (rc > remaining) {
-			cERROR(1, "sent %d requested %d", rc, remaining);
+			cifs_dbg(VFS, "sent %d requested %d\n", rc, remaining);
 			break;
 		}
 
 		if (rc == 0) {
 			/* should never happen, letting socket clear before
 			   retrying is our only obvious option here */
-			cERROR(1, "tcp sent no data");
+			cifs_dbg(VFS, "tcp sent no data\n");
 			msleep(500);
 			continue;
 		}
@@ -291,7 +285,7 @@
 	if (ssocket == NULL)
 		return -ENOTSOCK;
 
-	cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
+	cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length);
 	dump_smb(iov[0].iov_base, iov[0].iov_len);
 
 	/* cork the socket */
@@ -324,8 +318,8 @@
 				(char *)&val, sizeof(val));
 
 	if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
-		cFYI(1, "partial send (wanted=%u sent=%zu): terminating "
-			"session", smb_buf_length + 4, total_len);
+		cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
+			 smb_buf_length + 4, total_len);
 		/*
 		 * If we have only sent part of an SMB then the next SMB could
 		 * be taken as the remainder of this one. We need to kill the
@@ -335,7 +329,8 @@
 	}
 
 	if (rc < 0 && rc != -EINTR)
-		cERROR(1, "Error %d sending data on socket to server", rc);
+		cifs_dbg(VFS, "Error %d sending data on socket to server\n",
+			 rc);
 	else
 		rc = 0;
 
@@ -427,7 +422,7 @@
 	}
 
 	if (ses->server->tcpStatus == CifsNeedReconnect) {
-		cFYI(1, "tcp session dead - return to caller to retry");
+		cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
 		return -EAGAIN;
 	}
 
@@ -527,6 +522,9 @@
 	rc = smb_send_rqst(server, rqst);
 	cifs_in_send_dec(server);
 	cifs_save_when_sent(mid);
+
+	if (rc < 0)
+		server->sequence_number -= 2;
 	mutex_unlock(&server->srv_mutex);
 
 	if (rc == 0)
@@ -559,7 +557,7 @@
 	iov[0].iov_len = get_rfc1002_length(in_buf) + 4;
 	flags |= CIFS_NO_RESP;
 	rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
-	cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
+	cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc);
 
 	return rc;
 }
@@ -569,8 +567,8 @@
 {
 	int rc = 0;
 
-	cFYI(1, "%s: cmd=%d mid=%llu state=%d", __func__,
-	     le16_to_cpu(mid->command), mid->mid, mid->mid_state);
+	cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
+		 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
 
 	spin_lock(&GlobalMid_Lock);
 	switch (mid->mid_state) {
@@ -588,8 +586,8 @@
 		break;
 	default:
 		list_del_init(&mid->qhead);
-		cERROR(1, "%s: invalid mid state mid=%llu state=%d", __func__,
-		       mid->mid, mid->mid_state);
+		cifs_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
+			 __func__, mid->mid, mid->mid_state);
 		rc = -EIO;
 	}
 	spin_unlock(&GlobalMid_Lock);
@@ -624,10 +622,10 @@
 		iov.iov_len = len;
 		/* FIXME: add code to kill session */
 		rc = cifs_verify_signature(&rqst, server,
-					   mid->sequence_number + 1);
+					   mid->sequence_number);
 		if (rc)
-			cERROR(1, "SMB signature verification returned error = "
-			       "%d", rc);
+			cifs_dbg(VFS, "SMB signature verification returned error = %d\n",
+				 rc);
 	}
 
 	/* BB special case reconnect tid and uid here? */
@@ -672,7 +670,7 @@
 
 	if ((ses == NULL) || (ses->server == NULL)) {
 		cifs_small_buf_release(buf);
-		cERROR(1, "Null session");
+		cifs_dbg(VFS, "Null session\n");
 		return -EIO;
 	}
 
@@ -716,6 +714,8 @@
 	cifs_in_send_dec(ses->server);
 	cifs_save_when_sent(midQ);
 
+	if (rc < 0)
+		ses->server->sequence_number -= 2;
 	mutex_unlock(&ses->server->srv_mutex);
 
 	if (rc < 0) {
@@ -752,7 +752,7 @@
 
 	if (!midQ->resp_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) {
 		rc = -EIO;
-		cFYI(1, "Bad MID state?");
+		cifs_dbg(FYI, "Bad MID state?\n");
 		goto out;
 	}
 
@@ -788,11 +788,11 @@
 	struct mid_q_entry *midQ;
 
 	if (ses == NULL) {
-		cERROR(1, "Null smb session");
+		cifs_dbg(VFS, "Null smb session\n");
 		return -EIO;
 	}
 	if (ses->server == NULL) {
-		cERROR(1, "Null tcp session");
+		cifs_dbg(VFS, "Null tcp session\n");
 		return -EIO;
 	}
 
@@ -805,8 +805,8 @@
 
 	if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
 			MAX_CIFS_HDR_SIZE - 4) {
-		cERROR(1, "Illegal length, greater than maximum frame, %d",
-			   be32_to_cpu(in_buf->smb_buf_length));
+		cifs_dbg(VFS, "Illegal length, greater than maximum frame, %d\n",
+			 be32_to_cpu(in_buf->smb_buf_length));
 		return -EIO;
 	}
 
@@ -840,6 +840,10 @@
 	rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 	cifs_in_send_dec(ses->server);
 	cifs_save_when_sent(midQ);
+
+	if (rc < 0)
+		ses->server->sequence_number -= 2;
+
 	mutex_unlock(&ses->server->srv_mutex);
 
 	if (rc < 0)
@@ -871,7 +875,7 @@
 	if (!midQ->resp_buf || !out_buf ||
 	    midQ->mid_state != MID_RESPONSE_RECEIVED) {
 		rc = -EIO;
-		cERROR(1, "Bad MID state?");
+		cifs_dbg(VFS, "Bad MID state?\n");
 		goto out;
 	}
 
@@ -921,13 +925,13 @@
 	struct cifs_ses *ses;
 
 	if (tcon == NULL || tcon->ses == NULL) {
-		cERROR(1, "Null smb session");
+		cifs_dbg(VFS, "Null smb session\n");
 		return -EIO;
 	}
 	ses = tcon->ses;
 
 	if (ses->server == NULL) {
-		cERROR(1, "Null tcp session");
+		cifs_dbg(VFS, "Null tcp session\n");
 		return -EIO;
 	}
 
@@ -940,8 +944,8 @@
 
 	if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
 			MAX_CIFS_HDR_SIZE - 4) {
-		cERROR(1, "Illegal length, greater than maximum frame, %d",
-			   be32_to_cpu(in_buf->smb_buf_length));
+		cifs_dbg(VFS, "Illegal length, greater than maximum frame, %d\n",
+			 be32_to_cpu(in_buf->smb_buf_length));
 		return -EIO;
 	}
 
@@ -973,6 +977,10 @@
 	rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 	cifs_in_send_dec(ses->server);
 	cifs_save_when_sent(midQ);
+
+	if (rc < 0)
+		ses->server->sequence_number -= 2;
+
 	mutex_unlock(&ses->server->srv_mutex);
 
 	if (rc < 0) {
@@ -1038,7 +1046,7 @@
 	/* rcvd frame is ok */
 	if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_RECEIVED) {
 		rc = -EIO;
-		cERROR(1, "Bad MID state?");
+		cifs_dbg(VFS, "Bad MID state?\n");
 		goto out;
 	}
 

diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 5142f2c..09afda4 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c

@@ -68,12 +68,12 @@
 		goto remove_ea_exit;
 	}
 	if (ea_name == NULL) {
-		cFYI(1, "Null xattr names not supported");
+		cifs_dbg(FYI, "Null xattr names not supported\n");
 	} else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)
 		&& (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))) {
-		cFYI(1,
-		     "illegal xattr request %s (only user namespace supported)",
-		     ea_name);
+		cifs_dbg(FYI,
+			 "illegal xattr request %s (only user namespace supported)\n",
+			 ea_name);
 		/* BB what if no namespace prefix? */
 		/* Should we just pass them to server, except for
 		system and perhaps security prefixes? */
@@ -134,19 +134,19 @@
 		search server for EAs or streams to
 		returns as xattrs */
 	if (value_size > MAX_EA_VALUE_SIZE) {
-		cFYI(1, "size of EA value too large");
+		cifs_dbg(FYI, "size of EA value too large\n");
 		rc = -EOPNOTSUPP;
 		goto set_ea_exit;
 	}
 
 	if (ea_name == NULL) {
-		cFYI(1, "Null xattr names not supported");
+		cifs_dbg(FYI, "Null xattr names not supported\n");
 	} else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)
 		   == 0) {
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
 			goto set_ea_exit;
 		if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0)
-			cFYI(1, "attempt to set cifs inode metadata");
+			cifs_dbg(FYI, "attempt to set cifs inode metadata\n");
 
 		ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
 		rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
@@ -167,8 +167,6 @@
 		struct cifs_ntsd *pacl;
 		pacl = kmalloc(value_size, GFP_KERNEL);
 		if (!pacl) {
-			cFYI(1, "%s: Can't allocate memory for ACL",
-					__func__);
 			rc = -ENOMEM;
 		} else {
 			memcpy(pacl, ea_value, value_size);
@@ -179,7 +177,7 @@
 			kfree(pacl);
 		}
 #else
-			cFYI(1, "Set CIFS ACL not supported yet");
+		cifs_dbg(FYI, "Set CIFS ACL not supported yet\n");
 #endif /* CONFIG_CIFS_ACL */
 	} else {
 		int temp;
@@ -193,9 +191,9 @@
 					ACL_TYPE_ACCESS, cifs_sb->local_nls,
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-			cFYI(1, "set POSIX ACL rc %d", rc);
+			cifs_dbg(FYI, "set POSIX ACL rc %d\n", rc);
 #else
-			cFYI(1, "set POSIX ACL not supported");
+			cifs_dbg(FYI, "set POSIX ACL not supported\n");
 #endif
 		} else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
 				   strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -206,13 +204,13 @@
 					ACL_TYPE_DEFAULT, cifs_sb->local_nls,
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-			cFYI(1, "set POSIX default ACL rc %d", rc);
+			cifs_dbg(FYI, "set POSIX default ACL rc %d\n", rc);
 #else
-			cFYI(1, "set default POSIX ACL not supported");
+			cifs_dbg(FYI, "set default POSIX ACL not supported\n");
 #endif
 		} else {
-			cFYI(1, "illegal xattr request %s (only user namespace"
-				" supported)", ea_name);
+			cifs_dbg(FYI, "illegal xattr request %s (only user namespace supported)\n",
+				 ea_name);
 		  /* BB what if no namespace prefix? */
 		  /* Should we just pass them to server, except for
 		  system and perhaps security prefixes? */
@@ -263,14 +261,14 @@
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
 	if (ea_name == NULL) {
-		cFYI(1, "Null xattr names not supported");
+		cifs_dbg(FYI, "Null xattr names not supported\n");
 	} else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)
 		   == 0) {
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
 			goto get_ea_exit;
 
 		if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) {
-			cFYI(1, "attempt to query cifs inode metadata");
+			cifs_dbg(FYI, "attempt to query cifs inode metadata\n");
 			/* revalidate/getattr then populate from inode */
 		} /* BB add else when above is implemented */
 		ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
@@ -295,7 +293,7 @@
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 #else
-		cFYI(1, "Query POSIX ACL not supported yet");
+		cifs_dbg(FYI, "Query POSIX ACL not supported yet\n");
 #endif /* CONFIG_CIFS_POSIX */
 	} else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
 			  strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -307,7 +305,7 @@
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 #else
-		cFYI(1, "Query POSIX default ACL not supported yet");
+		cifs_dbg(FYI, "Query POSIX default ACL not supported yet\n");
 #endif /* CONFIG_CIFS_POSIX */
 	} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
 				strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
@@ -319,8 +317,8 @@
 						full_path, &acllen);
 			if (IS_ERR(pacl)) {
 				rc = PTR_ERR(pacl);
-				cERROR(1, "%s: error %zd getting sec desc",
-						__func__, rc);
+				cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
+					 __func__, rc);
 			} else {
 				if (ea_value) {
 					if (acllen > buf_size)
@@ -332,18 +330,18 @@
 				kfree(pacl);
 			}
 #else
-		cFYI(1, "Query CIFS ACL not supported yet");
+			cifs_dbg(FYI, "Query CIFS ACL not supported yet\n");
 #endif /* CONFIG_CIFS_ACL */
 	} else if (strncmp(ea_name,
 		  XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
-		cFYI(1, "Trusted xattr namespace not supported yet");
+		cifs_dbg(FYI, "Trusted xattr namespace not supported yet\n");
 	} else if (strncmp(ea_name,
 		  XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) {
-		cFYI(1, "Security xattr namespace not supported yet");
+		cifs_dbg(FYI, "Security xattr namespace not supported yet\n");
 	} else
-		cFYI(1,
-		    "illegal xattr request %s (only user namespace supported)",
-		     ea_name);
+		cifs_dbg(FYI,
+			 "illegal xattr request %s (only user namespace supported)\n",
+			 ea_name);
 
 	/* We could add an additional check for streams ie
 	    if proc/fs/cifs/streamstoxattr is set then

diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index a8ece9a..2c9e62c 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h

@@ -16,21 +16,27 @@
 #ifndef _ASM_GENERIC_CPUTIME_NSECS_H
 #define _ASM_GENERIC_CPUTIME_NSECS_H
 
+#include <linux/math64.h>
+
 typedef u64 __nocast cputime_t;
 typedef u64 __nocast cputime64_t;
 
 #define cputime_one_jiffy		jiffies_to_cputime(1)
 
+#define cputime_div(__ct, divisor)  div_u64((__force u64)__ct, divisor)
+#define cputime_div_rem(__ct, divisor, remainder) \
+	div_u64_rem((__force u64)__ct, divisor, remainder);
+
 /*
  * Convert cputime <-> jiffies (HZ)
  */
 #define cputime_to_jiffies(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+	cputime_div(__ct, NSEC_PER_SEC / HZ)
 #define cputime_to_scaled(__ct)		(__ct)
 #define jiffies_to_cputime(__jif)	\
 	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
 #define cputime64_to_jiffies64(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+	cputime_div(__ct, NSEC_PER_SEC / HZ)
 #define jiffies64_to_cputime64(__jif)	\
 	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
@@ -45,7 +51,7 @@
  * Convert cputime <-> microseconds
  */
 #define cputime_to_usecs(__ct)		\
-	((__force u64)(__ct) / NSEC_PER_USEC)
+	cputime_div(__ct, NSEC_PER_USEC)
 #define usecs_to_cputime(__usecs)	\
 	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
 #define usecs_to_cputime64(__usecs)	\
@@ -55,7 +61,7 @@
  * Convert cputime <-> seconds
  */
 #define cputime_to_secs(__ct)		\
-	((__force u64)(__ct) / NSEC_PER_SEC)
+	cputime_div(__ct, NSEC_PER_SEC)
 #define secs_to_cputime(__secs)		\
 	(__force cputime_t)((__secs) * NSEC_PER_SEC)
 
@@ -69,8 +75,10 @@
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
 {
-	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
-	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
+	u32 rem;
+
+	val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
+	val->tv_nsec = rem;
 }
 
 /*
@@ -83,15 +91,17 @@
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 {
-	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
-	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+	u32 rem;
+
+	val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
+	val->tv_usec = rem / NSEC_PER_USEC;
 }
 
 /*
  * Convert cputime <-> clock (USER_HZ)
  */
 #define cputime_to_clock_t(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+	cputime_div(__ct, (NSEC_PER_SEC / USER_HZ))
 #define clock_t_to_cputime(__x)		\
 	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
 

diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
index b76ebd0..4c120a1 100644
--- a/include/linux/acpi_gpio.h
+++ b/include/linux/acpi_gpio.h

@@ -1,13 +1,25 @@
 #ifndef _LINUX_ACPI_GPIO_H_
 #define _LINUX_ACPI_GPIO_H_
 
+#include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
 
+/**
+ * struct acpi_gpio_info - ACPI GPIO specific information
+ * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
+ */
+struct acpi_gpio_info {
+	bool gpioint;
+};
+
 #ifdef CONFIG_GPIO_ACPI
 
 int acpi_get_gpio(char *path, int pin);
+int acpi_get_gpio_by_index(struct device *dev, int index,
+			   struct acpi_gpio_info *info);
 void acpi_gpiochip_request_interrupts(struct gpio_chip *chip);
+void acpi_gpiochip_free_interrupts(struct gpio_chip *chip);
 
 #else /* CONFIG_GPIO_ACPI */
 
@@ -16,7 +28,14 @@
 	return -ENODEV;
 }
 
+static inline int acpi_get_gpio_by_index(struct device *dev, int index,
+					 struct acpi_gpio_info *info)
+{
+	return -ENODEV;
+}
+
 static inline void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { }
+static inline void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { }
 
 #endif /* CONFIG_GPIO_ACPI */
 

diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
index 1c504ca..d8a97ec 100644
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h

@@ -72,5 +72,6 @@
 #define BGPIOF_BIG_ENDIAN		BIT(0)
 #define BGPIOF_UNREADABLE_REG_SET	BIT(1) /* reg_set is unreadable */
 #define BGPIOF_UNREADABLE_REG_DIR	BIT(2) /* reg_dir is unreadable */
+#define BGPIOF_BIG_ENDIAN_BYTE_ORDER	BIT(3)
 
 #endif /* __BASIC_MMIO_GPIO_H */

diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index d4080f3..5f338684 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h

@@ -52,6 +52,9 @@
 	 */
 	int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
 				 struct ceph_auth_handshake *auth);
+	/* ensure that an existing authorizer is up to date */
+	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+				 struct ceph_auth_handshake *auth);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
 				       struct ceph_authorizer *a, size_t len);
 	void (*destroy_authorizer)(struct ceph_auth_client *ac,
@@ -75,6 +78,8 @@
 	u64 global_id;          /* our unique id in system */
 	const struct ceph_crypto_key *key;     /* our secret key */
 	unsigned want_keys;     /* which services we want */
+
+	struct mutex mutex;
 };
 
 extern struct ceph_auth_client *ceph_auth_init(const char *name,
@@ -94,5 +99,18 @@
 		    void *msg_buf, size_t msg_len);
 
 extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
+extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+				       int peer_type,
+				       struct ceph_auth_handshake *auth);
+extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+					 struct ceph_authorizer *a);
+extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+				       int peer_type,
+				       struct ceph_auth_handshake *a);
+extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+					     struct ceph_authorizer *a,
+					     size_t len);
+extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+					    int peer_type);
 
 #endif

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 76554ce..4c420803 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h

@@ -41,6 +41,7 @@
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
 	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_RECONNECT_SEQ |		\
 	 CEPH_FEATURE_PGID64 |			\
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
@@ -51,6 +52,7 @@
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
+	 CEPH_FEATURE_RECONNECT_SEQ |	 \
 	 CEPH_FEATURE_PGID64 |		 \
 	 CEPH_FEATURE_PGPOOL3 |		 \
 	 CEPH_FEATURE_OSDENC)

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 360d9d0..379f715 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h

@@ -8,6 +8,23 @@
 
 #include <linux/ceph/types.h>
 
+/* This seemed to be the easiest place to define these */
+
+#define	U8_MAX	((u8)(~0U))
+#define	U16_MAX	((u16)(~0U))
+#define	U32_MAX	((u32)(~0U))
+#define	U64_MAX	((u64)(~0ULL))
+
+#define	S8_MAX	((s8)(U8_MAX >> 1))
+#define	S16_MAX	((s16)(U16_MAX >> 1))
+#define	S32_MAX	((s32)(U32_MAX >> 1))
+#define	S64_MAX	((s64)(U64_MAX >> 1LL))
+
+#define	S8_MIN	((s8)(-S8_MAX - 1))
+#define	S16_MIN	((s16)(-S16_MAX - 1))
+#define	S32_MIN	((s32)(-S32_MAX - 1))
+#define	S64_MIN	((s64)(-S64_MAX - 1LL))
+
 /*
  * in all cases,
  *   void **p     pointer to position pointer
@@ -137,14 +154,19 @@
 static inline void ceph_decode_timespec(struct timespec *ts,
 					const struct ceph_timespec *tv)
 {
-	ts->tv_sec = le32_to_cpu(tv->tv_sec);
-	ts->tv_nsec = le32_to_cpu(tv->tv_nsec);
+	ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
+	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
 }
 static inline void ceph_encode_timespec(struct ceph_timespec *tv,
 					const struct timespec *ts)
 {
-	tv->tv_sec = cpu_to_le32(ts->tv_sec);
-	tv->tv_nsec = cpu_to_le32(ts->tv_nsec);
+	BUG_ON(ts->tv_sec < 0);
+	BUG_ON(ts->tv_sec > (__kernel_time_t)U32_MAX);
+	BUG_ON(ts->tv_nsec < 0);
+	BUG_ON(ts->tv_nsec > (long)U32_MAX);
+
+	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
+	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
 }
 
 /*

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 29818fc..2e30248 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h

@@ -66,6 +66,7 @@
 #define CEPH_OSD_IDLE_TTL_DEFAULT    60
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
+#define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
 
 #define CEPH_AUTH_NAME_DEFAULT   "guest"
@@ -156,31 +157,11 @@
 	u64 snaps[];
 };
 
-static inline struct ceph_snap_context *
-ceph_get_snap_context(struct ceph_snap_context *sc)
-{
-	/*
-	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-	       atomic_read(&sc->nref)+1);
-	*/
-	if (sc)
-		atomic_inc(&sc->nref);
-	return sc;
-}
-
-static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
-{
-	if (!sc)
-		return;
-	/*
-	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-	       atomic_read(&sc->nref)-1);
-	*/
-	if (atomic_dec_and_test(&sc->nref)) {
-		/*printk(" deleting snap_context %p\n", sc);*/
-		kfree(sc);
-	}
-}
+extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+					gfp_t gfp_flags);
+extern struct ceph_snap_context *ceph_get_snap_context(
+					struct ceph_snap_context *sc);
+extern void ceph_put_snap_context(struct ceph_snap_context *sc);
 
 /*
  * calculate the number of pages a given length and offset map onto,

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 60903e0..7c1420b 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h

@@ -64,6 +64,77 @@
 	u32 required_features;
 };
 
+enum ceph_msg_data_type {
+	CEPH_MSG_DATA_NONE,	/* message contains no data payload */
+	CEPH_MSG_DATA_PAGES,	/* data source/destination is a page array */
+	CEPH_MSG_DATA_PAGELIST,	/* data source/destination is a pagelist */
+#ifdef CONFIG_BLOCK
+	CEPH_MSG_DATA_BIO,	/* data source/destination is a bio list */
+#endif /* CONFIG_BLOCK */
+};
+
+static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
+{
+	switch (type) {
+	case CEPH_MSG_DATA_NONE:
+	case CEPH_MSG_DATA_PAGES:
+	case CEPH_MSG_DATA_PAGELIST:
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+#endif /* CONFIG_BLOCK */
+		return true;
+	default:
+		return false;
+	}
+}
+
+struct ceph_msg_data {
+	struct list_head		links;	/* ceph_msg->data */
+	enum ceph_msg_data_type		type;
+	union {
+#ifdef CONFIG_BLOCK
+		struct {
+			struct bio	*bio;
+			size_t		bio_length;
+		};
+#endif /* CONFIG_BLOCK */
+		struct {
+			struct page	**pages;	/* NOT OWNER. */
+			size_t		length;		/* total # bytes */
+			unsigned int	alignment;	/* first page */
+		};
+		struct ceph_pagelist	*pagelist;
+	};
+};
+
+struct ceph_msg_data_cursor {
+	size_t			total_resid;	/* across all data items */
+	struct list_head	*data_head;	/* = &ceph_msg->data */
+
+	struct ceph_msg_data	*data;		/* current data item */
+	size_t			resid;		/* bytes not yet consumed */
+	bool			last_piece;	/* current is last piece */
+	bool			need_crc;	/* crc update needed */
+	union {
+#ifdef CONFIG_BLOCK
+		struct {				/* bio */
+			struct bio	*bio;		/* bio from list */
+			unsigned int	vector_index;	/* vector from bio */
+			unsigned int	vector_offset;	/* bytes from vector */
+		};
+#endif /* CONFIG_BLOCK */
+		struct {				/* pages */
+			unsigned int	page_offset;	/* offset in page */
+			unsigned short	page_index;	/* index in array */
+			unsigned short	page_count;	/* pages in array */
+		};
+		struct {				/* pagelist */
+			struct page	*page;		/* page from list */
+			size_t		offset;		/* bytes from list */
+		};
+	};
+};
+
 /*
  * a single message.  it contains a header (src, dest, message type, etc.),
  * footer (crc values, mainly), a "front" message body, and possibly a
@@ -74,21 +145,15 @@
 	struct ceph_msg_footer footer;	/* footer */
 	struct kvec front;              /* unaligned blobs of message */
 	struct ceph_buffer *middle;
-	struct page **pages;            /* data payload.  NOT OWNER. */
-	unsigned nr_pages;              /* size of page array */
-	unsigned page_alignment;        /* io offset in first page */
-	struct ceph_pagelist *pagelist; /* instead of pages */
+
+	size_t				data_length;
+	struct list_head		data;
+	struct ceph_msg_data_cursor	cursor;
 
 	struct ceph_connection *con;
-	struct list_head list_head;
+	struct list_head list_head;	/* links for connection lists */
 
 	struct kref kref;
-#ifdef CONFIG_BLOCK
-	struct bio  *bio;		/* instead of pages/pagelist */
-	struct bio  *bio_iter;		/* bio iterator */
-	int bio_seg;			/* current bio segment */
-#endif /* CONFIG_BLOCK */
-	struct ceph_pagelist *trail;	/* the trailing part of the data */
 	bool front_is_vmalloc;
 	bool more_to_follow;
 	bool needs_out_seq;
@@ -98,12 +163,6 @@
 	struct ceph_msgpool *pool;
 };
 
-struct ceph_msg_pos {
-	int page, page_pos;  /* which page; offset in page */
-	int data_pos;        /* offset in data payload */
-	bool did_page_crc;   /* true if we've calculated crc for current page */
-};
-
 /* ceph connection fault delay defaults, for exponential backoff */
 #define BASE_DELAY_INTERVAL	(HZ/2)
 #define MAX_DELAY_INTERVAL	(5 * 60 * HZ)
@@ -161,7 +220,6 @@
 	struct ceph_msg *out_msg;        /* sending message (== tail of
 					    out_sent) */
 	bool out_msg_done;
-	struct ceph_msg_pos out_msg_pos;
 
 	struct kvec out_kvec[8],         /* sending header/footer data */
 		*out_kvec_cur;
@@ -175,7 +233,6 @@
 	/* message in temps */
 	struct ceph_msg_header in_hdr;
 	struct ceph_msg *in_msg;
-	struct ceph_msg_pos in_msg_pos;
 	u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
 
 	char in_tag;         /* protocol control byte */
@@ -218,6 +275,15 @@
 
 extern void ceph_con_keepalive(struct ceph_connection *con);
 
+extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+				size_t length, size_t alignment);
+extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+				struct ceph_pagelist *pagelist);
+#ifdef CONFIG_BLOCK
+extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+				size_t length);
+#endif /* CONFIG_BLOCK */
+
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
 extern void ceph_msg_kfree(struct ceph_msg *m);

diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 680d3d6..3d94a73 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h

@@ -87,6 +87,7 @@
 #define CEPH_MSGR_TAG_BADPROTOVER  10  /* bad protocol version */
 #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
 #define CEPH_MSGR_TAG_FEATURES      12 /* insufficient features */
+#define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
 
 
 /*

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 1dd5d46..186db0b 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h

@@ -29,6 +29,7 @@
  */
 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
 				     struct ceph_msg *);
+typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
 
 /* a given osd we're communicating with */
 struct ceph_osd {
@@ -48,7 +49,67 @@
 };
 
 
-#define CEPH_OSD_MAX_OP 10
+#define CEPH_OSD_MAX_OP	2
+
+enum ceph_osd_data_type {
+	CEPH_OSD_DATA_TYPE_NONE = 0,
+	CEPH_OSD_DATA_TYPE_PAGES,
+	CEPH_OSD_DATA_TYPE_PAGELIST,
+#ifdef CONFIG_BLOCK
+	CEPH_OSD_DATA_TYPE_BIO,
+#endif /* CONFIG_BLOCK */
+};
+
+struct ceph_osd_data {
+	enum ceph_osd_data_type	type;
+	union {
+		struct {
+			struct page	**pages;
+			u64		length;
+			u32		alignment;
+			bool		pages_from_pool;
+			bool		own_pages;
+		};
+		struct ceph_pagelist	*pagelist;
+#ifdef CONFIG_BLOCK
+		struct {
+			struct bio	*bio;		/* list of bios */
+			size_t		bio_length;	/* total in list */
+		};
+#endif /* CONFIG_BLOCK */
+	};
+};
+
+struct ceph_osd_req_op {
+	u16 op;           /* CEPH_OSD_OP_* */
+	u32 payload_len;
+	union {
+		struct ceph_osd_data raw_data_in;
+		struct {
+			u64 offset, length;
+			u64 truncate_size;
+			u32 truncate_seq;
+			struct ceph_osd_data osd_data;
+		} extent;
+		struct {
+			const char *class_name;
+			const char *method_name;
+			struct ceph_osd_data request_info;
+			struct ceph_osd_data request_data;
+			struct ceph_osd_data response_data;
+			__u8 class_len;
+			__u8 method_len;
+			__u8 argc;
+		} cls;
+		struct {
+			u64 cookie;
+			u64 ver;
+			u32 prot_ver;
+			u32 timeout;
+			__u8 flag;
+		} watch;
+	};
+};
 
 /* an in-flight request */
 struct ceph_osd_request {
@@ -63,15 +124,14 @@
 	int              r_pg_osds[CEPH_PG_MAX_SIZE];
 	int              r_num_pg_osds;
 
-	struct ceph_connection *r_con_filling_msg;
-
 	struct ceph_msg  *r_request, *r_reply;
 	int               r_flags;     /* any additional flags for the osd */
 	u32               r_sent;      /* >0 if r_request is sending/sent */
-	int               r_num_ops;
 
-	/* encoded message content */
-	struct ceph_osd_op *r_request_ops;
+	/* request osd ops array  */
+	unsigned int		r_num_ops;
+	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];
+
 	/* these are updated on each send */
 	__le32           *r_request_osdmap_epoch;
 	__le32           *r_request_flags;
@@ -85,12 +145,14 @@
 	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
 	int               r_got_reply;
 	int		  r_linger;
+	int		  r_completed;
 
 	struct ceph_osd_client *r_osdc;
 	struct kref       r_kref;
 	bool              r_mempool;
 	struct completion r_completion, r_safe_completion;
-	ceph_osdc_callback_t r_callback, r_safe_callback;
+	ceph_osdc_callback_t r_callback;
+	ceph_osdc_unsafe_callback_t r_unsafe_callback;
 	struct ceph_eversion r_reassert_version;
 	struct list_head  r_unsafe_item;
 
@@ -104,16 +166,6 @@
 
 	struct ceph_file_layout r_file_layout;
 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
-	unsigned          r_num_pages;        /* size of page array (follows) */
-	unsigned          r_page_alignment;   /* io offset in first page */
-	struct page     **r_pages;            /* pages for data payload */
-	int               r_pages_from_pool;
-	int               r_own_pages;        /* if true, i own page list */
-#ifdef CONFIG_BLOCK
-	struct bio       *r_bio;	      /* instead of pages */
-#endif
-
-	struct ceph_pagelist r_trail;	      /* trailing part of the data */
 };
 
 struct ceph_osd_event {
@@ -172,48 +224,8 @@
 	struct workqueue_struct	*notify_wq;
 };
 
-struct ceph_osd_req_op {
-	u16 op;           /* CEPH_OSD_OP_* */
-	u32 payload_len;
-	union {
-		struct {
-			u64 offset, length;
-			u64 truncate_size;
-			u32 truncate_seq;
-		} extent;
-		struct {
-			const char *name;
-			const char  *val;
-			u32 name_len;
-			u32 value_len;
-			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
-			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
-		} xattr;
-		struct {
-			const char *class_name;
-			const char *method_name;
-			const char *indata;
-			u32 indata_len;
-			__u8 class_len;
-			__u8 method_len;
-			__u8 argc;
-		} cls;
-		struct {
-			u64 cookie;
-			u64 count;
-		} pgls;
-	        struct {
-		        u64 snapid;
-	        } snap;
-		struct {
-			u64 cookie;
-			u64 ver;
-			u32 prot_ver;
-			u32 timeout;
-			__u8 flag;
-		} watch;
-	};
-};
+extern int ceph_osdc_setup(void);
+extern void ceph_osdc_cleanup(void);
 
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
 			  struct ceph_client *client);
@@ -224,16 +236,71 @@
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 
+extern void osd_req_op_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode);
+
+extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+
+extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode,
+					u64 offset, u64 length,
+					u64 truncate_size, u32 truncate_seq);
+extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+					unsigned int which, u64 length);
+
+extern struct ceph_osd_data *osd_req_op_extent_osd_data(
+					struct ceph_osd_request *osd_req,
+					unsigned int which);
+extern struct ceph_osd_data *osd_req_op_cls_response_data(
+					struct ceph_osd_request *osd_req,
+					unsigned int which);
+
+extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
+					unsigned int which,
+					struct ceph_pagelist *pagelist);
+#ifdef CONFIG_BLOCK
+extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
+					unsigned int which,
+					struct bio *bio, size_t bio_length);
+#endif /* CONFIG_BLOCK */
+
+extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
+					unsigned int which,
+					struct ceph_pagelist *pagelist);
+extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+
+extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode,
+					const char *class, const char *method);
+extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode,
+					u64 cookie, u64 version, int flag);
+
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-					       unsigned int num_op,
+					       unsigned int num_ops,
 					       bool use_mempool,
 					       gfp_t gfp_flags);
 
-extern void ceph_osdc_build_request(struct ceph_osd_request *req,
-				    u64 off, u64 len,
-				    unsigned int num_op,
-				    struct ceph_osd_req_op *src_ops,
+extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
 				    struct ceph_snap_context *snapc,
 				    u64 snap_id,
 				    struct timespec *mtime);
@@ -241,12 +308,11 @@
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
 				      struct ceph_vino vino,
-				      u64 offset, u64 *len, int op, int flags,
+				      u64 offset, u64 *len,
+				      int num_ops, int opcode, int flags,
 				      struct ceph_snap_context *snapc,
-				      int do_sync, u32 truncate_seq,
-				      u64 truncate_size,
-				      struct timespec *mtime,
-				      bool use_mempool, int page_align);
+				      u32 truncate_seq, u64 truncate_size,
+				      bool use_mempool);
 
 extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 					 struct ceph_osd_request *req);

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index c819190..d05cc44 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h

@@ -3,6 +3,7 @@
 
 #include <linux/rbtree.h>
 #include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
 #include <linux/ceph/ceph_fs.h>
 #include <linux/crush/crush.h>
 
@@ -119,6 +120,29 @@
 	return &map->osd_addr[osd];
 }
 
+static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
+{
+	__u8 version;
+
+	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+		pr_warning("incomplete pg encoding");
+
+		return -EINVAL;
+	}
+	version = ceph_decode_8(p);
+	if (version > 1) {
+		pr_warning("do not understand pg encoding %d > 1",
+			(int)version);
+		return -EINVAL;
+	}
+
+	pgid->pool = ceph_decode_64(p);
+	pgid->seed = ceph_decode_32(p);
+	*p += 4;	/* skip deprecated preferred value */
+
+	return 0;
+}
+
 extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
 extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 					    struct ceph_osdmap *map,
@@ -131,10 +155,8 @@
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
 /* calculate mapping of object to a placement group */
-extern int ceph_calc_object_layout(struct ceph_pg *pg,
-				   const char *oid,
-				   struct ceph_file_layout *fl,
-				   struct ceph_osdmap *osdmap);
+extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
+			  struct ceph_osdmap *osdmap, uint64_t pool);
 extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
 			       struct ceph_pg pgid,
 			       int *acting);

diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h
index 530e11b..01f5951 100644
--- a/include/linux/i2c/twl4030-madc.h
+++ b/include/linux/i2c/twl4030-madc.h

@@ -39,6 +39,7 @@
  * @do_avgP:	sample the input channel for 4 consecutive cycles
  * @method:	RT, SW1, SW2
  * @type:	Polling or interrupt based method
+ * @raw:	Return raw value, do not convert it
  */
 
 struct twl4030_madc_request {
@@ -48,6 +49,7 @@
 	u16 type;
 	bool active;
 	bool result_pending;
+	bool raw;
 	int rbuf[TWL4030_MADC_MAX_CHANNELS];
 	void (*func_cb)(int len, int channels, int *buf);
 };

diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index 5f3aa6b..27e06ac 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h

@@ -81,4 +81,23 @@
 			       unsigned short *keymap,
 			       struct input_dev *input_dev);
 
+#ifdef CONFIG_OF
+/**
+ * matrix_keypad_parse_of_params() - Read parameters from matrix-keypad node
+ *
+ * @dev: Device containing of_node
+ * @rows: Returns number of matrix rows
+ * @cols: Returns number of matrix columns
+ * @return 0 if OK, <0 on error
+ */
+int matrix_keypad_parse_of_params(struct device *dev,
+				  unsigned int *rows, unsigned int *cols);
+#else
+static inline int matrix_keypad_parse_of_params(struct device *dev,
+				  unsigned int *rows, unsigned int *cols)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_OF */
+
 #endif /* _MATRIX_KEYPAD_H */

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ba3b8a9..3aeb730 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h

@@ -20,6 +20,7 @@
 #define __LINUX_IOMMU_H
 
 #include <linux/errno.h>
+#include <linux/err.h>
 #include <linux/types.h>
 
 #define IOMMU_READ	(1)
@@ -91,8 +92,7 @@
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
-	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
-				    unsigned long iova);
+	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
 			      unsigned long cap);
 	int (*add_device)(struct device *dev);
@@ -105,7 +105,7 @@
 
 	/* Window handling functions */
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
-				    phys_addr_t paddr, u64 size);
+				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
 	/* Set the numer of window per domain */
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
@@ -125,6 +125,7 @@
 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
 extern bool iommu_present(struct bus_type *bus);
 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
+extern struct iommu_group *iommu_group_get_by_id(int id);
 extern void iommu_domain_free(struct iommu_domain *domain);
 extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
@@ -134,8 +135,7 @@
 		     phys_addr_t paddr, size_t size, int prot);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
-extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
-				      unsigned long iova);
+extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
 				unsigned long cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
@@ -171,7 +171,8 @@
 
 /* Window handling function prototypes */
 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
-				      phys_addr_t offset, u64 size);
+				      phys_addr_t offset, u64 size,
+				      int prot);
 extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
@@ -257,7 +258,7 @@
 
 static inline int iommu_domain_window_enable(struct iommu_domain *domain,
 					     u32 wnd_nr, phys_addr_t paddr,
-					     u64 size)
+					     u64 size, int prot)
 {
 	return -ENODEV;
 }
@@ -267,8 +268,7 @@
 {
 }
 
-static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
-					     unsigned long iova)
+static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
 	return 0;
 }

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c139582..f0eea07 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h

@@ -117,14 +117,13 @@
 #define KVM_REQ_APF_HALT          12
 #define KVM_REQ_STEAL_UPDATE      13
 #define KVM_REQ_NMI               14
-#define KVM_REQ_IMMEDIATE_EXIT    15
-#define KVM_REQ_PMU               16
-#define KVM_REQ_PMI               17
-#define KVM_REQ_WATCHDOG          18
-#define KVM_REQ_MASTERCLOCK_UPDATE 19
-#define KVM_REQ_MCLOCK_INPROGRESS 20
-#define KVM_REQ_EPR_EXIT          21
-#define KVM_REQ_EOIBITMAP         22
+#define KVM_REQ_PMU               15
+#define KVM_REQ_PMI               16
+#define KVM_REQ_WATCHDOG          17
+#define KVM_REQ_MASTERCLOCK_UPDATE 18
+#define KVM_REQ_MCLOCK_INPROGRESS 19
+#define KVM_REQ_EPR_EXIT          20
+#define KVM_REQ_SCAN_IOAPIC       21
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -133,6 +132,9 @@
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
+extern raw_spinlock_t kvm_lock;
+extern struct list_head vm_list;
+
 struct kvm_io_range {
 	gpa_t addr;
 	int len;
@@ -149,6 +151,7 @@
 enum kvm_bus {
 	KVM_MMIO_BUS,
 	KVM_PIO_BUS,
+	KVM_VIRTIO_CCW_NOTIFY_BUS,
 	KVM_NR_BUSES
 };
 
@@ -252,6 +255,7 @@
 		bool dy_eligible;
 	} spin_loop;
 #endif
+	bool preempted;
 	struct kvm_vcpu_arch arch;
 };
 
@@ -285,7 +289,8 @@
 	u32 gsi;
 	u32 type;
 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
-		   struct kvm *kvm, int irq_source_id, int level);
+		   struct kvm *kvm, int irq_source_id, int level,
+		   bool line_status);
 	union {
 		struct {
 			unsigned irqchip;
@@ -296,10 +301,10 @@
 	struct hlist_node link;
 };
 
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
 struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
 	struct kvm_kernel_irq_routing_entry *rt_entries;
 	u32 nr_rt_entries;
 	/*
@@ -385,6 +390,7 @@
 	long mmu_notifier_count;
 #endif
 	long tlbs_dirty;
+	struct list_head devices;
 };
 
 #define kvm_err(fmt, ...) \
@@ -424,6 +430,19 @@
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+int kvm_irqfd_init(void);
+void kvm_irqfd_exit(void);
+#else
+static inline int kvm_irqfd_init(void)
+{
+	return 0;
+}
+
+static inline void kvm_irqfd_exit(void)
+{
+}
+#endif
 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 		  struct module *module);
 void kvm_exit(void);
@@ -452,24 +471,39 @@
 	return slot;
 }
 
+/*
+ * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
+ * - create a new memory slot
+ * - delete an existing memory slot
+ * - modify an existing memory slot
+ *   -- move it in the guest physical memory space
+ *   -- just change its flags
+ *
+ * Since flags can be changed by some of these operations, the following
+ * differentiation is the best we can do for __kvm_set_memory_region():
+ */
+enum kvm_mr_change {
+	KVM_MR_CREATE,
+	KVM_MR_DELETE,
+	KVM_MR_MOVE,
+	KVM_MR_FLAGS_ONLY,
+};
+
 int kvm_set_memory_region(struct kvm *kvm,
-			  struct kvm_userspace_memory_region *mem,
-			  bool user_alloc);
+			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
-			    struct kvm_userspace_memory_region *mem,
-			    bool user_alloc);
+			    struct kvm_userspace_memory_region *mem);
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
-				struct kvm_memory_slot old,
 				struct kvm_userspace_memory_region *mem,
-				bool user_alloc);
+				enum kvm_mr_change change);
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				bool user_alloc);
+				const struct kvm_memory_slot *old,
+				enum kvm_mr_change change);
 bool kvm_largepages_enabled(void);
 void kvm_disable_largepages(void);
 /* flush all memory translations */
@@ -539,7 +573,7 @@
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
-void kvm_make_update_eoibitmap_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request(struct kvm *kvm);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
@@ -555,10 +589,9 @@
 				struct kvm_dirty_log *log);
 
 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct
-				   kvm_userspace_memory_region *mem,
-				   bool user_alloc);
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
+				   struct kvm_userspace_memory_region *mem);
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+			bool line_status);
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg);
 
@@ -632,7 +665,6 @@
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
-void kvm_free_all_assigned_devices(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
@@ -684,15 +716,11 @@
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
-#ifdef __KVM_HAVE_IOAPIC
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask);
-#endif
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
-		int irq_source_id, int level);
+		int irq_source_id, int level, bool line_status);
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -705,7 +733,7 @@
 /* For vcpu->arch.iommu_flags */
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
-#ifdef CONFIG_IOMMU_API
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 int kvm_iommu_map_guest(struct kvm *kvm);
@@ -714,7 +742,7 @@
 		      struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
-#else /* CONFIG_IOMMU_API */
+#else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      struct kvm_memory_slot *slot)
 {
@@ -726,28 +754,11 @@
 {
 }
 
-static inline int kvm_iommu_map_guest(struct kvm *kvm)
-{
-	return -ENODEV;
-}
-
 static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
 	return 0;
 }
-
-static inline int kvm_assign_device(struct kvm *kvm,
-		struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	return 0;
-}
-
-static inline int kvm_deassign_device(struct kvm *kvm,
-		struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	return 0;
-}
-#endif /* CONFIG_IOMMU_API */
+#endif
 
 static inline void __guest_enter(void)
 {
@@ -921,7 +932,7 @@
 }
 #endif
 
-#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
 #define KVM_MAX_IRQ_ROUTES 1024
 
@@ -930,6 +941,9 @@
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
@@ -998,11 +1012,13 @@
 
 #endif
 
-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 
 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 				  unsigned long arg);
 
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+
 #else
 
 static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
@@ -1011,6 +1027,8 @@
 	return -ENOTTY;
 }
 
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+
 #endif
 
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
@@ -1028,6 +1046,46 @@
 	}
 }
 
+extern bool kvm_rebooting;
+
+struct kvm_device_ops;
+
+struct kvm_device {
+	struct kvm_device_ops *ops;
+	struct kvm *kvm;
+	void *private;
+	struct list_head vm_node;
+};
+
+/* create, destroy, and name are mandatory */
+struct kvm_device_ops {
+	const char *name;
+	int (*create)(struct kvm_device *dev, u32 type);
+
+	/*
+	 * Destroy is responsible for freeing dev.
+	 *
+	 * Destroy may be called before or after destructors are called
+	 * on emulated I/O regions, depending on whether a reference is
+	 * held by a vcpu or other kvm component that gets destroyed
+	 * after the emulated I/O.
+	 */
+	void (*destroy)(struct kvm_device *dev);
+
+	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
+	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
+	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
+	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
+		      unsigned long arg);
+};
+
+void kvm_device_get(struct kvm_device *dev);
+void kvm_device_put(struct kvm_device *dev);
+struct kvm_device *kvm_device_from_filp(struct file *filp);
+
+extern struct kvm_device_ops kvm_mpic_ops;
+extern struct kvm_device_ops kvm_xics_ops;
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 0d9b5ee..0287ab2 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h

@@ -142,6 +142,10 @@
 /*
  * LED Triggers
  */
+/* Registration functions for simple triggers */
+#define DEFINE_LED_TRIGGER(x)		static struct led_trigger *x;
+#define DEFINE_LED_TRIGGER_GLOBAL(x)	struct led_trigger *x;
+
 #ifdef CONFIG_LEDS_TRIGGERS
 
 #define TRIG_NAME_MAX 50
@@ -164,9 +168,6 @@
 extern int led_trigger_register(struct led_trigger *trigger);
 extern void led_trigger_unregister(struct led_trigger *trigger);
 
-/* Registration functions for simple triggers */
-#define DEFINE_LED_TRIGGER(x)		static struct led_trigger *x;
-#define DEFINE_LED_TRIGGER_GLOBAL(x)	struct led_trigger *x;
 extern void led_trigger_register_simple(const char *name,
 				struct led_trigger **trigger);
 extern void led_trigger_unregister_simple(struct led_trigger *trigger);
@@ -199,20 +200,30 @@
 
 #else
 
-/* Triggers aren't active - null macros */
-#define DEFINE_LED_TRIGGER(x)
-#define DEFINE_LED_TRIGGER_GLOBAL(x)
-#define led_trigger_register_simple(x, y) do {} while(0)
-#define led_trigger_unregister_simple(x) do {} while(0)
-#define led_trigger_event(x, y) do {} while(0)
+/* Trigger has no members */
+struct led_trigger {};
 
-#endif
+/* Trigger inline empty functions */
+static inline void led_trigger_register_simple(const char *name,
+					struct led_trigger **trigger) {}
+static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {}
+static inline void led_trigger_event(struct led_trigger *trigger,
+				enum led_brightness event) {}
+#endif /* CONFIG_LEDS_TRIGGERS */
 
 /* Trigger specific functions */
 #ifdef CONFIG_LEDS_TRIGGER_IDE_DISK
 extern void ledtrig_ide_activity(void);
 #else
-#define ledtrig_ide_activity() do {} while(0)
+static inline void ledtrig_ide_activity(void) {}
+#endif
+
+#if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE)
+extern void ledtrig_flash_ctrl(bool on);
+extern void ledtrig_torch_ctrl(bool on);
+#else
+static inline void ledtrig_flash_ctrl(bool on) {}
+static inline void ledtrig_torch_ctrl(bool on) {}
 #endif
 
 /*

diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index a0f9409..80dead1 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h

@@ -78,6 +78,7 @@
 	unsigned int ext_cap:1;    /** External capacitor fitted */
 	unsigned int discharge:1;  /** Actively discharge */
 	unsigned int fast_start:1; /** Enable aggressive startup ramp rate */
+	unsigned int bypass:1;     /** Use bypass mode */
 };
 
 struct arizona_micd_config {
@@ -104,7 +105,8 @@
 	/** If a direct 32kHz clock is provided on an MCLK specify it here */
 	int clk32k_src;
 
-	bool irq_active_high; /** IRQ polarity */
+	/** Mode for primary IRQ (defaults to active low) */
+	unsigned int irq_flags;
 
 	/* Base GPIO */
 	int gpio_base;
@@ -183,6 +185,9 @@
 
 	/** Haptic actuator type */
 	unsigned int hap_act;
+
+	/** GPIO for primary IRQ (used for edge triggered emulation) */
+	int irq_gpio;
 };
 
 #endif

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
new file mode 100644
index 0000000..032af7f
--- /dev/null
+++ b/include/linux/mfd/cros_ec.h

@@ -0,0 +1,170 @@
+/*
+ * ChromeOS EC multi-function device
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_H
+#define __LINUX_MFD_CROS_EC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+/*
+ * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
+ */
+enum {
+	EC_MSG_TX_HEADER_BYTES	= 3,
+	EC_MSG_TX_TRAILER_BYTES	= 1,
+	EC_MSG_TX_PROTO_BYTES	= EC_MSG_TX_HEADER_BYTES +
+					EC_MSG_TX_TRAILER_BYTES,
+	EC_MSG_RX_PROTO_BYTES	= 3,
+
+	/* Max length of messages */
+	EC_MSG_BYTES		= EC_HOST_PARAM_SIZE + EC_MSG_TX_PROTO_BYTES,
+
+};
+
+/**
+ * struct cros_ec_msg - A message sent to the EC, and its reply
+ *
+ * @version: Command version number (often 0)
+ * @cmd: Command to send (EC_CMD_...)
+ * @out_buf: Outgoing payload (to EC)
+ * @outlen: Outgoing length
+ * @in_buf: Incoming payload (from EC)
+ * @in_len: Incoming length
+ */
+struct cros_ec_msg {
+	u8 version;
+	u8 cmd;
+	uint8_t *out_buf;
+	int out_len;
+	uint8_t *in_buf;
+	int in_len;
+};
+
+/**
+ * struct cros_ec_device - Information about a ChromeOS EC device
+ *
+ * @name: Name of this EC interface
+ * @priv: Private data
+ * @irq: Interrupt to use
+ * @din: input buffer (from EC)
+ * @dout: output buffer (to EC)
+ * \note
+ * These two buffers will always be dword-aligned and include enough
+ * space for up to 7 word-alignment bytes also, so we can ensure that
+ * the body of the message is always dword-aligned (64-bit).
+ *
+ * We use this alignment to keep ARM and x86 happy. Probably word
+ * alignment would be OK, there might be a small performance advantage
+ * to using dword.
+ * @din_size: size of din buffer
+ * @dout_size: size of dout buffer
+ * @command_send: send a command
+ * @command_recv: receive a command
+ * @ec_name: name of EC device (e.g. 'chromeos-ec')
+ * @phys_name: name of physical comms layer (e.g. 'i2c-4')
+ * @parent: pointer to parent device (e.g. i2c or spi device)
+ * @dev: Device pointer
+ * dev_lock: Lock to prevent concurrent access
+ * @wake_enabled: true if this device can wake the system from sleep
+ * @was_wake_device: true if this device was set to wake the system from
+ * sleep at the last suspend
+ * @event_notifier: interrupt event notifier for transport devices
+ */
+struct cros_ec_device {
+	const char *name;
+	void *priv;
+	int irq;
+	uint8_t *din;
+	uint8_t *dout;
+	int din_size;
+	int dout_size;
+	int (*command_send)(struct cros_ec_device *ec,
+			uint16_t cmd, void *out_buf, int out_len);
+	int (*command_recv)(struct cros_ec_device *ec,
+			uint16_t cmd, void *in_buf, int in_len);
+	int (*command_sendrecv)(struct cros_ec_device *ec,
+			uint16_t cmd, void *out_buf, int out_len,
+			void *in_buf, int in_len);
+	int (*command_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_msg *msg);
+
+	const char *ec_name;
+	const char *phys_name;
+	struct device *parent;
+
+	/* These are --private-- fields - do not assign */
+	struct device *dev;
+	struct mutex dev_lock;
+	bool wake_enabled;
+	bool was_wake_device;
+	struct blocking_notifier_head event_notifier;
+};
+
+/**
+ * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device
+ *
+ * This can be called by drivers to handle a suspend event.
+ *
+ * ec_dev: Device to suspend
+ * @return 0 if ok, -ve on error
+ */
+int cros_ec_suspend(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_resume - Handle a resume operation for the ChromeOS EC device
+ *
+ * This can be called by drivers to handle a resume event.
+ *
+ * @ec_dev: Device to resume
+ * @return 0 if ok, -ve on error
+ */
+int cros_ec_resume(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_prepare_tx - Prepare an outgoing message in the output buffer
+ *
+ * This is intended to be used by all ChromeOS EC drivers, but at present
+ * only SPI uses it. Once LPC uses the same protocol it can start using it.
+ * I2C could use it now, with a refactor of the existing code.
+ *
+ * @ec_dev: Device to register
+ * @msg: Message to write
+ */
+int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
+		       struct cros_ec_msg *msg);
+
+/**
+ * cros_ec_remove - Remove a ChromeOS EC
+ *
+ * Call this to deregister a ChromeOS EC. After this you should call
+ * cros_ec_free().
+ *
+ * @ec_dev: Device to register
+ * @return 0 if ok, -ve on error
+ */
+int cros_ec_remove(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_register - Register a new ChromeOS EC, using the provided info
+ *
+ * Before calling this, allocate a pointer to a new device and then fill
+ * in all the fields up to the --private-- marker.
+ *
+ * @ec_dev: Device to register
+ * @return 0 if ok, -ve on error
+ */
+int cros_ec_register(struct cros_ec_device *ec_dev);
+
+#endif /* __LINUX_MFD_CROS_EC_H */

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
new file mode 100644
index 0000000..86fd069
--- /dev/null
+++ b/include/linux/mfd/cros_ec_commands.h

@@ -0,0 +1,1369 @@
+/*
+ * Host communication command constants for ChromeOS EC
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * The ChromeOS EC multi function device is used to mux all the requests
+ * to the EC device for its multiple features: keyboard controller,
+ * battery charging and regulator control, firmware update.
+ *
+ * NOTE: This file is copied verbatim from the ChromeOS EC Open Source
+ * project in an attempt to make future updates easy to make.
+ */
+
+#ifndef __CROS_EC_COMMANDS_H
+#define __CROS_EC_COMMANDS_H
+
+/*
+ * Protocol overview
+ *
+ * request:  CMD [ P0 P1 P2 ... Pn S ]
+ * response: ERR [ P0 P1 P2 ... Pn S ]
+ *
+ * where the bytes are defined as follow :
+ *      - CMD is the command code. (defined by EC_CMD_ constants)
+ *      - ERR is the error code. (defined by EC_RES_ constants)
+ *      - Px is the optional payload.
+ *        it is not sent if the error code is not success.
+ *        (defined by ec_params_ and ec_response_ structures)
+ *      - S is the checksum which is the sum of all payload bytes.
+ *
+ * On LPC, CMD and ERR are sent/received at EC_LPC_ADDR_KERNEL|USER_CMD
+ * and the payloads are sent/received at EC_LPC_ADDR_KERNEL|USER_PARAM.
+ * On I2C, all bytes are sent serially in the same message.
+ */
+
+/* Current version of this protocol */
+#define EC_PROTO_VERSION          0x00000002
+
+/* Command version mask */
+#define EC_VER_MASK(version) (1UL << (version))
+
+/* I/O addresses for ACPI commands */
+#define EC_LPC_ADDR_ACPI_DATA  0x62
+#define EC_LPC_ADDR_ACPI_CMD   0x66
+
+/* I/O addresses for host command */
+#define EC_LPC_ADDR_HOST_DATA  0x200
+#define EC_LPC_ADDR_HOST_CMD   0x204
+
+/* I/O addresses for host command args and params */
+#define EC_LPC_ADDR_HOST_ARGS  0x800
+#define EC_LPC_ADDR_HOST_PARAM 0x804
+#define EC_HOST_PARAM_SIZE     0x0fc  /* Size of param area in bytes */
+
+/* I/O addresses for host command params, old interface */
+#define EC_LPC_ADDR_OLD_PARAM  0x880
+#define EC_OLD_PARAM_SIZE      0x080  /* Size of param area in bytes */
+
+/* EC command register bit functions */
+#define EC_LPC_CMDR_DATA	(1 << 0)  /* Data ready for host to read */
+#define EC_LPC_CMDR_PENDING	(1 << 1)  /* Write pending to EC */
+#define EC_LPC_CMDR_BUSY	(1 << 2)  /* EC is busy processing a command */
+#define EC_LPC_CMDR_CMD		(1 << 3)  /* Last host write was a command */
+#define EC_LPC_CMDR_ACPI_BRST	(1 << 4)  /* Burst mode (not used) */
+#define EC_LPC_CMDR_SCI		(1 << 5)  /* SCI event is pending */
+#define EC_LPC_CMDR_SMI		(1 << 6)  /* SMI event is pending */
+
+#define EC_LPC_ADDR_MEMMAP       0x900
+#define EC_MEMMAP_SIZE         255 /* ACPI IO buffer max is 255 bytes */
+#define EC_MEMMAP_TEXT_MAX     8   /* Size of a string in the memory map */
+
+/* The offset address of each type of data in mapped memory. */
+#define EC_MEMMAP_TEMP_SENSOR      0x00 /* Temp sensors */
+#define EC_MEMMAP_FAN              0x10 /* Fan speeds */
+#define EC_MEMMAP_TEMP_SENSOR_B    0x18 /* Temp sensors (second set) */
+#define EC_MEMMAP_ID               0x20 /* 'E' 'C' */
+#define EC_MEMMAP_ID_VERSION       0x22 /* Version of data in 0x20 - 0x2f */
+#define EC_MEMMAP_THERMAL_VERSION  0x23 /* Version of data in 0x00 - 0x1f */
+#define EC_MEMMAP_BATTERY_VERSION  0x24 /* Version of data in 0x40 - 0x7f */
+#define EC_MEMMAP_SWITCHES_VERSION 0x25 /* Version of data in 0x30 - 0x33 */
+#define EC_MEMMAP_EVENTS_VERSION   0x26 /* Version of data in 0x34 - 0x3f */
+#define EC_MEMMAP_HOST_CMD_FLAGS   0x27 /* Host command interface flags */
+#define EC_MEMMAP_SWITCHES         0x30
+#define EC_MEMMAP_HOST_EVENTS      0x34
+#define EC_MEMMAP_BATT_VOLT        0x40 /* Battery Present Voltage */
+#define EC_MEMMAP_BATT_RATE        0x44 /* Battery Present Rate */
+#define EC_MEMMAP_BATT_CAP         0x48 /* Battery Remaining Capacity */
+#define EC_MEMMAP_BATT_FLAG        0x4c /* Battery State, defined below */
+#define EC_MEMMAP_BATT_DCAP        0x50 /* Battery Design Capacity */
+#define EC_MEMMAP_BATT_DVLT        0x54 /* Battery Design Voltage */
+#define EC_MEMMAP_BATT_LFCC        0x58 /* Battery Last Full Charge Capacity */
+#define EC_MEMMAP_BATT_CCNT        0x5c /* Battery Cycle Count */
+#define EC_MEMMAP_BATT_MFGR        0x60 /* Battery Manufacturer String */
+#define EC_MEMMAP_BATT_MODEL       0x68 /* Battery Model Number String */
+#define EC_MEMMAP_BATT_SERIAL      0x70 /* Battery Serial Number String */
+#define EC_MEMMAP_BATT_TYPE        0x78 /* Battery Type String */
+
+/* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */
+#define EC_TEMP_SENSOR_ENTRIES     16
+/*
+ * Number of temp sensors at EC_MEMMAP_TEMP_SENSOR_B.
+ *
+ * Valid only if EC_MEMMAP_THERMAL_VERSION returns >= 2.
+ */
+#define EC_TEMP_SENSOR_B_ENTRIES      8
+#define EC_TEMP_SENSOR_NOT_PRESENT    0xff
+#define EC_TEMP_SENSOR_ERROR          0xfe
+#define EC_TEMP_SENSOR_NOT_POWERED    0xfd
+#define EC_TEMP_SENSOR_NOT_CALIBRATED 0xfc
+/*
+ * The offset of temperature value stored in mapped memory.  This allows
+ * reporting a temperature range of 200K to 454K = -73C to 181C.
+ */
+#define EC_TEMP_SENSOR_OFFSET      200
+
+#define EC_FAN_SPEED_ENTRIES       4       /* Number of fans at EC_MEMMAP_FAN */
+#define EC_FAN_SPEED_NOT_PRESENT   0xffff  /* Entry not present */
+#define EC_FAN_SPEED_STALLED       0xfffe  /* Fan stalled */
+
+/* Battery bit flags at EC_MEMMAP_BATT_FLAG. */
+#define EC_BATT_FLAG_AC_PRESENT   0x01
+#define EC_BATT_FLAG_BATT_PRESENT 0x02
+#define EC_BATT_FLAG_DISCHARGING  0x04
+#define EC_BATT_FLAG_CHARGING     0x08
+#define EC_BATT_FLAG_LEVEL_CRITICAL 0x10
+
+/* Switch flags at EC_MEMMAP_SWITCHES */
+#define EC_SWITCH_LID_OPEN               0x01
+#define EC_SWITCH_POWER_BUTTON_PRESSED   0x02
+#define EC_SWITCH_WRITE_PROTECT_DISABLED 0x04
+/* Recovery requested via keyboard */
+#define EC_SWITCH_KEYBOARD_RECOVERY      0x08
+/* Recovery requested via dedicated signal (from servo board) */
+#define EC_SWITCH_DEDICATED_RECOVERY     0x10
+/* Was fake developer mode switch; now unused.  Remove in next refactor. */
+#define EC_SWITCH_IGNORE0                0x20
+
+/* Host command interface flags */
+/* Host command interface supports LPC args (LPC interface only) */
+#define EC_HOST_CMD_FLAG_LPC_ARGS_SUPPORTED  0x01
+
+/* Wireless switch flags */
+#define EC_WIRELESS_SWITCH_WLAN      0x01
+#define EC_WIRELESS_SWITCH_BLUETOOTH 0x02
+
+/*
+ * This header file is used in coreboot both in C and ACPI code.  The ACPI code
+ * is pre-processed to handle constants but the ASL compiler is unable to
+ * handle actual C code so keep it separate.
+ */
+#ifndef __ACPI__
+
+/* LPC command status byte masks */
+/* EC has written a byte in the data register and host hasn't read it yet */
+#define EC_LPC_STATUS_TO_HOST     0x01
+/* Host has written a command/data byte and the EC hasn't read it yet */
+#define EC_LPC_STATUS_FROM_HOST   0x02
+/* EC is processing a command */
+#define EC_LPC_STATUS_PROCESSING  0x04
+/* Last write to EC was a command, not data */
+#define EC_LPC_STATUS_LAST_CMD    0x08
+/* EC is in burst mode.  Unsupported by Chrome EC, so this bit is never set */
+#define EC_LPC_STATUS_BURST_MODE  0x10
+/* SCI event is pending (requesting SCI query) */
+#define EC_LPC_STATUS_SCI_PENDING 0x20
+/* SMI event is pending (requesting SMI query) */
+#define EC_LPC_STATUS_SMI_PENDING 0x40
+/* (reserved) */
+#define EC_LPC_STATUS_RESERVED    0x80
+
+/*
+ * EC is busy.  This covers both the EC processing a command, and the host has
+ * written a new command but the EC hasn't picked it up yet.
+ */
+#define EC_LPC_STATUS_BUSY_MASK \
+	(EC_LPC_STATUS_FROM_HOST | EC_LPC_STATUS_PROCESSING)
+
+/* Host command response codes */
+enum ec_status {
+	EC_RES_SUCCESS = 0,
+	EC_RES_INVALID_COMMAND = 1,
+	EC_RES_ERROR = 2,
+	EC_RES_INVALID_PARAM = 3,
+	EC_RES_ACCESS_DENIED = 4,
+	EC_RES_INVALID_RESPONSE = 5,
+	EC_RES_INVALID_VERSION = 6,
+	EC_RES_INVALID_CHECKSUM = 7,
+	EC_RES_IN_PROGRESS = 8,		/* Accepted, command in progress */
+	EC_RES_UNAVAILABLE = 9,		/* No response available */
+	EC_RES_TIMEOUT = 10,		/* We got a timeout */
+	EC_RES_OVERFLOW = 11,		/* Table / data overflow */
+};
+
+/*
+ * Host event codes.  Note these are 1-based, not 0-based, because ACPI query
+ * EC command uses code 0 to mean "no event pending".  We explicitly specify
+ * each value in the enum listing so they won't change if we delete/insert an
+ * item or rearrange the list (it needs to be stable across platforms, not
+ * just within a single compiled instance).
+ */
+enum host_event_code {
+	EC_HOST_EVENT_LID_CLOSED = 1,
+	EC_HOST_EVENT_LID_OPEN = 2,
+	EC_HOST_EVENT_POWER_BUTTON = 3,
+	EC_HOST_EVENT_AC_CONNECTED = 4,
+	EC_HOST_EVENT_AC_DISCONNECTED = 5,
+	EC_HOST_EVENT_BATTERY_LOW = 6,
+	EC_HOST_EVENT_BATTERY_CRITICAL = 7,
+	EC_HOST_EVENT_BATTERY = 8,
+	EC_HOST_EVENT_THERMAL_THRESHOLD = 9,
+	EC_HOST_EVENT_THERMAL_OVERLOAD = 10,
+	EC_HOST_EVENT_THERMAL = 11,
+	EC_HOST_EVENT_USB_CHARGER = 12,
+	EC_HOST_EVENT_KEY_PRESSED = 13,
+	/*
+	 * EC has finished initializing the host interface.  The host can check
+	 * for this event following sending a EC_CMD_REBOOT_EC command to
+	 * determine when the EC is ready to accept subsequent commands.
+	 */
+	EC_HOST_EVENT_INTERFACE_READY = 14,
+	/* Keyboard recovery combo has been pressed */
+	EC_HOST_EVENT_KEYBOARD_RECOVERY = 15,
+
+	/* Shutdown due to thermal overload */
+	EC_HOST_EVENT_THERMAL_SHUTDOWN = 16,
+	/* Shutdown due to battery level too low */
+	EC_HOST_EVENT_BATTERY_SHUTDOWN = 17,
+
+	/*
+	 * The high bit of the event mask is not used as a host event code.  If
+	 * it reads back as set, then the entire event mask should be
+	 * considered invalid by the host.  This can happen when reading the
+	 * raw event status via EC_MEMMAP_HOST_EVENTS but the LPC interface is
+	 * not initialized on the EC, or improperly configured on the host.
+	 */
+	EC_HOST_EVENT_INVALID = 32
+};
+/* Host event mask */
+#define EC_HOST_EVENT_MASK(event_code) (1UL << ((event_code) - 1))
+
+/* Arguments at EC_LPC_ADDR_HOST_ARGS */
+struct ec_lpc_host_args {
+	uint8_t flags;
+	uint8_t command_version;
+	uint8_t data_size;
+	/*
+	 * Checksum; sum of command + flags + command_version + data_size +
+	 * all params/response data bytes.
+	 */
+	uint8_t checksum;
+} __packed;
+
+/* Flags for ec_lpc_host_args.flags */
+/*
+ * Args are from host.  Data area at EC_LPC_ADDR_HOST_PARAM contains command
+ * params.
+ *
+ * If EC gets a command and this flag is not set, this is an old-style command.
+ * Command version is 0 and params from host are at EC_LPC_ADDR_OLD_PARAM with
+ * unknown length.  EC must respond with an old-style response (that is,
+ * withouth setting EC_HOST_ARGS_FLAG_TO_HOST).
+ */
+#define EC_HOST_ARGS_FLAG_FROM_HOST 0x01
+/*
+ * Args are from EC.  Data area at EC_LPC_ADDR_HOST_PARAM contains response.
+ *
+ * If EC responds to a command and this flag is not set, this is an old-style
+ * response.  Command version is 0 and response data from EC is at
+ * EC_LPC_ADDR_OLD_PARAM with unknown length.
+ */
+#define EC_HOST_ARGS_FLAG_TO_HOST   0x02
+
+/*
+ * Notes on commands:
+ *
+ * Each command is an 8-byte command value.  Commands which take params or
+ * return response data specify structs for that data.  If no struct is
+ * specified, the command does not input or output data, respectively.
+ * Parameter/response length is implicit in the structs.  Some underlying
+ * communication protocols (I2C, SPI) may add length or checksum headers, but
+ * those are implementation-dependent and not defined here.
+ */
+
+/*****************************************************************************/
+/* General / test commands */
+
+/*
+ * Get protocol version, used to deal with non-backward compatible protocol
+ * changes.
+ */
+#define EC_CMD_PROTO_VERSION 0x00
+
+struct ec_response_proto_version {
+	uint32_t version;
+} __packed;
+
+/*
+ * Hello.  This is a simple command to test the EC is responsive to
+ * commands.
+ */
+#define EC_CMD_HELLO 0x01
+
+struct ec_params_hello {
+	uint32_t in_data;  /* Pass anything here */
+} __packed;
+
+struct ec_response_hello {
+	uint32_t out_data;  /* Output will be in_data + 0x01020304 */
+} __packed;
+
+/* Get version number */
+#define EC_CMD_GET_VERSION 0x02
+
+enum ec_current_image {
+	EC_IMAGE_UNKNOWN = 0,
+	EC_IMAGE_RO,
+	EC_IMAGE_RW
+};
+
+struct ec_response_get_version {
+	/* Null-terminated version strings for RO, RW */
+	char version_string_ro[32];
+	char version_string_rw[32];
+	char reserved[32];       /* Was previously RW-B string */
+	uint32_t current_image;  /* One of ec_current_image */
+} __packed;
+
+/* Read test */
+#define EC_CMD_READ_TEST 0x03
+
+struct ec_params_read_test {
+	uint32_t offset;   /* Starting value for read buffer */
+	uint32_t size;     /* Size to read in bytes */
+} __packed;
+
+struct ec_response_read_test {
+	uint32_t data[32];
+} __packed;
+
+/*
+ * Get build information
+ *
+ * Response is null-terminated string.
+ */
+#define EC_CMD_GET_BUILD_INFO 0x04
+
+/* Get chip info */
+#define EC_CMD_GET_CHIP_INFO 0x05
+
+struct ec_response_get_chip_info {
+	/* Null-terminated strings */
+	char vendor[32];
+	char name[32];
+	char revision[32];  /* Mask version */
+} __packed;
+
+/* Get board HW version */
+#define EC_CMD_GET_BOARD_VERSION 0x06
+
+struct ec_response_board_version {
+	uint16_t board_version;  /* A monotonously incrementing number. */
+} __packed;
+
+/*
+ * Read memory-mapped data.
+ *
+ * This is an alternate interface to memory-mapped data for bus protocols
+ * which don't support direct-mapped memory - I2C, SPI, etc.
+ *
+ * Response is params.size bytes of data.
+ */
+#define EC_CMD_READ_MEMMAP 0x07
+
+struct ec_params_read_memmap {
+	uint8_t offset;   /* Offset in memmap (EC_MEMMAP_*) */
+	uint8_t size;     /* Size to read in bytes */
+} __packed;
+
+/* Read versions supported for a command */
+#define EC_CMD_GET_CMD_VERSIONS 0x08
+
+struct ec_params_get_cmd_versions {
+	uint8_t cmd;      /* Command to check */
+} __packed;
+
+struct ec_response_get_cmd_versions {
+	/*
+	 * Mask of supported versions; use EC_VER_MASK() to compare with a
+	 * desired version.
+	 */
+	uint32_t version_mask;
+} __packed;
+
+/*
+ * Check EC communcations status (busy). This is needed on i2c/spi but not
+ * on lpc since it has its own out-of-band busy indicator.
+ *
+ * lpc must read the status from the command register. Attempting this on
+ * lpc will overwrite the args/parameter space and corrupt its data.
+ */
+#define EC_CMD_GET_COMMS_STATUS		0x09
+
+/* Avoid using ec_status which is for return values */
+enum ec_comms_status {
+	EC_COMMS_STATUS_PROCESSING	= 1 << 0,	/* Processing cmd */
+};
+
+struct ec_response_get_comms_status {
+	uint32_t flags;		/* Mask of enum ec_comms_status */
+} __packed;
+
+
+/*****************************************************************************/
+/* Flash commands */
+
+/* Get flash info */
+#define EC_CMD_FLASH_INFO 0x10
+
+struct ec_response_flash_info {
+	/* Usable flash size, in bytes */
+	uint32_t flash_size;
+	/*
+	 * Write block size.  Write offset and size must be a multiple
+	 * of this.
+	 */
+	uint32_t write_block_size;
+	/*
+	 * Erase block size.  Erase offset and size must be a multiple
+	 * of this.
+	 */
+	uint32_t erase_block_size;
+	/*
+	 * Protection block size.  Protection offset and size must be a
+	 * multiple of this.
+	 */
+	uint32_t protect_block_size;
+} __packed;
+
+/*
+ * Read flash
+ *
+ * Response is params.size bytes of data.
+ */
+#define EC_CMD_FLASH_READ 0x11
+
+struct ec_params_flash_read {
+	uint32_t offset;   /* Byte offset to read */
+	uint32_t size;     /* Size to read in bytes */
+} __packed;
+
+/* Write flash */
+#define EC_CMD_FLASH_WRITE 0x12
+
+struct ec_params_flash_write {
+	uint32_t offset;   /* Byte offset to write */
+	uint32_t size;     /* Size to write in bytes */
+	/*
+	 * Data to write.  Could really use EC_PARAM_SIZE - 8, but tidiest to
+	 * use a power of 2 so writes stay aligned.
+	 */
+	uint8_t data[64];
+} __packed;
+
+/* Erase flash */
+#define EC_CMD_FLASH_ERASE 0x13
+
+struct ec_params_flash_erase {
+	uint32_t offset;   /* Byte offset to erase */
+	uint32_t size;     /* Size to erase in bytes */
+} __packed;
+
+/*
+ * Get/set flash protection.
+ *
+ * If mask!=0, sets/clear the requested bits of flags.  Depending on the
+ * firmware write protect GPIO, not all flags will take effect immediately;
+ * some flags require a subsequent hard reset to take effect.  Check the
+ * returned flags bits to see what actually happened.
+ *
+ * If mask=0, simply returns the current flags state.
+ */
+#define EC_CMD_FLASH_PROTECT 0x15
+#define EC_VER_FLASH_PROTECT 1  /* Command version 1 */
+
+/* Flags for flash protection */
+/* RO flash code protected when the EC boots */
+#define EC_FLASH_PROTECT_RO_AT_BOOT         (1 << 0)
+/*
+ * RO flash code protected now.  If this bit is set, at-boot status cannot
+ * be changed.
+ */
+#define EC_FLASH_PROTECT_RO_NOW             (1 << 1)
+/* Entire flash code protected now, until reboot. */
+#define EC_FLASH_PROTECT_ALL_NOW            (1 << 2)
+/* Flash write protect GPIO is asserted now */
+#define EC_FLASH_PROTECT_GPIO_ASSERTED      (1 << 3)
+/* Error - at least one bank of flash is stuck locked, and cannot be unlocked */
+#define EC_FLASH_PROTECT_ERROR_STUCK        (1 << 4)
+/*
+ * Error - flash protection is in inconsistent state.  At least one bank of
+ * flash which should be protected is not protected.  Usually fixed by
+ * re-requesting the desired flags, or by a hard reset if that fails.
+ */
+#define EC_FLASH_PROTECT_ERROR_INCONSISTENT (1 << 5)
+/* Entile flash code protected when the EC boots */
+#define EC_FLASH_PROTECT_ALL_AT_BOOT        (1 << 6)
+
+struct ec_params_flash_protect {
+	uint32_t mask;   /* Bits in flags to apply */
+	uint32_t flags;  /* New flags to apply */
+} __packed;
+
+struct ec_response_flash_protect {
+	/* Current value of flash protect flags */
+	uint32_t flags;
+	/*
+	 * Flags which are valid on this platform.  This allows the caller
+	 * to distinguish between flags which aren't set vs. flags which can't
+	 * be set on this platform.
+	 */
+	uint32_t valid_flags;
+	/* Flags which can be changed given the current protection state */
+	uint32_t writable_flags;
+} __packed;
+
+/*
+ * Note: commands 0x14 - 0x19 version 0 were old commands to get/set flash
+ * write protect.  These commands may be reused with version > 0.
+ */
+
+/* Get the region offset/size */
+#define EC_CMD_FLASH_REGION_INFO 0x16
+#define EC_VER_FLASH_REGION_INFO 1
+
+enum ec_flash_region {
+	/* Region which holds read-only EC image */
+	EC_FLASH_REGION_RO,
+	/* Region which holds rewritable EC image */
+	EC_FLASH_REGION_RW,
+	/*
+	 * Region which should be write-protected in the factory (a superset of
+	 * EC_FLASH_REGION_RO)
+	 */
+	EC_FLASH_REGION_WP_RO,
+};
+
+struct ec_params_flash_region_info {
+	uint32_t region;  /* enum ec_flash_region */
+} __packed;
+
+struct ec_response_flash_region_info {
+	uint32_t offset;
+	uint32_t size;
+} __packed;
+
+/* Read/write VbNvContext */
+#define EC_CMD_VBNV_CONTEXT 0x17
+#define EC_VER_VBNV_CONTEXT 1
+#define EC_VBNV_BLOCK_SIZE 16
+
+enum ec_vbnvcontext_op {
+	EC_VBNV_CONTEXT_OP_READ,
+	EC_VBNV_CONTEXT_OP_WRITE,
+};
+
+struct ec_params_vbnvcontext {
+	uint32_t op;
+	uint8_t block[EC_VBNV_BLOCK_SIZE];
+} __packed;
+
+struct ec_response_vbnvcontext {
+	uint8_t block[EC_VBNV_BLOCK_SIZE];
+} __packed;
+
+/*****************************************************************************/
+/* PWM commands */
+
+/* Get fan target RPM */
+#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x20
+
+struct ec_response_pwm_get_fan_rpm {
+	uint32_t rpm;
+} __packed;
+
+/* Set target fan RPM */
+#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x21
+
+struct ec_params_pwm_set_fan_target_rpm {
+	uint32_t rpm;
+} __packed;
+
+/* Get keyboard backlight */
+#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x22
+
+struct ec_response_pwm_get_keyboard_backlight {
+	uint8_t percent;
+	uint8_t enabled;
+} __packed;
+
+/* Set keyboard backlight */
+#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x23
+
+struct ec_params_pwm_set_keyboard_backlight {
+	uint8_t percent;
+} __packed;
+
+/* Set target fan PWM duty cycle */
+#define EC_CMD_PWM_SET_FAN_DUTY 0x24
+
+struct ec_params_pwm_set_fan_duty {
+	uint32_t percent;
+} __packed;
+
+/*****************************************************************************/
+/*
+ * Lightbar commands. This looks worse than it is. Since we only use one HOST
+ * command to say "talk to the lightbar", we put the "and tell it to do X" part
+ * into a subcommand. We'll make separate structs for subcommands with
+ * different input args, so that we know how much to expect.
+ */
+#define EC_CMD_LIGHTBAR_CMD 0x28
+
+struct rgb_s {
+	uint8_t r, g, b;
+};
+
+#define LB_BATTERY_LEVELS 4
+/* List of tweakable parameters. NOTE: It's __packed so it can be sent in a
+ * host command, but the alignment is the same regardless. Keep it that way.
+ */
+struct lightbar_params {
+	/* Timing */
+	int google_ramp_up;
+	int google_ramp_down;
+	int s3s0_ramp_up;
+	int s0_tick_delay[2];			/* AC=0/1 */
+	int s0a_tick_delay[2];			/* AC=0/1 */
+	int s0s3_ramp_down;
+	int s3_sleep_for;
+	int s3_ramp_up;
+	int s3_ramp_down;
+
+	/* Oscillation */
+	uint8_t new_s0;
+	uint8_t osc_min[2];			/* AC=0/1 */
+	uint8_t osc_max[2];			/* AC=0/1 */
+	uint8_t w_ofs[2];			/* AC=0/1 */
+
+	/* Brightness limits based on the backlight and AC. */
+	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
+
+	/* Battery level thresholds */
+	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
+
+	/* Map [AC][battery_level] to color index */
+	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
+	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
+
+	/* Color palette */
+	struct rgb_s color[8];			/* 0-3 are Google colors */
+} __packed;
+
+struct ec_params_lightbar {
+	uint8_t cmd;		      /* Command (see enum lightbar_command) */
+	union {
+		struct {
+			/* no args */
+		} dump, off, on, init, get_seq, get_params;
+
+		struct num {
+			uint8_t num;
+		} brightness, seq, demo;
+
+		struct reg {
+			uint8_t ctrl, reg, value;
+		} reg;
+
+		struct rgb {
+			uint8_t led, red, green, blue;
+		} rgb;
+
+		struct lightbar_params set_params;
+	};
+} __packed;
+
+struct ec_response_lightbar {
+	union {
+		struct dump {
+			struct {
+				uint8_t reg;
+				uint8_t ic0;
+				uint8_t ic1;
+			} vals[23];
+		} dump;
+
+		struct get_seq {
+			uint8_t num;
+		} get_seq;
+
+		struct lightbar_params get_params;
+
+		struct {
+			/* no return params */
+		} off, on, init, brightness, seq, reg, rgb, demo, set_params;
+	};
+} __packed;
+
+/* Lightbar commands */
+enum lightbar_command {
+	LIGHTBAR_CMD_DUMP = 0,
+	LIGHTBAR_CMD_OFF = 1,
+	LIGHTBAR_CMD_ON = 2,
+	LIGHTBAR_CMD_INIT = 3,
+	LIGHTBAR_CMD_BRIGHTNESS = 4,
+	LIGHTBAR_CMD_SEQ = 5,
+	LIGHTBAR_CMD_REG = 6,
+	LIGHTBAR_CMD_RGB = 7,
+	LIGHTBAR_CMD_GET_SEQ = 8,
+	LIGHTBAR_CMD_DEMO = 9,
+	LIGHTBAR_CMD_GET_PARAMS = 10,
+	LIGHTBAR_CMD_SET_PARAMS = 11,
+	LIGHTBAR_NUM_CMDS
+};
+
+/*****************************************************************************/
+/* Verified boot commands */
+
+/*
+ * Note: command code 0x29 version 0 was VBOOT_CMD in Link EVT; it may be
+ * reused for other purposes with version > 0.
+ */
+
+/* Verified boot hash command */
+#define EC_CMD_VBOOT_HASH 0x2A
+
+struct ec_params_vboot_hash {
+	uint8_t cmd;             /* enum ec_vboot_hash_cmd */
+	uint8_t hash_type;       /* enum ec_vboot_hash_type */
+	uint8_t nonce_size;      /* Nonce size; may be 0 */
+	uint8_t reserved0;       /* Reserved; set 0 */
+	uint32_t offset;         /* Offset in flash to hash */
+	uint32_t size;           /* Number of bytes to hash */
+	uint8_t nonce_data[64];  /* Nonce data; ignored if nonce_size=0 */
+} __packed;
+
+struct ec_response_vboot_hash {
+	uint8_t status;          /* enum ec_vboot_hash_status */
+	uint8_t hash_type;       /* enum ec_vboot_hash_type */
+	uint8_t digest_size;     /* Size of hash digest in bytes */
+	uint8_t reserved0;       /* Ignore; will be 0 */
+	uint32_t offset;         /* Offset in flash which was hashed */
+	uint32_t size;           /* Number of bytes hashed */
+	uint8_t hash_digest[64]; /* Hash digest data */
+} __packed;
+
+enum ec_vboot_hash_cmd {
+	EC_VBOOT_HASH_GET = 0,       /* Get current hash status */
+	EC_VBOOT_HASH_ABORT = 1,     /* Abort calculating current hash */
+	EC_VBOOT_HASH_START = 2,     /* Start computing a new hash */
+	EC_VBOOT_HASH_RECALC = 3,    /* Synchronously compute a new hash */
+};
+
+enum ec_vboot_hash_type {
+	EC_VBOOT_HASH_TYPE_SHA256 = 0, /* SHA-256 */
+};
+
+enum ec_vboot_hash_status {
+	EC_VBOOT_HASH_STATUS_NONE = 0, /* No hash (not started, or aborted) */
+	EC_VBOOT_HASH_STATUS_DONE = 1, /* Finished computing a hash */
+	EC_VBOOT_HASH_STATUS_BUSY = 2, /* Busy computing a hash */
+};
+
+/*
+ * Special values for offset for EC_VBOOT_HASH_START and EC_VBOOT_HASH_RECALC.
+ * If one of these is specified, the EC will automatically update offset and
+ * size to the correct values for the specified image (RO or RW).
+ */
+#define EC_VBOOT_HASH_OFFSET_RO 0xfffffffe
+#define EC_VBOOT_HASH_OFFSET_RW 0xfffffffd
+
+/*****************************************************************************/
+/* USB charging control commands */
+
+/* Set USB port charging mode */
+#define EC_CMD_USB_CHARGE_SET_MODE 0x30
+
+struct ec_params_usb_charge_set_mode {
+	uint8_t usb_port_id;
+	uint8_t mode;
+} __packed;
+
+/*****************************************************************************/
+/* Persistent storage for host */
+
+/* Maximum bytes that can be read/written in a single command */
+#define EC_PSTORE_SIZE_MAX 64
+
+/* Get persistent storage info */
+#define EC_CMD_PSTORE_INFO 0x40
+
+struct ec_response_pstore_info {
+	/* Persistent storage size, in bytes */
+	uint32_t pstore_size;
+	/* Access size; read/write offset and size must be a multiple of this */
+	uint32_t access_size;
+} __packed;
+
+/*
+ * Read persistent storage
+ *
+ * Response is params.size bytes of data.
+ */
+#define EC_CMD_PSTORE_READ 0x41
+
+struct ec_params_pstore_read {
+	uint32_t offset;   /* Byte offset to read */
+	uint32_t size;     /* Size to read in bytes */
+} __packed;
+
+/* Write persistent storage */
+#define EC_CMD_PSTORE_WRITE 0x42
+
+struct ec_params_pstore_write {
+	uint32_t offset;   /* Byte offset to write */
+	uint32_t size;     /* Size to write in bytes */
+	uint8_t data[EC_PSTORE_SIZE_MAX];
+} __packed;
+
+/*****************************************************************************/
+/* Real-time clock */
+
+/* RTC params and response structures */
+struct ec_params_rtc {
+	uint32_t time;
+} __packed;
+
+struct ec_response_rtc {
+	uint32_t time;
+} __packed;
+
+/* These use ec_response_rtc */
+#define EC_CMD_RTC_GET_VALUE 0x44
+#define EC_CMD_RTC_GET_ALARM 0x45
+
+/* These all use ec_params_rtc */
+#define EC_CMD_RTC_SET_VALUE 0x46
+#define EC_CMD_RTC_SET_ALARM 0x47
+
+/*****************************************************************************/
+/* Port80 log access */
+
+/* Get last port80 code from previous boot */
+#define EC_CMD_PORT80_LAST_BOOT 0x48
+
+struct ec_response_port80_last_boot {
+	uint16_t code;
+} __packed;
+
+/*****************************************************************************/
+/* Thermal engine commands */
+
+/* Set thershold value */
+#define EC_CMD_THERMAL_SET_THRESHOLD 0x50
+
+struct ec_params_thermal_set_threshold {
+	uint8_t sensor_type;
+	uint8_t threshold_id;
+	uint16_t value;
+} __packed;
+
+/* Get threshold value */
+#define EC_CMD_THERMAL_GET_THRESHOLD 0x51
+
+struct ec_params_thermal_get_threshold {
+	uint8_t sensor_type;
+	uint8_t threshold_id;
+} __packed;
+
+struct ec_response_thermal_get_threshold {
+	uint16_t value;
+} __packed;
+
+/* Toggle automatic fan control */
+#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x52
+
+/* Get TMP006 calibration data */
+#define EC_CMD_TMP006_GET_CALIBRATION 0x53
+
+struct ec_params_tmp006_get_calibration {
+	uint8_t index;
+} __packed;
+
+struct ec_response_tmp006_get_calibration {
+	float s0;
+	float b0;
+	float b1;
+	float b2;
+} __packed;
+
+/* Set TMP006 calibration data */
+#define EC_CMD_TMP006_SET_CALIBRATION 0x54
+
+struct ec_params_tmp006_set_calibration {
+	uint8_t index;
+	uint8_t reserved[3];  /* Reserved; set 0 */
+	float s0;
+	float b0;
+	float b1;
+	float b2;
+} __packed;
+
+/*****************************************************************************/
+/* MKBP - Matrix KeyBoard Protocol */
+
+/*
+ * Read key state
+ *
+ * Returns raw data for keyboard cols; see ec_response_mkbp_info.cols for
+ * expected response size.
+ */
+#define EC_CMD_MKBP_STATE 0x60
+
+/* Provide information about the matrix : number of rows and columns */
+#define EC_CMD_MKBP_INFO 0x61
+
+struct ec_response_mkbp_info {
+	uint32_t rows;
+	uint32_t cols;
+	uint8_t switches;
+} __packed;
+
+/* Simulate key press */
+#define EC_CMD_MKBP_SIMULATE_KEY 0x62
+
+struct ec_params_mkbp_simulate_key {
+	uint8_t col;
+	uint8_t row;
+	uint8_t pressed;
+} __packed;
+
+/* Configure keyboard scanning */
+#define EC_CMD_MKBP_SET_CONFIG 0x64
+#define EC_CMD_MKBP_GET_CONFIG 0x65
+
+/* flags */
+enum mkbp_config_flags {
+	EC_MKBP_FLAGS_ENABLE = 1,	/* Enable keyboard scanning */
+};
+
+enum mkbp_config_valid {
+	EC_MKBP_VALID_SCAN_PERIOD		= 1 << 0,
+	EC_MKBP_VALID_POLL_TIMEOUT		= 1 << 1,
+	EC_MKBP_VALID_MIN_POST_SCAN_DELAY	= 1 << 3,
+	EC_MKBP_VALID_OUTPUT_SETTLE		= 1 << 4,
+	EC_MKBP_VALID_DEBOUNCE_DOWN		= 1 << 5,
+	EC_MKBP_VALID_DEBOUNCE_UP		= 1 << 6,
+	EC_MKBP_VALID_FIFO_MAX_DEPTH		= 1 << 7,
+};
+
+/* Configuration for our key scanning algorithm */
+struct ec_mkbp_config {
+	uint32_t valid_mask;		/* valid fields */
+	uint8_t flags;		/* some flags (enum mkbp_config_flags) */
+	uint8_t valid_flags;		/* which flags are valid */
+	uint16_t scan_period_us;	/* period between start of scans */
+	/* revert to interrupt mode after no activity for this long */
+	uint32_t poll_timeout_us;
+	/*
+	 * minimum post-scan relax time. Once we finish a scan we check
+	 * the time until we are due to start the next one. If this time is
+	 * shorter this field, we use this instead.
+	 */
+	uint16_t min_post_scan_delay_us;
+	/* delay between setting up output and waiting for it to settle */
+	uint16_t output_settle_us;
+	uint16_t debounce_down_us;	/* time for debounce on key down */
+	uint16_t debounce_up_us;	/* time for debounce on key up */
+	/* maximum depth to allow for fifo (0 = no keyscan output) */
+	uint8_t fifo_max_depth;
+} __packed;
+
+struct ec_params_mkbp_set_config {
+	struct ec_mkbp_config config;
+} __packed;
+
+struct ec_response_mkbp_get_config {
+	struct ec_mkbp_config config;
+} __packed;
+
+/* Run the key scan emulation */
+#define EC_CMD_KEYSCAN_SEQ_CTRL 0x66
+
+enum ec_keyscan_seq_cmd {
+	EC_KEYSCAN_SEQ_STATUS = 0,	/* Get status information */
+	EC_KEYSCAN_SEQ_CLEAR = 1,	/* Clear sequence */
+	EC_KEYSCAN_SEQ_ADD = 2,		/* Add item to sequence */
+	EC_KEYSCAN_SEQ_START = 3,	/* Start running sequence */
+	EC_KEYSCAN_SEQ_COLLECT = 4,	/* Collect sequence summary data */
+};
+
+enum ec_collect_flags {
+	/*
+	 * Indicates this scan was processed by the EC. Due to timing, some
+	 * scans may be skipped.
+	 */
+	EC_KEYSCAN_SEQ_FLAG_DONE	= 1 << 0,
+};
+
+struct ec_collect_item {
+	uint8_t flags;		/* some flags (enum ec_collect_flags) */
+};
+
+struct ec_params_keyscan_seq_ctrl {
+	uint8_t cmd;	/* Command to send (enum ec_keyscan_seq_cmd) */
+	union {
+		struct {
+			uint8_t active;		/* still active */
+			uint8_t num_items;	/* number of items */
+			/* Current item being presented */
+			uint8_t cur_item;
+		} status;
+		struct {
+			/*
+			 * Absolute time for this scan, measured from the
+			 * start of the sequence.
+			 */
+			uint32_t time_us;
+			uint8_t scan[0];	/* keyscan data */
+		} add;
+		struct {
+			uint8_t start_item;	/* First item to return */
+			uint8_t num_items;	/* Number of items to return */
+		} collect;
+	};
+} __packed;
+
+struct ec_result_keyscan_seq_ctrl {
+	union {
+		struct {
+			uint8_t num_items;	/* Number of items */
+			/* Data for each item */
+			struct ec_collect_item item[0];
+		} collect;
+	};
+} __packed;
+
+/*****************************************************************************/
+/* Temperature sensor commands */
+
+/* Read temperature sensor info */
+#define EC_CMD_TEMP_SENSOR_GET_INFO 0x70
+
+struct ec_params_temp_sensor_get_info {
+	uint8_t id;
+} __packed;
+
+struct ec_response_temp_sensor_get_info {
+	char sensor_name[32];
+	uint8_t sensor_type;
+} __packed;
+
+/*****************************************************************************/
+
+/*
+ * Note: host commands 0x80 - 0x87 are reserved to avoid conflict with ACPI
+ * commands accidentally sent to the wrong interface.  See the ACPI section
+ * below.
+ */
+
+/*****************************************************************************/
+/* Host event commands */
+
+/*
+ * Host event mask params and response structures, shared by all of the host
+ * event commands below.
+ */
+struct ec_params_host_event_mask {
+	uint32_t mask;
+} __packed;
+
+struct ec_response_host_event_mask {
+	uint32_t mask;
+} __packed;
+
+/* These all use ec_response_host_event_mask */
+#define EC_CMD_HOST_EVENT_GET_B         0x87
+#define EC_CMD_HOST_EVENT_GET_SMI_MASK  0x88
+#define EC_CMD_HOST_EVENT_GET_SCI_MASK  0x89
+#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x8d
+
+/* These all use ec_params_host_event_mask */
+#define EC_CMD_HOST_EVENT_SET_SMI_MASK  0x8a
+#define EC_CMD_HOST_EVENT_SET_SCI_MASK  0x8b
+#define EC_CMD_HOST_EVENT_CLEAR         0x8c
+#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x8e
+#define EC_CMD_HOST_EVENT_CLEAR_B       0x8f
+
+/*****************************************************************************/
+/* Switch commands */
+
+/* Enable/disable LCD backlight */
+#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x90
+
+struct ec_params_switch_enable_backlight {
+	uint8_t enabled;
+} __packed;
+
+/* Enable/disable WLAN/Bluetooth */
+#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x91
+
+struct ec_params_switch_enable_wireless {
+	uint8_t enabled;
+} __packed;
+
+/*****************************************************************************/
+/* GPIO commands. Only available on EC if write protect has been disabled. */
+
+/* Set GPIO output value */
+#define EC_CMD_GPIO_SET 0x92
+
+struct ec_params_gpio_set {
+	char name[32];
+	uint8_t val;
+} __packed;
+
+/* Get GPIO value */
+#define EC_CMD_GPIO_GET 0x93
+
+struct ec_params_gpio_get {
+	char name[32];
+} __packed;
+struct ec_response_gpio_get {
+	uint8_t val;
+} __packed;
+
+/*****************************************************************************/
+/* I2C commands. Only available when flash write protect is unlocked. */
+
+/* Read I2C bus */
+#define EC_CMD_I2C_READ 0x94
+
+struct ec_params_i2c_read {
+	uint16_t addr;
+	uint8_t read_size; /* Either 8 or 16. */
+	uint8_t port;
+	uint8_t offset;
+} __packed;
+struct ec_response_i2c_read {
+	uint16_t data;
+} __packed;
+
+/* Write I2C bus */
+#define EC_CMD_I2C_WRITE 0x95
+
+struct ec_params_i2c_write {
+	uint16_t data;
+	uint16_t addr;
+	uint8_t write_size; /* Either 8 or 16. */
+	uint8_t port;
+	uint8_t offset;
+} __packed;
+
+/*****************************************************************************/
+/* Charge state commands. Only available when flash write protect unlocked. */
+
+/* Force charge state machine to stop in idle mode */
+#define EC_CMD_CHARGE_FORCE_IDLE 0x96
+
+struct ec_params_force_idle {
+	uint8_t enabled;
+} __packed;
+
+/*****************************************************************************/
+/* Console commands. Only available when flash write protect is unlocked. */
+
+/* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */
+#define EC_CMD_CONSOLE_SNAPSHOT 0x97
+
+/*
+ * Read next chunk of data from saved snapshot.
+ *
+ * Response is null-terminated string.  Empty string, if there is no more
+ * remaining output.
+ */
+#define EC_CMD_CONSOLE_READ 0x98
+
+/*****************************************************************************/
+
+/*
+ * Cut off battery power output if the battery supports.
+ *
+ * For unsupported battery, just don't implement this command and lets EC
+ * return EC_RES_INVALID_COMMAND.
+ */
+#define EC_CMD_BATTERY_CUT_OFF 0x99
+
+/*****************************************************************************/
+/* Temporary debug commands. TODO: remove this crosbug.com/p/13849 */
+
+/*
+ * Dump charge state machine context.
+ *
+ * Response is a binary dump of charge state machine context.
+ */
+#define EC_CMD_CHARGE_DUMP 0xa0
+
+/*
+ * Set maximum battery charging current.
+ */
+#define EC_CMD_CHARGE_CURRENT_LIMIT 0xa1
+
+struct ec_params_current_limit {
+	uint32_t limit;
+} __packed;
+
+/*****************************************************************************/
+/* System commands */
+
+/*
+ * TODO: this is a confusing name, since it doesn't necessarily reboot the EC.
+ * Rename to "set image" or something similar.
+ */
+#define EC_CMD_REBOOT_EC 0xd2
+
+/* Command */
+enum ec_reboot_cmd {
+	EC_REBOOT_CANCEL = 0,        /* Cancel a pending reboot */
+	EC_REBOOT_JUMP_RO = 1,       /* Jump to RO without rebooting */
+	EC_REBOOT_JUMP_RW = 2,       /* Jump to RW without rebooting */
+	/* (command 3 was jump to RW-B) */
+	EC_REBOOT_COLD = 4,          /* Cold-reboot */
+	EC_REBOOT_DISABLE_JUMP = 5,  /* Disable jump until next reboot */
+	EC_REBOOT_HIBERNATE = 6      /* Hibernate EC */
+};
+
+/* Flags for ec_params_reboot_ec.reboot_flags */
+#define EC_REBOOT_FLAG_RESERVED0      (1 << 0)  /* Was recovery request */
+#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN (1 << 1)  /* Reboot after AP shutdown */
+
+struct ec_params_reboot_ec {
+	uint8_t cmd;           /* enum ec_reboot_cmd */
+	uint8_t flags;         /* See EC_REBOOT_FLAG_* */
+} __packed;
+
+/*
+ * Get information on last EC panic.
+ *
+ * Returns variable-length platform-dependent panic information.  See panic.h
+ * for details.
+ */
+#define EC_CMD_GET_PANIC_INFO 0xd3
+
+/*****************************************************************************/
+/*
+ * ACPI commands
+ *
+ * These are valid ONLY on the ACPI command/data port.
+ */
+
+/*
+ * ACPI Read Embedded Controller
+ *
+ * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*).
+ *
+ * Use the following sequence:
+ *
+ *    - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD
+ *    - Wait for EC_LPC_CMDR_PENDING bit to clear
+ *    - Write address to EC_LPC_ADDR_ACPI_DATA
+ *    - Wait for EC_LPC_CMDR_DATA bit to set
+ *    - Read value from EC_LPC_ADDR_ACPI_DATA
+ */
+#define EC_CMD_ACPI_READ 0x80
+
+/*
+ * ACPI Write Embedded Controller
+ *
+ * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*).
+ *
+ * Use the following sequence:
+ *
+ *    - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD
+ *    - Wait for EC_LPC_CMDR_PENDING bit to clear
+ *    - Write address to EC_LPC_ADDR_ACPI_DATA
+ *    - Wait for EC_LPC_CMDR_PENDING bit to clear
+ *    - Write value to EC_LPC_ADDR_ACPI_DATA
+ */
+#define EC_CMD_ACPI_WRITE 0x81
+
+/*
+ * ACPI Query Embedded Controller
+ *
+ * This clears the lowest-order bit in the currently pending host events, and
+ * sets the result code to the 1-based index of the bit (event 0x00000001 = 1,
+ * event 0x80000000 = 32), or 0 if no event was pending.
+ */
+#define EC_CMD_ACPI_QUERY_EVENT 0x84
+
+/* Valid addresses in ACPI memory space, for read/write commands */
+/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */
+#define EC_ACPI_MEM_VERSION            0x00
+/*
+ * Test location; writing value here updates test compliment byte to (0xff -
+ * value).
+ */
+#define EC_ACPI_MEM_TEST               0x01
+/* Test compliment; writes here are ignored. */
+#define EC_ACPI_MEM_TEST_COMPLIMENT    0x02
+/* Keyboard backlight brightness percent (0 - 100) */
+#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03
+
+/* Current version of ACPI memory address space */
+#define EC_ACPI_MEM_VERSION_CURRENT 1
+
+
+/*****************************************************************************/
+/*
+ * Special commands
+ *
+ * These do not follow the normal rules for commands.  See each command for
+ * details.
+ */
+
+/*
+ * Reboot NOW
+ *
+ * This command will work even when the EC LPC interface is busy, because the
+ * reboot command is processed at interrupt level.  Note that when the EC
+ * reboots, the host will reboot too, so there is no response to this command.
+ *
+ * Use EC_CMD_REBOOT_EC to reboot the EC more politely.
+ */
+#define EC_CMD_REBOOT 0xd1  /* Think "die" */
+
+/*
+ * Resend last response (not supported on LPC).
+ *
+ * Returns EC_RES_UNAVAILABLE if there is no response available - for example,
+ * there was no previous command, or the previous command's response was too
+ * big to save.
+ */
+#define EC_CMD_RESEND_RESPONSE 0xdb
+
+/*
+ * This header byte on a command indicate version 0. Any header byte less
+ * than this means that we are talking to an old EC which doesn't support
+ * versioning. In that case, we assume version 0.
+ *
+ * Header bytes greater than this indicate a later version. For example,
+ * EC_CMD_VERSION0 + 1 means we are using version 1.
+ *
+ * The old EC interface must not use commands 0dc or higher.
+ */
+#define EC_CMD_VERSION0 0xdc
+
+#endif  /* !__ACPI__ */
+
+#endif  /* __CROS_EC_COMMANDS_H */

diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index ecddc51..8f21daf 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h

@@ -1,9 +1,10 @@
 /*
  * TI Palmas
  *
- * Copyright 2011 Texas Instruments Inc.
+ * Copyright 2011-2013 Texas Instruments Inc.
  *
  * Author: Graeme Gregory <gg@slimlogic.co.uk>
+ * Author: Ian Lartey <ian@slimlogic.co.uk>
  *
  *  This program is free software; you can redistribute it and/or modify it
  *  under  the terms of the GNU General  Public License as published by the
@@ -22,6 +23,15 @@
 
 #define PALMAS_NUM_CLIENTS		3
 
+/* The ID_REVISION NUMBERS */
+#define PALMAS_CHIP_OLD_ID		0x0000
+#define PALMAS_CHIP_ID			0xC035
+#define PALMAS_CHIP_CHARGER_ID		0xC036
+
+#define is_palmas(a)	(((a) == PALMAS_CHIP_OLD_ID) || \
+			((a) == PALMAS_CHIP_ID))
+#define is_palmas_charger(a) ((a) == PALMAS_CHIP_CHARGER_ID)
+
 struct palmas_pmic;
 struct palmas_gpadc;
 struct palmas_resource;

diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h
index 1e2715d..65471c4 100644
--- a/include/linux/mfd/retu.h
+++ b/include/linux/mfd/retu.h

@@ -1,5 +1,5 @@
 /*
- * Retu MFD driver interface
+ * Retu/Tahvo MFD driver interface
  *
  * This file is subject to the terms and conditions of the GNU General
  * Public License. See the file "COPYING" in the main directory of this
@@ -19,4 +19,10 @@
 #define RETU_REG_CC1		0x0d		/* Common control register 1 */
 #define RETU_REG_STATUS		0x16		/* Status register */
 
+/* Interrupt sources */
+#define TAHVO_INT_VBUS		0		/* VBUS state */
+
+/* Interrupt status */
+#define TAHVO_STAT_VBUS		(1 << TAHVO_INT_VBUS)
+
 #endif /* __LINUX_MFD_RETU_H */

diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 26ea7f1..86bc635 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h

@@ -500,6 +500,8 @@
 #define BPP_POWER_15_PERCENT_ON		0x08
 #define BPP_POWER_ON			0x00
 #define BPP_POWER_MASK			0x0F
+#define SD_VCC_PARTIAL_POWER_ON		0x02
+#define SD_VCC_POWER_ON			0x00
 
 /* PWR_GATE_CTRL */
 #define PWR_GATE_EN			0x01
@@ -689,6 +691,40 @@
 #define IMAGE_FLAG_ADDR0		0xCE80
 #define IMAGE_FLAG_ADDR1		0xCE81
 
+/* Phy register */
+#define PHY_PCR				0x00
+#define PHY_RCR0			0x01
+#define PHY_RCR1			0x02
+#define PHY_RCR2			0x03
+#define PHY_RTCR			0x04
+#define PHY_RDR				0x05
+#define PHY_TCR0			0x06
+#define PHY_TCR1			0x07
+#define PHY_TUNE			0x08
+#define PHY_IMR				0x09
+#define PHY_BPCR			0x0A
+#define PHY_BIST			0x0B
+#define PHY_RAW_L			0x0C
+#define PHY_RAW_H			0x0D
+#define PHY_RAW_DATA			0x0E
+#define PHY_HOST_CLK_CTRL		0x0F
+#define PHY_DMR				0x10
+#define PHY_BACR			0x11
+#define PHY_IER				0x12
+#define PHY_BCSR			0x13
+#define PHY_BPR				0x14
+#define PHY_BPNR2			0x15
+#define PHY_BPNR			0x16
+#define PHY_BRNR2			0x17
+#define PHY_BENR			0x18
+#define PHY_REG_REV			0x19
+#define PHY_FLD0			0x1A
+#define PHY_FLD1			0x1B
+#define PHY_FLD2			0x1C
+#define PHY_FLD3			0x1D
+#define PHY_FLD4			0x1E
+#define PHY_DUM_REG			0x1F
+
 #define rtsx_pci_init_cmd(pcr)		((pcr)->ci = 0)
 
 struct rtsx_pcr;

diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h
new file mode 100644
index 0000000..ba89b94
--- /dev/null
+++ b/include/linux/mfd/si476x-core.h

@@ -0,0 +1,533 @@
+/*
+ * include/media/si476x-core.h -- Common definitions for si476x core
+ * device
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef SI476X_CORE_H
+#define SI476X_CORE_H
+
+#include <linux/kfifo.h>
+#include <linux/atomic.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <linux/mutex.h>
+#include <linux/mfd/core.h>
+#include <linux/videodev2.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/mfd/si476x-platform.h>
+#include <linux/mfd/si476x-reports.h>
+
+/* Command Timeouts */
+#define SI476X_DEFAULT_TIMEOUT	100000
+#define SI476X_TIMEOUT_TUNE	700000
+#define SI476X_TIMEOUT_POWER_UP	330000
+#define SI476X_STATUS_POLL_US	0
+
+/* -------------------- si476x-i2c.c ----------------------- */
+
+enum si476x_freq_supported_chips {
+	SI476X_CHIP_SI4761 = 1,
+	SI476X_CHIP_SI4764,
+	SI476X_CHIP_SI4768,
+};
+
+enum si476x_part_revisions {
+	SI476X_REVISION_A10 = 0,
+	SI476X_REVISION_A20 = 1,
+	SI476X_REVISION_A30 = 2,
+};
+
+enum si476x_mfd_cells {
+	SI476X_RADIO_CELL = 0,
+	SI476X_CODEC_CELL,
+	SI476X_MFD_CELLS,
+};
+
+/**
+ * enum si476x_power_state - possible power state of the si476x
+ * device.
+ *
+ * @SI476X_POWER_DOWN: In this state all regulators are turned off
+ * and the reset line is pulled low. The device is completely
+ * inactive.
+ * @SI476X_POWER_UP_FULL: In this state all the power regualtors are
+ * turned on, reset line pulled high, IRQ line is enabled(polling is
+ * active for polling use scenario) and device is turned on with
+ * POWER_UP command. The device is ready to be used.
+ * @SI476X_POWER_INCONSISTENT: This state indicates that previous
+ * power down was inconsistent, meaning some of the regulators were
+ * not turned down and thus use of the device, without power-cycling
+ * is impossible.
+ */
+enum si476x_power_state {
+	SI476X_POWER_DOWN		= 0,
+	SI476X_POWER_UP_FULL		= 1,
+	SI476X_POWER_INCONSISTENT	= 2,
+};
+
+/**
+ * struct si476x_core - internal data structure representing the
+ * underlying "core" device which all the MFD cell-devices use.
+ *
+ * @client: Actual I2C client used to transfer commands to the chip.
+ * @chip_id: Last digit of the chip model(E.g. "1" for SI4761)
+ * @cells: MFD cell devices created by this driver.
+ * @cmd_lock: Mutex used to serialize all the requests to the core
+ * device. This filed should not be used directly. Instead
+ * si476x_core_lock()/si476x_core_unlock() should be used to get
+ * exclusive access to the "core" device.
+ * @users: Active users counter(Used by the radio cell)
+ * @rds_read_queue: Wait queue used to wait for RDS data.
+ * @rds_fifo: FIFO in which all the RDS data received from the chip is
+ * placed.
+ * @rds_fifo_drainer: Worker that drains on-chip RDS FIFO.
+ * @rds_drainer_is_working: Flag used for launching only one instance
+ * of the @rds_fifo_drainer.
+ * @rds_drainer_status_lock: Lock used to guard access to the
+ * @rds_drainer_is_working variable.
+ * @command: Wait queue for wainting on the command comapletion.
+ * @cts: Clear To Send flag set upon receiving first status with CTS
+ * set.
+ * @tuning: Wait queue used for wainting for tune/seek comand
+ * completion.
+ * @stc: Similar to @cts, but for the STC bit of the status value.
+ * @power_up_parameters: Parameters used as argument for POWER_UP
+ * command when the device is started.
+ * @state: Current power state of the device.
+ * @supplues: Structure containing handles to all power supplies used
+ * by the device (NULL ones are ignored).
+ * @gpio_reset: GPIO pin connectet to the RSTB pin of the chip.
+ * @pinmux: Chip's configurable pins configuration.
+ * @diversity_mode: Chips role when functioning in diversity mode.
+ * @status_monitor: Polling worker used in polling use case scenarion
+ * (when IRQ is not avalible).
+ * @revision: Chip's running firmware revision number(Used for correct
+ * command set support).
+ */
+
+struct si476x_core {
+	struct i2c_client *client;
+	struct regmap *regmap;
+	int chip_id;
+	struct mfd_cell cells[SI476X_MFD_CELLS];
+
+	struct mutex cmd_lock; /* for serializing fm radio operations */
+	atomic_t users;
+
+	wait_queue_head_t  rds_read_queue;
+	struct kfifo       rds_fifo;
+	struct work_struct rds_fifo_drainer;
+	bool               rds_drainer_is_working;
+	struct mutex       rds_drainer_status_lock;
+
+	wait_queue_head_t command;
+	atomic_t          cts;
+
+	wait_queue_head_t tuning;
+	atomic_t          stc;
+
+	struct si476x_power_up_args power_up_parameters;
+
+	enum si476x_power_state power_state;
+
+	struct regulator_bulk_data supplies[4];
+
+	int gpio_reset;
+
+	struct si476x_pinmux pinmux;
+	enum si476x_phase_diversity_mode diversity_mode;
+
+	atomic_t is_alive;
+
+	struct delayed_work status_monitor;
+#define SI476X_WORK_TO_CORE(w) container_of(to_delayed_work(w),	\
+					    struct si476x_core,	\
+					    status_monitor)
+
+	int revision;
+
+	int rds_fifo_depth;
+};
+
+static inline struct si476x_core *i2c_mfd_cell_to_core(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev->parent);
+	return i2c_get_clientdata(client);
+}
+
+
+/**
+ * si476x_core_lock() - lock the core device to get an exclusive access
+ * to it.
+ */
+static inline void si476x_core_lock(struct si476x_core *core)
+{
+	mutex_lock(&core->cmd_lock);
+}
+
+/**
+ * si476x_core_unlock() - unlock the core device to relinquish an
+ * exclusive access to it.
+ */
+static inline void si476x_core_unlock(struct si476x_core *core)
+{
+	mutex_unlock(&core->cmd_lock);
+}
+
+/* *_TUNE_FREQ family of commands accept frequency in multiples of
+    10kHz */
+static inline u16 hz_to_si476x(struct si476x_core *core, int freq)
+{
+	u16 result;
+
+	switch (core->power_up_parameters.func) {
+	default:
+	case SI476X_FUNC_FM_RECEIVER:
+		result = freq / 10000;
+		break;
+	case SI476X_FUNC_AM_RECEIVER:
+		result = freq / 1000;
+		break;
+	}
+
+	return result;
+}
+
+static inline int si476x_to_hz(struct si476x_core *core, u16 freq)
+{
+	int result;
+
+	switch (core->power_up_parameters.func) {
+	default:
+	case SI476X_FUNC_FM_RECEIVER:
+		result = freq * 10000;
+		break;
+	case SI476X_FUNC_AM_RECEIVER:
+		result = freq * 1000;
+		break;
+	}
+
+	return result;
+}
+
+/* Since the V4L2_TUNER_CAP_LOW flag is supplied, V4L2 subsystem
+ * mesures frequency in 62.5 Hz units */
+
+static inline int hz_to_v4l2(int freq)
+{
+	return (freq * 10) / 625;
+}
+
+static inline int v4l2_to_hz(int freq)
+{
+	return (freq * 625) / 10;
+}
+
+static inline u16 v4l2_to_si476x(struct si476x_core *core, int freq)
+{
+	return hz_to_si476x(core, v4l2_to_hz(freq));
+}
+
+static inline int si476x_to_v4l2(struct si476x_core *core, u16 freq)
+{
+	return hz_to_v4l2(si476x_to_hz(core, freq));
+}
+
+
+
+/**
+ * struct si476x_func_info - structure containing result of the
+ * FUNC_INFO command.
+ *
+ * @firmware.major: Firmware major number.
+ * @firmware.minor[...]: Firmware minor numbers.
+ * @patch_id:
+ * @func: Mode tuner is working in.
+ */
+struct si476x_func_info {
+	struct {
+		u8 major, minor[2];
+	} firmware;
+	u16 patch_id;
+	enum si476x_func func;
+};
+
+/**
+ * struct si476x_power_down_args - structure used to pass parameters
+ * to POWER_DOWN command
+ *
+ * @xosc: true - Power down, but leav oscillator running.
+ *        false - Full power down.
+ */
+struct si476x_power_down_args {
+	bool xosc;
+};
+
+/**
+ * enum si476x_tunemode - enum representing possible tune modes for
+ * the chip.
+ * @SI476X_TM_VALIDATED_NORMAL_TUNE: Unconditionally stay on the new
+ * channel after tune, tune status is valid.
+ * @SI476X_TM_INVALIDATED_FAST_TUNE: Unconditionally stay in the new
+ * channel after tune, tune status invalid.
+ * @SI476X_TM_VALIDATED_AF_TUNE: Jump back to previous channel if
+ * metric thresholds are not met.
+ * @SI476X_TM_VALIDATED_AF_CHECK: Unconditionally jump back to the
+ * previous channel.
+ */
+enum si476x_tunemode {
+	SI476X_TM_VALIDATED_NORMAL_TUNE = 0,
+	SI476X_TM_INVALIDATED_FAST_TUNE = 1,
+	SI476X_TM_VALIDATED_AF_TUNE     = 2,
+	SI476X_TM_VALIDATED_AF_CHECK    = 3,
+};
+
+/**
+ * enum si476x_smoothmetrics - enum containing the possible setting fo
+ * audio transitioning of the chip
+ * @SI476X_SM_INITIALIZE_AUDIO: Initialize audio state to match this
+ * new channel
+ * @SI476X_SM_TRANSITION_AUDIO: Transition audio state from previous
+ * channel values to the new values
+ */
+enum si476x_smoothmetrics {
+	SI476X_SM_INITIALIZE_AUDIO = 0,
+	SI476X_SM_TRANSITION_AUDIO = 1,
+};
+
+/**
+ * struct si476x_rds_status_report - the structure representing the
+ * response to 'FM_RD_STATUS' command
+ * @rdstpptyint: Traffic program flag(TP) and/or program type(PTY)
+ * code has changed.
+ * @rdspiint: Program indentifiaction(PI) code has changed.
+ * @rdssyncint: RDS synchronization has changed.
+ * @rdsfifoint: RDS was received and the RDS FIFO has at least
+ * 'FM_RDS_INTERRUPT_FIFO_COUNT' elements in it.
+ * @tpptyvalid: TP flag and PTY code are valid falg.
+ * @pivalid: PI code is valid flag.
+ * @rdssync: RDS is currently synchronized.
+ * @rdsfifolost: On or more RDS groups have been lost/discarded flag.
+ * @tp: Current channel's TP flag.
+ * @pty: Current channel's PTY code.
+ * @pi: Current channel's PI code.
+ * @rdsfifoused: Number of blocks remaining in the RDS FIFO (0 if
+ * empty).
+ */
+struct si476x_rds_status_report {
+	bool rdstpptyint, rdspiint, rdssyncint, rdsfifoint;
+	bool tpptyvalid, pivalid, rdssync, rdsfifolost;
+	bool tp;
+
+	u8 pty;
+	u16 pi;
+
+	u8 rdsfifoused;
+	u8 ble[4];
+
+	struct v4l2_rds_data rds[4];
+};
+
+struct si476x_rsq_status_args {
+	bool primary;
+	bool rsqack;
+	bool attune;
+	bool cancel;
+	bool stcack;
+};
+
+enum si476x_injside {
+	SI476X_INJSIDE_AUTO	= 0,
+	SI476X_INJSIDE_LOW	= 1,
+	SI476X_INJSIDE_HIGH	= 2,
+};
+
+struct si476x_tune_freq_args {
+	bool zifsr;
+	bool hd;
+	enum si476x_injside injside;
+	int freq;
+	enum si476x_tunemode tunemode;
+	enum si476x_smoothmetrics smoothmetrics;
+	int antcap;
+};
+
+int  si476x_core_stop(struct si476x_core *, bool);
+int  si476x_core_start(struct si476x_core *, bool);
+int  si476x_core_set_power_state(struct si476x_core *, enum si476x_power_state);
+bool si476x_core_has_am(struct si476x_core *);
+bool si476x_core_has_diversity(struct si476x_core *);
+bool si476x_core_is_a_secondary_tuner(struct si476x_core *);
+bool si476x_core_is_a_primary_tuner(struct si476x_core *);
+bool si476x_core_is_in_am_receiver_mode(struct si476x_core *core);
+bool si476x_core_is_powered_up(struct si476x_core *core);
+
+enum si476x_i2c_type {
+	SI476X_I2C_SEND,
+	SI476X_I2C_RECV
+};
+
+int si476x_core_i2c_xfer(struct si476x_core *,
+			 enum si476x_i2c_type,
+			 char *, int);
+
+
+/* -------------------- si476x-cmd.c ----------------------- */
+
+int si476x_core_cmd_func_info(struct si476x_core *, struct si476x_func_info *);
+int si476x_core_cmd_set_property(struct si476x_core *, u16, u16);
+int si476x_core_cmd_get_property(struct si476x_core *, u16);
+int si476x_core_cmd_dig_audio_pin_cfg(struct si476x_core *,
+				      enum si476x_dclk_config,
+				      enum si476x_dfs_config,
+				      enum si476x_dout_config,
+				      enum si476x_xout_config);
+int si476x_core_cmd_zif_pin_cfg(struct si476x_core *,
+				enum si476x_iqclk_config,
+				enum si476x_iqfs_config,
+				enum si476x_iout_config,
+				enum si476x_qout_config);
+int si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(struct si476x_core *,
+					    enum si476x_icin_config,
+					    enum si476x_icip_config,
+					    enum si476x_icon_config,
+					    enum si476x_icop_config);
+int si476x_core_cmd_ana_audio_pin_cfg(struct si476x_core *,
+				      enum si476x_lrout_config);
+int si476x_core_cmd_intb_pin_cfg(struct si476x_core *, enum si476x_intb_config,
+				 enum si476x_a1_config);
+int si476x_core_cmd_fm_seek_start(struct si476x_core *, bool, bool);
+int si476x_core_cmd_am_seek_start(struct si476x_core *, bool, bool);
+int si476x_core_cmd_fm_rds_status(struct si476x_core *, bool, bool, bool,
+				  struct si476x_rds_status_report *);
+int si476x_core_cmd_fm_rds_blockcount(struct si476x_core *, bool,
+				      struct si476x_rds_blockcount_report *);
+int si476x_core_cmd_fm_tune_freq(struct si476x_core *,
+				 struct si476x_tune_freq_args *);
+int si476x_core_cmd_am_tune_freq(struct si476x_core *,
+				 struct si476x_tune_freq_args *);
+int si476x_core_cmd_am_rsq_status(struct si476x_core *,
+				  struct si476x_rsq_status_args *,
+				  struct si476x_rsq_status_report *);
+int si476x_core_cmd_fm_rsq_status(struct si476x_core *,
+				  struct si476x_rsq_status_args *,
+				  struct si476x_rsq_status_report *);
+int si476x_core_cmd_power_up(struct si476x_core *,
+			     struct si476x_power_up_args *);
+int si476x_core_cmd_power_down(struct si476x_core *,
+			       struct si476x_power_down_args *);
+int si476x_core_cmd_fm_phase_div_status(struct si476x_core *);
+int si476x_core_cmd_fm_phase_diversity(struct si476x_core *,
+				       enum si476x_phase_diversity_mode);
+
+int si476x_core_cmd_fm_acf_status(struct si476x_core *,
+				  struct si476x_acf_status_report *);
+int si476x_core_cmd_am_acf_status(struct si476x_core *,
+				  struct si476x_acf_status_report *);
+int si476x_core_cmd_agc_status(struct si476x_core *,
+			       struct si476x_agc_status_report *);
+
+enum si476x_power_grid_type {
+	SI476X_POWER_GRID_50HZ = 0,
+	SI476X_POWER_GRID_60HZ,
+};
+
+/* Properties  */
+
+enum si476x_interrupt_flags {
+	SI476X_STCIEN = (1 << 0),
+	SI476X_ACFIEN = (1 << 1),
+	SI476X_RDSIEN = (1 << 2),
+	SI476X_RSQIEN = (1 << 3),
+
+	SI476X_ERRIEN = (1 << 6),
+	SI476X_CTSIEN = (1 << 7),
+
+	SI476X_STCREP = (1 << 8),
+	SI476X_ACFREP = (1 << 9),
+	SI476X_RDSREP = (1 << 10),
+	SI476X_RSQREP = (1 << 11),
+};
+
+enum si476x_rdsint_sources {
+	SI476X_RDSTPPTY = (1 << 4),
+	SI476X_RDSPI    = (1 << 3),
+	SI476X_RDSSYNC	= (1 << 1),
+	SI476X_RDSRECV	= (1 << 0),
+};
+
+enum si476x_status_response_bits {
+	SI476X_CTS	  = (1 << 7),
+	SI476X_ERR	  = (1 << 6),
+	/* Status response for WB receiver */
+	SI476X_WB_ASQ_INT = (1 << 4),
+	SI476X_RSQ_INT    = (1 << 3),
+	/* Status response for FM receiver */
+	SI476X_FM_RDS_INT = (1 << 2),
+	SI476X_ACF_INT    = (1 << 1),
+	SI476X_STC_INT    = (1 << 0),
+};
+
+/* -------------------- si476x-prop.c ----------------------- */
+
+enum si476x_common_receiver_properties {
+	SI476X_PROP_INT_CTL_ENABLE			= 0x0000,
+	SI476X_PROP_DIGITAL_IO_INPUT_SAMPLE_RATE	= 0x0200,
+	SI476X_PROP_DIGITAL_IO_INPUT_FORMAT		= 0x0201,
+	SI476X_PROP_DIGITAL_IO_OUTPUT_SAMPLE_RATE	= 0x0202,
+	SI476X_PROP_DIGITAL_IO_OUTPUT_FORMAT		= 0x0203,
+
+	SI476X_PROP_SEEK_BAND_BOTTOM			= 0x1100,
+	SI476X_PROP_SEEK_BAND_TOP			= 0x1101,
+	SI476X_PROP_SEEK_FREQUENCY_SPACING		= 0x1102,
+
+	SI476X_PROP_VALID_MAX_TUNE_ERROR		= 0x2000,
+	SI476X_PROP_VALID_SNR_THRESHOLD			= 0x2003,
+	SI476X_PROP_VALID_RSSI_THRESHOLD		= 0x2004,
+};
+
+enum si476x_am_receiver_properties {
+	SI476X_PROP_AUDIO_PWR_LINE_FILTER		= 0x0303,
+};
+
+enum si476x_fm_receiver_properties {
+	SI476X_PROP_AUDIO_DEEMPHASIS			= 0x0302,
+
+	SI476X_PROP_FM_RDS_INTERRUPT_SOURCE		= 0x4000,
+	SI476X_PROP_FM_RDS_INTERRUPT_FIFO_COUNT		= 0x4001,
+	SI476X_PROP_FM_RDS_CONFIG			= 0x4002,
+};
+
+enum si476x_prop_audio_pwr_line_filter_bits {
+	SI476X_PROP_PWR_HARMONICS_MASK	= 0x001f,
+	SI476X_PROP_PWR_GRID_MASK	= 0x0100,
+	SI476X_PROP_PWR_ENABLE_MASK	= 0x0200,
+	SI476X_PROP_PWR_GRID_50HZ	= 0x0000,
+	SI476X_PROP_PWR_GRID_60HZ	= 0x0100,
+};
+
+enum si476x_prop_fm_rds_config_bits {
+	SI476X_PROP_RDSEN_MASK	= 0x1,
+	SI476X_PROP_RDSEN	= 0x1,
+};
+
+
+struct regmap *devm_regmap_init_si476x(struct si476x_core *);
+
+#endif	/* SI476X_CORE_H */

diff --git a/include/linux/mfd/si476x-platform.h b/include/linux/mfd/si476x-platform.h
new file mode 100644
index 0000000..88bb93b
--- /dev/null
+++ b/include/linux/mfd/si476x-platform.h

@@ -0,0 +1,267 @@
+/*
+ * include/media/si476x-platform.h -- Platform data specific definitions
+ *
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef __SI476X_PLATFORM_H__
+#define __SI476X_PLATFORM_H__
+
+/* It is possible to select one of the four adresses using pins A0
+ * and A1 on SI476x */
+#define SI476X_I2C_ADDR_1	0x60
+#define SI476X_I2C_ADDR_2	0x61
+#define SI476X_I2C_ADDR_3	0x62
+#define SI476X_I2C_ADDR_4	0x63
+
+enum si476x_iqclk_config {
+	SI476X_IQCLK_NOOP = 0,
+	SI476X_IQCLK_TRISTATE = 1,
+	SI476X_IQCLK_IQ = 21,
+};
+enum si476x_iqfs_config {
+	SI476X_IQFS_NOOP = 0,
+	SI476X_IQFS_TRISTATE = 1,
+	SI476X_IQFS_IQ = 21,
+};
+enum si476x_iout_config {
+	SI476X_IOUT_NOOP = 0,
+	SI476X_IOUT_TRISTATE = 1,
+	SI476X_IOUT_OUTPUT = 22,
+};
+enum si476x_qout_config {
+	SI476X_QOUT_NOOP = 0,
+	SI476X_QOUT_TRISTATE = 1,
+	SI476X_QOUT_OUTPUT = 22,
+};
+
+enum si476x_dclk_config {
+	SI476X_DCLK_NOOP      = 0,
+	SI476X_DCLK_TRISTATE  = 1,
+	SI476X_DCLK_DAUDIO    = 10,
+};
+
+enum si476x_dfs_config {
+	SI476X_DFS_NOOP      = 0,
+	SI476X_DFS_TRISTATE  = 1,
+	SI476X_DFS_DAUDIO    = 10,
+};
+
+enum si476x_dout_config {
+	SI476X_DOUT_NOOP       = 0,
+	SI476X_DOUT_TRISTATE   = 1,
+	SI476X_DOUT_I2S_OUTPUT = 12,
+	SI476X_DOUT_I2S_INPUT  = 13,
+};
+
+enum si476x_xout_config {
+	SI476X_XOUT_NOOP        = 0,
+	SI476X_XOUT_TRISTATE    = 1,
+	SI476X_XOUT_I2S_INPUT   = 13,
+	SI476X_XOUT_MODE_SELECT = 23,
+};
+
+enum si476x_icin_config {
+	SI476X_ICIN_NOOP	= 0,
+	SI476X_ICIN_TRISTATE	= 1,
+	SI476X_ICIN_GPO1_HIGH	= 2,
+	SI476X_ICIN_GPO1_LOW	= 3,
+	SI476X_ICIN_IC_LINK	= 30,
+};
+
+enum si476x_icip_config {
+	SI476X_ICIP_NOOP	= 0,
+	SI476X_ICIP_TRISTATE	= 1,
+	SI476X_ICIP_GPO2_HIGH	= 2,
+	SI476X_ICIP_GPO2_LOW	= 3,
+	SI476X_ICIP_IC_LINK	= 30,
+};
+
+enum si476x_icon_config {
+	SI476X_ICON_NOOP	= 0,
+	SI476X_ICON_TRISTATE	= 1,
+	SI476X_ICON_I2S		= 10,
+	SI476X_ICON_IC_LINK	= 30,
+};
+
+enum si476x_icop_config {
+	SI476X_ICOP_NOOP	= 0,
+	SI476X_ICOP_TRISTATE	= 1,
+	SI476X_ICOP_I2S		= 10,
+	SI476X_ICOP_IC_LINK	= 30,
+};
+
+
+enum si476x_lrout_config {
+	SI476X_LROUT_NOOP	= 0,
+	SI476X_LROUT_TRISTATE	= 1,
+	SI476X_LROUT_AUDIO	= 2,
+	SI476X_LROUT_MPX	= 3,
+};
+
+
+enum si476x_intb_config {
+	SI476X_INTB_NOOP     = 0,
+	SI476X_INTB_TRISTATE = 1,
+	SI476X_INTB_DAUDIO   = 10,
+	SI476X_INTB_IRQ      = 40,
+};
+
+enum si476x_a1_config {
+	SI476X_A1_NOOP     = 0,
+	SI476X_A1_TRISTATE = 1,
+	SI476X_A1_IRQ      = 40,
+};
+
+
+struct si476x_pinmux {
+	enum si476x_dclk_config  dclk;
+	enum si476x_dfs_config   dfs;
+	enum si476x_dout_config  dout;
+	enum si476x_xout_config  xout;
+
+	enum si476x_iqclk_config iqclk;
+	enum si476x_iqfs_config  iqfs;
+	enum si476x_iout_config  iout;
+	enum si476x_qout_config  qout;
+
+	enum si476x_icin_config  icin;
+	enum si476x_icip_config  icip;
+	enum si476x_icon_config  icon;
+	enum si476x_icop_config  icop;
+
+	enum si476x_lrout_config lrout;
+
+	enum si476x_intb_config  intb;
+	enum si476x_a1_config    a1;
+};
+
+enum si476x_ibias6x {
+	SI476X_IBIAS6X_OTHER			= 0,
+	SI476X_IBIAS6X_RCVR1_NON_4MHZ_CLK	= 1,
+};
+
+enum si476x_xstart {
+	SI476X_XSTART_MULTIPLE_TUNER	= 0x11,
+	SI476X_XSTART_NORMAL		= 0x77,
+};
+
+enum si476x_freq {
+	SI476X_FREQ_4_MHZ		= 0,
+	SI476X_FREQ_37P209375_MHZ	= 1,
+	SI476X_FREQ_36P4_MHZ		= 2,
+	SI476X_FREQ_37P8_MHZ		=  3,
+};
+
+enum si476x_xmode {
+	SI476X_XMODE_CRYSTAL_RCVR1	= 1,
+	SI476X_XMODE_EXT_CLOCK		= 2,
+	SI476X_XMODE_CRYSTAL_RCVR2_3	= 3,
+};
+
+enum si476x_xbiashc {
+	SI476X_XBIASHC_SINGLE_RECEIVER = 0,
+	SI476X_XBIASHC_MULTIPLE_RECEIVER = 1,
+};
+
+enum si476x_xbias {
+	SI476X_XBIAS_RCVR2_3	= 0,
+	SI476X_XBIAS_4MHZ_RCVR1 = 3,
+	SI476X_XBIAS_RCVR1	= 7,
+};
+
+enum si476x_func {
+	SI476X_FUNC_BOOTLOADER	= 0,
+	SI476X_FUNC_FM_RECEIVER = 1,
+	SI476X_FUNC_AM_RECEIVER = 2,
+	SI476X_FUNC_WB_RECEIVER = 3,
+};
+
+
+/**
+ * @xcload: Selects the amount of additional on-chip capacitance to
+ *          be connected between XTAL1 and gnd and between XTAL2 and
+ *          GND. One half of the capacitance value shown here is the
+ *          additional load capacitance presented to the xtal. The
+ *          minimum step size is 0.277 pF. Recommended value is 0x28
+ *          but it will be layout dependent. Range is 0–0x3F i.e.
+ *          (0–16.33 pF)
+ * @ctsien: enable CTSINT(interrupt request when CTS condition
+ *          arises) when set
+ * @intsel: when set A1 pin becomes the interrupt pin; otherwise,
+ *          INTB is the interrupt pin
+ * @func:   selects the boot function of the device. I.e.
+ *          SI476X_BOOTLOADER  - Boot loader
+ *          SI476X_FM_RECEIVER - FM receiver
+ *          SI476X_AM_RECEIVER - AM receiver
+ *          SI476X_WB_RECEIVER - Weatherband receiver
+ * @freq:   oscillator's crystal frequency:
+ *          SI476X_XTAL_37P209375_MHZ - 37.209375 Mhz
+ *          SI476X_XTAL_36P4_MHZ      - 36.4 Mhz
+ *          SI476X_XTAL_37P8_MHZ      - 37.8 Mhz
+ */
+struct si476x_power_up_args {
+	enum si476x_ibias6x ibias6x;
+	enum si476x_xstart  xstart;
+	u8   xcload;
+	bool fastboot;
+	enum si476x_xbiashc xbiashc;
+	enum si476x_xbias   xbias;
+	enum si476x_func    func;
+	enum si476x_freq    freq;
+	enum si476x_xmode   xmode;
+};
+
+
+/**
+ * enum si476x_phase_diversity_mode - possbile phase diversity modes
+ * for SI4764/5/6/7 chips.
+ *
+ * @SI476X_PHDIV_DISABLED:		Phase diversity feature is
+ *					disabled.
+ * @SI476X_PHDIV_PRIMARY_COMBINING:	Tuner works as a primary tuner
+ *					in combination with a
+ *					secondary one.
+ * @SI476X_PHDIV_PRIMARY_ANTENNA:	Tuner works as a primary tuner
+ *					using only its own antenna.
+ * @SI476X_PHDIV_SECONDARY_ANTENNA:	Tuner works as a primary tuner
+ *					usning seconary tuner's antenna.
+ * @SI476X_PHDIV_SECONDARY_COMBINING:	Tuner works as a secondary
+ *					tuner in combination with the
+ *					primary one.
+ */
+enum si476x_phase_diversity_mode {
+	SI476X_PHDIV_DISABLED			= 0,
+	SI476X_PHDIV_PRIMARY_COMBINING		= 1,
+	SI476X_PHDIV_PRIMARY_ANTENNA		= 2,
+	SI476X_PHDIV_SECONDARY_ANTENNA		= 3,
+	SI476X_PHDIV_SECONDARY_COMBINING	= 5,
+};
+
+
+/*
+ * Platform dependent definition
+ */
+struct si476x_platform_data {
+	int gpio_reset; /* < 0 if not used */
+
+	struct si476x_power_up_args power_up_parameters;
+	enum si476x_phase_diversity_mode diversity_mode;
+
+	struct si476x_pinmux pinmux;
+};
+
+
+#endif /* __SI476X_PLATFORM_H__ */

diff --git a/include/linux/mfd/si476x-reports.h b/include/linux/mfd/si476x-reports.h
new file mode 100644
index 0000000..e0b9455
--- /dev/null
+++ b/include/linux/mfd/si476x-reports.h

@@ -0,0 +1,163 @@
+/*
+ * include/media/si476x-platform.h -- Definitions of the data formats
+ * returned by debugfs hooks
+ *
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef __SI476X_REPORTS_H__
+#define __SI476X_REPORTS_H__
+
+/**
+ * struct si476x_rsq_status - structure containing received signal
+ * quality
+ * @multhint:   Multipath Detect High.
+ *              true  - Indicatedes that the value is below
+ *                      FM_RSQ_MULTIPATH_HIGH_THRESHOLD
+ *              false - Indicatedes that the value is above
+ *                      FM_RSQ_MULTIPATH_HIGH_THRESHOLD
+ * @multlint:   Multipath Detect Low.
+ *              true  - Indicatedes that the value is below
+ *                      FM_RSQ_MULTIPATH_LOW_THRESHOLD
+ *              false - Indicatedes that the value is above
+ *                      FM_RSQ_MULTIPATH_LOW_THRESHOLD
+ * @snrhint:    SNR Detect High.
+ *              true  - Indicatedes that the value is below
+ *                      FM_RSQ_SNR_HIGH_THRESHOLD
+ *              false - Indicatedes that the value is above
+ *                      FM_RSQ_SNR_HIGH_THRESHOLD
+ * @snrlint:    SNR Detect Low.
+ *              true  - Indicatedes that the value is below
+ *                      FM_RSQ_SNR_LOW_THRESHOLD
+ *              false - Indicatedes that the value is above
+ *                      FM_RSQ_SNR_LOW_THRESHOLD
+ * @rssihint:   RSSI Detect High.
+ *              true  - Indicatedes that the value is below
+ *                      FM_RSQ_RSSI_HIGH_THRESHOLD
+ *              false - Indicatedes that the value is above
+ *                      FM_RSQ_RSSI_HIGH_THRESHOLD
+ * @rssilint:   RSSI Detect Low.
+ *              true  - Indicatedes that the value is below
+ *                      FM_RSQ_RSSI_LOW_THRESHOLD
+ *              false - Indicatedes that the value is above
+ *                      FM_RSQ_RSSI_LOW_THRESHOLD
+ * @bltf:       Band Limit.
+ *              Set if seek command hits the band limit or wrapped to
+ *              the original frequency.
+ * @snr_ready:  SNR measurement in progress.
+ * @rssiready:  RSSI measurement in progress.
+ * @afcrl:      Set if FREQOFF >= MAX_TUNE_ERROR
+ * @valid:      Set if the channel is valid
+ *               rssi < FM_VALID_RSSI_THRESHOLD
+ *               snr  < FM_VALID_SNR_THRESHOLD
+ *               tune_error < FM_VALID_MAX_TUNE_ERROR
+ * @readfreq:   Current tuned frequency.
+ * @freqoff:    Signed frequency offset.
+ * @rssi:       Received Signal Strength Indicator(dBuV).
+ * @snr:        RF SNR Indicator(dB).
+ * @lassi:
+ * @hassi:      Low/High side Adjacent(100 kHz) Channel Strength Indicator
+ * @mult:       Multipath indicator
+ * @dev:        Who knows? But values may vary.
+ * @readantcap: Antenna tuning capacity value.
+ * @assi:       Adjacent Channel(+/- 200kHz) Strength Indicator
+ * @usn:        Ultrasonic Noise Inticator in -DBFS
+ */
+struct si476x_rsq_status_report {
+	__u8 multhint, multlint;
+	__u8 snrhint,  snrlint;
+	__u8 rssihint, rssilint;
+	__u8 bltf;
+	__u8 snr_ready;
+	__u8 rssiready;
+	__u8 injside;
+	__u8 afcrl;
+	__u8 valid;
+
+	__u16 readfreq;
+	__s8  freqoff;
+	__s8  rssi;
+	__s8  snr;
+	__s8  issi;
+	__s8  lassi, hassi;
+	__s8  mult;
+	__u8  dev;
+	__u16 readantcap;
+	__s8  assi;
+	__s8  usn;
+
+	__u8 pilotdev;
+	__u8 rdsdev;
+	__u8 assidev;
+	__u8 strongdev;
+	__u16 rdspi;
+} __packed;
+
+/**
+ * si476x_acf_status_report - ACF report results
+ *
+ * @blend_int: If set, indicates that stereo separation has crossed
+ * below the blend threshold as set by FM_ACF_BLEND_THRESHOLD
+ * @hblend_int: If set, indicates that HiBlend cutoff frequency is
+ * lower than threshold as set by FM_ACF_HBLEND_THRESHOLD
+ * @hicut_int:  If set, indicates that HiCut cutoff frequency is lower
+ * than the threshold set by ACF_
+
+ */
+struct si476x_acf_status_report {
+	__u8 blend_int;
+	__u8 hblend_int;
+	__u8 hicut_int;
+	__u8 chbw_int;
+	__u8 softmute_int;
+	__u8 smute;
+	__u8 smattn;
+	__u8 chbw;
+	__u8 hicut;
+	__u8 hiblend;
+	__u8 pilot;
+	__u8 stblend;
+} __packed;
+
+enum si476x_fmagc {
+	SI476X_FMAGC_10K_OHM	= 0,
+	SI476X_FMAGC_800_OHM	= 1,
+	SI476X_FMAGC_400_OHM	= 2,
+	SI476X_FMAGC_200_OHM	= 4,
+	SI476X_FMAGC_100_OHM	= 8,
+	SI476X_FMAGC_50_OHM	= 16,
+	SI476X_FMAGC_25_OHM	= 32,
+	SI476X_FMAGC_12P5_OHM	= 64,
+	SI476X_FMAGC_6P25_OHM	= 128,
+};
+
+struct si476x_agc_status_report {
+	__u8 mxhi;
+	__u8 mxlo;
+	__u8 lnahi;
+	__u8 lnalo;
+	__u8 fmagc1;
+	__u8 fmagc2;
+	__u8 pgagain;
+	__u8 fmwblang;
+} __packed;
+
+struct si476x_rds_blockcount_report {
+	__u16 expected;
+	__u16 received;
+	__u16 uncorrectable;
+} __packed;
+
+#endif  /* __SI476X_REPORTS_H__ */

diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 383ac15..48395a6 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h

@@ -26,6 +26,7 @@
 	STMPE801,
 	STMPE811,
 	STMPE1601,
+	STMPE1801,
 	STMPE2401,
 	STMPE2403,
 	STMPE_NBR_PARTS
@@ -39,6 +40,7 @@
 	STMPE_IDX_CHIP_ID,
 	STMPE_IDX_ICR_LSB,
 	STMPE_IDX_IER_LSB,
+	STMPE_IDX_ISR_LSB,
 	STMPE_IDX_ISR_MSB,
 	STMPE_IDX_GPMR_LSB,
 	STMPE_IDX_GPSR_LSB,
@@ -49,6 +51,7 @@
 	STMPE_IDX_GPFER_LSB,
 	STMPE_IDX_GPAFR_U_MSB,
 	STMPE_IDX_IEGPIOR_LSB,
+	STMPE_IDX_ISGPIOR_LSB,
 	STMPE_IDX_ISGPIOR_MSB,
 	STMPE_IDX_MAX,
 };

diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index 6aeb6b8..b473577f 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h

@@ -15,8 +15,11 @@
 #ifndef __LINUX_MFD_SYSCON_H__
 #define __LINUX_MFD_SYSCON_H__
 
+struct device_node;
+
 extern struct regmap *syscon_node_to_regmap(struct device_node *np);
 extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s);
+extern struct regmap *syscon_regmap_lookup_by_pdevname(const char *s);
 extern struct regmap *syscon_regmap_lookup_by_phandle(
 					struct device_node *np,
 					const char *property);

diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 998628a..3f43069 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h

@@ -27,6 +27,7 @@
 
 /* TPS65090 IRQs */
 enum {
+	TPS65090_IRQ_INTERRUPT,
 	TPS65090_IRQ_VAC_STATUS_CHANGE,
 	TPS65090_IRQ_VSYS_STATUS_CHANGE,
 	TPS65090_IRQ_BAT_STATUS_CHANGE,

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8873e83..e326ae2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h

@@ -342,9 +342,7 @@
 
 	mmc_pm_flag_t		pm_flags;	/* requested pm features */
 
-#ifdef CONFIG_LEDS_TRIGGERS
 	struct led_trigger	*led;		/* activity led */
-#endif
 
 #ifdef CONFIG_REGULATOR
 	bool			regulator_enabled; /* regulator state */

diff --git a/include/linux/of.h b/include/linux/of.h
index fb2002f..1b671c3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h

@@ -356,6 +356,11 @@
 	return NULL;
 }
 
+static inline struct device_node *of_get_parent(const struct device_node *node)
+{
+	return NULL;
+}
+
 static inline bool of_have_populated_dt(void)
 {
 	return false;

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e0373d2..f463a46 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -788,6 +788,12 @@
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
+extern bool perf_event_can_stop_tick(void);
+#else
+static inline bool perf_event_can_stop_tick(void)			{ return true; }
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern void perf_restore_debug_store(void);
 #else

diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
index 1509570d..202e290 100644
--- a/include/linux/platform_data/leds-lp55xx.h
+++ b/include/linux/platform_data/leds-lp55xx.h

@@ -20,18 +20,6 @@
 #define LP55XX_CLOCK_INT	1
 #define LP55XX_CLOCK_EXT	2
 
-/* Bits in LP5521 CONFIG register. 'update_config' in lp55xx_platform_data */
-#define LP5521_PWM_HF			0x40	/* PWM: 0 = 256Hz, 1 = 558Hz */
-#define LP5521_PWRSAVE_EN		0x20	/* 1 = Power save mode */
-#define LP5521_CP_MODE_OFF		0	/* Charge pump (CP) off */
-#define LP5521_CP_MODE_BYPASS		8	/* CP forced to bypass mode */
-#define LP5521_CP_MODE_1X5		0x10	/* CP forced to 1.5x mode */
-#define LP5521_CP_MODE_AUTO		0x18	/* Automatic mode selection */
-#define LP5521_R_TO_BATT		4	/* R out: 0 = CP, 1 = Vbat */
-#define LP5521_CLK_SRC_EXT		0	/* Ext-clk source (CLK_32K) */
-#define LP5521_CLK_INT			1	/* Internal clock */
-#define LP5521_CLK_AUTO			2	/* Automatic clock selection */
-
 struct lp55xx_led_config {
 	const char *name;
 	u8 chan_nr;
@@ -40,9 +28,9 @@
 };
 
 struct lp55xx_predef_pattern {
-	u8 *r;
-	u8 *g;
-	u8 *b;
+	const u8 *r;
+	const u8 *g;
+	const u8 *b;
 	u8 size_r;
 	u8 size_g;
 	u8 size_b;
@@ -79,9 +67,6 @@
 	/* Predefined pattern data */
 	struct lp55xx_predef_pattern *patterns;
 	unsigned int num_patterns;
-
-	/* _CONFIG register */
-	u8 update_config;
 };
 
 #endif /* _LEDS_LP55XX_H */

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 60bac69..7794d75e 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h

@@ -123,6 +123,8 @@
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
 
+bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
+
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval);
 

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9ed2c9a..4ccd68e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h

@@ -1000,4 +1000,11 @@
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
+#ifdef CONFIG_RCU_NOCB_CPU
+extern bool rcu_is_nocb_cpu(int cpu);
+#else
+static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
+#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+
+
 #endif /* __LINUX_RCUPDATE_H */

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f95004..4800e9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -231,7 +231,7 @@
 
 extern int runqueue_is_locked(int cpu);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void nohz_balance_enter_idle(int cpu);
 extern void set_cpu_sd_state_idle(void);
 extern int get_nohz_timer_target(void);
@@ -1764,13 +1764,13 @@
 }
 #endif
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 void calc_load_enter_idle(void);
 void calc_load_exit_idle(void);
 #else
 static inline void calc_load_enter_idle(void) { }
 static inline void calc_load_exit_idle(void) { }
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
 
 #ifndef CONFIG_CPUMASK_OFFSTACK
 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
@@ -1856,10 +1856,17 @@
 static inline void idle_task_exit(void) {}
 #endif
 
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-extern void wake_up_idle_cpu(int cpu);
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+extern void wake_up_nohz_cpu(int cpu);
 #else
-static inline void wake_up_idle_cpu(int cpu) { }
+static inline void wake_up_nohz_cpu(int cpu) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(void);
+extern u64 scheduler_tick_max_deferment(void);
+#else
+static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 553272e..9180f4b 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h

@@ -82,7 +82,7 @@
 extern void tick_setup_sched_timer(void);
 # endif
 
-# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS
+# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
 extern void tick_cancel_sched_timer(int cpu);
 # else
 static inline void tick_cancel_sched_timer(int cpu) { }
@@ -123,7 +123,7 @@
 static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-# ifdef CONFIG_NO_HZ
+# ifdef CONFIG_NO_HZ_COMMON
 DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 static inline int tick_nohz_tick_stopped(void)
@@ -138,7 +138,7 @@
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 
-# else /* !CONFIG_NO_HZ */
+# else /* !CONFIG_NO_HZ_COMMON */
 static inline int tick_nohz_tick_stopped(void)
 {
 	return 0;
@@ -155,7 +155,24 @@
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
-# endif /* !NO_HZ */
+# endif /* !CONFIG_NO_HZ_COMMON */
+
+#ifdef CONFIG_NO_HZ_FULL
+extern void tick_nohz_init(void);
+extern int tick_nohz_full_cpu(int cpu);
+extern void tick_nohz_full_check(void);
+extern void tick_nohz_full_kick(void);
+extern void tick_nohz_full_kick_all(void);
+extern void tick_nohz_task_switch(struct task_struct *tsk);
+#else
+static inline void tick_nohz_init(void) { }
+static inline int tick_nohz_full_cpu(int cpu) { return 0; }
+static inline void tick_nohz_full_check(void) { }
+static inline void tick_nohz_full_kick(void) { }
+static inline void tick_nohz_full_kick_all(void) { }
+static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
+#endif
+
 
 # ifdef CONFIG_CPU_IDLE_GOV_MENU
 extern void menu_hrtimer_cancel(void);

diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index d21b33c..2e9ee4d 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h

@@ -83,15 +83,12 @@
 #define UCB_ID			0x7e
 #define UCB_ID_1400             0x4304
 
-struct ucb1400_gpio_data {
-	int gpio_offset;
-	int (*gpio_setup)(struct device *dev, int ngpio);
-	int (*gpio_teardown)(struct device *dev, int ngpio);
-};
-
 struct ucb1400_gpio {
 	struct gpio_chip	gc;
 	struct snd_ac97		*ac97;
+	int			gpio_offset;
+	int			(*gpio_setup)(struct device *dev, int ngpio);
+	int			(*gpio_teardown)(struct device *dev, int ngpio);
 };
 
 struct ucb1400_ts {
@@ -110,6 +107,9 @@
 
 struct ucb1400_pdata {
 	int	irq;
+	int	gpio_offset;
+	int	(*gpio_setup)(struct device *dev, int ngpio);
+	int	(*gpio_teardown)(struct device *dev, int ngpio);
 };
 
 static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg)
@@ -162,10 +162,4 @@
 unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
 			      int adcsync);
 
-#ifdef CONFIG_GPIO_UCB1400
-void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data);
-#else
-static inline void ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) {}
-#endif
-
 #endif

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 19911dd..7005d11 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h

@@ -37,7 +37,7 @@
 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
 );
 
-#if defined(__KVM_HAVE_IRQ_LINE)
+#if defined(CONFIG_HAVE_KVM_IRQCHIP)
 TRACE_EVENT(kvm_set_irq,
 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
 	TP_ARGS(gsi, level, irq_source_id),
@@ -122,6 +122,10 @@
 	{KVM_IRQCHIP_PIC_SLAVE,		"PIC slave"},		\
 	{KVM_IRQCHIP_IOAPIC,		"IOAPIC"}
 
+#endif /* defined(__KVM_HAVE_IOAPIC) */
+
+#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+
 TRACE_EVENT(kvm_ack_irq,
 	TP_PROTO(unsigned int irqchip, unsigned int pin),
 	TP_ARGS(irqchip, pin),
@@ -136,14 +140,18 @@
 		__entry->pin		= pin;
 	),
 
+#ifdef kvm_irqchips
 	TP_printk("irqchip %s pin %u",
 		  __print_symbolic(__entry->irqchip, kvm_irqchips),
 		 __entry->pin)
+#else
+	TP_printk("irqchip %d pin %u", __entry->irqchip, __entry->pin)
+#endif
 );
 
+#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
 
 
-#endif /* defined(__KVM_HAVE_IOAPIC) */
 
 #define KVM_TRACE_MMIO_READ_UNSATISFIED 0
 #define KVM_TRACE_MMIO_READ 1

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 8d21947..68c2c20 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h

@@ -323,6 +323,27 @@
 		  (int) __entry->pid, (unsigned long long)__entry->now)
 );
 
+#ifdef CONFIG_NO_HZ_COMMON
+TRACE_EVENT(tick_stop,
+
+	TP_PROTO(int success, char *error_msg),
+
+	TP_ARGS(success, error_msg),
+
+	TP_STRUCT__entry(
+		__field( int ,		success	)
+		__string( msg, 		error_msg )
+	),
+
+	TP_fast_assign(
+		__entry->success	= success;
+		__assign_str(msg, error_msg);
+	),
+
+	TP_printk("success=%s msg=%s",  __entry->success ? "yes" : "no", __get_str(msg))
+);
+#endif
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3c56ba3..a5c86fc 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h

@@ -449,12 +449,15 @@
 	kvm_ioeventfd_flag_nr_datamatch,
 	kvm_ioeventfd_flag_nr_pio,
 	kvm_ioeventfd_flag_nr_deassign,
+	kvm_ioeventfd_flag_nr_virtio_ccw_notify,
 	kvm_ioeventfd_flag_nr_max,
 };
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
 #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
 
 #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
 
@@ -558,9 +561,7 @@
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
-#endif
 #define KVM_CAP_IOMMU 18
 #ifdef __KVM_HAVE_MSI
 #define KVM_CAP_DEVICE_MSI 20
@@ -576,13 +577,9 @@
 #ifdef __KVM_HAVE_PIT
 #define KVM_CAP_REINJECT_CONTROL 24
 #endif
-#ifdef __KVM_HAVE_IOAPIC
 #define KVM_CAP_IRQ_ROUTING 25
-#endif
 #define KVM_CAP_IRQ_INJECT_STATUS 26
-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#endif
 #ifdef __KVM_HAVE_MSIX
 #define KVM_CAP_DEVICE_MSIX 28
 #endif
@@ -665,6 +662,10 @@
 #define KVM_CAP_PPC_EPR 86
 #define KVM_CAP_ARM_PSCI 87
 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
+#define KVM_CAP_DEVICE_CTRL 89
+#define KVM_CAP_IRQ_MPIC 90
+#define KVM_CAP_PPC_RTAS 91
+#define KVM_CAP_IRQ_XICS 92
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -818,6 +819,28 @@
 };
 
 /*
+ * Device control API, available with KVM_CAP_DEVICE_CTRL
+ */
+#define KVM_CREATE_DEVICE_TEST		1
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
+#define KVM_DEV_TYPE_FSL_MPIC_20	1
+#define KVM_DEV_TYPE_FSL_MPIC_42	2
+#define KVM_DEV_TYPE_XICS		3
+
+/*
  * ioctls for VM fds
  */
 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
@@ -904,6 +927,16 @@
 #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
 /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
+/* Available with KVM_CAP_PPC_RTAS */
+#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct kvm_rtas_token_args)
+
+/* ioctl for vm fd */
+#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
+
+/* ioctls for fds returned by KVM_CREATE_DEVICE */
+#define KVM_SET_DEVICE_ATTR	  _IOW(KVMIO,  0xe1, struct kvm_device_attr)
+#define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
+#define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
 
 /*
  * ioctls for vcpu fds

diff --git a/init/Kconfig b/init/Kconfig
index a76d131..9d3a788 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -302,7 +302,7 @@
 # Kind of a stub config for the pure tick based cputime accounting
 config TICK_CPU_ACCOUNTING
 	bool "Simple tick based cputime accounting"
-	depends on !S390
+	depends on !S390 && !NO_HZ_FULL
 	help
 	  This is the basic tick based cputime accounting that maintains
 	  statistics about user, system and idle time spent on per jiffies
@@ -312,7 +312,7 @@
 
 config VIRT_CPU_ACCOUNTING_NATIVE
 	bool "Deterministic task and CPU time accounting"
-	depends on HAVE_VIRT_CPU_ACCOUNTING
+	depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL
 	select VIRT_CPU_ACCOUNTING
 	help
 	  Select this option to enable more accurate task and CPU time
@@ -342,7 +342,7 @@
 
 config IRQ_TIME_ACCOUNTING
 	bool "Fine granularity task level IRQ time accounting"
-	depends on HAVE_IRQ_TIME_ACCOUNTING
+	depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL
 	help
 	  Select this option to enable fine granularity task irq time
 	  accounting. This is done by reading a timestamp on each
@@ -576,7 +576,7 @@
 
 config RCU_FAST_NO_HZ
 	bool "Accelerate last non-dyntick-idle CPU's grace periods"
-	depends on NO_HZ && SMP
+	depends on NO_HZ_COMMON && SMP
 	default n
 	help
 	  This option permits CPUs to enter dynticks-idle state even if
@@ -687,7 +687,7 @@
 
 config RCU_NOCB_CPU_NONE
 	bool "No build_forced no-CBs CPUs"
-	depends on RCU_NOCB_CPU
+	depends on RCU_NOCB_CPU && !NO_HZ_FULL
 	help
 	  This option does not force any of the CPUs to be no-CBs CPUs.
 	  Only CPUs designated by the rcu_nocbs= boot parameter will be
@@ -695,7 +695,7 @@
 
 config RCU_NOCB_CPU_ZERO
 	bool "CPU 0 is a build_forced no-CBs CPU"
-	depends on RCU_NOCB_CPU
+	depends on RCU_NOCB_CPU && !NO_HZ_FULL
 	help
 	  This option forces CPU 0 to be a no-CBs CPU.  Additional CPUs
 	  may be designated as no-CBs CPUs using the rcu_nocbs= boot

diff --git a/init/main.c b/init/main.c
index ceed17a..9484f4b 100644
--- a/init/main.c
+++ b/init/main.c

@@ -544,6 +544,7 @@
 	idr_init_cache();
 	perf_event_init();
 	rcu_init();
+	tick_nohz_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3820e3c..6b41c18 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -18,6 +18,7 @@
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/hash.h>
+#include <linux/tick.h>
 #include <linux/sysfs.h>
 #include <linux/dcache.h>
 #include <linux/percpu.h>
@@ -685,8 +686,12 @@
 
 	WARN_ON(!irqs_disabled());
 
-	if (list_empty(&cpuctx->rotation_list))
+	if (list_empty(&cpuctx->rotation_list)) {
+		int was_empty = list_empty(head);
 		list_add(&cpuctx->rotation_list, head);
+		if (was_empty)
+			tick_nohz_full_kick();
+	}
 }
 
 static void get_ctx(struct perf_event_context *ctx)
@@ -2591,6 +2596,16 @@
 		list_del_init(&cpuctx->rotation_list);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+bool perf_event_can_stop_tick(void)
+{
+	if (list_empty(&__get_cpu_var(rotation_list)))
+		return true;
+	else
+		return false;
+}
+#endif
+
 void perf_event_task_tick(void)
 {
 	struct list_head *head = &__get_cpu_var(rotation_list);

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 97fddb0..cd55144 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c

@@ -326,11 +326,16 @@
 }
 
 #else
+static int data_page_nr(struct ring_buffer *rb)
+{
+	return rb->nr_pages << page_order(rb);
+}
 
 struct page *
 perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
 {
-	if (pgoff > (1UL << page_order(rb)))
+	/* The '>' counts in the user page. */
+	if (pgoff > data_page_nr(rb))
 		return NULL;
 
 	return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -350,10 +355,11 @@
 	int i, nr;
 
 	rb = container_of(work, struct ring_buffer, work);
-	nr = 1 << page_order(rb);
+	nr = data_page_nr(rb);
 
 	base = rb->user_page;
-	for (i = 0; i < nr + 1; i++)
+	/* The '<=' counts in the user page. */
+	for (i = 0; i <= nr; i++)
 		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
 	vfree(base);
@@ -387,7 +393,7 @@
 	rb->user_page = all_buf;
 	rb->data_pages[0] = all_buf + PAGE_SIZE;
 	rb->page_order = ilog2(nr_pages);
-	rb->nr_pages = 1;
+	rb->nr_pages = !!nr_pages;
 
 	ring_buffer_init(rb, watermark, flags);
 

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 609d8ff..fd4b13b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c

@@ -172,7 +172,7 @@
  */
 static int hrtimer_get_target(int this_cpu, int pinned)
 {
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
 		return get_nohz_timer_target();
 #endif
@@ -1125,7 +1125,7 @@
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /**
  * hrtimer_get_next_event - get the time until next expiry event
  *

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8fd709c..42670e9 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c

@@ -10,6 +10,8 @@
 #include <linux/kernel_stat.h>
 #include <trace/events/timer.h>
 #include <linux/random.h>
+#include <linux/tick.h>
+#include <linux/workqueue.h>
 
 /*
  * Called after updating RLIMIT_CPU to run cpu timer and update
@@ -153,6 +155,21 @@
 	}
 }
 
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:	The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
+		return 1;
+	return 0;
+}
+
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
 	cputime_t utime, stime;
@@ -636,6 +653,37 @@
 	return 0;
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static void nohz_kick_work_fn(struct work_struct *work)
+{
+	tick_nohz_full_kick_all();
+}
+
+static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
+
+/*
+ * We need the IPIs to be sent from sane process context.
+ * The posix cpu timers are always set with irqs disabled.
+ */
+static void posix_cpu_timer_kick_nohz(void)
+{
+	schedule_work(&nohz_kick_work);
+}
+
+bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
+{
+	if (!task_cputime_zero(&tsk->cputime_expires))
+		return false;
+
+	if (tsk->signal->cputimer.running)
+		return false;
+
+	return true;
+}
+#else
+static inline void posix_cpu_timer_kick_nohz(void) { }
+#endif
+
 /*
  * Guts of sys_timer_settime for CPU timers.
  * This is called with the timer locked and interrupts disabled.
@@ -794,6 +842,8 @@
 		sample_to_timespec(timer->it_clock,
 				   old_incr, &old->it_interval);
 	}
+	if (!ret)
+		posix_cpu_timer_kick_nohz();
 	return ret;
 }
 
@@ -1008,21 +1058,6 @@
 	}
 }
 
-/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime:	The struct to compare.
- *
- * Checks @cputime to see if all fields are zero.  Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
-{
-	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
-		return 1;
-	return 0;
-}
-
 /*
  * Check for any per-thread CPU timers that have fired and move them
  * off the tsk->*_timers list onto the firing list.  Per-thread timers
@@ -1336,6 +1371,13 @@
 			cpu_timer_fire(timer);
 		spin_unlock(&timer->it_lock);
 	}
+
+	/*
+	 * In case some timers were rescheduled after the queue got emptied,
+	 * wake up full dynticks CPUs.
+	 */
+	if (tsk->signal->cputimer.running)
+		posix_cpu_timer_kick_nohz();
 }
 
 /*
@@ -1366,7 +1408,7 @@
 		}
 
 		if (!*newval)
-			return;
+			goto out;
 		*newval += now.cpu;
 	}
 
@@ -1384,6 +1426,8 @@
 			tsk->signal->cputime_expires.virt_exp = *newval;
 		break;
 	}
+out:
+	posix_cpu_timer_kick_nohz();
 }
 
 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d853430..16ea679 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c

@@ -799,6 +799,16 @@
 		rdp->offline_fqs++;
 		return 1;
 	}
+
+	/*
+	 * There is a possibility that a CPU in adaptive-ticks state
+	 * might run in the kernel with the scheduling-clock tick disabled
+	 * for an extended time period.  Invoke rcu_kick_nohz_cpu() to
+	 * force the CPU to restart the scheduling-clock tick in this
+	 * CPU is in this state.
+	 */
+	rcu_kick_nohz_cpu(rdp->cpu);
+
 	return 0;
 }
 
@@ -1820,7 +1830,7 @@
 			  struct rcu_node *rnp, struct rcu_data *rdp)
 {
 	/* No-CBs CPUs do not have orphanable callbacks. */
-	if (is_nocb_cpu(rdp->cpu))
+	if (rcu_is_nocb_cpu(rdp->cpu))
 		return;
 
 	/*
@@ -2892,10 +2902,10 @@
 	 * corresponding CPU's preceding callbacks have been invoked.
 	 */
 	for_each_possible_cpu(cpu) {
-		if (!cpu_online(cpu) && !is_nocb_cpu(cpu))
+		if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
 			continue;
 		rdp = per_cpu_ptr(rsp->rda, cpu);
-		if (is_nocb_cpu(cpu)) {
+		if (rcu_is_nocb_cpu(cpu)) {
 			_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
 					   rsp->n_barrier_done);
 			atomic_inc(&rsp->barrier_cpu_count);

diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 14ee407..da77a8f 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h

@@ -530,13 +530,13 @@
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
-static bool is_nocb_cpu(int cpu);
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy);
 static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 				      struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
+static void rcu_kick_nohz_cpu(int cpu);
 static bool init_nocb_callback_list(struct rcu_data *rdp);
 
 #endif /* #ifndef RCU_TREE_NONCORE */

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index d084ae3..170814d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h

@@ -28,6 +28,7 @@
 #include <linux/gfp.h>
 #include <linux/oom.h>
 #include <linux/smpboot.h>
+#include <linux/tick.h>
 
 #define RCU_KTHREAD_PRIO 1
 
@@ -1705,7 +1706,7 @@
 		return;
 
 	/* If this is a no-CBs CPU, no callbacks, just return. */
-	if (is_nocb_cpu(cpu))
+	if (rcu_is_nocb_cpu(cpu))
 		return;
 
 	/*
@@ -1747,7 +1748,7 @@
 	struct rcu_data *rdp;
 	struct rcu_state *rsp;
 
-	if (is_nocb_cpu(cpu))
+	if (rcu_is_nocb_cpu(cpu))
 		return;
 	rcu_try_advance_all_cbs();
 	for_each_rcu_flavor(rsp) {
@@ -2052,7 +2053,7 @@
 }
 
 /* Is the specified CPU a no-CPUs CPU? */
-static bool is_nocb_cpu(int cpu)
+bool rcu_is_nocb_cpu(int cpu)
 {
 	if (have_rcu_nocb_mask)
 		return cpumask_test_cpu(cpu, rcu_nocb_mask);
@@ -2110,7 +2111,7 @@
 			    bool lazy)
 {
 
-	if (!is_nocb_cpu(rdp->cpu))
+	if (!rcu_is_nocb_cpu(rdp->cpu))
 		return 0;
 	__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
 	if (__is_kfree_rcu_offset((unsigned long)rhp->func))
@@ -2134,7 +2135,7 @@
 	long qll = rsp->qlen_lazy;
 
 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
-	if (!is_nocb_cpu(smp_processor_id()))
+	if (!rcu_is_nocb_cpu(smp_processor_id()))
 		return 0;
 	rsp->qlen = 0;
 	rsp->qlen_lazy = 0;
@@ -2306,11 +2307,6 @@
 {
 }
 
-static bool is_nocb_cpu(int cpu)
-{
-	return false;
-}
-
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy)
 {
@@ -2337,3 +2333,20 @@
 }
 
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+
+/*
+ * An adaptive-ticks CPU can potentially execute in kernel mode for an
+ * arbitrarily long period of time with the scheduling-clock tick turned
+ * off.  RCU will be paying attention to this CPU because it is in the
+ * kernel, but the CPU cannot be guaranteed to be executing the RCU state
+ * machine because the scheduling-clock tick has been disabled.  Therefore,
+ * if an adaptive-ticks CPU is failing to respond to the current grace
+ * period and has not be idle from an RCU perspective, kick it.
+ */
+static void rcu_kick_nohz_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	if (tick_nohz_full_cpu(cpu))
+		smp_send_reschedule(cpu);
+#endif /* #ifdef CONFIG_NO_HZ_FULL */
+}

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5662f58..58453b8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -544,7 +544,7 @@
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * In the semi idle case, use the nearest busy cpu for migrating timers
  * from an idle cpu.  This is good for power-savings.
@@ -582,7 +582,7 @@
  * account when the CPU goes back to idle and evaluates the timer
  * wheel for the next timer event.
  */
-void wake_up_idle_cpu(int cpu)
+static void wake_up_idle_cpu(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 
@@ -612,20 +612,56 @@
 		smp_send_reschedule(cpu);
 }
 
+static bool wake_up_full_nohz_cpu(int cpu)
+{
+	if (tick_nohz_full_cpu(cpu)) {
+		if (cpu != smp_processor_id() ||
+		    tick_nohz_tick_stopped())
+			smp_send_reschedule(cpu);
+		return true;
+	}
+
+	return false;
+}
+
+void wake_up_nohz_cpu(int cpu)
+{
+	if (!wake_up_full_nohz_cpu(cpu))
+		wake_up_idle_cpu(cpu);
+}
+
 static inline bool got_nohz_idle_kick(void)
 {
 	int cpu = smp_processor_id();
 	return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
 }
 
-#else /* CONFIG_NO_HZ */
+#else /* CONFIG_NO_HZ_COMMON */
 
 static inline bool got_nohz_idle_kick(void)
 {
 	return false;
 }
 
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
+
+#ifdef CONFIG_NO_HZ_FULL
+bool sched_can_stop_tick(void)
+{
+       struct rq *rq;
+
+       rq = this_rq();
+
+       /* Make sure rq->nr_running update is visible after the IPI */
+       smp_rmb();
+
+       /* More than one running task need preemption */
+       if (rq->nr_running > 1)
+               return false;
+
+       return true;
+}
+#endif /* CONFIG_NO_HZ_FULL */
 
 void sched_avg_update(struct rq *rq)
 {
@@ -1357,7 +1393,8 @@
 
 void scheduler_ipi(void)
 {
-	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
+	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
+	    && !tick_nohz_full_cpu(smp_processor_id()))
 		return;
 
 	/*
@@ -1374,6 +1411,7 @@
 	 * somewhat pessimize the simple resched case.
 	 */
 	irq_enter();
+	tick_nohz_full_check();
 	sched_ttwu_pending();
 
 	/*
@@ -1855,6 +1893,8 @@
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+
+	tick_nohz_task_switch(current);
 }
 
 #ifdef CONFIG_SMP
@@ -2118,7 +2158,7 @@
 	return load >> FSHIFT;
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * Handle NO_HZ for the global load-average.
  *
@@ -2344,12 +2384,12 @@
 	smp_wmb();
 	calc_load_idx++;
 }
-#else /* !CONFIG_NO_HZ */
+#else /* !CONFIG_NO_HZ_COMMON */
 
 static inline long calc_load_fold_idle(void) { return 0; }
 static inline void calc_global_nohz(void) { }
 
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
 
 /*
  * calc_load - update the avenrun load estimates 10 ticks after the
@@ -2509,7 +2549,7 @@
 	sched_avg_update(this_rq);
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * There is no sane way to deal with nohz on smp when using jiffies because the
  * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -2569,7 +2609,7 @@
 	}
 	raw_spin_unlock(&this_rq->lock);
 }
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
 
 /*
  * Called from scheduler_tick()
@@ -2696,8 +2736,35 @@
 	rq->idle_balance = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
 #endif
+	rq_last_tick_reset(rq);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+/**
+ * scheduler_tick_max_deferment
+ *
+ * Keep at least one tick per second when a single
+ * active task is running because the scheduler doesn't
+ * yet completely support full dynticks environment.
+ *
+ * This makes sure that uptime, CFS vruntime, load
+ * balancing, etc... continue to move forward, even
+ * with a very low granularity.
+ */
+u64 scheduler_tick_max_deferment(void)
+{
+	struct rq *rq = this_rq();
+	unsigned long next, now = ACCESS_ONCE(jiffies);
+
+	next = rq->last_sched_tick + HZ;
+
+	if (time_before_eq(next, now))
+		return 0;
+
+	return jiffies_to_usecs(next - now) * NSEC_PER_USEC;
+}
+#endif
+
 notrace unsigned long get_parent_ip(unsigned long addr)
 {
 	if (in_lock_functions(addr)) {
@@ -6951,9 +7018,12 @@
 		INIT_LIST_HEAD(&rq->cfs_tasks);
 
 		rq_attach_root(rq, &def_root_domain);
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 		rq->nohz_flags = 0;
 #endif
+#ifdef CONFIG_NO_HZ_FULL
+		rq->last_sched_tick = 0;
+#endif
 #endif
 		init_rq_hrtick(rq);
 		atomic_set(&rq->nr_iowait, 0);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8bf7081..c61a614 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -5355,7 +5355,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * idle load balancing details
  * - When one of the busy CPUs notice that there may be an idle rebalancing
@@ -5572,9 +5572,9 @@
 		rq->next_balance = next_balance;
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
- * In CONFIG_NO_HZ case, the idle balance kickee will do the
+ * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
  * rebalancing for all the cpus for whom scheduler ticks are stopped.
  */
 static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
@@ -5717,7 +5717,7 @@
 	if (time_after_eq(jiffies, rq->next_balance) &&
 	    likely(!on_null_domain(cpu)))
 		raise_softirq(SCHED_SOFTIRQ);
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
 		nohz_balancer_kick(cpu);
 #endif
@@ -6187,7 +6187,7 @@
 #ifdef CONFIG_SMP
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	nohz.next_balance = jiffies;
 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
 	cpu_notifier(sched_ilb_notifier, 0);

diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index b8ce773..d8da010 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c

@@ -17,6 +17,7 @@
 static void pre_schedule_idle(struct rq *rq, struct task_struct *prev)
 {
 	idle_exit_fair(rq);
+	rq_last_tick_reset(rq);
 }
 
 static void post_schedule_idle(struct rq *rq)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4c225c4..ce39224d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -5,6 +5,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
+#include <linux/tick.h>
 
 #include "cpupri.h"
 #include "cpuacct.h"
@@ -405,10 +406,13 @@
 	#define CPU_LOAD_IDX_MAX 5
 	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
 	unsigned long last_load_update_tick;
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	u64 nohz_stamp;
 	unsigned long nohz_flags;
 #endif
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long last_sched_tick;
+#endif
 	int skip_clock_update;
 
 	/* capture load from *all* tasks on this cpu: */
@@ -1072,6 +1076,16 @@
 static inline void inc_nr_running(struct rq *rq)
 {
 	rq->nr_running++;
+
+#ifdef CONFIG_NO_HZ_FULL
+	if (rq->nr_running == 2) {
+		if (tick_nohz_full_cpu(rq->cpu)) {
+			/* Order rq->nr_running write against the IPI */
+			smp_wmb();
+			smp_send_reschedule(rq->cpu);
+		}
+       }
+#endif
 }
 
 static inline void dec_nr_running(struct rq *rq)
@@ -1079,6 +1093,13 @@
 	rq->nr_running--;
 }
 
+static inline void rq_last_tick_reset(struct rq *rq)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	rq->last_sched_tick = jiffies;
+#endif
+}
+
 extern void update_rq_clock(struct rq *rq);
 
 extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
@@ -1299,7 +1320,7 @@
 
 extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 enum rq_nohz_flag_bits {
 	NOHZ_TICK_STOPPED,
 	NOHZ_BALANCE_KICK,

diff --git a/kernel/softirq.c b/kernel/softirq.c
index aa82723..b5197dc 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c

@@ -329,6 +329,19 @@
 		wakeup_softirqd();
 }
 
+static inline void tick_irq_exit(void)
+{
+#ifdef CONFIG_NO_HZ_COMMON
+	int cpu = smp_processor_id();
+
+	/* Make sure that timer wheel updates are propagated */
+	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
+		if (!in_interrupt())
+			tick_nohz_irq_exit();
+	}
+#endif
+}
+
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
@@ -346,11 +359,7 @@
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
 
-#ifdef CONFIG_NO_HZ
-	/* Make sure that timer wheel updates are propagated */
-	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
-		tick_nohz_irq_exit();
-#endif
+	tick_irq_exit();
 	rcu_irq_exit();
 }
 

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 24510d8..e4c07b0 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig

@@ -64,20 +64,88 @@
 if GENERIC_CLOCKEVENTS
 menu "Timers subsystem"
 
-# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
+# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
 # only related to the tick functionality. Oneshot clockevent devices
 # are supported independ of this.
 config TICK_ONESHOT
 	bool
 
-config NO_HZ
-	bool "Tickless System (Dynamic Ticks)"
+config NO_HZ_COMMON
+	bool
 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
 	select TICK_ONESHOT
+
+choice
+	prompt "Timer tick handling"
+	default NO_HZ_IDLE if NO_HZ
+
+config HZ_PERIODIC
+	bool "Periodic timer ticks (constant rate, no dynticks)"
 	help
-	  This option enables a tickless system: timer interrupts will
-	  only trigger on an as-needed basis both when the system is
-	  busy and when the system is idle.
+	  This option keeps the tick running periodically at a constant
+	  rate, even when the CPU doesn't need it.
+
+config NO_HZ_IDLE
+	bool "Idle dynticks system (tickless idle)"
+	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
+	select NO_HZ_COMMON
+	help
+	  This option enables a tickless idle system: timer interrupts
+	  will only trigger on an as-needed basis when the system is idle.
+	  This is usually interesting for energy saving.
+
+	  Most of the time you want to say Y here.
+
+config NO_HZ_FULL
+	bool "Full dynticks system (tickless)"
+	# NO_HZ_COMMON dependency
+	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
+	# We need at least one periodic CPU for timekeeping
+	depends on SMP
+	# RCU_USER_QS dependency
+	depends on HAVE_CONTEXT_TRACKING
+	# VIRT_CPU_ACCOUNTING_GEN dependency
+	depends on 64BIT
+	select NO_HZ_COMMON
+	select RCU_USER_QS
+	select RCU_NOCB_CPU
+	select VIRT_CPU_ACCOUNTING_GEN
+	select CONTEXT_TRACKING_FORCE
+	select IRQ_WORK
+	help
+	 Adaptively try to shutdown the tick whenever possible, even when
+	 the CPU is running tasks. Typically this requires running a single
+	 task on the CPU. Chances for running tickless are maximized when
+	 the task mostly runs in userspace and has few kernel activity.
+
+	 You need to fill up the nohz_full boot parameter with the
+	 desired range of dynticks CPUs.
+
+	 This is implemented at the expense of some overhead in user <-> kernel
+	 transitions: syscalls, exceptions and interrupts. Even when it's
+	 dynamically off.
+
+	 Say N.
+
+endchoice
+
+config NO_HZ_FULL_ALL
+       bool "Full dynticks system on all CPUs by default"
+       depends on NO_HZ_FULL
+       help
+         If the user doesn't pass the nohz_full boot option to
+	 define the range of full dynticks CPUs, consider that all
+	 CPUs in the system are full dynticks by default.
+	 Note the boot CPU will still be kept outside the range to
+	 handle the timekeeping duty.
+
+config NO_HZ
+	bool "Old Idle dynticks config"
+	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
+	help
+	  This is the old config entry that enables dynticks idle.
+	  We keep it around for a little while to enforce backward
+	  compatibility with older config files.
 
 config HIGH_RES_TIMERS
 	bool "High Resolution Timer Support"

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 61d00a8..206bbfb 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c

@@ -693,7 +693,8 @@
 		bc->event_handler = tick_handle_oneshot_broadcast;
 
 		/* Take the do_timer update */
-		tick_do_timer_cpu = cpu;
+		if (!tick_nohz_full_cpu(cpu))
+			tick_do_timer_cpu = cpu;
 
 		/*
 		 * We must be careful here. There might be other CPUs

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 6176a3e..5d3fb10 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c

@@ -163,7 +163,10 @@
 		 * this cpu:
 		 */
 		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
-			tick_do_timer_cpu = cpu;
+			if (!tick_nohz_full_cpu(cpu))
+				tick_do_timer_cpu = cpu;
+			else
+				tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 			tick_next_period = ktime_get();
 			tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
 		}

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 225f8bf..bc67d42 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c

@@ -21,11 +21,15 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
+#include <linux/posix-timers.h>
+#include <linux/perf_event.h>
 
 #include <asm/irq_regs.h>
 
 #include "tick-internal.h"
 
+#include <trace/events/timer.h>
+
 /*
  * Per cpu nohz control structure
  */
@@ -104,7 +108,7 @@
 {
 	int cpu = smp_processor_id();
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	/*
 	 * Check if the do_timer duty was dropped. We don't care about
 	 * concurrency: This happens only when the cpu in charge went
@@ -112,7 +116,8 @@
 	 * this duty, then the jiffies update is still serialized by
 	 * jiffies_lock.
 	 */
-	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+	    && !tick_nohz_full_cpu(cpu))
 		tick_do_timer_cpu = cpu;
 #endif
 
@@ -123,7 +128,7 @@
 
 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 {
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	/*
 	 * When we are idle and the tick is stopped, we have to touch
 	 * the watchdog as we might not schedule for a really long
@@ -142,10 +147,226 @@
 	profile_tick(CPU_PROFILING);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static cpumask_var_t nohz_full_mask;
+bool have_nohz_full_mask;
+
+static bool can_stop_full_tick(void)
+{
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (!sched_can_stop_tick()) {
+		trace_tick_stop(0, "more than 1 task in runqueue\n");
+		return false;
+	}
+
+	if (!posix_cpu_timers_can_stop_tick(current)) {
+		trace_tick_stop(0, "posix timers running\n");
+		return false;
+	}
+
+	if (!perf_event_can_stop_tick()) {
+		trace_tick_stop(0, "perf events running\n");
+		return false;
+	}
+
+	/* sched_clock_tick() needs us? */
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+	/*
+	 * TODO: kick full dynticks CPUs when
+	 * sched_clock_stable is set.
+	 */
+	if (!sched_clock_stable) {
+		trace_tick_stop(0, "unstable sched clock\n");
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
+
+/*
+ * Re-evaluate the need for the tick on the current CPU
+ * and restart it if necessary.
+ */
+void tick_nohz_full_check(void)
+{
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+	if (tick_nohz_full_cpu(smp_processor_id())) {
+		if (ts->tick_stopped && !is_idle_task(current)) {
+			if (!can_stop_full_tick())
+				tick_nohz_restart_sched_tick(ts, ktime_get());
+		}
+	}
+}
+
+static void nohz_full_kick_work_func(struct irq_work *work)
+{
+	tick_nohz_full_check();
+}
+
+static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
+	.func = nohz_full_kick_work_func,
+};
+
+/*
+ * Kick the current CPU if it's full dynticks in order to force it to
+ * re-evaluate its dependency on the tick and restart it if necessary.
+ */
+void tick_nohz_full_kick(void)
+{
+	if (tick_nohz_full_cpu(smp_processor_id()))
+		irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
+}
+
+static void nohz_full_kick_ipi(void *info)
+{
+	tick_nohz_full_check();
+}
+
+/*
+ * Kick all full dynticks CPUs in order to force these to re-evaluate
+ * their dependency on the tick and restart it if necessary.
+ */
+void tick_nohz_full_kick_all(void)
+{
+	if (!have_nohz_full_mask)
+		return;
+
+	preempt_disable();
+	smp_call_function_many(nohz_full_mask,
+			       nohz_full_kick_ipi, NULL, false);
+	preempt_enable();
+}
+
+/*
+ * Re-evaluate the need for the tick as we switch the current task.
+ * It might need the tick due to per task/process properties:
+ * perf events, posix cpu timers, ...
+ */
+void tick_nohz_task_switch(struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	if (!tick_nohz_full_cpu(smp_processor_id()))
+		goto out;
+
+	if (tick_nohz_tick_stopped() && !can_stop_full_tick())
+		tick_nohz_full_kick();
+
+out:
+	local_irq_restore(flags);
+}
+
+int tick_nohz_full_cpu(int cpu)
+{
+	if (!have_nohz_full_mask)
+		return 0;
+
+	return cpumask_test_cpu(cpu, nohz_full_mask);
+}
+
+/* Parse the boot-time nohz CPU list from the kernel parameters. */
+static int __init tick_nohz_full_setup(char *str)
+{
+	int cpu;
+
+	alloc_bootmem_cpumask_var(&nohz_full_mask);
+	if (cpulist_parse(str, nohz_full_mask) < 0) {
+		pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
+		return 1;
+	}
+
+	cpu = smp_processor_id();
+	if (cpumask_test_cpu(cpu, nohz_full_mask)) {
+		pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
+		cpumask_clear_cpu(cpu, nohz_full_mask);
+	}
+	have_nohz_full_mask = true;
+
+	return 1;
+}
+__setup("nohz_full=", tick_nohz_full_setup);
+
+static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
+						 unsigned long action,
+						 void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_PREPARE:
+		/*
+		 * If we handle the timekeeping duty for full dynticks CPUs,
+		 * we can't safely shutdown that CPU.
+		 */
+		if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
+			return -EINVAL;
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+/*
+ * Worst case string length in chunks of CPU range seems 2 steps
+ * separations: 0,2,4,6,...
+ * This is NR_CPUS + sizeof('\0')
+ */
+static char __initdata nohz_full_buf[NR_CPUS + 1];
+
+static int tick_nohz_init_all(void)
+{
+	int err = -1;
+
+#ifdef CONFIG_NO_HZ_FULL_ALL
+	if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
+		pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
+		return err;
+	}
+	err = 0;
+	cpumask_setall(nohz_full_mask);
+	cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
+	have_nohz_full_mask = true;
+#endif
+	return err;
+}
+
+void __init tick_nohz_init(void)
+{
+	int cpu;
+
+	if (!have_nohz_full_mask) {
+		if (tick_nohz_init_all() < 0)
+			return;
+	}
+
+	cpu_notifier(tick_nohz_cpu_down_callback, 0);
+
+	/* Make sure full dynticks CPU are also RCU nocbs */
+	for_each_cpu(cpu, nohz_full_mask) {
+		if (!rcu_is_nocb_cpu(cpu)) {
+			pr_warning("NO_HZ: CPU %d is not RCU nocb: "
+				   "cleared from nohz_full range", cpu);
+			cpumask_clear_cpu(cpu, nohz_full_mask);
+		}
+	}
+
+	cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
+	pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
+}
+#else
+#define have_nohz_full_mask (0)
+#endif
+
 /*
  * NOHZ - aka dynamic tick functionality
  */
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * NO HZ enabled ?
  */
@@ -345,11 +566,12 @@
 			delta_jiffies = rcu_delta_jiffies;
 		}
 	}
+
 	/*
-	 * Do not stop the tick, if we are only one off
-	 * or if the cpu is required for rcu
+	 * Do not stop the tick, if we are only one off (or less)
+	 * or if the cpu is required for RCU:
 	 */
-	if (!ts->tick_stopped && delta_jiffies == 1)
+	if (!ts->tick_stopped && delta_jiffies <= 1)
 		goto out;
 
 	/* Schedule the tick, if we are at least one jiffie off */
@@ -378,6 +600,13 @@
 			time_delta = KTIME_MAX;
 		}
 
+#ifdef CONFIG_NO_HZ_FULL
+		if (!ts->inidle) {
+			time_delta = min(time_delta,
+					 scheduler_tick_max_deferment());
+		}
+#endif
+
 		/*
 		 * calculate the expiry time for the next timer wheel
 		 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
@@ -421,6 +650,7 @@
 
 			ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 			ts->tick_stopped = 1;
+			trace_tick_stop(1, " ");
 		}
 
 		/*
@@ -457,6 +687,24 @@
 	return ret;
 }
 
+static void tick_nohz_full_stop_tick(struct tick_sched *ts)
+{
+#ifdef CONFIG_NO_HZ_FULL
+       int cpu = smp_processor_id();
+
+       if (!tick_nohz_full_cpu(cpu) || is_idle_task(current))
+               return;
+
+       if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
+	       return;
+
+       if (!can_stop_full_tick())
+               return;
+
+       tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
+#endif
+}
+
 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 {
 	/*
@@ -489,6 +737,21 @@
 		return false;
 	}
 
+	if (have_nohz_full_mask) {
+		/*
+		 * Keep the tick alive to guarantee timekeeping progression
+		 * if there are full dynticks CPUs around
+		 */
+		if (tick_do_timer_cpu == cpu)
+			return false;
+		/*
+		 * Boot safety: make sure the timekeeping duty has been
+		 * assigned before entering dyntick-idle mode,
+		 */
+		if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+			return false;
+	}
+
 	return true;
 }
 
@@ -568,12 +831,13 @@
 {
 	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 
-	if (!ts->inidle)
-		return;
-
-	/* Cancel the timer because CPU already waken up from the C-states*/
-	menu_hrtimer_cancel();
-	__tick_nohz_idle_enter(ts);
+	if (ts->inidle) {
+		/* Cancel the timer because CPU already waken up from the C-states*/
+		menu_hrtimer_cancel();
+		__tick_nohz_idle_enter(ts);
+	} else {
+		tick_nohz_full_stop_tick(ts);
+	}
 }
 
 /**
@@ -802,7 +1066,7 @@
 static inline void tick_nohz_switch_to_nohz(void) { }
 static inline void tick_check_nohz(int cpu) { }
 
-#endif /* NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
 
 /*
  * Called from irq_enter to notify about the possible interruption of idle()
@@ -887,14 +1151,14 @@
 		now = ktime_get();
 	}
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 	if (tick_nohz_enabled)
 		ts->nohz_mode = NOHZ_MODE_HIGHRES;
 #endif
 }
 #endif /* HIGH_RES_TIMERS */
 
-#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS
+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
 void tick_cancel_sched_timer(int cpu)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);

diff --git a/kernel/timer.c b/kernel/timer.c
index 09bca8c..a860bba 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c

@@ -739,7 +739,7 @@
 
 	cpu = smp_processor_id();
 
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
 	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
 		cpu = get_nohz_timer_target();
 #endif
@@ -931,14 +931,14 @@
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
 	/*
-	 * Check whether the other CPU is idle and needs to be
-	 * triggered to reevaluate the timer wheel when nohz is
-	 * active. We are protected against the other CPU fiddling
+	 * Check whether the other CPU is in dynticks mode and needs
+	 * to be triggered to reevaluate the timer wheel.
+	 * We are protected against the other CPU fiddling
 	 * with the timer by holding the timer base lock. This also
-	 * makes sure that a CPU on the way to idle can not evaluate
-	 * the timer wheel.
+	 * makes sure that a CPU on the way to stop its tick can not
+	 * evaluate the timer wheel.
 	 */
-	wake_up_idle_cpu(cpu);
+	wake_up_nohz_cpu(cpu);
 	spin_unlock_irqrestore(&base->lock, flags);
 }
 EXPORT_SYMBOL_GPL(add_timer_on);
@@ -1189,7 +1189,7 @@
 	spin_unlock_irq(&base->lock);
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * Find out when the next timer event is due to happen. This
  * is used on S/390 to stop all activity when a CPU is idle.

diff --git a/lib/oid_registry.c b/lib/oid_registry.c
index d8de11f..318f382 100644
--- a/lib/oid_registry.c
+++ b/lib/oid_registry.c

@@ -9,6 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#include <linux/module.h>
 #include <linux/export.h>
 #include <linux/oid_registry.h>
 #include <linux/kernel.h>
@@ -16,6 +17,10 @@
 #include <linux/bug.h>
 #include "oid_registry_data.c"
 
+MODULE_DESCRIPTION("OID Registry");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
 /**
  * look_up_OID - Find an OID registration for the specified data
  * @data: Binary representation of the OID

diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index e87ef43..958d9856 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile

@@ -11,5 +11,5 @@
 	crypto.o armor.o \
 	auth_x.o \
 	ceph_fs.o ceph_strings.o ceph_hash.o \
-	pagevec.o
+	pagevec.o snapshot.o
 

diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index b4bf4ac..6b923bc 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c

@@ -47,6 +47,7 @@
 	if (!ac)
 		goto out;
 
+	mutex_init(&ac->mutex);
 	ac->negotiating = true;
 	if (name)
 		ac->name = name;
@@ -73,10 +74,12 @@
  */
 void ceph_auth_reset(struct ceph_auth_client *ac)
 {
+	mutex_lock(&ac->mutex);
 	dout("auth_reset %p\n", ac);
 	if (ac->ops && !ac->negotiating)
 		ac->ops->reset(ac);
 	ac->negotiating = true;
+	mutex_unlock(&ac->mutex);
 }
 
 int ceph_entity_name_encode(const char *name, void **p, void *end)
@@ -102,6 +105,7 @@
 	int i, num;
 	int ret;
 
+	mutex_lock(&ac->mutex);
 	dout("auth_build_hello\n");
 	monhdr->have_version = 0;
 	monhdr->session_mon = cpu_to_le16(-1);
@@ -122,15 +126,19 @@
 
 	ret = ceph_entity_name_encode(ac->name, &p, end);
 	if (ret < 0)
-		return ret;
+		goto out;
 	ceph_decode_need(&p, end, sizeof(u64), bad);
 	ceph_encode_64(&p, ac->global_id);
 
 	ceph_encode_32(&lenp, p - lenp - sizeof(u32));
-	return p - buf;
+	ret = p - buf;
+out:
+	mutex_unlock(&ac->mutex);
+	return ret;
 
 bad:
-	return -ERANGE;
+	ret = -ERANGE;
+	goto out;
 }
 
 static int ceph_build_auth_request(struct ceph_auth_client *ac,
@@ -151,11 +159,13 @@
 	if (ret < 0) {
 		pr_err("error %d building auth method %s request\n", ret,
 		       ac->ops->name);
-		return ret;
+		goto out;
 	}
 	dout(" built request %d bytes\n", ret);
 	ceph_encode_32(&p, ret);
-	return p + ret - msg_buf;
+	ret = p + ret - msg_buf;
+out:
+	return ret;
 }
 
 /*
@@ -176,6 +186,7 @@
 	int result_msg_len;
 	int ret = -EINVAL;
 
+	mutex_lock(&ac->mutex);
 	dout("handle_auth_reply %p %p\n", p, end);
 	ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
 	protocol = ceph_decode_32(&p);
@@ -227,33 +238,103 @@
 
 	ret = ac->ops->handle_reply(ac, result, payload, payload_end);
 	if (ret == -EAGAIN) {
-		return ceph_build_auth_request(ac, reply_buf, reply_len);
+		ret = ceph_build_auth_request(ac, reply_buf, reply_len);
 	} else if (ret) {
 		pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
-		return ret;
 	}
-	return 0;
+
+out:
+	mutex_unlock(&ac->mutex);
+	return ret;
 
 bad:
 	pr_err("failed to decode auth msg\n");
-out:
-	return ret;
+	ret = -EINVAL;
+	goto out;
 }
 
 int ceph_build_auth(struct ceph_auth_client *ac,
 		    void *msg_buf, size_t msg_len)
 {
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
 	if (!ac->protocol)
-		return ceph_auth_build_hello(ac, msg_buf, msg_len);
-	BUG_ON(!ac->ops);
-	if (ac->ops->should_authenticate(ac))
-		return ceph_build_auth_request(ac, msg_buf, msg_len);
-	return 0;
+		ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
+	else if (ac->ops->should_authenticate(ac))
+		ret = ceph_build_auth_request(ac, msg_buf, msg_len);
+	mutex_unlock(&ac->mutex);
+	return ret;
 }
 
 int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
 {
-	if (!ac->ops)
-		return 0;
-	return ac->ops->is_authenticated(ac);
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops)
+		ret = ac->ops->is_authenticated(ac);
+	mutex_unlock(&ac->mutex);
+	return ret;
 }
+EXPORT_SYMBOL(ceph_auth_is_authenticated);
+
+int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+				int peer_type,
+				struct ceph_auth_handshake *auth)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->create_authorizer)
+		ret = ac->ops->create_authorizer(ac, peer_type, auth);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_create_authorizer);
+
+void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+				  struct ceph_authorizer *a)
+{
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->destroy_authorizer)
+		ac->ops->destroy_authorizer(ac, a);
+	mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
+
+int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+				int peer_type,
+				struct ceph_auth_handshake *a)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->update_authorizer)
+		ret = ac->ops->update_authorizer(ac, peer_type, a);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_update_authorizer);
+
+int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a, size_t len)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->verify_authorizer_reply)
+		ret = ac->ops->verify_authorizer_reply(ac, a, len);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
+
+void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
+{
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->invalidate_authorizer)
+		ac->ops->invalidate_authorizer(ac, peer_type);
+	mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a16bf14..96238ba 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c

@@ -298,6 +298,7 @@
 			return -ENOMEM;
 	}
 	au->service = th->service;
+	au->secret_id = th->secret_id;
 
 	msg_a = au->buf->vec.iov_base;
 	msg_a->struct_v = 1;
@@ -555,6 +556,26 @@
 	return 0;
 }
 
+static int ceph_x_update_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_auth_handshake *auth)
+{
+	struct ceph_x_authorizer *au;
+	struct ceph_x_ticket_handler *th;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	au = (struct ceph_x_authorizer *)auth->authorizer;
+	if (au->secret_id < th->secret_id) {
+		dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
+		     au->service, au->secret_id, th->secret_id);
+		return ceph_x_build_authorizer(ac, th, au);
+	}
+	return 0;
+}
+
 static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 					  struct ceph_authorizer *a, size_t len)
 {
@@ -630,7 +651,7 @@
 
 	th = get_ticket_handler(ac, peer_type);
 	if (!IS_ERR(th))
-		remove_ticket_handler(ac, th);
+		memset(&th->validity, 0, sizeof(th->validity));
 }
 
 
@@ -641,6 +662,7 @@
 	.build_request = ceph_x_build_request,
 	.handle_reply = ceph_x_handle_reply,
 	.create_authorizer = ceph_x_create_authorizer,
+	.update_authorizer = ceph_x_update_authorizer,
 	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
 	.destroy_authorizer = ceph_x_destroy_authorizer,
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,

diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index f459e93..c5a058da 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h

@@ -29,6 +29,7 @@
 	struct ceph_buffer *buf;
 	unsigned int service;
 	u64 nonce;
+	u64 secret_id;
 	char reply_buf[128];  /* big enough for encrypted blob */
 };
 

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index e65e6e4..34b11ee 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -606,11 +606,17 @@
 	if (ret < 0)
 		goto out_crypto;
 
+	ret = ceph_osdc_setup();
+	if (ret < 0)
+		goto out_msgr;
+
 	pr_info("loaded (mon/osd proto %d/%d)\n",
 		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
 
 	return 0;
 
+out_msgr:
+	ceph_msgr_exit();
 out_crypto:
 	ceph_crypto_shutdown();
 out_debugfs:
@@ -622,6 +628,7 @@
 static void __exit exit_ceph_lib(void)
 {
 	dout("exit_ceph_lib\n");
+	ceph_osdc_cleanup();
 	ceph_msgr_exit();
 	ceph_crypto_shutdown();
 	ceph_debugfs_cleanup();

diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 00d051f..83661cd 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c

@@ -123,8 +123,8 @@
 	mutex_lock(&osdc->request_mutex);
 	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
 		struct ceph_osd_request *req;
+		unsigned int i;
 		int opcode;
-		int i;
 
 		req = rb_entry(p, struct ceph_osd_request, r_node);
 
@@ -142,7 +142,7 @@
 			seq_printf(s, "\t");
 
 		for (i = 0; i < req->r_num_ops; i++) {
-			opcode = le16_to_cpu(req->r_request_ops[i].op);
+			opcode = req->r_ops[i].op;
 			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
 		}
 

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2c0669f..eb0a46a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -21,6 +21,9 @@
 #include <linux/ceph/pagelist.h>
 #include <linux/export.h>
 
+#define list_entry_next(pos, member)					\
+	list_entry(pos->member.next, typeof(*pos), member)
+
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
  * hosts in the system.  The messenger provides ordered and reliable
@@ -149,6 +152,11 @@
 	return test_and_set_bit(con_flag, &con->flags);
 }
 
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache	*ceph_msg_cache;
+static struct kmem_cache	*ceph_msg_data_cache;
+
 /* static tag bytes (protocol control messages) */
 static char tag_msg = CEPH_MSGR_TAG_MSG;
 static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -223,6 +231,41 @@
  */
 static struct workqueue_struct *ceph_msgr_wq;
 
+static int ceph_msgr_slab_init(void)
+{
+	BUG_ON(ceph_msg_cache);
+	ceph_msg_cache = kmem_cache_create("ceph_msg",
+					sizeof (struct ceph_msg),
+					__alignof__(struct ceph_msg), 0, NULL);
+
+	if (!ceph_msg_cache)
+		return -ENOMEM;
+
+	BUG_ON(ceph_msg_data_cache);
+	ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
+					sizeof (struct ceph_msg_data),
+					__alignof__(struct ceph_msg_data),
+					0, NULL);
+	if (ceph_msg_data_cache)
+		return 0;
+
+	kmem_cache_destroy(ceph_msg_cache);
+	ceph_msg_cache = NULL;
+
+	return -ENOMEM;
+}
+
+static void ceph_msgr_slab_exit(void)
+{
+	BUG_ON(!ceph_msg_data_cache);
+	kmem_cache_destroy(ceph_msg_data_cache);
+	ceph_msg_data_cache = NULL;
+
+	BUG_ON(!ceph_msg_cache);
+	kmem_cache_destroy(ceph_msg_cache);
+	ceph_msg_cache = NULL;
+}
+
 static void _ceph_msgr_exit(void)
 {
 	if (ceph_msgr_wq) {
@@ -230,6 +273,8 @@
 		ceph_msgr_wq = NULL;
 	}
 
+	ceph_msgr_slab_exit();
+
 	BUG_ON(zero_page == NULL);
 	kunmap(zero_page);
 	page_cache_release(zero_page);
@@ -242,6 +287,9 @@
 	zero_page = ZERO_PAGE(0);
 	page_cache_get(zero_page);
 
+	if (ceph_msgr_slab_init())
+		return -ENOMEM;
+
 	ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
 	if (ceph_msgr_wq)
 		return 0;
@@ -471,6 +519,22 @@
 	return r;
 }
 
+static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
+		     int page_offset, size_t length)
+{
+	void *kaddr;
+	int ret;
+
+	BUG_ON(page_offset + length > PAGE_SIZE);
+
+	kaddr = kmap(page);
+	BUG_ON(!kaddr);
+	ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
+	kunmap(page);
+
+	return ret;
+}
+
 /*
  * write something.  @more is true if caller will be sending more data
  * shortly.
@@ -493,7 +557,7 @@
 }
 
 static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
-		     int offset, size_t size, int more)
+		     int offset, size_t size, bool more)
 {
 	int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
 	int ret;
@@ -697,50 +761,397 @@
 }
 
 #ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+
+/*
+ * For a bio data item, a piece is whatever remains of the next
+ * entry in the current bio iovec, or the first entry in the next
+ * bio in the list.
+ */
+static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
 {
-	if (!bio) {
-		*iter = NULL;
-		*seg = 0;
-		return;
-	}
-	*iter = bio;
-	*seg = bio->bi_idx;
+	struct ceph_msg_data *data = cursor->data;
+	struct bio *bio;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+	bio = data->bio;
+	BUG_ON(!bio);
+	BUG_ON(!bio->bi_vcnt);
+
+	cursor->resid = min(length, data->bio_length);
+	cursor->bio = bio;
+	cursor->vector_index = 0;
+	cursor->vector_offset = 0;
+	cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
 }
 
-static void iter_bio_next(struct bio **bio_iter, int *seg)
+static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
+						size_t *page_offset,
+						size_t *length)
 {
-	if (*bio_iter == NULL)
-		return;
+	struct ceph_msg_data *data = cursor->data;
+	struct bio *bio;
+	struct bio_vec *bio_vec;
+	unsigned int index;
 
-	BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
 
-	(*seg)++;
-	if (*seg == (*bio_iter)->bi_vcnt)
-		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
-}
-#endif
+	bio = cursor->bio;
+	BUG_ON(!bio);
 
-static void prepare_write_message_data(struct ceph_connection *con)
-{
-	struct ceph_msg *msg = con->out_msg;
+	index = cursor->vector_index;
+	BUG_ON(index >= (unsigned int) bio->bi_vcnt);
 
-	BUG_ON(!msg);
-	BUG_ON(!msg->hdr.data_len);
-
-	/* initialize page iterator */
-	con->out_msg_pos.page = 0;
-	if (msg->pages)
-		con->out_msg_pos.page_pos = msg->page_alignment;
+	bio_vec = &bio->bi_io_vec[index];
+	BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
+	*page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
+	BUG_ON(*page_offset >= PAGE_SIZE);
+	if (cursor->last_piece) /* pagelist offset is always 0 */
+		*length = cursor->resid;
 	else
-		con->out_msg_pos.page_pos = 0;
+		*length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
+	BUG_ON(*length > cursor->resid);
+	BUG_ON(*page_offset + *length > PAGE_SIZE);
+
+	return bio_vec->bv_page;
+}
+
+static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
+					size_t bytes)
+{
+	struct bio *bio;
+	struct bio_vec *bio_vec;
+	unsigned int index;
+
+	BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+
+	bio = cursor->bio;
+	BUG_ON(!bio);
+
+	index = cursor->vector_index;
+	BUG_ON(index >= (unsigned int) bio->bi_vcnt);
+	bio_vec = &bio->bi_io_vec[index];
+
+	/* Advance the cursor offset */
+
+	BUG_ON(cursor->resid < bytes);
+	cursor->resid -= bytes;
+	cursor->vector_offset += bytes;
+	if (cursor->vector_offset < bio_vec->bv_len)
+		return false;	/* more bytes to process in this segment */
+	BUG_ON(cursor->vector_offset != bio_vec->bv_len);
+
+	/* Move on to the next segment, and possibly the next bio */
+
+	if (++index == (unsigned int) bio->bi_vcnt) {
+		bio = bio->bi_next;
+		index = 0;
+	}
+	cursor->bio = bio;
+	cursor->vector_index = index;
+	cursor->vector_offset = 0;
+
+	if (!cursor->last_piece) {
+		BUG_ON(!cursor->resid);
+		BUG_ON(!bio);
+		/* A short read is OK, so use <= rather than == */
+		if (cursor->resid <= bio->bi_io_vec[index].bv_len)
+			cursor->last_piece = true;
+	}
+
+	return true;
+}
+#endif /* CONFIG_BLOCK */
+
+/*
+ * For a page array, a piece comes from the first page in the array
+ * that has not already been fully consumed.
+ */
+static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	int page_count;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(!data->pages);
+	BUG_ON(!data->length);
+
+	cursor->resid = min(length, data->length);
+	page_count = calc_pages_for(data->alignment, (u64)data->length);
+	cursor->page_offset = data->alignment & ~PAGE_MASK;
+	cursor->page_index = 0;
+	BUG_ON(page_count > (int)USHRT_MAX);
+	cursor->page_count = (unsigned short)page_count;
+	BUG_ON(length > SIZE_MAX - cursor->page_offset);
+	cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
+					size_t *page_offset, size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(cursor->page_index >= cursor->page_count);
+	BUG_ON(cursor->page_offset >= PAGE_SIZE);
+
+	*page_offset = cursor->page_offset;
+	if (cursor->last_piece)
+		*length = cursor->resid;
+	else
+		*length = PAGE_SIZE - *page_offset;
+
+	return data->pages[cursor->page_index];
+}
+
+static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
+						size_t bytes)
+{
+	BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
+
+	/* Advance the cursor page offset */
+
+	cursor->resid -= bytes;
+	cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
+	if (!bytes || cursor->page_offset)
+		return false;	/* more bytes to process in the current page */
+
+	/* Move on to the next page; offset is already at 0 */
+
+	BUG_ON(cursor->page_index >= cursor->page_count);
+	cursor->page_index++;
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+	return true;
+}
+
+/*
+ * For a pagelist, a piece is whatever remains to be consumed in the
+ * first page in the list, or the front of the next page.
+ */
+static void
+ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+	struct page *page;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	if (!length)
+		return;		/* pagelist can be assigned but empty */
+
+	BUG_ON(list_empty(&pagelist->head));
+	page = list_first_entry(&pagelist->head, struct page, lru);
+
+	cursor->resid = min(length, pagelist->length);
+	cursor->page = page;
+	cursor->offset = 0;
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
+				size_t *page_offset, size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	BUG_ON(!cursor->page);
+	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+
+	/* offset of first page in pagelist is always 0 */
+	*page_offset = cursor->offset & ~PAGE_MASK;
+	if (cursor->last_piece)
+		*length = cursor->resid;
+	else
+		*length = PAGE_SIZE - *page_offset;
+
+	return cursor->page;
+}
+
+static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
+						size_t bytes)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+	BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
+
+	/* Advance the cursor offset */
+
+	cursor->resid -= bytes;
+	cursor->offset += bytes;
+	/* offset of first page in pagelist is always 0 */
+	if (!bytes || cursor->offset & ~PAGE_MASK)
+		return false;	/* more bytes to process in the current page */
+
+	/* Move on to the next page */
+
+	BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
+	cursor->page = list_entry_next(cursor->page, lru);
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+	return true;
+}
+
+/*
+ * Message data is handled (sent or received) in pieces, where each
+ * piece resides on a single page.  The network layer might not
+ * consume an entire piece at once.  A data item's cursor keeps
+ * track of which piece is next to process and how much remains to
+ * be processed in that piece.  It also tracks whether the current
+ * piece is the last one in the data item.
+ */
+static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
+{
+	size_t length = cursor->total_resid;
+
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		ceph_msg_data_pagelist_cursor_init(cursor, length);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		ceph_msg_data_pages_cursor_init(cursor, length);
+		break;
 #ifdef CONFIG_BLOCK
-	if (msg->bio)
-		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-	con->out_msg_pos.data_pos = 0;
-	con->out_msg_pos.did_page_crc = false;
-	con->out_more = 1;  /* data + footer will follow */
+	case CEPH_MSG_DATA_BIO:
+		ceph_msg_data_bio_cursor_init(cursor, length);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		/* BUG(); */
+		break;
+	}
+	cursor->need_crc = true;
+}
+
+static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
+{
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	struct ceph_msg_data *data;
+
+	BUG_ON(!length);
+	BUG_ON(length > msg->data_length);
+	BUG_ON(list_empty(&msg->data));
+
+	cursor->data_head = &msg->data;
+	cursor->total_resid = length;
+	data = list_first_entry(&msg->data, struct ceph_msg_data, links);
+	cursor->data = data;
+
+	__ceph_msg_data_cursor_init(cursor);
+}
+
+/*
+ * Return the page containing the next piece to process for a given
+ * data item, and supply the page offset and length of that piece.
+ * Indicate whether this is the last piece in this data item.
+ */
+static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
+					size_t *page_offset, size_t *length,
+					bool *last_piece)
+{
+	struct page *page;
+
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		page = ceph_msg_data_pages_next(cursor, page_offset, length);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		page = ceph_msg_data_bio_next(cursor, page_offset, length);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		page = NULL;
+		break;
+	}
+	BUG_ON(!page);
+	BUG_ON(*page_offset + *length > PAGE_SIZE);
+	BUG_ON(!*length);
+	if (last_piece)
+		*last_piece = cursor->last_piece;
+
+	return page;
+}
+
+/*
+ * Returns true if the result moves the cursor on to the next piece
+ * of the data item.
+ */
+static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				size_t bytes)
+{
+	bool new_piece;
+
+	BUG_ON(bytes > cursor->resid);
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		new_piece = ceph_msg_data_pages_advance(cursor, bytes);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		new_piece = ceph_msg_data_bio_advance(cursor, bytes);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		BUG();
+		break;
+	}
+	cursor->total_resid -= bytes;
+
+	if (!cursor->resid && cursor->total_resid) {
+		WARN_ON(!cursor->last_piece);
+		BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
+		cursor->data = list_entry_next(cursor->data, links);
+		__ceph_msg_data_cursor_init(cursor);
+		new_piece = true;
+	}
+	cursor->need_crc = new_piece;
+
+	return new_piece;
+}
+
+static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
+{
+	BUG_ON(!msg);
+	BUG_ON(!data_len);
+
+	/* Initialize data cursor */
+
+	ceph_msg_data_cursor_init(msg, (size_t)data_len);
 }
 
 /*
@@ -803,16 +1214,12 @@
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
 	}
-#ifdef CONFIG_BLOCK
-	else
-		m->bio_iter = NULL;
-#endif
+	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
 
-	dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
+	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
 	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
-	     le32_to_cpu(m->hdr.data_len),
-	     m->nr_pages);
+	     m->data_length);
 	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
 
 	/* tag + hdr + front + middle */
@@ -843,11 +1250,13 @@
 
 	/* is there a data payload? */
 	con->out_msg->footer.data_crc = 0;
-	if (m->hdr.data_len)
-		prepare_write_message_data(con);
-	else
+	if (m->data_length) {
+		prepare_message_data(con->out_msg, m->data_length);
+		con->out_more = 1;  /* data + footer will follow */
+	} else {
 		/* no, queue up footer too and be done */
 		prepare_write_message_footer(con);
+	}
 
 	con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
@@ -874,6 +1283,24 @@
 }
 
 /*
+ * Prepare to share the seq during handshake
+ */
+static void prepare_write_seq(struct ceph_connection *con)
+{
+	dout("prepare_write_seq %p %llu -> %llu\n", con,
+	     con->in_seq_acked, con->in_seq);
+	con->in_seq_acked = con->in_seq;
+
+	con_out_kvec_reset(con);
+
+	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+	con_out_kvec_add(con, sizeof (con->out_temp_ack),
+			 &con->out_temp_ack);
+
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
  * Prepare to write keepalive byte.
  */
 static void prepare_write_keepalive(struct ceph_connection *con)
@@ -1022,35 +1449,19 @@
 	return ret;  /* done! */
 }
 
-static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
-			size_t len, size_t sent, bool in_trail)
+static u32 ceph_crc32c_page(u32 crc, struct page *page,
+				unsigned int page_offset,
+				unsigned int length)
 {
-	struct ceph_msg *msg = con->out_msg;
+	char *kaddr;
 
-	BUG_ON(!msg);
-	BUG_ON(!sent);
+	kaddr = kmap(page);
+	BUG_ON(kaddr == NULL);
+	crc = crc32c(crc, kaddr + page_offset, length);
+	kunmap(page);
 
-	con->out_msg_pos.data_pos += sent;
-	con->out_msg_pos.page_pos += sent;
-	if (sent < len)
-		return;
-
-	BUG_ON(sent != len);
-	con->out_msg_pos.page_pos = 0;
-	con->out_msg_pos.page++;
-	con->out_msg_pos.did_page_crc = false;
-	if (in_trail)
-		list_move_tail(&page->lru,
-			       &msg->trail->head);
-	else if (msg->pagelist)
-		list_move_tail(&page->lru,
-			       &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
-	else if (msg->bio)
-		iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
+	return crc;
 }
-
 /*
  * Write as much message data payload as we can.  If we finish, queue
  * up the footer.
@@ -1058,21 +1469,17 @@
  *  0 -> socket full, but more to do
  * <0 -> error
  */
-static int write_partial_msg_pages(struct ceph_connection *con)
+static int write_partial_message_data(struct ceph_connection *con)
 {
 	struct ceph_msg *msg = con->out_msg;
-	unsigned int data_len = le32_to_cpu(msg->hdr.data_len);
-	size_t len;
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
 	bool do_datacrc = !con->msgr->nocrc;
-	int ret;
-	int total_max_write;
-	bool in_trail = false;
-	const size_t trail_len = (msg->trail ? msg->trail->length : 0);
-	const size_t trail_off = data_len - trail_len;
+	u32 crc;
 
-	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
-	     con, msg, con->out_msg_pos.page, msg->nr_pages,
-	     con->out_msg_pos.page_pos);
+	dout("%s %p msg %p\n", __func__, con, msg);
+
+	if (list_empty(&msg->data))
+		return -EINVAL;
 
 	/*
 	 * Iterate through each page that contains data to be
@@ -1082,72 +1489,41 @@
 	 * need to map the page.  If we have no pages, they have
 	 * been revoked, so use the zero page.
 	 */
-	while (data_len > con->out_msg_pos.data_pos) {
-		struct page *page = NULL;
-		int max_write = PAGE_SIZE;
-		int bio_offset = 0;
+	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
+	while (cursor->resid) {
+		struct page *page;
+		size_t page_offset;
+		size_t length;
+		bool last_piece;
+		bool need_crc;
+		int ret;
 
-		in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
-		if (!in_trail)
-			total_max_write = trail_off - con->out_msg_pos.data_pos;
+		page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+							&last_piece);
+		ret = ceph_tcp_sendpage(con->sock, page, page_offset,
+				      length, last_piece);
+		if (ret <= 0) {
+			if (do_datacrc)
+				msg->footer.data_crc = cpu_to_le32(crc);
 
-		if (in_trail) {
-			total_max_write = data_len - con->out_msg_pos.data_pos;
-
-			page = list_first_entry(&msg->trail->head,
-						struct page, lru);
-		} else if (msg->pages) {
-			page = msg->pages[con->out_msg_pos.page];
-		} else if (msg->pagelist) {
-			page = list_first_entry(&msg->pagelist->head,
-						struct page, lru);
-#ifdef CONFIG_BLOCK
-		} else if (msg->bio) {
-			struct bio_vec *bv;
-
-			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
-			page = bv->bv_page;
-			bio_offset = bv->bv_offset;
-			max_write = bv->bv_len;
-#endif
-		} else {
-			page = zero_page;
+			return ret;
 		}
-		len = min_t(int, max_write - con->out_msg_pos.page_pos,
-			    total_max_write);
-
-		if (do_datacrc && !con->out_msg_pos.did_page_crc) {
-			void *base;
-			u32 crc = le32_to_cpu(msg->footer.data_crc);
-			char *kaddr;
-
-			kaddr = kmap(page);
-			BUG_ON(kaddr == NULL);
-			base = kaddr + con->out_msg_pos.page_pos + bio_offset;
-			crc = crc32c(crc, base, len);
-			kunmap(page);
-			msg->footer.data_crc = cpu_to_le32(crc);
-			con->out_msg_pos.did_page_crc = true;
-		}
-		ret = ceph_tcp_sendpage(con->sock, page,
-				      con->out_msg_pos.page_pos + bio_offset,
-				      len, 1);
-		if (ret <= 0)
-			goto out;
-
-		out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
+		if (do_datacrc && cursor->need_crc)
+			crc = ceph_crc32c_page(crc, page, page_offset, length);
+		need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
 	}
 
-	dout("write_partial_msg_pages %p msg %p done\n", con, msg);
+	dout("%s %p msg %p done\n", __func__, con, msg);
 
 	/* prepare and queue up footer, too */
-	if (!do_datacrc)
+	if (do_datacrc)
+		msg->footer.data_crc = cpu_to_le32(crc);
+	else
 		msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
 	con_out_kvec_reset(con);
 	prepare_write_message_footer(con);
-	ret = 1;
-out:
-	return ret;
+
+	return 1;	/* must return > 0 to indicate success */
 }
 
 /*
@@ -1160,7 +1536,7 @@
 	while (con->out_skip > 0) {
 		size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
 
-		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1);
+		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
 		if (ret <= 0)
 			goto out;
 		con->out_skip -= ret;
@@ -1191,6 +1567,13 @@
 	con->in_base_pos = 0;
 }
 
+static void prepare_read_seq(struct ceph_connection *con)
+{
+	dout("prepare_read_seq %p\n", con);
+	con->in_base_pos = 0;
+	con->in_tag = CEPH_MSGR_TAG_SEQ;
+}
+
 static void prepare_read_tag(struct ceph_connection *con)
 {
 	dout("prepare_read_tag %p\n", con);
@@ -1597,7 +1980,6 @@
 			con->error_msg = "connect authorization failure";
 			return -1;
 		}
-		con->auth_retry = 1;
 		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
@@ -1668,6 +2050,7 @@
 		prepare_read_connect(con);
 		break;
 
+	case CEPH_MSGR_TAG_SEQ:
 	case CEPH_MSGR_TAG_READY:
 		if (req_feat & ~server_feat) {
 			pr_err("%s%lld %s protocol feature mismatch,"
@@ -1682,7 +2065,7 @@
 
 		WARN_ON(con->state != CON_STATE_NEGOTIATING);
 		con->state = CON_STATE_OPEN;
-
+		con->auth_retry = 0;    /* we authenticated; clear flag */
 		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
 		con->connect_seq++;
 		con->peer_features = server_feat;
@@ -1698,7 +2081,12 @@
 
 		con->delay = 0;      /* reset backoff memory */
 
-		prepare_read_tag(con);
+		if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
+			prepare_write_seq(con);
+			prepare_read_seq(con);
+		} else {
+			prepare_read_tag(con);
+		}
 		break;
 
 	case CEPH_MSGR_TAG_WAIT:
@@ -1732,7 +2120,6 @@
 	return read_partial(con, end, size, &con->in_temp_ack);
 }
 
-
 /*
  * We can finally discard anything that's been acked.
  */
@@ -1757,8 +2144,6 @@
 }
 
 
-
-
 static int read_partial_message_section(struct ceph_connection *con,
 					struct kvec *section,
 					unsigned int sec_len, u32 *crc)
@@ -1782,77 +2167,49 @@
 	return 1;
 }
 
-static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
-
-static int read_partial_message_pages(struct ceph_connection *con,
-				      struct page **pages,
-				      unsigned int data_len, bool do_datacrc)
+static int read_partial_msg_data(struct ceph_connection *con)
 {
-	void *p;
+	struct ceph_msg *msg = con->in_msg;
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	const bool do_datacrc = !con->msgr->nocrc;
+	struct page *page;
+	size_t page_offset;
+	size_t length;
+	u32 crc = 0;
 	int ret;
-	int left;
 
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
-	/* (page) data */
-	BUG_ON(pages == NULL);
-	p = kmap(pages[con->in_msg_pos.page]);
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && do_datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(pages[con->in_msg_pos.page]);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == PAGE_SIZE) {
-		con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.page++;
+	BUG_ON(!msg);
+	if (list_empty(&msg->data))
+		return -EIO;
+
+	if (do_datacrc)
+		crc = con->in_data_crc;
+	while (cursor->resid) {
+		page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+							NULL);
+		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
+		if (ret <= 0) {
+			if (do_datacrc)
+				con->in_data_crc = crc;
+
+			return ret;
+		}
+
+		if (do_datacrc)
+			crc = ceph_crc32c_page(crc, page, page_offset, ret);
+		(void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
 	}
+	if (do_datacrc)
+		con->in_data_crc = crc;
 
-	return ret;
+	return 1;	/* must return > 0 to indicate success */
 }
 
-#ifdef CONFIG_BLOCK
-static int read_partial_message_bio(struct ceph_connection *con,
-				    struct bio **bio_iter, int *bio_seg,
-				    unsigned int data_len, bool do_datacrc)
-{
-	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
-	void *p;
-	int ret, left;
-
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(bv->bv_len - con->in_msg_pos.page_pos));
-
-	p = kmap(bv->bv_page) + bv->bv_offset;
-
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && do_datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(bv->bv_page);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == bv->bv_len) {
-		con->in_msg_pos.page_pos = 0;
-		iter_bio_next(bio_iter, bio_seg);
-	}
-
-	return ret;
-}
-#endif
-
 /*
  * read (part of) a message.
  */
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
+
 static int read_partial_message(struct ceph_connection *con)
 {
 	struct ceph_msg *m = con->in_msg;
@@ -1885,7 +2242,7 @@
 	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
 		return -EIO;
 	middle_len = le32_to_cpu(con->in_hdr.middle_len);
-	if (middle_len > CEPH_MSG_MAX_DATA_LEN)
+	if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
 		return -EIO;
 	data_len = le32_to_cpu(con->in_hdr.data_len);
 	if (data_len > CEPH_MSG_MAX_DATA_LEN)
@@ -1914,14 +2271,22 @@
 		int skip = 0;
 
 		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
-		     con->in_hdr.front_len, con->in_hdr.data_len);
+		     front_len, data_len);
 		ret = ceph_con_in_msg_alloc(con, &skip);
 		if (ret < 0)
 			return ret;
+
+		BUG_ON(!con->in_msg ^ skip);
+		if (con->in_msg && data_len > con->in_msg->data_length) {
+			pr_warning("%s skipping long message (%u > %zd)\n",
+				__func__, data_len, con->in_msg->data_length);
+			ceph_msg_put(con->in_msg);
+			con->in_msg = NULL;
+			skip = 1;
+		}
 		if (skip) {
 			/* skip this message */
 			dout("alloc_msg said skip message\n");
-			BUG_ON(con->in_msg);
 			con->in_base_pos = -front_len - middle_len - data_len -
 				sizeof(m->footer);
 			con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1936,17 +2301,10 @@
 		if (m->middle)
 			m->middle->vec.iov_len = 0;
 
-		con->in_msg_pos.page = 0;
-		if (m->pages)
-			con->in_msg_pos.page_pos = m->page_alignment;
-		else
-			con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.data_pos = 0;
+		/* prepare for data payload, if any */
 
-#ifdef CONFIG_BLOCK
-		if (m->bio)
-			init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
+		if (data_len)
+			prepare_message_data(con->in_msg, data_len);
 	}
 
 	/* front */
@@ -1965,24 +2323,10 @@
 	}
 
 	/* (page) data */
-	while (con->in_msg_pos.data_pos < data_len) {
-		if (m->pages) {
-			ret = read_partial_message_pages(con, m->pages,
-						 data_len, do_datacrc);
-			if (ret <= 0)
-				return ret;
-#ifdef CONFIG_BLOCK
-		} else if (m->bio) {
-			BUG_ON(!m->bio_iter);
-			ret = read_partial_message_bio(con,
-						 &m->bio_iter, &m->bio_seg,
-						 data_len, do_datacrc);
-			if (ret <= 0)
-				return ret;
-#endif
-		} else {
-			BUG_ON(1);
-		}
+	if (data_len) {
+		ret = read_partial_msg_data(con);
+		if (ret <= 0)
+			return ret;
 	}
 
 	/* footer */
@@ -2108,13 +2452,13 @@
 			goto do_next;
 		}
 
-		ret = write_partial_msg_pages(con);
+		ret = write_partial_message_data(con);
 		if (ret == 1)
 			goto more_kvec;  /* we need to send the footer, too! */
 		if (ret == 0)
 			goto out;
 		if (ret < 0) {
-			dout("try_write write_partial_msg_pages err %d\n",
+			dout("try_write write_partial_message_data err %d\n",
 			     ret);
 			goto out;
 		}
@@ -2266,7 +2610,12 @@
 			prepare_read_tag(con);
 		goto more;
 	}
-	if (con->in_tag == CEPH_MSGR_TAG_ACK) {
+	if (con->in_tag == CEPH_MSGR_TAG_ACK ||
+	    con->in_tag == CEPH_MSGR_TAG_SEQ) {
+		/*
+		 * the final handshake seq exchange is semantically
+		 * equivalent to an ACK
+		 */
 		ret = read_partial_ack(con);
 		if (ret <= 0)
 			goto out;
@@ -2672,6 +3021,88 @@
 }
 EXPORT_SYMBOL(ceph_con_keepalive);
 
+static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+{
+	struct ceph_msg_data *data;
+
+	if (WARN_ON(!ceph_msg_data_type_valid(type)))
+		return NULL;
+
+	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
+	if (data)
+		data->type = type;
+	INIT_LIST_HEAD(&data->links);
+
+	return data;
+}
+
+static void ceph_msg_data_destroy(struct ceph_msg_data *data)
+{
+	if (!data)
+		return;
+
+	WARN_ON(!list_empty(&data->links));
+	if (data->type == CEPH_MSG_DATA_PAGELIST) {
+		ceph_pagelist_release(data->pagelist);
+		kfree(data->pagelist);
+	}
+	kmem_cache_free(ceph_msg_data_cache, data);
+}
+
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+		size_t length, size_t alignment)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!pages);
+	BUG_ON(!length);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
+	BUG_ON(!data);
+	data->pages = pages;
+	data->length = length;
+	data->alignment = alignment & ~PAGE_MASK;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pages);
+
+void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+				struct ceph_pagelist *pagelist)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!pagelist);
+	BUG_ON(!pagelist->length);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
+	BUG_ON(!data);
+	data->pagelist = pagelist;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += pagelist->length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
+
+#ifdef	CONFIG_BLOCK
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+		size_t length)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!bio);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
+	BUG_ON(!data);
+	data->bio = bio;
+	data->bio_length = length;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bio);
+#endif	/* CONFIG_BLOCK */
 
 /*
  * construct a new message with given type, size
@@ -2682,49 +3113,20 @@
 {
 	struct ceph_msg *m;
 
-	m = kmalloc(sizeof(*m), flags);
+	m = kmem_cache_zalloc(ceph_msg_cache, flags);
 	if (m == NULL)
 		goto out;
-	kref_init(&m->kref);
 
-	m->con = NULL;
-	INIT_LIST_HEAD(&m->list_head);
-
-	m->hdr.tid = 0;
 	m->hdr.type = cpu_to_le16(type);
 	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
-	m->hdr.version = 0;
 	m->hdr.front_len = cpu_to_le32(front_len);
-	m->hdr.middle_len = 0;
-	m->hdr.data_len = 0;
-	m->hdr.data_off = 0;
-	m->hdr.reserved = 0;
-	m->footer.front_crc = 0;
-	m->footer.middle_crc = 0;
-	m->footer.data_crc = 0;
-	m->footer.flags = 0;
-	m->front_max = front_len;
-	m->front_is_vmalloc = false;
-	m->more_to_follow = false;
-	m->ack_stamp = 0;
-	m->pool = NULL;
 
-	/* middle */
-	m->middle = NULL;
-
-	/* data */
-	m->nr_pages = 0;
-	m->page_alignment = 0;
-	m->pages = NULL;
-	m->pagelist = NULL;
-#ifdef	CONFIG_BLOCK
-	m->bio = NULL;
-	m->bio_iter = NULL;
-	m->bio_seg = 0;
-#endif	/* CONFIG_BLOCK */
-	m->trail = NULL;
+	INIT_LIST_HEAD(&m->list_head);
+	kref_init(&m->kref);
+	INIT_LIST_HEAD(&m->data);
 
 	/* front */
+	m->front_max = front_len;
 	if (front_len) {
 		if (front_len > PAGE_CACHE_SIZE) {
 			m->front.iov_base = __vmalloc(front_len, flags,
@@ -2802,49 +3204,37 @@
 static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 {
 	struct ceph_msg_header *hdr = &con->in_hdr;
-	int type = le16_to_cpu(hdr->type);
-	int front_len = le32_to_cpu(hdr->front_len);
 	int middle_len = le32_to_cpu(hdr->middle_len);
+	struct ceph_msg *msg;
 	int ret = 0;
 
 	BUG_ON(con->in_msg != NULL);
+	BUG_ON(!con->ops->alloc_msg);
 
-	if (con->ops->alloc_msg) {
-		struct ceph_msg *msg;
-
-		mutex_unlock(&con->mutex);
-		msg = con->ops->alloc_msg(con, hdr, skip);
-		mutex_lock(&con->mutex);
-		if (con->state != CON_STATE_OPEN) {
-			if (msg)
-				ceph_msg_put(msg);
-			return -EAGAIN;
-		}
-		con->in_msg = msg;
-		if (con->in_msg) {
-			con->in_msg->con = con->ops->get(con);
-			BUG_ON(con->in_msg->con == NULL);
-		}
-		if (*skip) {
-			con->in_msg = NULL;
-			return 0;
-		}
-		if (!con->in_msg) {
-			con->error_msg =
-				"error allocating memory for incoming message";
-			return -ENOMEM;
-		}
+	mutex_unlock(&con->mutex);
+	msg = con->ops->alloc_msg(con, hdr, skip);
+	mutex_lock(&con->mutex);
+	if (con->state != CON_STATE_OPEN) {
+		if (msg)
+			ceph_msg_put(msg);
+		return -EAGAIN;
 	}
-	if (!con->in_msg) {
-		con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
-		if (!con->in_msg) {
-			pr_err("unable to allocate msg type %d len %d\n",
-			       type, front_len);
-			return -ENOMEM;
-		}
+	if (msg) {
+		BUG_ON(*skip);
+		con->in_msg = msg;
 		con->in_msg->con = con->ops->get(con);
 		BUG_ON(con->in_msg->con == NULL);
-		con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+	} else {
+		/*
+		 * Null message pointer means either we should skip
+		 * this message or we couldn't allocate memory.  The
+		 * former is not an error.
+		 */
+		if (*skip)
+			return 0;
+		con->error_msg = "error allocating memory for incoming message";
+
+		return -ENOMEM;
 	}
 	memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
 
@@ -2870,7 +3260,7 @@
 		vfree(m->front.iov_base);
 	else
 		kfree(m->front.iov_base);
-	kfree(m);
+	kmem_cache_free(ceph_msg_cache, m);
 }
 
 /*
@@ -2879,6 +3269,9 @@
 void ceph_msg_last_put(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
+	LIST_HEAD(data);
+	struct list_head *links;
+	struct list_head *next;
 
 	dout("ceph_msg_put last one on %p\n", m);
 	WARN_ON(!list_empty(&m->list_head));
@@ -2888,16 +3281,16 @@
 		ceph_buffer_put(m->middle);
 		m->middle = NULL;
 	}
-	m->nr_pages = 0;
-	m->pages = NULL;
 
-	if (m->pagelist) {
-		ceph_pagelist_release(m->pagelist);
-		kfree(m->pagelist);
-		m->pagelist = NULL;
+	list_splice_init(&m->data, &data);
+	list_for_each_safe(links, next, &data) {
+		struct ceph_msg_data *data;
+
+		data = list_entry(links, struct ceph_msg_data, links);
+		list_del_init(links);
+		ceph_msg_data_destroy(data);
 	}
-
-	m->trail = NULL;
+	m->data_length = 0;
 
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
@@ -2908,8 +3301,8 @@
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
-	pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg,
-		 msg->front_max, msg->nr_pages);
+	pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
+		 msg->front_max, msg->data_length);
 	print_hex_dump(KERN_DEBUG, "header: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       &msg->hdr, sizeof(msg->hdr), true);

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index aef5b10..1fe25cd 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -737,7 +737,7 @@
 
 		__validate_auth(monc);
 
-		if (monc->auth->ops->is_authenticated(monc->auth))
+		if (ceph_auth_is_authenticated(monc->auth))
 			__send_subscribe(monc);
 	}
 	__schedule_delayed(monc);
@@ -892,8 +892,7 @@
 
 	mutex_lock(&monc->mutex);
 	had_debugfs_info = have_debugfs_info(monc);
-	if (monc->auth->ops)
-		was_auth = monc->auth->ops->is_authenticated(monc->auth);
+	was_auth = ceph_auth_is_authenticated(monc->auth);
 	monc->pending_auth = 0;
 	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
 				     msg->front.iov_len,
@@ -904,7 +903,7 @@
 		wake_up_all(&monc->client->auth_wq);
 	} else if (ret > 0) {
 		__send_prepared_auth_request(monc, ret);
-	} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
+	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
 		dout("authenticated, starting session\n");
 
 		monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d730dd4..a3395fd 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -1,3 +1,4 @@
+
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
@@ -21,6 +22,8 @@
 #define OSD_OP_FRONT_LEN	4096
 #define OSD_OPREPLY_FRONT_LEN	512
 
+static struct kmem_cache	*ceph_osd_request_cache;
+
 static const struct ceph_connection_operations osd_con_ops;
 
 static void __send_queued(struct ceph_osd_client *osdc);
@@ -32,12 +35,6 @@
 static void __send_request(struct ceph_osd_client *osdc,
 			   struct ceph_osd_request *req);
 
-static int op_has_extent(int op)
-{
-	return (op == CEPH_OSD_OP_READ ||
-		op == CEPH_OSD_OP_WRITE);
-}
-
 /*
  * Implement client access to distributed object storage cluster.
  *
@@ -63,53 +60,238 @@
  *
  * fill osd op in request message.
  */
-static int calc_layout(struct ceph_vino vino,
-		       struct ceph_file_layout *layout,
-		       u64 off, u64 *plen,
-		       struct ceph_osd_request *req,
-		       struct ceph_osd_req_op *op)
+static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
+			u64 *objnum, u64 *objoff, u64 *objlen)
 {
 	u64 orig_len = *plen;
-	u64 bno = 0;
-	u64 objoff = 0;
-	u64 objlen = 0;
 	int r;
 
 	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno,
-					  &objoff, &objlen);
+	r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+					  objoff, objlen);
 	if (r < 0)
 		return r;
-	if (objlen < orig_len) {
-		*plen = objlen;
+	if (*objlen < orig_len) {
+		*plen = *objlen;
 		dout(" skipping last %llu, final file extent %llu~%llu\n",
 		     orig_len - *plen, off, *plen);
 	}
 
-	if (op_has_extent(op->op)) {
-		u32 osize = le32_to_cpu(layout->fl_object_size);
-		op->extent.offset = objoff;
-		op->extent.length = objlen;
-		if (op->extent.truncate_size <= off - objoff) {
-			op->extent.truncate_size = 0;
-		} else {
-			op->extent.truncate_size -= off - objoff;
-			if (op->extent.truncate_size > osize)
-				op->extent.truncate_size = osize;
-		}
+	dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
+
+	return 0;
+}
+
+static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
+{
+	memset(osd_data, 0, sizeof (*osd_data));
+	osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
+}
+
+static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
+			struct page **pages, u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
+	osd_data->pages = pages;
+	osd_data->length = length;
+	osd_data->alignment = alignment;
+	osd_data->pages_from_pool = pages_from_pool;
+	osd_data->own_pages = own_pages;
+}
+
+static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
+			struct ceph_pagelist *pagelist)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
+	osd_data->pagelist = pagelist;
+}
+
+#ifdef CONFIG_BLOCK
+static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
+			struct bio *bio, size_t bio_length)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
+	osd_data->bio = bio;
+	osd_data->bio_length = bio_length;
+}
+#endif /* CONFIG_BLOCK */
+
+#define osd_req_op_data(oreq, whch, typ, fld)	\
+	({						\
+		BUG_ON(whch >= (oreq)->r_num_ops);	\
+		&(oreq)->r_ops[whch].typ.fld;		\
+	})
+
+static struct ceph_osd_data *
+osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
+{
+	BUG_ON(which >= osd_req->r_num_ops);
+
+	return &osd_req->r_ops[which].raw_data_in;
+}
+
+struct ceph_osd_data *
+osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	return osd_req_op_data(osd_req, which, extent, osd_data);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data);
+
+struct ceph_osd_data *
+osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	return osd_req_op_data(osd_req, which, cls, response_data);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data);	/* ??? */
+
+void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages,
+			u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_raw_data_in(osd_req, which);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
+
+void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages,
+			u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
+
+void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
+
+#ifdef CONFIG_BLOCK
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+			unsigned int which, struct bio *bio, size_t bio_length)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_bio_init(osd_data, bio, bio_length);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
+#endif /* CONFIG_BLOCK */
+
+static void osd_req_op_cls_request_info_pagelist(
+			struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_info);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+
+void osd_req_op_cls_request_data_pagelist(
+			struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
+
+void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages, u64 length,
+			u32 alignment, bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+
+void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages, u64 length,
+			u32 alignment, bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, response_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
+
+static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
+{
+	switch (osd_data->type) {
+	case CEPH_OSD_DATA_TYPE_NONE:
+		return 0;
+	case CEPH_OSD_DATA_TYPE_PAGES:
+		return osd_data->length;
+	case CEPH_OSD_DATA_TYPE_PAGELIST:
+		return (u64)osd_data->pagelist->length;
+#ifdef CONFIG_BLOCK
+	case CEPH_OSD_DATA_TYPE_BIO:
+		return (u64)osd_data->bio_length;
+#endif /* CONFIG_BLOCK */
+	default:
+		WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
+		return 0;
 	}
-	req->r_num_pages = calc_pages_for(off, *plen);
-	req->r_page_alignment = off & ~PAGE_MASK;
-	if (op->op == CEPH_OSD_OP_WRITE)
-		op->payload_len = *plen;
+}
 
-	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
-	     bno, objoff, objlen, req->r_num_pages);
+static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
+{
+	if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
+		int num_pages;
 
-	snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
-	req->r_oid_len = strlen(req->r_oid);
+		num_pages = calc_pages_for((u64)osd_data->alignment,
+						(u64)osd_data->length);
+		ceph_release_page_vector(osd_data->pages, num_pages);
+	}
+	ceph_osd_data_init(osd_data);
+}
 
-	return r;
+static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	struct ceph_osd_req_op *op;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	op = &osd_req->r_ops[which];
+
+	switch (op->op) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+		ceph_osd_data_release(&op->extent.osd_data);
+		break;
+	case CEPH_OSD_OP_CALL:
+		ceph_osd_data_release(&op->cls.request_info);
+		ceph_osd_data_release(&op->cls.request_data);
+		ceph_osd_data_release(&op->cls.response_data);
+		break;
+	default:
+		break;
+	}
 }
 
 /*
@@ -117,30 +299,26 @@
  */
 void ceph_osdc_release_request(struct kref *kref)
 {
-	struct ceph_osd_request *req = container_of(kref,
-						    struct ceph_osd_request,
-						    r_kref);
+	struct ceph_osd_request *req;
+	unsigned int which;
 
+	req = container_of(kref, struct ceph_osd_request, r_kref);
 	if (req->r_request)
 		ceph_msg_put(req->r_request);
-	if (req->r_con_filling_msg) {
-		dout("%s revoking msg %p from con %p\n", __func__,
-		     req->r_reply, req->r_con_filling_msg);
+	if (req->r_reply) {
 		ceph_msg_revoke_incoming(req->r_reply);
-		req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
-		req->r_con_filling_msg = NULL;
-	}
-	if (req->r_reply)
 		ceph_msg_put(req->r_reply);
-	if (req->r_own_pages)
-		ceph_release_page_vector(req->r_pages,
-					 req->r_num_pages);
+	}
+
+	for (which = 0; which < req->r_num_ops; which++)
+		osd_req_op_data_release(req, which);
+
 	ceph_put_snap_context(req->r_snapc);
-	ceph_pagelist_release(&req->r_trail);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
 	else
-		kfree(req);
+		kmem_cache_free(ceph_osd_request_cache, req);
+
 }
 EXPORT_SYMBOL(ceph_osdc_release_request);
 
@@ -154,6 +332,9 @@
 	struct ceph_msg *msg;
 	size_t msg_size;
 
+	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
+	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
+
 	msg_size = 4 + 4 + 8 + 8 + 4+8;
 	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
 	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
@@ -168,13 +349,14 @@
 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
 		memset(req, 0, sizeof(*req));
 	} else {
-		req = kzalloc(sizeof(*req), gfp_flags);
+		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
 	}
 	if (req == NULL)
 		return NULL;
 
 	req->r_osdc = osdc;
 	req->r_mempool = use_mempool;
+	req->r_num_ops = num_ops;
 
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
@@ -198,8 +380,6 @@
 	}
 	req->r_reply = msg;
 
-	ceph_pagelist_init(&req->r_trail);
-
 	/* create request message; allow space for oid */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -218,60 +398,24 @@
 }
 EXPORT_SYMBOL(ceph_osdc_alloc_request);
 
-static void osd_req_encode_op(struct ceph_osd_request *req,
-			      struct ceph_osd_op *dst,
-			      struct ceph_osd_req_op *src)
+static bool osd_req_opcode_valid(u16 opcode)
 {
-	dst->op = cpu_to_le16(src->op);
-
-	switch (src->op) {
-	case CEPH_OSD_OP_STAT:
-		break;
+	switch (opcode) {
 	case CEPH_OSD_OP_READ:
-	case CEPH_OSD_OP_WRITE:
-		dst->extent.offset =
-			cpu_to_le64(src->extent.offset);
-		dst->extent.length =
-			cpu_to_le64(src->extent.length);
-		dst->extent.truncate_size =
-			cpu_to_le64(src->extent.truncate_size);
-		dst->extent.truncate_seq =
-			cpu_to_le32(src->extent.truncate_seq);
-		break;
-	case CEPH_OSD_OP_CALL:
-		dst->cls.class_len = src->cls.class_len;
-		dst->cls.method_len = src->cls.method_len;
-		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
-
-		ceph_pagelist_append(&req->r_trail, src->cls.class_name,
-				     src->cls.class_len);
-		ceph_pagelist_append(&req->r_trail, src->cls.method_name,
-				     src->cls.method_len);
-		ceph_pagelist_append(&req->r_trail, src->cls.indata,
-				     src->cls.indata_len);
-		break;
-	case CEPH_OSD_OP_STARTSYNC:
-		break;
-	case CEPH_OSD_OP_NOTIFY_ACK:
-	case CEPH_OSD_OP_WATCH:
-		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
-		dst->watch.ver = cpu_to_le64(src->watch.ver);
-		dst->watch.flag = src->watch.flag;
-		break;
-	default:
-		pr_err("unrecognized osd opcode %d\n", dst->op);
-		WARN_ON(1);
-		break;
+	case CEPH_OSD_OP_STAT:
 	case CEPH_OSD_OP_MAPEXT:
 	case CEPH_OSD_OP_MASKTRUNC:
 	case CEPH_OSD_OP_SPARSE_READ:
 	case CEPH_OSD_OP_NOTIFY:
+	case CEPH_OSD_OP_NOTIFY_ACK:
 	case CEPH_OSD_OP_ASSERT_VER:
+	case CEPH_OSD_OP_WRITE:
 	case CEPH_OSD_OP_WRITEFULL:
 	case CEPH_OSD_OP_TRUNCATE:
 	case CEPH_OSD_OP_ZERO:
 	case CEPH_OSD_OP_DELETE:
 	case CEPH_OSD_OP_APPEND:
+	case CEPH_OSD_OP_STARTSYNC:
 	case CEPH_OSD_OP_SETTRUNC:
 	case CEPH_OSD_OP_TRIMTRUNC:
 	case CEPH_OSD_OP_TMAPUP:
@@ -279,11 +423,11 @@
 	case CEPH_OSD_OP_TMAPGET:
 	case CEPH_OSD_OP_CREATE:
 	case CEPH_OSD_OP_ROLLBACK:
+	case CEPH_OSD_OP_WATCH:
 	case CEPH_OSD_OP_OMAPGETKEYS:
 	case CEPH_OSD_OP_OMAPGETVALS:
 	case CEPH_OSD_OP_OMAPGETHEADER:
 	case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
-	case CEPH_OSD_OP_MODE_RD:
 	case CEPH_OSD_OP_OMAPSETVALS:
 	case CEPH_OSD_OP_OMAPSETHEADER:
 	case CEPH_OSD_OP_OMAPCLEAR:
@@ -314,113 +458,233 @@
 	case CEPH_OSD_OP_RDUNLOCK:
 	case CEPH_OSD_OP_UPLOCK:
 	case CEPH_OSD_OP_DNLOCK:
+	case CEPH_OSD_OP_CALL:
 	case CEPH_OSD_OP_PGLS:
 	case CEPH_OSD_OP_PGLS_FILTER:
-		pr_err("unsupported osd opcode %s\n",
-			ceph_osd_op_name(dst->op));
-		WARN_ON(1);
-		break;
+		return true;
+	default:
+		return false;
 	}
-	dst->payload_len = cpu_to_le32(src->payload_len);
 }
 
 /*
- * build new request AND message
- *
+ * This is an osd op init function for opcodes that have no data or
+ * other information associated with them.  It also serves as a
+ * common init routine for all the other init functions, below.
  */
-void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 len, unsigned int num_ops,
-			     struct ceph_osd_req_op *src_ops,
-			     struct ceph_snap_context *snapc, u64 snap_id,
-			     struct timespec *mtime)
+static struct ceph_osd_req_op *
+_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+				u16 opcode)
 {
-	struct ceph_msg *msg = req->r_request;
-	struct ceph_osd_req_op *src_op;
-	void *p;
-	size_t msg_size;
-	int flags = req->r_flags;
-	u64 data_len;
-	int i;
+	struct ceph_osd_req_op *op;
 
-	req->r_num_ops = num_ops;
-	req->r_snapid = snap_id;
-	req->r_snapc = ceph_get_snap_context(snapc);
+	BUG_ON(which >= osd_req->r_num_ops);
+	BUG_ON(!osd_req_opcode_valid(opcode));
 
-	/* encode request */
-	msg->hdr.version = cpu_to_le16(4);
+	op = &osd_req->r_ops[which];
+	memset(op, 0, sizeof (*op));
+	op->op = opcode;
 
-	p = msg->front.iov_base;
-	ceph_encode_32(&p, 1);   /* client_inc  is always 1 */
-	req->r_request_osdmap_epoch = p;
-	p += 4;
-	req->r_request_flags = p;
-	p += 4;
-	if (req->r_flags & CEPH_OSD_FLAG_WRITE)
-		ceph_encode_timespec(p, mtime);
-	p += sizeof(struct ceph_timespec);
-	req->r_request_reassert_version = p;
-	p += sizeof(struct ceph_eversion); /* will get filled in */
-
-	/* oloc */
-	ceph_encode_8(&p, 4);
-	ceph_encode_8(&p, 4);
-	ceph_encode_32(&p, 8 + 4 + 4);
-	req->r_request_pool = p;
-	p += 8;
-	ceph_encode_32(&p, -1);  /* preferred */
-	ceph_encode_32(&p, 0);   /* key len */
-
-	ceph_encode_8(&p, 1);
-	req->r_request_pgid = p;
-	p += 8 + 4;
-	ceph_encode_32(&p, -1);  /* preferred */
-
-	/* oid */
-	ceph_encode_32(&p, req->r_oid_len);
-	memcpy(p, req->r_oid, req->r_oid_len);
-	dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
-	p += req->r_oid_len;
-
-	/* ops */
-	ceph_encode_16(&p, num_ops);
-	src_op = src_ops;
-	req->r_request_ops = p;
-	for (i = 0; i < num_ops; i++, src_op++) {
-		osd_req_encode_op(req, p, src_op);
-		p += sizeof(struct ceph_osd_op);
-	}
-
-	/* snaps */
-	ceph_encode_64(&p, req->r_snapid);
-	ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
-	ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
-	if (req->r_snapc) {
-		for (i = 0; i < snapc->num_snaps; i++) {
-			ceph_encode_64(&p, req->r_snapc->snaps[i]);
-		}
-	}
-
-	req->r_request_attempts = p;
-	p += 4;
-
-	data_len = req->r_trail.length;
-	if (flags & CEPH_OSD_FLAG_WRITE) {
-		req->r_request->hdr.data_off = cpu_to_le16(off);
-		data_len += len;
-	}
-	req->r_request->hdr.data_len = cpu_to_le32(data_len);
-	req->r_request->page_alignment = req->r_page_alignment;
-
-	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
-	msg_size = p - msg->front.iov_base;
-	msg->front.iov_len = msg_size;
-	msg->hdr.front_len = cpu_to_le32(msg_size);
-
-	dout("build_request msg_size was %d num_ops %d\n", (int)msg_size,
-	     num_ops);
-	return;
+	return op;
 }
-EXPORT_SYMBOL(ceph_osdc_build_request);
+
+void osd_req_op_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode)
+{
+	(void)_osd_req_op_init(osd_req, which, opcode);
+}
+EXPORT_SYMBOL(osd_req_op_init);
+
+void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode,
+				u64 offset, u64 length,
+				u64 truncate_size, u32 truncate_seq)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	size_t payload_len = 0;
+
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+
+	op->extent.offset = offset;
+	op->extent.length = length;
+	op->extent.truncate_size = truncate_size;
+	op->extent.truncate_seq = truncate_seq;
+	if (opcode == CEPH_OSD_OP_WRITE)
+		payload_len += length;
+
+	op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_extent_init);
+
+void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+				unsigned int which, u64 length)
+{
+	struct ceph_osd_req_op *op;
+	u64 previous;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	op = &osd_req->r_ops[which];
+	previous = op->extent.length;
+
+	if (length == previous)
+		return;		/* Nothing to do */
+	BUG_ON(length > previous);
+
+	op->extent.length = length;
+	op->payload_len -= previous - length;
+}
+EXPORT_SYMBOL(osd_req_op_extent_update);
+
+void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+			u16 opcode, const char *class, const char *method)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	struct ceph_pagelist *pagelist;
+	size_t payload_len = 0;
+	size_t size;
+
+	BUG_ON(opcode != CEPH_OSD_OP_CALL);
+
+	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+	BUG_ON(!pagelist);
+	ceph_pagelist_init(pagelist);
+
+	op->cls.class_name = class;
+	size = strlen(class);
+	BUG_ON(size > (size_t) U8_MAX);
+	op->cls.class_len = size;
+	ceph_pagelist_append(pagelist, class, size);
+	payload_len += size;
+
+	op->cls.method_name = method;
+	size = strlen(method);
+	BUG_ON(size > (size_t) U8_MAX);
+	op->cls.method_len = size;
+	ceph_pagelist_append(pagelist, method, size);
+	payload_len += size;
+
+	osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
+
+	op->cls.argc = 0;	/* currently unused */
+
+	op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_cls_init);
+
+void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode,
+				u64 cookie, u64 version, int flag)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+
+	BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
+
+	op->watch.cookie = cookie;
+	op->watch.ver = version;
+	if (opcode == CEPH_OSD_OP_WATCH && flag)
+		op->watch.flag = (u8)1;
+}
+EXPORT_SYMBOL(osd_req_op_watch_init);
+
+static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
+				struct ceph_osd_data *osd_data)
+{
+	u64 length = ceph_osd_data_length(osd_data);
+
+	if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+		BUG_ON(length > (u64) SIZE_MAX);
+		if (length)
+			ceph_msg_data_add_pages(msg, osd_data->pages,
+					length, osd_data->alignment);
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+		BUG_ON(!length);
+		ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
+#ifdef CONFIG_BLOCK
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
+		ceph_msg_data_add_bio(msg, osd_data->bio, length);
+#endif
+	} else {
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
+	}
+}
+
+static u64 osd_req_encode_op(struct ceph_osd_request *req,
+			      struct ceph_osd_op *dst, unsigned int which)
+{
+	struct ceph_osd_req_op *src;
+	struct ceph_osd_data *osd_data;
+	u64 request_data_len = 0;
+	u64 data_length;
+
+	BUG_ON(which >= req->r_num_ops);
+	src = &req->r_ops[which];
+	if (WARN_ON(!osd_req_opcode_valid(src->op))) {
+		pr_err("unrecognized osd opcode %d\n", src->op);
+
+		return 0;
+	}
+
+	switch (src->op) {
+	case CEPH_OSD_OP_STAT:
+		osd_data = &src->raw_data_in;
+		ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+		if (src->op == CEPH_OSD_OP_WRITE)
+			request_data_len = src->extent.length;
+		dst->extent.offset = cpu_to_le64(src->extent.offset);
+		dst->extent.length = cpu_to_le64(src->extent.length);
+		dst->extent.truncate_size =
+			cpu_to_le64(src->extent.truncate_size);
+		dst->extent.truncate_seq =
+			cpu_to_le32(src->extent.truncate_seq);
+		osd_data = &src->extent.osd_data;
+		if (src->op == CEPH_OSD_OP_WRITE)
+			ceph_osdc_msg_data_add(req->r_request, osd_data);
+		else
+			ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
+	case CEPH_OSD_OP_CALL:
+		dst->cls.class_len = src->cls.class_len;
+		dst->cls.method_len = src->cls.method_len;
+		osd_data = &src->cls.request_info;
+		ceph_osdc_msg_data_add(req->r_request, osd_data);
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
+		request_data_len = osd_data->pagelist->length;
+
+		osd_data = &src->cls.request_data;
+		data_length = ceph_osd_data_length(osd_data);
+		if (data_length) {
+			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
+			dst->cls.indata_len = cpu_to_le32(data_length);
+			ceph_osdc_msg_data_add(req->r_request, osd_data);
+			src->payload_len += data_length;
+			request_data_len += data_length;
+		}
+		osd_data = &src->cls.response_data;
+		ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
+	case CEPH_OSD_OP_STARTSYNC:
+		break;
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+		dst->watch.ver = cpu_to_le64(src->watch.ver);
+		dst->watch.flag = src->watch.flag;
+		break;
+	default:
+		pr_err("unsupported osd opcode %s\n",
+			ceph_osd_op_name(src->op));
+		WARN_ON(1);
+
+		return 0;
+	}
+	dst->op = cpu_to_le16(src->op);
+	dst->payload_len = cpu_to_le32(src->payload_len);
+
+	return request_data_len;
+}
 
 /*
  * build new request AND message, calculate layout, and adjust file
@@ -436,51 +700,63 @@
 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       struct ceph_file_layout *layout,
 					       struct ceph_vino vino,
-					       u64 off, u64 *plen,
+					       u64 off, u64 *plen, int num_ops,
 					       int opcode, int flags,
 					       struct ceph_snap_context *snapc,
-					       int do_sync,
 					       u32 truncate_seq,
 					       u64 truncate_size,
-					       struct timespec *mtime,
-					       bool use_mempool,
-					       int page_align)
+					       bool use_mempool)
 {
-	struct ceph_osd_req_op ops[2];
 	struct ceph_osd_request *req;
-	unsigned int num_op = 1;
+	u64 objnum = 0;
+	u64 objoff = 0;
+	u64 objlen = 0;
+	u32 object_size;
+	u64 object_base;
 	int r;
 
-	memset(&ops, 0, sizeof ops);
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
 
-	ops[0].op = opcode;
-	ops[0].extent.truncate_seq = truncate_seq;
-	ops[0].extent.truncate_size = truncate_size;
-
-	if (do_sync) {
-		ops[1].op = CEPH_OSD_OP_STARTSYNC;
-		num_op++;
-	}
-
-	req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool,
+	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
 					GFP_NOFS);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
+
 	req->r_flags = flags;
 
 	/* calculate max write size */
-	r = calc_layout(vino, layout, off, plen, req, ops);
-	if (r < 0)
+	r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
+	if (r < 0) {
+		ceph_osdc_put_request(req);
 		return ERR_PTR(r);
+	}
+
+	object_size = le32_to_cpu(layout->fl_object_size);
+	object_base = off - objoff;
+	if (truncate_size <= object_base) {
+		truncate_size = 0;
+	} else {
+		truncate_size -= object_base;
+		if (truncate_size > object_size)
+			truncate_size = object_size;
+	}
+
+	osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
+				truncate_size, truncate_seq);
+
+	/*
+	 * A second op in the ops array means the caller wants to
+	 * also issue a include a 'startsync' command so that the
+	 * osd will flush data quickly.
+	 */
+	if (num_ops > 1)
+		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 	req->r_file_layout = *layout;  /* keep a copy */
 
-	/* in case it differs from natural (file) alignment that
-	   calc_layout filled in for us */
-	req->r_num_pages = calc_pages_for(page_align, *plen);
-	req->r_page_alignment = page_align;
-
-	ceph_osdc_build_request(req, off, *plen, num_op, ops,
-				snapc, vino.snap, mtime);
+	snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx",
+		vino.ino, objnum);
+	req->r_oid_len = strlen(req->r_oid);
 
 	return req;
 }
@@ -558,21 +834,46 @@
 				struct ceph_osd *osd)
 {
 	struct ceph_osd_request *req, *nreq;
+	LIST_HEAD(resend);
 	int err;
 
 	dout("__kick_osd_requests osd%d\n", osd->o_osd);
 	err = __reset_osd(osdc, osd);
 	if (err)
 		return;
-
+	/*
+	 * Build up a list of requests to resend by traversing the
+	 * osd's list of requests.  Requests for a given object are
+	 * sent in tid order, and that is also the order they're
+	 * kept on this list.  Therefore all requests that are in
+	 * flight will be found first, followed by all requests that
+	 * have not yet been sent.  And to resend requests while
+	 * preserving this order we will want to put any sent
+	 * requests back on the front of the osd client's unsent
+	 * list.
+	 *
+	 * So we build a separate ordered list of already-sent
+	 * requests for the affected osd and splice it onto the
+	 * front of the osd client's unsent list.  Once we've seen a
+	 * request that has not yet been sent we're done.  Those
+	 * requests are already sitting right where they belong.
+	 */
 	list_for_each_entry(req, &osd->o_requests, r_osd_item) {
-		list_move(&req->r_req_lru_item, &osdc->req_unsent);
-		dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
+		if (!req->r_sent)
+			break;
+		list_move_tail(&req->r_req_lru_item, &resend);
+		dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
 		if (!req->r_linger)
 			req->r_flags |= CEPH_OSD_FLAG_RETRY;
 	}
+	list_splice(&resend, &osdc->req_unsent);
 
+	/*
+	 * Linger requests are re-registered before sending, which
+	 * sets up a new tid for each.  We add them to the unsent
+	 * list at the end to keep things in tid order.
+	 */
 	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
 				 r_linger_osd) {
 		/*
@@ -581,8 +882,8 @@
 		 */
 		BUG_ON(!list_empty(&req->r_req_lru_item));
 		__register_request(osdc, req);
-		list_add(&req->r_req_lru_item, &osdc->req_unsent);
-		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+		list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
 		__unregister_linger_request(osdc, req);
 		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
@@ -654,8 +955,7 @@
 	if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
 		struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
 
-		if (ac->ops && ac->ops->destroy_authorizer)
-			ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
+		ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
 		kfree(osd);
 	}
 }
@@ -820,14 +1120,6 @@
 	}
 }
 
-static void register_request(struct ceph_osd_client *osdc,
-			     struct ceph_osd_request *req)
-{
-	mutex_lock(&osdc->request_mutex);
-	__register_request(osdc, req);
-	mutex_unlock(&osdc->request_mutex);
-}
-
 /*
  * called under osdc->request_mutex
  */
@@ -952,8 +1244,8 @@
 	int err;
 
 	dout("map_request %p tid %lld\n", req, req->r_tid);
-	err = ceph_calc_object_layout(&pgid, req->r_oid,
-				      &req->r_file_layout, osdc->osdmap);
+	err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
+				ceph_file_layout_pg_pool(req->r_file_layout));
 	if (err) {
 		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 		return err;
@@ -1007,10 +1299,10 @@
 
 	if (req->r_osd) {
 		__remove_osd_from_lru(req->r_osd);
-		list_add(&req->r_osd_item, &req->r_osd->o_requests);
-		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+		list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
 	} else {
-		list_move(&req->r_req_lru_item, &osdc->req_notarget);
+		list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
 	}
 	err = 1;   /* osd or pg changed */
 
@@ -1045,8 +1337,14 @@
 	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
 
 	ceph_msg_get(req->r_request); /* send consumes a ref */
-	ceph_con_send(&req->r_osd->o_con, req->r_request);
+
+	/* Mark the request unsafe if this is the first timet's being sent. */
+
+	if (!req->r_sent && req->r_unsafe_callback)
+		req->r_unsafe_callback(req, true);
 	req->r_sent = req->r_osd->o_incarnation;
+
+	ceph_con_send(&req->r_osd->o_con, req->r_request);
 }
 
 /*
@@ -1134,31 +1432,11 @@
 
 static void complete_request(struct ceph_osd_request *req)
 {
-	if (req->r_safe_callback)
-		req->r_safe_callback(req, NULL);
+	if (req->r_unsafe_callback)
+		req->r_unsafe_callback(req, false);
 	complete_all(&req->r_safe_completion);  /* fsync waiter */
 }
 
-static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
-{
-	__u8 v;
-
-	ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
-	v = ceph_decode_8(p);
-	if (v > 1) {
-		pr_warning("do not understand pg encoding %d > 1", v);
-		return -EINVAL;
-	}
-	pgid->pool = ceph_decode_64(p);
-	pgid->seed = ceph_decode_32(p);
-	*p += 4;
-	return 0;
-
-bad:
-	pr_warning("incomplete pg encoding");
-	return -EINVAL;
-}
-
 /*
  * handle osd op reply.  either call the callback if it is specified,
  * or do the completion to wake up the waiting thread.
@@ -1170,7 +1448,8 @@
 	struct ceph_osd_request *req;
 	u64 tid;
 	int object_len;
-	int numops, payload_len, flags;
+	unsigned int numops;
+	int payload_len, flags;
 	s32 result;
 	s32 retry_attempt;
 	struct ceph_pg pg;
@@ -1178,7 +1457,9 @@
 	u32 reassert_epoch;
 	u64 reassert_version;
 	u32 osdmap_epoch;
-	int i;
+	int already_completed;
+	u32 bytes;
+	unsigned int i;
 
 	tid = le64_to_cpu(msg->hdr.tid);
 	dout("handle_reply %p tid %llu\n", msg, tid);
@@ -1191,7 +1472,7 @@
 	ceph_decode_need(&p, end, object_len, bad);
 	p += object_len;
 
-	err = __decode_pgid(&p, end, &pg);
+	err = ceph_decode_pgid(&p, end, &pg);
 	if (err)
 		goto bad;
 
@@ -1207,8 +1488,7 @@
 	req = __lookup_request(osdc, tid);
 	if (req == NULL) {
 		dout("handle_reply tid %llu dne\n", tid);
-		mutex_unlock(&osdc->request_mutex);
-		return;
+		goto bad_mutex;
 	}
 	ceph_osdc_get_request(req);
 
@@ -1233,9 +1513,10 @@
 		payload_len += len;
 		p += sizeof(*op);
 	}
-	if (payload_len != le32_to_cpu(msg->hdr.data_len)) {
+	bytes = le32_to_cpu(msg->hdr.data_len);
+	if (payload_len != bytes) {
 		pr_warning("sum of op payload lens %d != data_len %d",
-			   payload_len, le32_to_cpu(msg->hdr.data_len));
+			   payload_len, bytes);
 		goto bad_put;
 	}
 
@@ -1244,21 +1525,9 @@
 	for (i = 0; i < numops; i++)
 		req->r_reply_op_result[i] = ceph_decode_32(&p);
 
-	/*
-	 * if this connection filled our message, drop our reference now, to
-	 * avoid a (safe but slower) revoke later.
-	 */
-	if (req->r_con_filling_msg == con && req->r_reply == msg) {
-		dout(" dropping con_filling_msg ref %p\n", con);
-		req->r_con_filling_msg = NULL;
-		con->ops->put(con);
-	}
-
 	if (!req->r_got_reply) {
-		unsigned int bytes;
 
 		req->r_result = result;
-		bytes = le32_to_cpu(msg->hdr.data_len);
 		dout("handle_reply result %d bytes %d\n", req->r_result,
 		     bytes);
 		if (req->r_result == 0)
@@ -1286,7 +1555,11 @@
 	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
 		__unregister_request(osdc, req);
 
+	already_completed = req->r_completed;
+	req->r_completed = 1;
 	mutex_unlock(&osdc->request_mutex);
+	if (already_completed)
+		goto done;
 
 	if (req->r_callback)
 		req->r_callback(req, msg);
@@ -1303,6 +1576,8 @@
 
 bad_put:
 	ceph_osdc_put_request(req);
+bad_mutex:
+	mutex_unlock(&osdc->request_mutex);
 bad:
 	pr_err("corrupt osd_op_reply got %d %d\n",
 	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
@@ -1736,6 +2011,104 @@
 }
 
 /*
+ * build new request AND message
+ *
+ */
+void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+				struct ceph_snap_context *snapc, u64 snap_id,
+				struct timespec *mtime)
+{
+	struct ceph_msg *msg = req->r_request;
+	void *p;
+	size_t msg_size;
+	int flags = req->r_flags;
+	u64 data_len;
+	unsigned int i;
+
+	req->r_snapid = snap_id;
+	req->r_snapc = ceph_get_snap_context(snapc);
+
+	/* encode request */
+	msg->hdr.version = cpu_to_le16(4);
+
+	p = msg->front.iov_base;
+	ceph_encode_32(&p, 1);   /* client_inc  is always 1 */
+	req->r_request_osdmap_epoch = p;
+	p += 4;
+	req->r_request_flags = p;
+	p += 4;
+	if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+		ceph_encode_timespec(p, mtime);
+	p += sizeof(struct ceph_timespec);
+	req->r_request_reassert_version = p;
+	p += sizeof(struct ceph_eversion); /* will get filled in */
+
+	/* oloc */
+	ceph_encode_8(&p, 4);
+	ceph_encode_8(&p, 4);
+	ceph_encode_32(&p, 8 + 4 + 4);
+	req->r_request_pool = p;
+	p += 8;
+	ceph_encode_32(&p, -1);  /* preferred */
+	ceph_encode_32(&p, 0);   /* key len */
+
+	ceph_encode_8(&p, 1);
+	req->r_request_pgid = p;
+	p += 8 + 4;
+	ceph_encode_32(&p, -1);  /* preferred */
+
+	/* oid */
+	ceph_encode_32(&p, req->r_oid_len);
+	memcpy(p, req->r_oid, req->r_oid_len);
+	dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
+	p += req->r_oid_len;
+
+	/* ops--can imply data */
+	ceph_encode_16(&p, (u16)req->r_num_ops);
+	data_len = 0;
+	for (i = 0; i < req->r_num_ops; i++) {
+		data_len += osd_req_encode_op(req, p, i);
+		p += sizeof(struct ceph_osd_op);
+	}
+
+	/* snaps */
+	ceph_encode_64(&p, req->r_snapid);
+	ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+	ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+	if (req->r_snapc) {
+		for (i = 0; i < snapc->num_snaps; i++) {
+			ceph_encode_64(&p, req->r_snapc->snaps[i]);
+		}
+	}
+
+	req->r_request_attempts = p;
+	p += 4;
+
+	/* data */
+	if (flags & CEPH_OSD_FLAG_WRITE) {
+		u16 data_off;
+
+		/*
+		 * The header "data_off" is a hint to the receiver
+		 * allowing it to align received data into its
+		 * buffers such that there's no need to re-copy
+		 * it before writing it to disk (direct I/O).
+		 */
+		data_off = (u16) (off & 0xffff);
+		req->r_request->hdr.data_off = cpu_to_le16(data_off);
+	}
+	req->r_request->hdr.data_len = cpu_to_le32(data_len);
+
+	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+	msg_size = p - msg->front.iov_base;
+	msg->front.iov_len = msg_size;
+	msg->hdr.front_len = cpu_to_le32(msg_size);
+
+	dout("build_request msg_size was %d\n", (int)msg_size);
+}
+EXPORT_SYMBOL(ceph_osdc_build_request);
+
+/*
  * Register request, send initial attempt.
  */
 int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1744,41 +2117,26 @@
 {
 	int rc = 0;
 
-	req->r_request->pages = req->r_pages;
-	req->r_request->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
-	req->r_request->bio = req->r_bio;
-#endif
-	req->r_request->trail = &req->r_trail;
-
-	register_request(osdc, req);
-
 	down_read(&osdc->map_sem);
 	mutex_lock(&osdc->request_mutex);
-	/*
-	 * a racing kick_requests() may have sent the message for us
-	 * while we dropped request_mutex above, so only send now if
-	 * the request still han't been touched yet.
-	 */
-	if (req->r_sent == 0) {
-		rc = __map_request(osdc, req, 0);
-		if (rc < 0) {
-			if (nofail) {
-				dout("osdc_start_request failed map, "
-				     " will retry %lld\n", req->r_tid);
-				rc = 0;
-			}
-			goto out_unlock;
+	__register_request(osdc, req);
+	WARN_ON(req->r_sent);
+	rc = __map_request(osdc, req, 0);
+	if (rc < 0) {
+		if (nofail) {
+			dout("osdc_start_request failed map, "
+				" will retry %lld\n", req->r_tid);
+			rc = 0;
 		}
-		if (req->r_osd == NULL) {
-			dout("send_request %p no up osds in pg\n", req);
-			ceph_monc_request_next_osdmap(&osdc->client->monc);
-		} else {
-			__send_request(osdc, req);
-		}
-		rc = 0;
+		goto out_unlock;
 	}
-
+	if (req->r_osd == NULL) {
+		dout("send_request %p no up osds in pg\n", req);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+	} else {
+		__send_queued(osdc);
+	}
+	rc = 0;
 out_unlock:
 	mutex_unlock(&osdc->request_mutex);
 	up_read(&osdc->map_sem);
@@ -1940,18 +2298,22 @@
 
 	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
 	     vino.snap, off, *plen);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
-				    NULL, 0, truncate_seq, truncate_size, NULL,
-				    false, page_align);
+				    NULL, truncate_seq, truncate_size,
+				    false);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	/* it may be a short read due to an object boundary */
-	req->r_pages = pages;
 
-	dout("readpages  final extent is %llu~%llu (%d pages align %d)\n",
-	     off, *plen, req->r_num_pages, page_align);
+	osd_req_op_extent_osd_data_pages(req, 0,
+				pages, *plen, page_align, false, false);
+
+	dout("readpages  final extent is %llu~%llu (%llu bytes align %d)\n",
+	     off, *plen, *plen, page_align);
+
+	ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
 
 	rc = ceph_osdc_start_request(osdc, req, false);
 	if (!rc)
@@ -1978,20 +2340,21 @@
 	int rc = 0;
 	int page_align = off & ~PAGE_MASK;
 
-	BUG_ON(vino.snap != CEPH_NOSNAP);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
+	BUG_ON(vino.snap != CEPH_NOSNAP);	/* snapshots aren't writeable */
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
 				    CEPH_OSD_OP_WRITE,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
-				    snapc, 0,
-				    truncate_seq, truncate_size, mtime,
-				    true, page_align);
+				    snapc, truncate_seq, truncate_size,
+				    true);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	/* it may be a short write due to an object boundary */
-	req->r_pages = pages;
-	dout("writepages %llu~%llu (%d pages)\n", off, len,
-	     req->r_num_pages);
+	osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+				false, false);
+	dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
+
+	ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
 
 	rc = ceph_osdc_start_request(osdc, req, true);
 	if (!rc)
@@ -2005,6 +2368,26 @@
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
+int ceph_osdc_setup(void)
+{
+	BUG_ON(ceph_osd_request_cache);
+	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
+					sizeof (struct ceph_osd_request),
+					__alignof__(struct ceph_osd_request),
+					0, NULL);
+
+	return ceph_osd_request_cache ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_osdc_setup);
+
+void ceph_osdc_cleanup(void)
+{
+	BUG_ON(!ceph_osd_request_cache);
+	kmem_cache_destroy(ceph_osd_request_cache);
+	ceph_osd_request_cache = NULL;
+}
+EXPORT_SYMBOL(ceph_osdc_cleanup);
+
 /*
  * handle incoming message
  */
@@ -2064,13 +2447,10 @@
 		goto out;
 	}
 
-	if (req->r_con_filling_msg) {
+	if (req->r_reply->con)
 		dout("%s revoking msg %p from old con %p\n", __func__,
-		     req->r_reply, req->r_con_filling_msg);
-		ceph_msg_revoke_incoming(req->r_reply);
-		req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
-		req->r_con_filling_msg = NULL;
-	}
+		     req->r_reply, req->r_reply->con);
+	ceph_msg_revoke_incoming(req->r_reply);
 
 	if (front > req->r_reply->front.iov_len) {
 		pr_warning("get_reply front %d > preallocated %d\n",
@@ -2084,26 +2464,29 @@
 	m = ceph_msg_get(req->r_reply);
 
 	if (data_len > 0) {
-		int want = calc_pages_for(req->r_page_alignment, data_len);
+		struct ceph_osd_data *osd_data;
 
-		if (req->r_pages && unlikely(req->r_num_pages < want)) {
-			pr_warning("tid %lld reply has %d bytes %d pages, we"
-				   " had only %d pages ready\n", tid, data_len,
-				   want, req->r_num_pages);
-			*skip = 1;
-			ceph_msg_put(m);
-			m = NULL;
-			goto out;
+		/*
+		 * XXX This is assuming there is only one op containing
+		 * XXX page data.  Probably OK for reads, but this
+		 * XXX ought to be done more generally.
+		 */
+		osd_data = osd_req_op_extent_osd_data(req, 0);
+		if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+			if (osd_data->pages &&
+				unlikely(osd_data->length < data_len)) {
+
+				pr_warning("tid %lld reply has %d bytes "
+					"we had only %llu bytes ready\n",
+					tid, data_len, osd_data->length);
+				*skip = 1;
+				ceph_msg_put(m);
+				m = NULL;
+				goto out;
+			}
 		}
-		m->pages = req->r_pages;
-		m->nr_pages = req->r_num_pages;
-		m->page_alignment = req->r_page_alignment;
-#ifdef CONFIG_BLOCK
-		m->bio = req->r_bio;
-#endif
 	}
 	*skip = 0;
-	req->r_con_filling_msg = con->ops->get(con);
 	dout("get_reply tid %lld %p\n", tid, m);
 
 out:
@@ -2168,13 +2551,17 @@
 	struct ceph_auth_handshake *auth = &o->o_auth;
 
 	if (force_new && auth->authorizer) {
-		if (ac->ops && ac->ops->destroy_authorizer)
-			ac->ops->destroy_authorizer(ac, auth->authorizer);
+		ceph_auth_destroy_authorizer(ac, auth->authorizer);
 		auth->authorizer = NULL;
 	}
-	if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
-		int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
-							auth);
+	if (!auth->authorizer) {
+		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+						      auth);
+		if (ret)
+			return ERR_PTR(ret);
+	} else {
+		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+						     auth);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -2190,11 +2577,7 @@
 	struct ceph_osd_client *osdc = o->o_osdc;
 	struct ceph_auth_client *ac = osdc->client->monc.auth;
 
-	/*
-	 * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
-	 * XXX which do we do:  succeed or fail?
-	 */
-	return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+	return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
 }
 
 static int invalidate_authorizer(struct ceph_connection *con)
@@ -2203,9 +2586,7 @@
 	struct ceph_osd_client *osdc = o->o_osdc;
 	struct ceph_auth_client *ac = osdc->client->monc.auth;
 
-	if (ac->ops && ac->ops->invalidate_authorizer)
-		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
-
+	ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4543b9a..603ddd9 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c

@@ -654,24 +654,6 @@
 	return 0;
 }
 
-static int __decode_pgid(void **p, void *end, struct ceph_pg *pg)
-{
-	u8 v;
-
-	ceph_decode_need(p, end, 1+8+4+4, bad);
-	v = ceph_decode_8(p);
-	if (v != 1)
-		goto bad;
-	pg->pool = ceph_decode_64(p);
-	pg->seed = ceph_decode_32(p);
-	*p += 4; /* skip preferred */
-	return 0;
-
-bad:
-	dout("error decoding pgid\n");
-	return -EINVAL;
-}
-
 /*
  * decode a full map.
  */
@@ -765,7 +747,7 @@
 		struct ceph_pg pgid;
 		struct ceph_pg_mapping *pg;
 
-		err = __decode_pgid(p, end, &pgid);
+		err = ceph_decode_pgid(p, end, &pgid);
 		if (err)
 			goto bad;
 		ceph_decode_need(p, end, sizeof(u32), bad);
@@ -983,7 +965,7 @@
 		struct ceph_pg pgid;
 		u32 pglen;
 
-		err = __decode_pgid(p, end, &pgid);
+		err = ceph_decode_pgid(p, end, &pgid);
 		if (err)
 			goto bad;
 		ceph_decode_need(p, end, sizeof(u32), bad);
@@ -1111,27 +1093,22 @@
  * calculate an object layout (i.e. pgid) from an oid,
  * file_layout, and osdmap
  */
-int ceph_calc_object_layout(struct ceph_pg *pg,
-			    const char *oid,
-			    struct ceph_file_layout *fl,
-			    struct ceph_osdmap *osdmap)
+int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
+			struct ceph_osdmap *osdmap, uint64_t pool)
 {
-	unsigned int num, num_mask;
-	struct ceph_pg_pool_info *pool;
+	struct ceph_pg_pool_info *pool_info;
 
 	BUG_ON(!osdmap);
-	pg->pool = le32_to_cpu(fl->fl_pg_pool);
-	pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool);
-	if (!pool)
+	pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
+	if (!pool_info)
 		return -EIO;
-	pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
-	num = pool->pg_num;
-	num_mask = pool->pg_num_mask;
+	pg->pool = pool;
+	pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
 
-	dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed);
+	dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
 	return 0;
 }
-EXPORT_SYMBOL(ceph_calc_object_layout);
+EXPORT_SYMBOL(ceph_calc_ceph_pg);
 
 /*
  * Calculate raw osd vector for the given pgid.  Return pointer to osd

diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
new file mode 100644
index 0000000..154683f
--- /dev/null
+++ b/net/ceph/snapshot.c

@@ -0,0 +1,78 @@
+/*
+ * snapshot.c    Ceph snapshot context utility routines (part of libceph)
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stddef.h>
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/ceph/libceph.h>
+
+/*
+ * Ceph snapshot contexts are reference counted objects, and the
+ * returned structure holds a single reference.  Acquire additional
+ * references with ceph_get_snap_context(), and release them with
+ * ceph_put_snap_context().  When the reference count reaches zero
+ * the entire structure is freed.
+ */
+
+/*
+ * Create a new ceph snapshot context large enough to hold the
+ * indicated number of snapshot ids (which can be 0).  Caller has
+ * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
+ *
+ * Returns a null pointer if an error occurs.
+ */
+struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+						gfp_t gfp_flags)
+{
+	struct ceph_snap_context *snapc;
+	size_t size;
+
+	size = sizeof (struct ceph_snap_context);
+	size += snap_count * sizeof (snapc->snaps[0]);
+	snapc = kzalloc(size, gfp_flags);
+	if (!snapc)
+		return NULL;
+
+	atomic_set(&snapc->nref, 1);
+	snapc->num_snaps = snap_count;
+
+	return snapc;
+}
+EXPORT_SYMBOL(ceph_create_snap_context);
+
+struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+	if (sc)
+		atomic_inc(&sc->nref);
+	return sc;
+}
+EXPORT_SYMBOL(ceph_get_snap_context);
+
+void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+	if (!sc)
+		return;
+	if (atomic_dec_and_test(&sc->nref)) {
+		/*printk(" deleting snap_context %p\n", sc);*/
+		kfree(sc);
+	}
+}
+EXPORT_SYMBOL(ceph_put_snap_context);

diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index d01b24b..779262f 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig

@@ -6,6 +6,9 @@
 config HAVE_KVM_IRQCHIP
        bool
 
+config HAVE_KVM_IRQ_ROUTING
+       bool
+
 config HAVE_KVM_EVENTFD
        bool
        select EVENTFD

diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 3642239..8db4370 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c

@@ -80,11 +80,12 @@
 		spin_lock(&assigned_dev->intx_mask_lock);
 		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
 			kvm_set_irq(assigned_dev->kvm,
-				    assigned_dev->irq_source_id, vector, 1);
+				    assigned_dev->irq_source_id, vector, 1,
+				    false);
 		spin_unlock(&assigned_dev->intx_mask_lock);
 	} else
 		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-			    vector, 1);
+			    vector, 1, false);
 }
 
 static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
@@ -165,7 +166,7 @@
 		container_of(kian, struct kvm_assigned_dev_kernel,
 			     ack_notifier);
 
-	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
 
 	spin_lock(&dev->intx_mask_lock);
 
@@ -188,7 +189,7 @@
 
 		if (reassert)
 			kvm_set_irq(dev->kvm, dev->irq_source_id,
-				    dev->guest_irq, 1);
+				    dev->guest_irq, 1, false);
 	}
 
 	spin_unlock(&dev->intx_mask_lock);
@@ -202,7 +203,7 @@
 						&assigned_dev->ack_notifier);
 
 	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 0);
+		    assigned_dev->guest_irq, 0, false);
 
 	if (assigned_dev->irq_source_id != -1)
 		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
@@ -901,7 +902,7 @@
 	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
 		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
 			kvm_set_irq(match->kvm, match->irq_source_id,
-				    match->guest_irq, 0);
+				    match->guest_irq, 0, false);
 			/*
 			 * Masking at hardware-level is performed on demand,
 			 * i.e. when an IRQ actually arrives at the host.
@@ -982,36 +983,6 @@
 			goto out;
 		break;
 	}
-#ifdef KVM_CAP_IRQ_ROUTING
-	case KVM_SET_GSI_ROUTING: {
-		struct kvm_irq_routing routing;
-		struct kvm_irq_routing __user *urouting;
-		struct kvm_irq_routing_entry *entries;
-
-		r = -EFAULT;
-		if (copy_from_user(&routing, argp, sizeof(routing)))
-			goto out;
-		r = -EINVAL;
-		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
-			goto out;
-		if (routing.flags)
-			goto out;
-		r = -ENOMEM;
-		entries = vmalloc(routing.nr * sizeof(*entries));
-		if (!entries)
-			goto out;
-		r = -EFAULT;
-		urouting = argp;
-		if (copy_from_user(entries, urouting->entries,
-				   routing.nr * sizeof(*entries)))
-			goto out_free_irq_routing;
-		r = kvm_set_irq_routing(kvm, entries, routing.nr,
-					routing.flags);
-	out_free_irq_routing:
-		vfree(entries);
-		break;
-	}
-#endif /* KVM_CAP_IRQ_ROUTING */
 #ifdef __KVM_HAVE_MSIX
 	case KVM_ASSIGN_SET_MSIX_NR: {
 		struct kvm_assigned_msix_nr entry_nr;

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index adb17f2..64ee720 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c

@@ -35,7 +35,7 @@
 
 #include "iodev.h"
 
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -100,11 +100,13 @@
 	struct kvm *kvm = irqfd->kvm;
 
 	if (!irqfd->resampler) {
-		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
-		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
+				false);
+		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
+				false);
 	} else
 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
-			    irqfd->gsi, 1);
+			    irqfd->gsi, 1, false);
 }
 
 /*
@@ -121,7 +123,7 @@
 	resampler = container_of(kian, struct _irqfd_resampler, notifier);
 
 	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
-		    resampler->notifier.gsi, 0);
+		    resampler->notifier.gsi, 0, false);
 
 	rcu_read_lock();
 
@@ -146,7 +148,7 @@
 		list_del(&resampler->link);
 		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
-			    resampler->notifier.gsi, 0);
+			    resampler->notifier.gsi, 0, false);
 		kfree(resampler);
 	}
 
@@ -225,7 +227,8 @@
 		irq = rcu_dereference(irqfd->irq_entry);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
-			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+					false);
 		else
 			schedule_work(&irqfd->inject);
 		rcu_read_unlock();
@@ -430,7 +433,7 @@
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -439,7 +442,7 @@
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 /*
  * shutdown any irqfd's that match fd+gsi
  */
@@ -543,7 +546,7 @@
  * aggregated from all vm* instances. We need our own isolated single-thread
  * queue to prevent deadlock against flushing the normal work-queue.
  */
-static int __init irqfd_module_init(void)
+int kvm_irqfd_init(void)
 {
 	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
 	if (!irqfd_cleanup_wq)
@@ -552,13 +555,10 @@
 	return 0;
 }
 
-static void __exit irqfd_module_exit(void)
+void kvm_irqfd_exit(void)
 {
 	destroy_workqueue(irqfd_cleanup_wq);
 }
-
-module_init(irqfd_module_init);
-module_exit(irqfd_module_exit);
 #endif
 
 /*
@@ -577,6 +577,7 @@
 	struct eventfd_ctx  *eventfd;
 	u64                  datamatch;
 	struct kvm_io_device dev;
+	u8                   bus_idx;
 	bool                 wildcard;
 };
 
@@ -669,7 +670,8 @@
 	struct _ioeventfd *_p;
 
 	list_for_each_entry(_p, &kvm->ioeventfds, list)
-		if (_p->addr == p->addr && _p->length == p->length &&
+		if (_p->bus_idx == p->bus_idx &&
+		    _p->addr == p->addr && _p->length == p->length &&
 		    (_p->wildcard || p->wildcard ||
 		     _p->datamatch == p->datamatch))
 			return true;
@@ -677,15 +679,24 @@
 	return false;
 }
 
+static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
+{
+	if (flags & KVM_IOEVENTFD_FLAG_PIO)
+		return KVM_PIO_BUS;
+	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
+		return KVM_VIRTIO_CCW_NOTIFY_BUS;
+	return KVM_MMIO_BUS;
+}
+
 static int
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
-	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
+	enum kvm_bus              bus_idx;
 	struct _ioeventfd        *p;
 	struct eventfd_ctx       *eventfd;
 	int                       ret;
 
+	bus_idx = ioeventfd_bus_from_flags(args->flags);
 	/* must be natural-word sized */
 	switch (args->len) {
 	case 1:
@@ -717,6 +728,7 @@
 
 	INIT_LIST_HEAD(&p->list);
 	p->addr    = args->addr;
+	p->bus_idx = bus_idx;
 	p->length  = args->len;
 	p->eventfd = eventfd;
 
@@ -760,12 +772,12 @@
 static int
 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
-	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
+	enum kvm_bus              bus_idx;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
 
+	bus_idx = ioeventfd_bus_from_flags(args->flags);
 	eventfd = eventfd_ctx_fdget(args->fd);
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
@@ -775,7 +787,8 @@
 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
 
-		if (p->eventfd != eventfd  ||
+		if (p->bus_idx != bus_idx ||
+		    p->eventfd != eventfd  ||
 		    p->addr != args->addr  ||
 		    p->length != args->len ||
 		    p->wildcard != wildcard)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 5ba005c..2d68297 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c

@@ -50,7 +50,8 @@
 #else
 #define ioapic_debug(fmt, arg...)
 #endif
-static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
+static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq,
+		bool line_status);
 
 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 					  unsigned long addr,
@@ -90,7 +91,80 @@
 	return result;
 }
 
-static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
+{
+	ioapic->rtc_status.pending_eoi = 0;
+	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+	bool new_val, old_val;
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	union kvm_ioapic_redirect_entry *e;
+
+	e = &ioapic->redirtbl[RTC_GSI];
+	if (!kvm_apic_match_dest(vcpu, NULL, 0,	e->fields.dest_id,
+				e->fields.dest_mode))
+		return;
+
+	new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
+	old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+
+	if (new_val == old_val)
+		return;
+
+	if (new_val) {
+		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi++;
+	} else {
+		__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi--;
+	}
+
+	WARN_ON(ioapic->rtc_status.pending_eoi < 0);
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+	spin_lock(&ioapic->lock);
+	__rtc_irq_eoi_tracking_restore_one(vcpu);
+	spin_unlock(&ioapic->lock);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	if (RTC_GSI >= IOAPIC_NUM_PINS)
+		return;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
+	    __rtc_irq_eoi_tracking_restore_one(vcpu);
+}
+
+static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
+{
+	if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map))
+		--ioapic->rtc_status.pending_eoi;
+
+	WARN_ON(ioapic->rtc_status.pending_eoi < 0);
+}
+
+static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
+{
+	if (ioapic->rtc_status.pending_eoi > 0)
+		return true; /* coalesced */
+
+	return false;
+}
+
+static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
+		bool line_status)
 {
 	union kvm_ioapic_redirect_entry *pent;
 	int injected = -1;
@@ -98,7 +172,7 @@
 	pent = &ioapic->redirtbl[idx];
 
 	if (!pent->fields.mask) {
-		injected = ioapic_deliver(ioapic, idx);
+		injected = ioapic_deliver(ioapic, idx, line_status);
 		if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
 			pent->fields.remote_irr = 1;
 	}
@@ -119,41 +193,48 @@
 	smp_wmb();
 }
 
-void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-					u64 *eoi_exit_bitmap)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+			u32 *tmr)
 {
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 	union kvm_ioapic_redirect_entry *e;
-	struct kvm_lapic_irq irqe;
 	int index;
 
 	spin_lock(&ioapic->lock);
-	/* traverse ioapic entry to set eoi exit bitmap*/
 	for (index = 0; index < IOAPIC_NUM_PINS; index++) {
 		e = &ioapic->redirtbl[index];
 		if (!e->fields.mask &&
 			(e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
 			 kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
-				 index))) {
-			irqe.dest_id = e->fields.dest_id;
-			irqe.vector = e->fields.vector;
-			irqe.dest_mode = e->fields.dest_mode;
-			irqe.delivery_mode = e->fields.delivery_mode << 8;
-			kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap);
+				 index) || index == RTC_GSI)) {
+			if (kvm_apic_match_dest(vcpu, NULL, 0,
+				e->fields.dest_id, e->fields.dest_mode)) {
+				__set_bit(e->fields.vector,
+					(unsigned long *)eoi_exit_bitmap);
+				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+					__set_bit(e->fields.vector,
+						(unsigned long *)tmr);
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
 }
-EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap);
 
-void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm)
+#ifdef CONFIG_X86
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
-	if (!kvm_apic_vid_enabled(kvm) || !ioapic)
+	if (!ioapic)
 		return;
-	kvm_make_update_eoibitmap_request(kvm);
+	kvm_make_scan_ioapic_request(kvm);
 }
+#else
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+	return;
+}
+#endif
 
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
@@ -195,16 +276,17 @@
 			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
 		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
 		    && ioapic->irr & (1 << index))
-			ioapic_service(ioapic, index);
-		kvm_ioapic_make_eoibitmap_request(ioapic->kvm);
+			ioapic_service(ioapic, index, false);
+		kvm_vcpu_request_scan_ioapic(ioapic->kvm);
 		break;
 	}
 }
 
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
 {
 	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
 	struct kvm_lapic_irq irqe;
+	int ret;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
@@ -220,11 +302,19 @@
 	irqe.level = 1;
 	irqe.shorthand = 0;
 
-	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
+	if (irq == RTC_GSI && line_status) {
+		BUG_ON(ioapic->rtc_status.pending_eoi != 0);
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+				ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi = ret;
+	} else
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+
+	return ret;
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
-		       int level)
+		       int level, bool line_status)
 {
 	u32 old_irr;
 	u32 mask = 1 << irq;
@@ -244,13 +334,20 @@
 		ret = 1;
 	} else {
 		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+		if (irq == RTC_GSI && line_status &&
+			rtc_irq_check_coalesced(ioapic)) {
+			ret = 0; /* coalesced */
+			goto out;
+		}
 		ioapic->irr |= mask;
 		if ((edge && old_irr != ioapic->irr) ||
 		    (!edge && !entry.fields.remote_irr))
-			ret = ioapic_service(ioapic, irq);
+			ret = ioapic_service(ioapic, irq, line_status);
 		else
 			ret = 0; /* report coalesced interrupt */
 	}
+out:
 	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
@@ -267,8 +364,8 @@
 	spin_unlock(&ioapic->lock);
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
-				     int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
 	int i;
 
@@ -278,6 +375,8 @@
 		if (ent->fields.vector != vector)
 			continue;
 
+		if (i == RTC_GSI)
+			rtc_irq_eoi(ioapic, vcpu);
 		/*
 		 * We are dropping lock while calling ack notifiers because ack
 		 * notifier callbacks for assigned devices call into IOAPIC
@@ -296,7 +395,7 @@
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
 		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
-			ioapic_service(ioapic, i);
+			ioapic_service(ioapic, i, false);
 	}
 }
 
@@ -307,12 +406,12 @@
 	return test_bit(vector, ioapic->handled_vectors);
 }
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 
 	spin_lock(&ioapic->lock);
-	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
 	spin_unlock(&ioapic->lock);
 }
 
@@ -410,7 +509,7 @@
 		break;
 #ifdef	CONFIG_IA64
 	case IOAPIC_REG_EOI:
-		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
+		__kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
 		break;
 #endif
 
@@ -431,6 +530,7 @@
 	ioapic->ioregsel = 0;
 	ioapic->irr = 0;
 	ioapic->id = 0;
+	rtc_irq_eoi_tracking_reset(ioapic);
 	update_handled_vectors(ioapic);
 }
 
@@ -496,7 +596,8 @@
 	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
 	update_handled_vectors(ioapic);
-	kvm_ioapic_make_eoibitmap_request(kvm);
+	kvm_vcpu_request_scan_ioapic(kvm);
+	kvm_rtc_eoi_tracking_restore_all(ioapic);
 	spin_unlock(&ioapic->lock);
 	return 0;
 }

diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 0400a46..615d8c9 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h

@@ -34,6 +34,17 @@
 #define	IOAPIC_INIT			0x5
 #define	IOAPIC_EXTINT			0x7
 
+#ifdef CONFIG_X86
+#define RTC_GSI 8
+#else
+#define RTC_GSI -1U
+#endif
+
+struct rtc_status {
+	int pending_eoi;
+	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
 struct kvm_ioapic {
 	u64 base_address;
 	u32 ioregsel;
@@ -47,6 +58,7 @@
 	void (*ack_notifier)(void *opaque, int irq);
 	spinlock_t lock;
 	DECLARE_BITMAP(handled_vectors, 256);
+	struct rtc_status rtc_status;
 };
 
 #ifdef DEBUG
@@ -67,24 +79,25 @@
 	return kvm->arch.vioapic;
 }
 
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+			int trigger_mode);
 bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
-		       int level);
+		       int level, bool line_status);
 void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq);
+		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm);
-void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
-					u64 *eoi_exit_bitmap);
-
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+			u32 *tmr);
 
 #endif

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e9073cf..e2e6b44 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c

@@ -35,7 +35,8 @@
 #include "ioapic.h"
 
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-			   struct kvm *kvm, int irq_source_id, int level)
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
 {
 #ifdef CONFIG_X86
 	struct kvm_pic *pic = pic_irqchip(kvm);
@@ -46,10 +47,12 @@
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-			      struct kvm *kvm, int irq_source_id, int level)
+			      struct kvm *kvm, int irq_source_id, int level,
+			      bool line_status)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level);
+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
+				line_status);
 }
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
@@ -63,7 +66,7 @@
 }
 
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq)
+		struct kvm_lapic_irq *irq, unsigned long *dest_map)
 {
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
@@ -74,7 +77,7 @@
 		irq->delivery_mode = APIC_DM_FIXED;
 	}
 
-	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
 		return r;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -88,7 +91,7 @@
 		if (!kvm_is_dm_lowest_prio(irq)) {
 			if (r < 0)
 				r = 0;
-			r += kvm_apic_set_irq(vcpu, irq);
+			r += kvm_apic_set_irq(vcpu, irq, dest_map);
 		} else if (kvm_lapic_enabled(vcpu)) {
 			if (!lowest)
 				lowest = vcpu;
@@ -98,7 +101,7 @@
 	}
 
 	if (lowest)
-		r = kvm_apic_set_irq(lowest, irq);
+		r = kvm_apic_set_irq(lowest, irq, dest_map);
 
 	return r;
 }
@@ -121,7 +124,7 @@
 }
 
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		struct kvm *kvm, int irq_source_id, int level)
+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
 {
 	struct kvm_lapic_irq irq;
 
@@ -130,7 +133,7 @@
 
 	kvm_set_msi_irq(e, &irq);
 
-	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
+	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
 }
 
 
@@ -142,63 +145,12 @@
 
 	kvm_set_msi_irq(e, &irq);
 
-	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
+	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
 		return r;
 	else
 		return -EWOULDBLOCK;
 }
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
-	struct kvm_kernel_irq_routing_entry route;
-
-	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
-		return -EINVAL;
-
-	route.msi.address_lo = msi->address_lo;
-	route.msi.address_hi = msi->address_hi;
-	route.msi.data = msi->data;
-
-	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
-}
-
-/*
- * Return value:
- *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
- *  = 0   Interrupt was coalesced (previous irq is still pending)
- *  > 0   Number of CPUs interrupt was delivered to
- */
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
-{
-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0;
-	struct kvm_irq_routing_table *irq_rt;
-
-	trace_kvm_set_irq(irq, level, irq_source_id);
-
-	/* Not possible to detect if the guest uses the PIC or the
-	 * IOAPIC.  So set the bit in both. The guest will ignore
-	 * writes to the unused one.
-	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
-			irq_set[i++] = *e;
-	rcu_read_unlock();
-
-	while(i--) {
-		int r;
-		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
-		if (r < 0)
-			continue;
-
-		ret = r + ((ret < 0) ? 0 : ret);
-	}
-
-	return ret;
-}
-
 /*
  * Deliver an IRQ in an atomic context if we can, or return a failure,
  * user can retry in a process context.
@@ -236,63 +188,6 @@
 	return ret;
 }
 
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi;
-
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				rcu_read_unlock();
-				return true;
-			}
-
-	rcu_read_unlock();
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	rcu_read_unlock();
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-	kvm_ioapic_make_eoibitmap_request(kvm);
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
-	kvm_ioapic_make_eoibitmap_request(kvm);
-}
-
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
@@ -376,34 +271,14 @@
 	rcu_read_unlock();
 }
 
-void kvm_free_irq_routing(struct kvm *kvm)
-{
-	/* Called only during vm destruction. Nobody can use the pointer
-	   at this stage */
-	kfree(kvm->irq_routing);
-}
-
-static int setup_routing_entry(struct kvm_irq_routing_table *rt,
-			       struct kvm_kernel_irq_routing_entry *e,
-			       const struct kvm_irq_routing_entry *ue)
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
 	int delta;
 	unsigned max_pin;
-	struct kvm_kernel_irq_routing_entry *ei;
 
-	/*
-	 * Do not allow GSI to be mapped to the same irqchip more than once.
-	 * Allow only one to one mapping between GSI and MSI.
-	 */
-	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
-		if (ei->type == KVM_IRQ_ROUTING_MSI ||
-		    ue->type == KVM_IRQ_ROUTING_MSI ||
-		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
-			return r;
-
-	e->gsi = ue->gsi;
-	e->type = ue->type;
 	switch (ue->type) {
 	case KVM_IRQ_ROUTING_IRQCHIP:
 		delta = 0;
@@ -440,69 +315,11 @@
 		goto out;
 	}
 
-	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
 out:
 	return r;
 }
 
-
-int kvm_set_irq_routing(struct kvm *kvm,
-			const struct kvm_irq_routing_entry *ue,
-			unsigned nr,
-			unsigned flags)
-{
-	struct kvm_irq_routing_table *new, *old;
-	u32 i, j, nr_rt_entries = 0;
-	int r;
-
-	for (i = 0; i < nr; ++i) {
-		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
-			return -EINVAL;
-		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
-	}
-
-	nr_rt_entries += 1;
-
-	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
-		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
-		      GFP_KERNEL);
-
-	if (!new)
-		return -ENOMEM;
-
-	new->rt_entries = (void *)&new->map[nr_rt_entries];
-
-	new->nr_rt_entries = nr_rt_entries;
-	for (i = 0; i < 3; i++)
-		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
-			new->chip[i][j] = -1;
-
-	for (i = 0; i < nr; ++i) {
-		r = -EINVAL;
-		if (ue->flags)
-			goto out;
-		r = setup_routing_entry(new, &new->rt_entries[i], ue);
-		if (r)
-			goto out;
-		++ue;
-	}
-
-	mutex_lock(&kvm->irq_lock);
-	old = kvm->irq_routing;
-	kvm_irq_routing_update(kvm, new);
-	mutex_unlock(&kvm->irq_lock);
-
-	synchronize_rcu();
-
-	new = old;
-	r = 0;
-
-out:
-	kfree(new);
-	return r;
-}
-
 #define IOAPIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }

diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
new file mode 100644
index 0000000..20dc9e4
--- /dev/null
+++ b/virt/kvm/irqchip.c

@@ -0,0 +1,237 @@
+/*
+ * irqchip.c: Common API for in kernel interrupt controllers
+ * Copyright (c) 2007, Intel Corporation.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (c) 2013, Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * This file is derived from virt/kvm/irq_comm.c.
+ *
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *   Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <trace/events/kvm.h>
+#include "irq.h"
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi;
+
+	rcu_read_lock();
+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				rcu_read_unlock();
+				return true;
+			}
+
+	rcu_read_unlock();
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	rcu_read_lock();
+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	rcu_read_unlock();
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+	struct kvm_kernel_irq_routing_entry route;
+
+	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+		return -EINVAL;
+
+	route.msi.address_lo = msi->address_lo;
+	route.msi.address_hi = msi->address_hi;
+	route.msi.data = msi->data;
+
+	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
+}
+
+/*
+ * Return value:
+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
+ *  = 0   Interrupt was coalesced (previous irq is still pending)
+ *  > 0   Number of CPUs interrupt was delivered to
+ */
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status)
+{
+	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
+	int ret = -1, i = 0;
+	struct kvm_irq_routing_table *irq_rt;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	/* Not possible to detect if the guest uses the PIC or the
+	 * IOAPIC.  So set the bit in both. The guest will ignore
+	 * writes to the unused one.
+	 */
+	rcu_read_lock();
+	irq_rt = rcu_dereference(kvm->irq_routing);
+	if (irq < irq_rt->nr_rt_entries)
+		hlist_for_each_entry(e, &irq_rt->map[irq], link)
+			irq_set[i++] = *e;
+	rcu_read_unlock();
+
+	while(i--) {
+		int r;
+		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
+				   line_status);
+		if (r < 0)
+			continue;
+
+		ret = r + ((ret < 0) ? 0 : ret);
+	}
+
+	return ret;
+}
+
+void kvm_free_irq_routing(struct kvm *kvm)
+{
+	/* Called only during vm destruction. Nobody can use the pointer
+	   at this stage */
+	kfree(kvm->irq_routing);
+}
+
+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+			       struct kvm_kernel_irq_routing_entry *e,
+			       const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+	struct kvm_kernel_irq_routing_entry *ei;
+
+	/*
+	 * Do not allow GSI to be mapped to the same irqchip more than once.
+	 * Allow only one to one mapping between GSI and MSI.
+	 */
+	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
+		if (ei->type == KVM_IRQ_ROUTING_MSI ||
+		    ue->type == KVM_IRQ_ROUTING_MSI ||
+		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
+			return r;
+
+	e->gsi = ue->gsi;
+	e->type = ue->type;
+	r = kvm_set_routing_entry(rt, e, ue);
+	if (r)
+		goto out;
+
+	hlist_add_head(&e->link, &rt->map[e->gsi]);
+	r = 0;
+out:
+	return r;
+}
+
+int kvm_set_irq_routing(struct kvm *kvm,
+			const struct kvm_irq_routing_entry *ue,
+			unsigned nr,
+			unsigned flags)
+{
+	struct kvm_irq_routing_table *new, *old;
+	u32 i, j, nr_rt_entries = 0;
+	int r;
+
+	for (i = 0; i < nr; ++i) {
+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
+			return -EINVAL;
+		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
+	}
+
+	nr_rt_entries += 1;
+
+	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
+		      GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
+	new->rt_entries = (void *)&new->map[nr_rt_entries];
+
+	new->nr_rt_entries = nr_rt_entries;
+	for (i = 0; i < KVM_NR_IRQCHIPS; i++)
+		for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++)
+			new->chip[i][j] = -1;
+
+	for (i = 0; i < nr; ++i) {
+		r = -EINVAL;
+		if (ue->flags)
+			goto out;
+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
+		if (r)
+			goto out;
+		++ue;
+	}
+
+	mutex_lock(&kvm->irq_lock);
+	old = kvm->irq_routing;
+	kvm_irq_routing_update(kvm, new);
+	mutex_unlock(&kvm->irq_lock);
+
+	synchronize_rcu();
+
+	new = old;
+	r = 0;
+
+out:
+	kfree(new);
+	return r;
+}

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f18013f..45f0936 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -217,9 +217,9 @@
 	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
 }
 
-void kvm_make_update_eoibitmap_request(struct kvm *kvm)
+void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP);
+	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -244,6 +244,7 @@
 
 	kvm_vcpu_set_in_spin_loop(vcpu, false);
 	kvm_vcpu_set_dy_eligible(vcpu, false);
+	vcpu->preempted = false;
 
 	r = kvm_arch_vcpu_init(vcpu);
 	if (r < 0)
@@ -503,6 +504,7 @@
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
+	INIT_LIST_HEAD(&kvm->devices);
 
 	r = kvm_init_mmu_notifier(kvm);
 	if (r)
@@ -580,6 +582,19 @@
 	kfree(kvm->memslots);
 }
 
+static void kvm_destroy_devices(struct kvm *kvm)
+{
+	struct list_head *node, *tmp;
+
+	list_for_each_safe(node, tmp, &kvm->devices) {
+		struct kvm_device *dev =
+			list_entry(node, struct kvm_device, vm_node);
+
+		list_del(node);
+		dev->ops->destroy(dev);
+	}
+}
+
 static void kvm_destroy_vm(struct kvm *kvm)
 {
 	int i;
@@ -599,6 +614,7 @@
 	kvm_arch_flush_shadow_all(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	kvm_destroy_devices(kvm);
 	kvm_free_physmem(kvm);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
@@ -719,24 +735,6 @@
 }
 
 /*
- * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
- * - create a new memory slot
- * - delete an existing memory slot
- * - modify an existing memory slot
- *   -- move it in the guest physical memory space
- *   -- just change its flags
- *
- * Since flags can be changed by some of these operations, the following
- * differentiation is the best we can do for __kvm_set_memory_region():
- */
-enum kvm_mr_change {
-	KVM_MR_CREATE,
-	KVM_MR_DELETE,
-	KVM_MR_MOVE,
-	KVM_MR_FLAGS_ONLY,
-};
-
-/*
  * Allocate some memory and give it an address in the guest physical address
  * space.
  *
@@ -745,8 +743,7 @@
  * Must be called holding mmap_sem for write.
  */
 int __kvm_set_memory_region(struct kvm *kvm,
-			    struct kvm_userspace_memory_region *mem,
-			    bool user_alloc)
+			    struct kvm_userspace_memory_region *mem)
 {
 	int r;
 	gfn_t base_gfn;
@@ -767,7 +764,7 @@
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
 	/* We can read the guest memory with __xxx_user() later on. */
-	if (user_alloc &&
+	if ((mem->slot < KVM_USER_MEM_SLOTS) &&
 	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
 	     !access_ok(VERIFY_WRITE,
 			(void __user *)(unsigned long)mem->userspace_addr,
@@ -875,7 +872,7 @@
 		slots = old_memslots;
 	}
 
-	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
+	r = kvm_arch_prepare_memory_region(kvm, &new, mem, change);
 	if (r)
 		goto out_slots;
 
@@ -915,7 +912,7 @@
 
 	old_memslots = install_new_memslots(kvm, slots, &new);
 
-	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
+	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
 	kvm_free_physmem_slot(&old, &new);
 	kfree(old_memslots);
@@ -932,26 +929,23 @@
 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
 
 int kvm_set_memory_region(struct kvm *kvm,
-			  struct kvm_userspace_memory_region *mem,
-			  bool user_alloc)
+			  struct kvm_userspace_memory_region *mem)
 {
 	int r;
 
 	mutex_lock(&kvm->slots_lock);
-	r = __kvm_set_memory_region(kvm, mem, user_alloc);
+	r = __kvm_set_memory_region(kvm, mem);
 	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 
 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct
-				   kvm_userspace_memory_region *mem,
-				   bool user_alloc)
+				   struct kvm_userspace_memory_region *mem)
 {
 	if (mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
-	return kvm_set_memory_region(kvm, mem, user_alloc);
+	return kvm_set_memory_region(kvm, mem);
 }
 
 int kvm_get_dirty_log(struct kvm *kvm,
@@ -1099,7 +1093,7 @@
 	return __copy_from_user_inatomic(data, hva, len);
 }
 
-int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
+static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	unsigned long start, int write, struct page **page)
 {
 	int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET;
@@ -1719,6 +1713,7 @@
 			smp_send_reschedule(cpu);
 	put_cpu();
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
 void kvm_resched(struct kvm_vcpu *vcpu)
@@ -1816,6 +1811,8 @@
 				continue;
 			} else if (pass && i > last_boosted_vcpu)
 				break;
+			if (!ACCESS_ONCE(vcpu->preempted))
+				continue;
 			if (vcpu == me)
 				continue;
 			if (waitqueue_active(&vcpu->wq))
@@ -2204,6 +2201,119 @@
 }
 #endif
 
+static int kvm_device_ioctl_attr(struct kvm_device *dev,
+				 int (*accessor)(struct kvm_device *dev,
+						 struct kvm_device_attr *attr),
+				 unsigned long arg)
+{
+	struct kvm_device_attr attr;
+
+	if (!accessor)
+		return -EPERM;
+
+	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+		return -EFAULT;
+
+	return accessor(dev, &attr);
+}
+
+static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
+			     unsigned long arg)
+{
+	struct kvm_device *dev = filp->private_data;
+
+	switch (ioctl) {
+	case KVM_SET_DEVICE_ATTR:
+		return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
+	case KVM_GET_DEVICE_ATTR:
+		return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
+	case KVM_HAS_DEVICE_ATTR:
+		return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
+	default:
+		if (dev->ops->ioctl)
+			return dev->ops->ioctl(dev, ioctl, arg);
+
+		return -ENOTTY;
+	}
+}
+
+static int kvm_device_release(struct inode *inode, struct file *filp)
+{
+	struct kvm_device *dev = filp->private_data;
+	struct kvm *kvm = dev->kvm;
+
+	kvm_put_kvm(kvm);
+	return 0;
+}
+
+static const struct file_operations kvm_device_fops = {
+	.unlocked_ioctl = kvm_device_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = kvm_device_ioctl,
+#endif
+	.release = kvm_device_release,
+};
+
+struct kvm_device *kvm_device_from_filp(struct file *filp)
+{
+	if (filp->f_op != &kvm_device_fops)
+		return NULL;
+
+	return filp->private_data;
+}
+
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	struct kvm_device_ops *ops = NULL;
+	struct kvm_device *dev;
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int ret;
+
+	switch (cd->type) {
+#ifdef CONFIG_KVM_MPIC
+	case KVM_DEV_TYPE_FSL_MPIC_20:
+	case KVM_DEV_TYPE_FSL_MPIC_42:
+		ops = &kvm_mpic_ops;
+		break;
+#endif
+#ifdef CONFIG_KVM_XICS
+	case KVM_DEV_TYPE_XICS:
+		ops = &kvm_xics_ops;
+		break;
+#endif
+	default:
+		return -ENODEV;
+	}
+
+	if (test)
+		return 0;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->ops = ops;
+	dev->kvm = kvm;
+
+	ret = ops->create(dev, cd->type);
+	if (ret < 0) {
+		kfree(dev);
+		return ret;
+	}
+
+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+	if (ret < 0) {
+		ops->destroy(dev);
+		return ret;
+	}
+
+	list_add(&dev->vm_node, &kvm->devices);
+	kvm_get_kvm(kvm);
+	cd->fd = ret;
+	return 0;
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2225,7 +2335,7 @@
 						sizeof kvm_userspace_mem))
 			goto out;
 
-		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true);
+		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
 		break;
 	}
 	case KVM_GET_DIRTY_LOG: {
@@ -2304,7 +2414,8 @@
 		if (copy_from_user(&irq_event, argp, sizeof irq_event))
 			goto out;
 
-		r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+		r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
+					ioctl == KVM_IRQ_LINE_STATUS);
 		if (r)
 			goto out;
 
@@ -2318,6 +2429,54 @@
 		break;
 	}
 #endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_SET_GSI_ROUTING: {
+		struct kvm_irq_routing routing;
+		struct kvm_irq_routing __user *urouting;
+		struct kvm_irq_routing_entry *entries;
+
+		r = -EFAULT;
+		if (copy_from_user(&routing, argp, sizeof(routing)))
+			goto out;
+		r = -EINVAL;
+		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
+			goto out;
+		if (routing.flags)
+			goto out;
+		r = -ENOMEM;
+		entries = vmalloc(routing.nr * sizeof(*entries));
+		if (!entries)
+			goto out;
+		r = -EFAULT;
+		urouting = argp;
+		if (copy_from_user(entries, urouting->entries,
+				   routing.nr * sizeof(*entries)))
+			goto out_free_irq_routing;
+		r = kvm_set_irq_routing(kvm, entries, routing.nr,
+					routing.flags);
+	out_free_irq_routing:
+		vfree(entries);
+		break;
+	}
+#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
+	case KVM_CREATE_DEVICE: {
+		struct kvm_create_device cd;
+
+		r = -EFAULT;
+		if (copy_from_user(&cd, argp, sizeof(cd)))
+			goto out;
+
+		r = kvm_ioctl_create_device(kvm, &cd);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &cd, sizeof(cd)))
+			goto out;
+
+		r = 0;
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2447,8 +2606,11 @@
 #ifdef CONFIG_HAVE_KVM_MSI
 	case KVM_CAP_SIGNAL_MSI:
 #endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
 		return 1;
-#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 	case KVM_CAP_IRQ_ROUTING:
 		return KVM_MAX_IRQ_ROUTES;
 #endif
@@ -2618,14 +2780,6 @@
 	return NOTIFY_OK;
 }
 
-
-asmlinkage void kvm_spurious_fault(void)
-{
-	/* Fault while not rebooting.  We want the trace. */
-	BUG();
-}
-EXPORT_SYMBOL_GPL(kvm_spurious_fault);
-
 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
 		      void *v)
 {
@@ -2658,7 +2812,7 @@
 	kfree(bus);
 }
 
-int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
+static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
 {
 	const struct kvm_io_range *r1 = p1;
 	const struct kvm_io_range *r2 = p2;
@@ -2670,7 +2824,7 @@
 	return 0;
 }
 
-int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
 			  gpa_t addr, int len)
 {
 	bus->range[bus->dev_count++] = (struct kvm_io_range) {
@@ -2685,7 +2839,7 @@
 	return 0;
 }
 
-int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
+static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
 			     gpa_t addr, int len)
 {
 	struct kvm_io_range *range, key;
@@ -2929,6 +3083,8 @@
 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+	if (vcpu->preempted)
+		vcpu->preempted = false;
 
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
@@ -2938,6 +3094,8 @@
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
+	if (current->state == TASK_RUNNING)
+		vcpu->preempted = true;
 	kvm_arch_vcpu_put(vcpu);
 }
 
@@ -2947,6 +3105,9 @@
 	int r;
 	int cpu;
 
+	r = kvm_irqfd_init();
+	if (r)
+		goto out_irqfd;
 	r = kvm_arch_init(opaque);
 	if (r)
 		goto out_fail;
@@ -3027,6 +3188,8 @@
 out_free_0:
 	kvm_arch_exit();
 out_fail:
+	kvm_irqfd_exit();
+out_irqfd:
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init);
@@ -3043,6 +3206,7 @@
 	on_each_cpu(hardware_disable_nolock, NULL, 1);
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
+	kvm_irqfd_exit();
 	free_cpumask_var(cpus_hardware_enabled);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
commit	51a26ae7a14b85c99c9be470c2d28eeeba0f26a3	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon May 06 15:51:10 2013 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon May 06 15:51:10 2013 -0700
tree	63b38cb292a53e89d4b0cfe631c7031e1fed3cfe
parent	2b69703fea185bb0ae1af78ca2da41af677b9dff [diff]
parent	6bf15191f666c5965d212561d7a5c7b78b808dfa [diff]