Merge tag 'dmaengine-4.5-rc1' of git://git.infradead.org/users/vkoul/slave-dma

Pull dmaengine updates from Vinod Koul:
 "This round we have few new features, new driver and updates to few
  drivers.

  The new features to dmaengine core are:
   - Synchronized transfer termination API to terminate the dmaengine
     transfers in synchronized and async fashion as required by users.
     We have its user now in ALSA dmaengine lib, img, at_xdma, axi_dmac
     drivers.
   - Universal API for channel request and start consolidation of
     request flows.  It's user is ompa-dma driver.
   - Introduce reuse of descriptors and use in pxa_dma driver

  Add/Remove:
   - New STM32 DMA driver
   - Removal of unused R-Car HPB-DMAC driver

  Updates:
   - ti-dma-crossbar updates for supporting eDMA
   - tegra-apb pm updates
   - idma64
   - mv_xor updates
   - ste_dma updates"

* tag 'dmaengine-4.5-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (54 commits)
  dmaengine: mv_xor: add suspend/resume support
  dmaengine: mv_xor: de-duplicate mv_chan_set_mode*()
  dmaengine: mv_xor: remove mv_xor_chan->current_type field
  dmaengine: omap-dma: Add support for DMA filter mapping to slave devices
  dmaengine: edma: Add support for DMA filter mapping to slave devices
  dmaengine: core: Introduce new, universal API to request a channel
  dmaengine: core: Move and merge the code paths using private_candidate
  dmaengine: core: Skip mask matching when it is not provided to private_candidate
  dmaengine: mdc: Correct terminate_all handling
  dmaengine: edma: Add probe callback to edma_tptc_driver
  dmaengine: dw: fix potential memory leak in dw_dma_parse_dt()
  dmaengine: stm32-dma: Fix unchecked deference of chan->desc
  dmaengine: sh: Remove unused R-Car HPB-DMAC driver
  dmaengine: usb-dmac: Document SoC specific compatibility strings
  ste_dma40: Delete an unnecessary variable initialisation in d40_probe()
  ste_dma40: Delete another unnecessary check in d40_probe()
  ste_dma40: Delete an unnecessary check before the function call "kmem_cache_destroy"
  dmaengine: tegra-apb: Free interrupts before killing tasklets
  dmaengine: tegra-apb: Update driver to use GFP_NOWAIT
  dmaengine: tegra-apb: Only save channel state for those in use
  ...
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
index 040f365..e7780a1 100644
--- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -1,7 +1,13 @@
 * Renesas USB DMA Controller Device Tree bindings
 
 Required Properties:
-- compatible: must contain "renesas,usb-dmac"
+-compatible: "renesas,<soctype>-usb-dmac", "renesas,usb-dmac" as fallback.
+	Examples with soctypes are:
+	  - "renesas,r8a7790-usb-dmac" (R-Car H2)
+	  - "renesas,r8a7791-usb-dmac" (R-Car M2-W)
+	  - "renesas,r8a7793-usb-dmac" (R-Car M2-N)
+	  - "renesas,r8a7794-usb-dmac" (R-Car E2)
+	  - "renesas,r8a7795-usb-dmac" (R-Car H3)
 - reg: base address and length of the registers block for the DMAC
 - interrupts: interrupt specifiers for the DMAC, one for each entry in
   interrupt-names.
@@ -15,7 +21,7 @@
 Example: R8A7790 (R-Car H2) USB-DMACs
 
 	usb_dmac0: dma-controller@e65a0000 {
-		compatible = "renesas,usb-dmac";
+		compatible = "renesas,r8a7790-usb-dmac", "renesas,usb-dmac";
 		reg = <0 0xe65a0000 0 0x100>;
 		interrupts = <0 109 IRQ_TYPE_LEVEL_HIGH
 			      0 109 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt
new file mode 100644
index 0000000..70cd13f
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt
@@ -0,0 +1,82 @@
+* STMicroelectronics STM32 DMA controller
+
+The STM32 DMA is a general-purpose direct memory access controller capable of
+supporting 8 independent DMA channels. Each channel can have up to 8 requests.
+
+Required properties:
+- compatible: Should be "st,stm32-dma"
+- reg: Should contain DMA registers location and length. This should include
+  all of the per-channel registers.
+- interrupts: Should contain all of the per-channel DMA interrupts in
+  ascending order with respect to the DMA channel index.
+- clocks: Should contain the input clock of the DMA instance.
+- #dma-cells : Must be <4>. See DMA client paragraph for more details.
+
+Optional properties:
+- resets: Reference to a reset controller asserting the DMA controller
+- st,mem2mem: boolean; if defined, it indicates that the controller supports
+  memory-to-memory transfer
+
+Example:
+
+	dma2: dma-controller@40026400 {
+		compatible = "st,stm32-dma";
+		reg = <0x40026400 0x400>;
+		interrupts = <56>,
+			     <57>,
+			     <58>,
+			     <59>,
+			     <60>,
+			     <68>,
+			     <69>,
+			     <70>;
+		clocks = <&clk_hclk>;
+		#dma-cells = <4>;
+		st,mem2mem;
+		resets = <&rcc 150>;
+	};
+
+* DMA client
+
+DMA clients connected to the STM32 DMA controller must use the format
+described in the dma.txt file, using a five-cell specifier for each
+channel: a phandle plus four integer cells.
+The four cells in order are:
+
+1. The channel id
+2. The request line number
+3. A 32bit mask specifying the DMA channel configuration which are device
+   dependent:
+  -bit 9: Peripheral Increment Address
+	0x0: no address increment between transfers
+	0x1: increment address between transfers
+ -bit 10: Memory Increment Address
+	0x0: no address increment between transfers
+	0x1: increment address between transfers
+ -bit 15: Peripheral Increment Offset Size
+	0x0: offset size is linked to the peripheral bus width
+	0x1: offset size is fixed to 4 (32-bit alignment)
+ -bit 16-17: Priority level
+	0x0: low
+	0x1: medium
+	0x2: high
+	0x3: very high
+5. A 32bit mask specifying the DMA FIFO threshold configuration which are device
+   dependent:
+ -bit 0-1: Fifo threshold
+	0x0: 1/4 full FIFO
+	0x1: 1/2 full FIFO
+	0x2: 3/4 full FIFO
+	0x3: full FIFO
+
+Example:
+
+	usart1: serial@40011000 {
+		compatible = "st,stm32-usart", "st,stm32-uart";
+		reg = <0x40011000 0x400>;
+		interrupts = <37>;
+		clocks = <&clk_pclk2>;
+		dmas = <&dma2 2 4 0x10400 0x3>,
+		       <&dma2 7 5 0x10200 0x3>;
+		dma-names = "rx", "tx";
+	};
diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
index b152a75..aead586 100644
--- a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
+++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
@@ -14,6 +14,10 @@
 
 Optional properties:
 - ti,dma-safe-map: Safe routing value for unused request lines
+- ti,reserved-dma-request-ranges: DMA request ranges which should not be used
+		when mapping xbar input to DMA request, they are either
+		allocated to be used by for example the DSP or they are used as
+		memcpy channels in eDMA.
 
 Notes:
 When requesting channel via ti,dra7-dma-crossbar, the DMA clinet must request
@@ -46,6 +50,8 @@
 	#dma-cells = <1>;
 	dma-requests = <205>;
 	ti,dma-safe-map = <0>;
+	/* Protect the sDMA request ranges: 10-14 and 100-126 */
+	ti,reserved-dma-request-ranges = <10 5>, <100 27>;
 	dma-masters = <&sdma>;
 };
 
diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt
index 11fb87f..9e33189 100644
--- a/Documentation/dmaengine/client.txt
+++ b/Documentation/dmaengine/client.txt
@@ -22,25 +22,14 @@
    Channel allocation is slightly different in the slave DMA context,
    client drivers typically need a channel from a particular DMA
    controller only and even in some cases a specific channel is desired.
-   To request a channel dma_request_channel() API is used.
+   To request a channel dma_request_chan() API is used.
 
    Interface:
-	struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
-			dma_filter_fn filter_fn,
-			void *filter_param);
-   where dma_filter_fn is defined as:
-	typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+	struct dma_chan *dma_request_chan(struct device *dev, const char *name);
 
-   The 'filter_fn' parameter is optional, but highly recommended for
-   slave and cyclic channels as they typically need to obtain a specific
-   DMA channel.
-
-   When the optional 'filter_fn' parameter is NULL, dma_request_channel()
-   simply returns the first channel that satisfies the capability mask.
-
-   Otherwise, the 'filter_fn' routine will be called once for each free
-   channel which has a capability in 'mask'.  'filter_fn' is expected to
-   return 'true' when the desired DMA channel is found.
+   Which will find and return the 'name' DMA channel associated with the 'dev'
+   device. The association is done via DT, ACPI or board file based
+   dma_slave_map matching table.
 
    A channel allocated via this interface is exclusive to the caller,
    until dma_release_channel() is called.
@@ -128,7 +117,7 @@
 	transaction.
 
 	For cyclic DMA, a callback function may wish to terminate the
-	DMA via dmaengine_terminate_all().
+	DMA via dmaengine_terminate_async().
 
 	Therefore, it is important that DMA engine drivers drop any
 	locks before calling the callback function which may cause a
@@ -166,12 +155,29 @@
 
 Further APIs:
 
-1. int dmaengine_terminate_all(struct dma_chan *chan)
+1. int dmaengine_terminate_sync(struct dma_chan *chan)
+   int dmaengine_terminate_async(struct dma_chan *chan)
+   int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */
 
    This causes all activity for the DMA channel to be stopped, and may
    discard data in the DMA FIFO which hasn't been fully transferred.
    No callback functions will be called for any incomplete transfers.
 
+   Two variants of this function are available.
+
+   dmaengine_terminate_async() might not wait until the DMA has been fully
+   stopped or until any running complete callbacks have finished. But it is
+   possible to call dmaengine_terminate_async() from atomic context or from
+   within a complete callback. dmaengine_synchronize() must be called before it
+   is safe to free the memory accessed by the DMA transfer or free resources
+   accessed from within the complete callback.
+
+   dmaengine_terminate_sync() will wait for the transfer and any running
+   complete callbacks to finish before it returns. But the function must not be
+   called from atomic context or from within a complete callback.
+
+   dmaengine_terminate_all() is deprecated and should not be used in new code.
+
 2. int dmaengine_pause(struct dma_chan *chan)
 
    This pauses activity on the DMA channel without data loss.
@@ -197,3 +203,20 @@
 	a running DMA channel.  It is recommended that DMA engine users
 	pause or stop (via dmaengine_terminate_all()) the channel before
 	using this API.
+
+5. void dmaengine_synchronize(struct dma_chan *chan)
+
+  Synchronize the termination of the DMA channel to the current context.
+
+  This function should be used after dmaengine_terminate_async() to synchronize
+  the termination of the DMA channel to the current context. The function will
+  wait for the transfer and any running complete callbacks to finish before it
+  returns.
+
+  If dmaengine_terminate_async() is used to stop the DMA channel this function
+  must be called before it is safe to free memory accessed by previously
+  submitted descriptors or to free any resources accessed within the complete
+  callback of previously submitted descriptors.
+
+  The behavior of this function is undefined if dma_async_issue_pending() has
+  been called between dmaengine_terminate_async() and this function.
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
index 67d4ce4..122b7f4 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -327,8 +327,24 @@
 
    * device_terminate_all
      - Aborts all the pending and ongoing transfers on the channel
-     - This command should operate synchronously on the channel,
-       terminating right away all the channels
+     - For aborted transfers the complete callback should not be called
+     - Can be called from atomic context or from within a complete
+       callback of a descriptor. Must not sleep. Drivers must be able
+       to handle this correctly.
+     - Termination may be asynchronous. The driver does not have to
+       wait until the currently active transfer has completely stopped.
+       See device_synchronize.
+
+   * device_synchronize
+     - Must synchronize the termination of a channel to the current
+       context.
+     - Must make sure that memory for previously submitted
+       descriptors is no longer accessed by the DMA controller.
+     - Must make sure that all complete callbacks for previously
+       submitted descriptors have finished running and none are
+       scheduled to run.
+     - May sleep.
+
 
 Misc notes (stuff that should be documented, but don't really know
 where to put them)
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 4725fab..ec52505 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -54,6 +54,8 @@
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_DMADEVICES=y
+CONFIG_STM32_DMA=y
 # CONFIG_FILE_LOCKING is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 819dfda..7afbb28 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -321,7 +321,8 @@
  * @ops - pointer to struct of dca operation function pointers
  * @priv_size - size of extra mem to be added for provider's needs
  */
-struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size)
+struct dca_provider *alloc_dca_provider(const struct dca_ops *ops,
+					int priv_size)
 {
 	struct dca_provider *dca;
 	int alloc_size;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index e6cd1a3..3a8ce67 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -431,6 +431,18 @@
 	help
 	  Support for ST-Ericsson DMA40 controller
 
+config STM32_DMA
+	bool "STMicroelectronics STM32 DMA support"
+	depends on ARCH_STM32
+	select DMA_ENGINE
+	select DMA_OF
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the on-chip DMA controller on STMicroelectronics
+	  STM32 MCUs.
+	  If you have a board based on such a MCU and wish to use DMA say Y or M
+	  here.
+
 config S3C24XX_DMAC
 	tristate "Samsung S3C24XX DMA support"
 	depends on ARCH_S3C24XX
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index ef9c099..2dd0a067 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -56,6 +56,7 @@
 obj-$(CONFIG_RENESAS_DMA) += sh/
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
+obj-$(CONFIG_STM32_DMA) += stm32-dma.o
 obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index 16d0daa..eed6bda 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -15,6 +15,7 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -72,7 +73,9 @@
 	si = (const struct acpi_csrt_shared_info *)&grp[1];
 
 	/* Match device by MMIO and IRQ */
-	if (si->mmio_base_low != mem || si->gsi_interrupt != irq)
+	if (si->mmio_base_low != lower_32_bits(mem) ||
+	    si->mmio_base_high != upper_32_bits(mem) ||
+	    si->gsi_interrupt != irq)
 		return 0;
 
 	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 370c661..39f5966 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -863,8 +863,12 @@
 	 * access. Hopefully we can access DDR through both ports (at least on
 	 * SAMA5D4x), so we can use the same interface for source and dest,
 	 * that solves the fact we don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
 	 */
-	u32			chan_cc = AT_XDMAC_CC_DIF(0)
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DIF(0)
 					| AT_XDMAC_CC_SIF(0)
 					| AT_XDMAC_CC_MBSIZE_SIXTEEN
 					| AT_XDMAC_CC_TYPE_MEM_TRAN;
@@ -1041,8 +1045,12 @@
 	 * access DDR through both ports (at least on SAMA5D4x), so we can use
 	 * the same interface for source and dest, that solves the fact we
 	 * don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
 	 */
-	u32			chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DAM_INCREMENTED_AM
 					| AT_XDMAC_CC_SAM_INCREMENTED_AM
 					| AT_XDMAC_CC_DIF(0)
 					| AT_XDMAC_CC_SIF(0)
@@ -1143,8 +1151,12 @@
 	 * access. Hopefully we can access DDR through both ports (at least on
 	 * SAMA5D4x), so we can use the same interface for source and dest,
 	 * that solves the fact we don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
 	 */
-	u32			chan_cc = AT_XDMAC_CC_DAM_UBS_AM
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DAM_UBS_AM
 					| AT_XDMAC_CC_SAM_INCREMENTED_AM
 					| AT_XDMAC_CC_DIF(0)
 					| AT_XDMAC_CC_SIF(0)
@@ -1998,8 +2010,6 @@
 	dma_async_device_unregister(&atxdmac->dma);
 	clk_disable_unprepare(atxdmac->clk);
 
-	synchronize_irq(atxdmac->irq);
-
 	free_irq(atxdmac->irq, atxdmac->dma.dev);
 
 	for (i = 0; i < atxdmac->dma.chancnt; i++) {
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 5b2395e..c346809 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -307,6 +307,13 @@
 	return 0;
 }
 
+static void axi_dmac_synchronize(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
 static void axi_dmac_issue_pending(struct dma_chan *c)
 {
 	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
@@ -613,6 +620,7 @@
 	dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic;
 	dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved;
 	dma_dev->device_terminate_all = axi_dmac_terminate_all;
+	dma_dev->device_synchronize = axi_dmac_synchronize;
 	dma_dev->dev = &pdev->dev;
 	dma_dev->chancnt = 1;
 	dma_dev->src_addr_widths = BIT(dmac->chan.src_width);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 3ecec14..c50a247 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -43,6 +43,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -265,8 +266,11 @@
 	module_put(dma_chan_to_owner(chan));
 
 	/* This channel is not in use anymore, free it */
-	if (!chan->client_count && chan->device->device_free_chan_resources)
+	if (!chan->client_count && chan->device->device_free_chan_resources) {
+		/* Make sure all operations have completed */
+		dmaengine_synchronize(chan);
 		chan->device->device_free_chan_resources(chan);
+	}
 
 	/* If the channel is used via a DMA request router, free the mapping */
 	if (chan->router && chan->router->route_free) {
@@ -493,6 +497,7 @@
 	caps->dst_addr_widths = device->dst_addr_widths;
 	caps->directions = device->directions;
 	caps->residue_granularity = device->residue_granularity;
+	caps->descriptor_reuse = device->descriptor_reuse;
 
 	/*
 	 * Some devices implement only pause (e.g. to get residuum) but no
@@ -511,7 +516,7 @@
 {
 	struct dma_chan *chan;
 
-	if (!__dma_device_satisfies_mask(dev, mask)) {
+	if (mask && !__dma_device_satisfies_mask(dev, mask)) {
 		pr_debug("%s: wrong capabilities\n", __func__);
 		return NULL;
 	}
@@ -542,6 +547,42 @@
 	return NULL;
 }
 
+static struct dma_chan *find_candidate(struct dma_device *device,
+				       const dma_cap_mask_t *mask,
+				       dma_filter_fn fn, void *fn_param)
+{
+	struct dma_chan *chan = private_candidate(mask, device, fn, fn_param);
+	int err;
+
+	if (chan) {
+		/* Found a suitable channel, try to grab, prep, and return it.
+		 * We first set DMA_PRIVATE to disable balance_ref_count as this
+		 * channel will not be published in the general-purpose
+		 * allocator
+		 */
+		dma_cap_set(DMA_PRIVATE, device->cap_mask);
+		device->privatecnt++;
+		err = dma_chan_get(chan);
+
+		if (err) {
+			if (err == -ENODEV) {
+				pr_debug("%s: %s module removed\n", __func__,
+					 dma_chan_name(chan));
+				list_del_rcu(&device->global_node);
+			} else
+				pr_debug("%s: failed to get %s: (%d)\n",
+					 __func__, dma_chan_name(chan), err);
+
+			if (--device->privatecnt == 0)
+				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+
+			chan = ERR_PTR(err);
+		}
+	}
+
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+
 /**
  * dma_get_slave_channel - try to get specific channel exclusively
  * @chan: target channel
@@ -580,7 +621,6 @@
 {
 	dma_cap_mask_t mask;
 	struct dma_chan *chan;
-	int err;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
@@ -588,23 +628,11 @@
 	/* lock against __dma_request_channel */
 	mutex_lock(&dma_list_mutex);
 
-	chan = private_candidate(&mask, device, NULL, NULL);
-	if (chan) {
-		dma_cap_set(DMA_PRIVATE, device->cap_mask);
-		device->privatecnt++;
-		err = dma_chan_get(chan);
-		if (err) {
-			pr_debug("%s: failed to get %s: (%d)\n",
-				__func__, dma_chan_name(chan), err);
-			chan = NULL;
-			if (--device->privatecnt == 0)
-				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
-		}
-	}
+	chan = find_candidate(device, &mask, NULL, NULL);
 
 	mutex_unlock(&dma_list_mutex);
 
-	return chan;
+	return IS_ERR(chan) ? NULL : chan;
 }
 EXPORT_SYMBOL_GPL(dma_get_any_slave_channel);
 
@@ -621,35 +649,15 @@
 {
 	struct dma_device *device, *_d;
 	struct dma_chan *chan = NULL;
-	int err;
 
 	/* Find a channel */
 	mutex_lock(&dma_list_mutex);
 	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
-		chan = private_candidate(mask, device, fn, fn_param);
-		if (chan) {
-			/* Found a suitable channel, try to grab, prep, and
-			 * return it.  We first set DMA_PRIVATE to disable
-			 * balance_ref_count as this channel will not be
-			 * published in the general-purpose allocator
-			 */
-			dma_cap_set(DMA_PRIVATE, device->cap_mask);
-			device->privatecnt++;
-			err = dma_chan_get(chan);
+		chan = find_candidate(device, mask, fn, fn_param);
+		if (!IS_ERR(chan))
+			break;
 
-			if (err == -ENODEV) {
-				pr_debug("%s: %s module removed\n",
-					 __func__, dma_chan_name(chan));
-				list_del_rcu(&device->global_node);
-			} else if (err)
-				pr_debug("%s: failed to get %s: (%d)\n",
-					 __func__, dma_chan_name(chan), err);
-			else
-				break;
-			if (--device->privatecnt == 0)
-				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
-			chan = NULL;
-		}
+		chan = NULL;
 	}
 	mutex_unlock(&dma_list_mutex);
 
@@ -662,27 +670,73 @@
 }
 EXPORT_SYMBOL_GPL(__dma_request_channel);
 
+static const struct dma_slave_map *dma_filter_match(struct dma_device *device,
+						    const char *name,
+						    struct device *dev)
+{
+	int i;
+
+	if (!device->filter.mapcnt)
+		return NULL;
+
+	for (i = 0; i < device->filter.mapcnt; i++) {
+		const struct dma_slave_map *map = &device->filter.map[i];
+
+		if (!strcmp(map->devname, dev_name(dev)) &&
+		    !strcmp(map->slave, name))
+			return map;
+	}
+
+	return NULL;
+}
+
 /**
- * dma_request_slave_channel_reason - try to allocate an exclusive slave channel
+ * dma_request_chan - try to allocate an exclusive slave channel
  * @dev:	pointer to client device structure
  * @name:	slave channel name
  *
  * Returns pointer to appropriate DMA channel on success or an error pointer.
  */
-struct dma_chan *dma_request_slave_channel_reason(struct device *dev,
-						  const char *name)
+struct dma_chan *dma_request_chan(struct device *dev, const char *name)
 {
+	struct dma_device *d, *_d;
+	struct dma_chan *chan = NULL;
+
 	/* If device-tree is present get slave info from here */
 	if (dev->of_node)
-		return of_dma_request_slave_channel(dev->of_node, name);
+		chan = of_dma_request_slave_channel(dev->of_node, name);
 
 	/* If device was enumerated by ACPI get slave info from here */
-	if (ACPI_HANDLE(dev))
-		return acpi_dma_request_slave_chan_by_name(dev, name);
+	if (has_acpi_companion(dev) && !chan)
+		chan = acpi_dma_request_slave_chan_by_name(dev, name);
 
-	return ERR_PTR(-ENODEV);
+	if (chan) {
+		/* Valid channel found or requester need to be deferred */
+		if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
+			return chan;
+	}
+
+	/* Try to find the channel via the DMA filter map(s) */
+	mutex_lock(&dma_list_mutex);
+	list_for_each_entry_safe(d, _d, &dma_device_list, global_node) {
+		dma_cap_mask_t mask;
+		const struct dma_slave_map *map = dma_filter_match(d, name, dev);
+
+		if (!map)
+			continue;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+
+		chan = find_candidate(d, &mask, d->filter.fn, map->param);
+		if (!IS_ERR(chan))
+			break;
+	}
+	mutex_unlock(&dma_list_mutex);
+
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
 }
-EXPORT_SYMBOL_GPL(dma_request_slave_channel_reason);
+EXPORT_SYMBOL_GPL(dma_request_chan);
 
 /**
  * dma_request_slave_channel - try to allocate an exclusive slave channel
@@ -694,17 +748,35 @@
 struct dma_chan *dma_request_slave_channel(struct device *dev,
 					   const char *name)
 {
-	struct dma_chan *ch = dma_request_slave_channel_reason(dev, name);
+	struct dma_chan *ch = dma_request_chan(dev, name);
 	if (IS_ERR(ch))
 		return NULL;
 
-	dma_cap_set(DMA_PRIVATE, ch->device->cap_mask);
-	ch->device->privatecnt++;
-
 	return ch;
 }
 EXPORT_SYMBOL_GPL(dma_request_slave_channel);
 
+/**
+ * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities
+ * @mask: capabilities that the channel must satisfy
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask)
+{
+	struct dma_chan *chan;
+
+	if (!mask)
+		return ERR_PTR(-ENODEV);
+
+	chan = __dma_request_channel(mask, NULL, NULL);
+	if (!chan)
+		chan = ERR_PTR(-ENODEV);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(dma_request_chan_by_mask);
+
 void dma_release_channel(struct dma_chan *chan)
 {
 	mutex_lock(&dma_list_mutex);
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 127093a..26edbe3 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -103,18 +103,21 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
 	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
+	u32 nr_channels;
 
 	if (!np) {
 		dev_err(&pdev->dev, "Missing DT data\n");
 		return NULL;
 	}
 
+	if (of_property_read_u32(np, "dma-channels", &nr_channels))
+		return NULL;
+
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return NULL;
 
-	if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
-		return NULL;
+	pdata->nr_channels = nr_channels;
 
 	if (of_property_read_bool(np, "is_private"))
 		pdata->is_private = true;
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 16fe773..5058401 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -2314,6 +2314,10 @@
 		edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot);
 	}
 
+	ecc->dma_slave.filter.map = info->slave_map;
+	ecc->dma_slave.filter.mapcnt = info->slavecnt;
+	ecc->dma_slave.filter.fn = edma_filter_fn;
+
 	ret = dma_async_device_register(&ecc->dma_slave);
 	if (ret) {
 		dev_err(dev, "slave ddev registration failed (%d)\n", ret);
@@ -2421,7 +2425,13 @@
 	},
 };
 
+static int edma_tptc_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
 static struct platform_driver edma_tptc_driver = {
+	.probe		= edma_tptc_probe,
 	.driver = {
 		.name	= "edma3-tptc",
 		.of_match_table = edma_tptc_of_ids,
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 915eec3..be2e62b 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -116,6 +116,10 @@
 				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
 				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
 				BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+enum fsl_edma_pm_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
 
 struct fsl_edma_hw_tcd {
 	__le32	saddr;
@@ -147,6 +151,9 @@
 struct fsl_edma_chan {
 	struct virt_dma_chan		vchan;
 	enum dma_status			status;
+	enum fsl_edma_pm_state		pm_state;
+	bool				idle;
+	u32				slave_id;
 	struct fsl_edma_engine		*edma;
 	struct fsl_edma_desc		*edesc;
 	struct fsl_edma_slave_config	fsc;
@@ -298,6 +305,7 @@
 	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
 	fsl_edma_disable_request(fsl_chan);
 	fsl_chan->edesc = NULL;
+	fsl_chan->idle = true;
 	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
@@ -313,6 +321,7 @@
 	if (fsl_chan->edesc) {
 		fsl_edma_disable_request(fsl_chan);
 		fsl_chan->status = DMA_PAUSED;
+		fsl_chan->idle = true;
 	}
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 	return 0;
@@ -327,6 +336,7 @@
 	if (fsl_chan->edesc) {
 		fsl_edma_enable_request(fsl_chan);
 		fsl_chan->status = DMA_IN_PROGRESS;
+		fsl_chan->idle = false;
 	}
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 	return 0;
@@ -648,6 +658,7 @@
 	fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd);
 	fsl_edma_enable_request(fsl_chan);
 	fsl_chan->status = DMA_IN_PROGRESS;
+	fsl_chan->idle = false;
 }
 
 static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id)
@@ -676,6 +687,7 @@
 				vchan_cookie_complete(&fsl_chan->edesc->vdesc);
 				fsl_chan->edesc = NULL;
 				fsl_chan->status = DMA_COMPLETE;
+				fsl_chan->idle = true;
 			} else {
 				vchan_cyclic_callback(&fsl_chan->edesc->vdesc);
 			}
@@ -704,6 +716,7 @@
 			edma_writeb(fsl_edma, EDMA_CERR_CERR(ch),
 				fsl_edma->membase + EDMA_CERR);
 			fsl_edma->chans[ch].status = DMA_ERROR;
+			fsl_edma->chans[ch].idle = true;
 		}
 	}
 	return IRQ_HANDLED;
@@ -724,6 +737,12 @@
 
 	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
 
+	if (unlikely(fsl_chan->pm_state != RUNNING)) {
+		spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+		/* cannot submit due to suspend */
+		return;
+	}
+
 	if (vchan_issue_pending(&fsl_chan->vchan) && !fsl_chan->edesc)
 		fsl_edma_xfer_desc(fsl_chan);
 
@@ -735,6 +754,7 @@
 {
 	struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data;
 	struct dma_chan *chan, *_chan;
+	struct fsl_edma_chan *fsl_chan;
 	unsigned long chans_per_mux = fsl_edma->n_chans / DMAMUX_NR;
 
 	if (dma_spec->args_count != 2)
@@ -748,8 +768,10 @@
 			chan = dma_get_slave_channel(chan);
 			if (chan) {
 				chan->device->privatecnt++;
-				fsl_edma_chan_mux(to_fsl_edma_chan(chan),
-					dma_spec->args[1], true);
+				fsl_chan = to_fsl_edma_chan(chan);
+				fsl_chan->slave_id = dma_spec->args[1];
+				fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id,
+						true);
 				mutex_unlock(&fsl_edma->fsl_edma_mutex);
 				return chan;
 			}
@@ -888,7 +910,9 @@
 		struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i];
 
 		fsl_chan->edma = fsl_edma;
-
+		fsl_chan->pm_state = RUNNING;
+		fsl_chan->slave_id = 0;
+		fsl_chan->idle = true;
 		fsl_chan->vchan.desc_free = fsl_edma_free_desc;
 		vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev);
 
@@ -959,6 +983,60 @@
 	return 0;
 }
 
+static int fsl_edma_suspend_late(struct device *dev)
+{
+	struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+	struct fsl_edma_chan *fsl_chan;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		fsl_chan = &fsl_edma->chans[i];
+		spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+		/* Make sure chan is idle or will force disable. */
+		if (unlikely(!fsl_chan->idle)) {
+			dev_warn(dev, "WARN: There is non-idle channel.");
+			fsl_edma_disable_request(fsl_chan);
+			fsl_edma_chan_mux(fsl_chan, 0, false);
+		}
+
+		fsl_chan->pm_state = SUSPENDED;
+		spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	}
+
+	return 0;
+}
+
+static int fsl_edma_resume_early(struct device *dev)
+{
+	struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+	struct fsl_edma_chan *fsl_chan;
+	int i;
+
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		fsl_chan = &fsl_edma->chans[i];
+		fsl_chan->pm_state = RUNNING;
+		edma_writew(fsl_edma, 0x0, fsl_edma->membase + EDMA_TCD_CSR(i));
+		if (fsl_chan->slave_id != 0)
+			fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true);
+	}
+
+	edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA,
+			fsl_edma->membase + EDMA_CR);
+
+	return 0;
+}
+
+/*
+ * eDMA provides the service to others, so it should be suspend late
+ * and resume early. When eDMA suspend, all of the clients should stop
+ * the DMA data transmission and let the channel idle.
+ */
+static const struct dev_pm_ops fsl_edma_pm_ops = {
+	.suspend_late   = fsl_edma_suspend_late,
+	.resume_early   = fsl_edma_resume_early,
+};
+
 static const struct of_device_id fsl_edma_dt_ids[] = {
 	{ .compatible = "fsl,vf610-edma", },
 	{ /* sentinel */ }
@@ -969,6 +1047,7 @@
 	.driver		= {
 		.name	= "fsl-edma",
 		.of_match_table = fsl_edma_dt_ids,
+		.pm     = &fsl_edma_pm_ops,
 	},
 	.probe          = fsl_edma_probe,
 	.remove		= fsl_edma_remove,
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 823ad72..eef145e 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -228,6 +228,8 @@
 	for_each_sg(sgl, sg, sg_len, i) {
 		desc->sg[i].addr = sg_dma_address(sg);
 		desc->sg[i].len = sg_dma_len(sg);
+
+		desc->length += sg_dma_len(sg);
 	}
 
 	desc->nents = sg_len;
@@ -249,21 +251,10 @@
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 }
 
-static size_t hsu_dma_desc_size(struct hsu_dma_desc *desc)
-{
-	size_t bytes = 0;
-	unsigned int i;
-
-	for (i = desc->active; i < desc->nents; i++)
-		bytes += desc->sg[i].len;
-
-	return bytes;
-}
-
 static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc)
 {
 	struct hsu_dma_desc *desc = hsuc->desc;
-	size_t bytes = hsu_dma_desc_size(desc);
+	size_t bytes = desc->length;
 	int i;
 
 	i = desc->active % HSU_DMA_CHAN_NR_DESC;
@@ -294,7 +285,7 @@
 		dma_set_residue(state, bytes);
 		status = hsuc->desc->status;
 	} else if (vdesc) {
-		bytes = hsu_dma_desc_size(to_hsu_dma_desc(vdesc));
+		bytes = to_hsu_dma_desc(vdesc)->length;
 		dma_set_residue(state, bytes);
 	}
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index f06579c..578a8ee 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -65,6 +65,7 @@
 	enum dma_transfer_direction direction;
 	struct hsu_dma_sg *sg;
 	unsigned int nents;
+	size_t length;
 	unsigned int active;
 	enum dma_status status;
 };
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
index 7d56b47..3cb7b2c 100644
--- a/drivers/dma/idma64.c
+++ b/drivers/dma/idma64.c
@@ -178,20 +178,12 @@
 	if (!status)
 		return IRQ_NONE;
 
-	/* Disable interrupts */
-	channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask);
-	channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
-
 	status_xfer = dma_readl(idma64, RAW(XFER));
 	status_err = dma_readl(idma64, RAW(ERROR));
 
 	for (i = 0; i < idma64->dma.chancnt; i++)
 		idma64_chan_irq(idma64, i, status_err, status_xfer);
 
-	/* Re-enable interrupts */
-	channel_set_bit(idma64, MASK(XFER), idma64->all_chan_mask);
-	channel_set_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
-
 	return IRQ_HANDLED;
 }
 
@@ -239,7 +231,7 @@
 	idma64_desc_free(idma64c, to_idma64_desc(vdesc));
 }
 
-static u64 idma64_hw_desc_fill(struct idma64_hw_desc *hw,
+static void idma64_hw_desc_fill(struct idma64_hw_desc *hw,
 		struct dma_slave_config *config,
 		enum dma_transfer_direction direction, u64 llp)
 {
@@ -276,26 +268,26 @@
 		     IDMA64C_CTLL_SRC_WIDTH(src_width);
 
 	lli->llp = llp;
-	return hw->llp;
 }
 
 static void idma64_desc_fill(struct idma64_chan *idma64c,
 		struct idma64_desc *desc)
 {
 	struct dma_slave_config *config = &idma64c->config;
-	struct idma64_hw_desc *hw = &desc->hw[desc->ndesc - 1];
+	unsigned int i = desc->ndesc;
+	struct idma64_hw_desc *hw = &desc->hw[i - 1];
 	struct idma64_lli *lli = hw->lli;
 	u64 llp = 0;
-	unsigned int i = desc->ndesc;
 
 	/* Fill the hardware descriptors and link them to a list */
 	do {
 		hw = &desc->hw[--i];
-		llp = idma64_hw_desc_fill(hw, config, desc->direction, llp);
+		idma64_hw_desc_fill(hw, config, desc->direction, llp);
+		llp = hw->llp;
 		desc->length += hw->len;
 	} while (i);
 
-	/* Trigger interrupt after last block */
+	/* Trigger an interrupt after the last block is transfered */
 	lli->ctllo |= IDMA64C_CTLL_INT_EN;
 }
 
@@ -596,6 +588,8 @@
 
 	idma64->dma.dev = chip->dev;
 
+	dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK);
+
 	ret = dma_async_device_register(&idma64->dma);
 	if (ret)
 		return ret;
diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
index f6aeff0..8423f13 100644
--- a/drivers/dma/idma64.h
+++ b/drivers/dma/idma64.h
@@ -54,7 +54,8 @@
 #define IDMA64C_CTLL_LLP_S_EN		(1 << 28)	/* src block chain */
 
 /* Bitfields in CTL_HI */
-#define IDMA64C_CTLH_BLOCK_TS(x)	((x) & ((1 << 17) - 1))
+#define IDMA64C_CTLH_BLOCK_TS_MASK	((1 << 17) - 1)
+#define IDMA64C_CTLH_BLOCK_TS(x)	((x) & IDMA64C_CTLH_BLOCK_TS_MASK)
 #define IDMA64C_CTLH_DONE		(1 << 17)
 
 /* Bitfields in CFG_LO */
diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c
index 9ca5683..a4c53be 100644
--- a/drivers/dma/img-mdc-dma.c
+++ b/drivers/dma/img-mdc-dma.c
@@ -651,6 +651,48 @@
 	return ret;
 }
 
+static unsigned int mdc_get_new_events(struct mdc_chan *mchan)
+{
+	u32 val, processed, done1, done2;
+	unsigned int ret;
+
+	val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+	processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
+				MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
+	/*
+	 * CMDS_DONE may have incremented between reading CMDS_PROCESSED
+	 * and clearing INT_ACTIVE.  Re-read CMDS_PROCESSED to ensure we
+	 * didn't miss a command completion.
+	 */
+	do {
+		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+		done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+
+		val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK <<
+			  MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) |
+			 MDC_CMDS_PROCESSED_INT_ACTIVE);
+
+		val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT;
+
+		mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED);
+
+		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+		done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+	} while (done1 != done2);
+
+	if (done1 >= processed)
+		ret = done1 - processed;
+	else
+		ret = ((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1) -
+			processed) + done1;
+
+	return ret;
+}
+
 static int mdc_terminate_all(struct dma_chan *chan)
 {
 	struct mdc_chan *mchan = to_mdc_chan(chan);
@@ -667,6 +709,8 @@
 	mchan->desc = NULL;
 	vchan_get_all_descriptors(&mchan->vc, &head);
 
+	mdc_get_new_events(mchan);
+
 	spin_unlock_irqrestore(&mchan->vc.lock, flags);
 
 	if (mdesc)
@@ -703,35 +747,17 @@
 {
 	struct mdc_chan *mchan = (struct mdc_chan *)dev_id;
 	struct mdc_tx_desc *mdesc;
-	u32 val, processed, done1, done2;
-	unsigned int i;
+	unsigned int i, new_events;
 
 	spin_lock(&mchan->vc.lock);
 
-	val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
-	processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
-		MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
-	/*
-	 * CMDS_DONE may have incremented between reading CMDS_PROCESSED
-	 * and clearing INT_ACTIVE.  Re-read CMDS_PROCESSED to ensure we
-	 * didn't miss a command completion.
-	 */
-	do {
-		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
-		done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
-			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
-		val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK <<
-			  MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) |
-			 MDC_CMDS_PROCESSED_INT_ACTIVE);
-		val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT;
-		mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED);
-		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
-		done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
-			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
-	} while (done1 != done2);
-
 	dev_dbg(mdma2dev(mchan->mdma), "IRQ on channel %d\n", mchan->chan_nr);
 
+	new_events = mdc_get_new_events(mchan);
+
+	if (!new_events)
+		goto out;
+
 	mdesc = mchan->desc;
 	if (!mdesc) {
 		dev_warn(mdma2dev(mchan->mdma),
@@ -740,8 +766,7 @@
 		goto out;
 	}
 
-	for (i = processed; i != done1;
-	     i = (i + 1) % (MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1)) {
+	for (i = 0; i < new_events; i++) {
 		/*
 		 * The first interrupt in a transfer indicates that the
 		 * command list has been loaded, not that a command has
@@ -979,7 +1004,6 @@
 				 vc.chan.device_node) {
 		list_del(&mchan->vc.chan.device_node);
 
-		synchronize_irq(mchan->irq);
 		devm_free_irq(&pdev->dev, mchan->irq, mchan);
 
 		tasklet_kill(&mchan->vc.task);
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
index 2cb7c30..0b9b6b0 100644
--- a/drivers/dma/ioat/dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -224,7 +224,7 @@
 	return tag;
 }
 
-static struct dca_ops ioat_dca_ops = {
+static const struct dca_ops ioat_dca_ops = {
 	.add_requester		= ioat_dca_add_requester,
 	.remove_requester	= ioat_dca_remove_requester,
 	.get_tag		= ioat_dca_get_tag,
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 8f4e607..b8f4807 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -235,43 +235,11 @@
 	return ioat_dma->idx[index];
 }
 
-static inline u64 ioat_chansts_32(struct ioatdma_chan *ioat_chan)
-{
-	u8 ver = ioat_chan->ioat_dma->version;
-	u64 status;
-	u32 status_lo;
-
-	/* We need to read the low address first as this causes the
-	 * chipset to latch the upper bits for the subsequent read
-	 */
-	status_lo = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
-	status = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
-	status <<= 32;
-	status |= status_lo;
-
-	return status;
-}
-
-#if BITS_PER_LONG == 64
-
 static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = ioat_chan->ioat_dma->version;
-	u64 status;
-
-	 /* With IOAT v3.3 the status register is 64bit.  */
-	if (ver >= IOAT_VER_3_3)
-		status = readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET(ver));
-	else
-		status = ioat_chansts_32(ioat_chan);
-
-	return status;
+	return readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET);
 }
 
-#else
-#define ioat_chansts ioat_chansts_32
-#endif
-
 static inline u64 ioat_chansts_to_addr(u64 status)
 {
 	return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index 909352f..4994a36 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -99,19 +99,9 @@
 #define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
 #define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
 
-
-#define IOAT1_CHANSTS_OFFSET		0x04	/* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW	0x04
-#define IOAT2_CHANSTS_OFFSET_LOW	0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH	0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH	0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+/* IOAT1 define left for i7300_idle driver to not fail compiling */
+#define IOAT1_CHANSTS_OFFSET		0x04
+#define IOAT_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
 #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
 #define IOAT_CHANSTS_SOFT_ERR			0x10ULL
 #define IOAT_CHANSTS_UNAFFILIATED_ERR		0x8ULL
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 1c2de9a..14091f8 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -139,46 +139,10 @@
 }
 
 static void mv_chan_set_mode(struct mv_xor_chan *chan,
-			     enum dma_transaction_type type)
+			     u32 op_mode)
 {
-	u32 op_mode;
 	u32 config = readl_relaxed(XOR_CONFIG(chan));
 
-	switch (type) {
-	case DMA_XOR:
-		op_mode = XOR_OPERATION_MODE_XOR;
-		break;
-	case DMA_MEMCPY:
-		op_mode = XOR_OPERATION_MODE_MEMCPY;
-		break;
-	default:
-		dev_err(mv_chan_to_devp(chan),
-			"error: unsupported operation %d\n",
-			type);
-		BUG();
-		return;
-	}
-
-	config &= ~0x7;
-	config |= op_mode;
-
-#if defined(__BIG_ENDIAN)
-	config |= XOR_DESCRIPTOR_SWAP;
-#else
-	config &= ~XOR_DESCRIPTOR_SWAP;
-#endif
-
-	writel_relaxed(config, XOR_CONFIG(chan));
-	chan->current_type = type;
-}
-
-static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan)
-{
-	u32 op_mode;
-	u32 config = readl_relaxed(XOR_CONFIG(chan));
-
-	op_mode = XOR_OPERATION_MODE_IN_DESC;
-
 	config &= ~0x7;
 	config |= op_mode;
 
@@ -1043,9 +1007,9 @@
 	mv_chan_unmask_interrupts(mv_chan);
 
 	if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
-		mv_chan_set_mode_to_desc(mv_chan);
+		mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_IN_DESC);
 	else
-		mv_chan_set_mode(mv_chan, DMA_XOR);
+		mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_XOR);
 
 	spin_lock_init(&mv_chan->lock);
 	INIT_LIST_HEAD(&mv_chan->chain);
@@ -1121,6 +1085,57 @@
 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
 }
 
+/*
+ * Since this XOR driver is basically used only for RAID5, we don't
+ * need to care about synchronizing ->suspend with DMA activity,
+ * because the DMA engine will naturally be quiet due to the block
+ * devices being suspended.
+ */
+static int mv_xor_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct mv_xor_device *xordev = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		struct mv_xor_chan *mv_chan = xordev->channels[i];
+
+		if (!mv_chan)
+			continue;
+
+		mv_chan->saved_config_reg =
+			readl_relaxed(XOR_CONFIG(mv_chan));
+		mv_chan->saved_int_mask_reg =
+			readl_relaxed(XOR_INTR_MASK(mv_chan));
+	}
+
+	return 0;
+}
+
+static int mv_xor_resume(struct platform_device *dev)
+{
+	struct mv_xor_device *xordev = platform_get_drvdata(dev);
+	const struct mbus_dram_target_info *dram;
+	int i;
+
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		struct mv_xor_chan *mv_chan = xordev->channels[i];
+
+		if (!mv_chan)
+			continue;
+
+		writel_relaxed(mv_chan->saved_config_reg,
+			       XOR_CONFIG(mv_chan));
+		writel_relaxed(mv_chan->saved_int_mask_reg,
+			       XOR_INTR_MASK(mv_chan));
+	}
+
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv_xor_conf_mbus_windows(xordev, dram);
+
+	return 0;
+}
+
 static const struct of_device_id mv_xor_dt_ids[] = {
 	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG },
 	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC },
@@ -1282,6 +1297,8 @@
 
 static struct platform_driver mv_xor_driver = {
 	.probe		= mv_xor_probe,
+	.suspend        = mv_xor_suspend,
+	.resume         = mv_xor_resume,
 	.driver		= {
 		.name	        = MV_XOR_NAME,
 		.of_match_table = of_match_ptr(mv_xor_dt_ids),
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index b7455b4..c19fe30 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -110,7 +110,6 @@
 	void __iomem		*mmr_high_base;
 	unsigned int		idx;
 	int                     irq;
-	enum dma_transaction_type	current_type;
 	struct list_head	chain;
 	struct list_head	free_slots;
 	struct list_head	allocated_slots;
@@ -126,6 +125,7 @@
 	char			dummy_src[MV_XOR_MIN_BYTE_COUNT];
 	char			dummy_dst[MV_XOR_MIN_BYTE_COUNT];
 	dma_addr_t		dummy_src_addr, dummy_dst_addr;
+	u32                     saved_config_reg, saved_int_mask_reg;
 };
 
 /**
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 1dfc71c..9794b07 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -28,8 +28,6 @@
 struct omap_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
-	struct tasklet_struct task;
-	struct list_head pending;
 	void __iomem *base;
 	const struct omap_dma_reg *reg_map;
 	struct omap_system_dma_plat_info *plat;
@@ -42,7 +40,6 @@
 
 struct omap_chan {
 	struct virt_dma_chan vc;
-	struct list_head node;
 	void __iomem *channel_base;
 	const struct omap_dma_reg *reg_map;
 	uint32_t ccr;
@@ -454,33 +451,6 @@
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 }
 
-/*
- * This callback schedules all pending channels.  We could be more
- * clever here by postponing allocation of the real DMA channels to
- * this point, and freeing them when our virtual channel becomes idle.
- *
- * We would then need to deal with 'all channels in-use'
- */
-static void omap_dma_sched(unsigned long data)
-{
-	struct omap_dmadev *d = (struct omap_dmadev *)data;
-	LIST_HEAD(head);
-
-	spin_lock_irq(&d->lock);
-	list_splice_tail_init(&d->pending, &head);
-	spin_unlock_irq(&d->lock);
-
-	while (!list_empty(&head)) {
-		struct omap_chan *c = list_first_entry(&head,
-			struct omap_chan, node);
-
-		spin_lock_irq(&c->vc.lock);
-		list_del_init(&c->node);
-		omap_dma_start_desc(c);
-		spin_unlock_irq(&c->vc.lock);
-	}
-}
-
 static irqreturn_t omap_dma_irq(int irq, void *devid)
 {
 	struct omap_dmadev *od = devid;
@@ -703,8 +673,14 @@
 	struct omap_chan *c = to_omap_dma_chan(chan);
 	struct virt_dma_desc *vd;
 	enum dma_status ret;
+	uint32_t ccr;
 	unsigned long flags;
 
+	ccr = omap_dma_chan_read(c, CCR);
+	/* The channel is no longer active, handle the completion right away */
+	if (!(ccr & CCR_ENABLE))
+		omap_dma_callback(c->dma_ch, 0, c);
+
 	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
@@ -719,7 +695,7 @@
 
 		if (d->dir == DMA_MEM_TO_DEV)
 			pos = omap_dma_get_src_pos(c);
-		else if (d->dir == DMA_DEV_TO_MEM)
+		else if (d->dir == DMA_DEV_TO_MEM  || d->dir == DMA_MEM_TO_MEM)
 			pos = omap_dma_get_dst_pos(c);
 		else
 			pos = 0;
@@ -739,22 +715,8 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&c->vc.lock, flags);
-	if (vchan_issue_pending(&c->vc) && !c->desc) {
-		/*
-		 * c->cyclic is used only by audio and in this case the DMA need
-		 * to be started without delay.
-		 */
-		if (!c->cyclic) {
-			struct omap_dmadev *d = to_omap_dma_dev(chan->device);
-			spin_lock(&d->lock);
-			if (list_empty(&c->node))
-				list_add_tail(&c->node, &d->pending);
-			spin_unlock(&d->lock);
-			tasklet_schedule(&d->task);
-		} else {
-			omap_dma_start_desc(c);
-		}
-	}
+	if (vchan_issue_pending(&c->vc) && !c->desc)
+		omap_dma_start_desc(c);
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 }
 
@@ -768,7 +730,7 @@
 	struct scatterlist *sgent;
 	struct omap_desc *d;
 	dma_addr_t dev_addr;
-	unsigned i, j = 0, es, en, frame_bytes;
+	unsigned i, es, en, frame_bytes;
 	u32 burst;
 
 	if (dir == DMA_DEV_TO_MEM) {
@@ -845,13 +807,12 @@
 	en = burst;
 	frame_bytes = es_bytes[es] * en;
 	for_each_sg(sgl, sgent, sglen, i) {
-		d->sg[j].addr = sg_dma_address(sgent);
-		d->sg[j].en = en;
-		d->sg[j].fn = sg_dma_len(sgent) / frame_bytes;
-		j++;
+		d->sg[i].addr = sg_dma_address(sgent);
+		d->sg[i].en = en;
+		d->sg[i].fn = sg_dma_len(sgent) / frame_bytes;
 	}
 
-	d->sglen = j;
+	d->sglen = sglen;
 
 	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
 }
@@ -1018,17 +979,11 @@
 static int omap_dma_terminate_all(struct dma_chan *chan)
 {
 	struct omap_chan *c = to_omap_dma_chan(chan);
-	struct omap_dmadev *d = to_omap_dma_dev(c->vc.chan.device);
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&c->vc.lock, flags);
 
-	/* Prevent this channel being scheduled */
-	spin_lock(&d->lock);
-	list_del_init(&c->node);
-	spin_unlock(&d->lock);
-
 	/*
 	 * Stop DMA activity: we assume the callback will not be called
 	 * after omap_dma_stop() returns (even if it does, it will see
@@ -1102,14 +1057,12 @@
 	c->reg_map = od->reg_map;
 	c->vc.desc_free = omap_dma_desc_free;
 	vchan_init(&c->vc, &od->ddev);
-	INIT_LIST_HEAD(&c->node);
 
 	return 0;
 }
 
 static void omap_dma_free(struct omap_dmadev *od)
 {
-	tasklet_kill(&od->task);
 	while (!list_empty(&od->ddev.channels)) {
 		struct omap_chan *c = list_first_entry(&od->ddev.channels,
 			struct omap_chan, vc.chan.device_node);
@@ -1165,12 +1118,9 @@
 	od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	od->ddev.dev = &pdev->dev;
 	INIT_LIST_HEAD(&od->ddev.channels);
-	INIT_LIST_HEAD(&od->pending);
 	spin_lock_init(&od->lock);
 	spin_lock_init(&od->irq_lock);
 
-	tasklet_init(&od->task, omap_dma_sched, (unsigned long)od);
-
 	od->dma_requests = OMAP_SDMA_REQUESTS;
 	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
 						      "dma-requests",
@@ -1203,6 +1153,10 @@
 			return rc;
 	}
 
+	od->ddev.filter.map = od->plat->slave_map;
+	od->ddev.filter.mapcnt = od->plat->slavecnt;
+	od->ddev.filter.fn = omap_dma_filter_fn;
+
 	rc = dma_async_device_register(&od->ddev);
 	if (rc) {
 		pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n",
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index fc4156a..f2a0310 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -1414,6 +1414,7 @@
 	pdev->slave.dst_addr_widths = widths;
 	pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
 	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	pdev->slave.descriptor_reuse = true;
 
 	pdev->slave.dev = &op->dev;
 	ret = pxad_init_dmadev(op, pdev, dma_channels);
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 9fda65a..f32c430 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -47,12 +47,6 @@
 	  This driver supports the general purpose DMA controller found in the
 	  Renesas R-Car second generation SoCs.
 
-config RCAR_HPB_DMAE
-	tristate "Renesas R-Car HPB DMAC support"
-	depends on SH_DMAE_BASE
-	help
-	  Enable support for the Renesas R-Car series DMA controllers.
-
 config RENESAS_USB_DMAC
 	tristate "Renesas USB-DMA Controller"
 	depends on ARCH_SHMOBILE || COMPILE_TEST
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index 0133e46..f1e2fd6 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -14,6 +14,5 @@
 obj-$(CONFIG_SH_DMAE) += shdma.o
 
 obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
-obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
 obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
 obj-$(CONFIG_SUDMAC) += sudmac.o
diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
deleted file mode 100644
index 749f26e..0000000
--- a/drivers/dma/sh/rcar-hpbdma.c
+++ /dev/null
@@ -1,669 +0,0 @@
-/*
- * Copyright (C) 2011-2013 Renesas Electronics Corporation
- * Copyright (C) 2013 Cogent Embedded, Inc.
- *
- * This file is based on the drivers/dma/sh/shdma.c
- *
- * Renesas SuperH DMA Engine support
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * - DMA of SuperH does not have Hardware DMA chain mode.
- * - max DMA size is 16MB.
- *
- */
-
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/platform_data/dma-rcar-hpbdma.h>
-#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
-#include <linux/shdma-base.h>
-#include <linux/slab.h>
-
-/* DMA channel registers */
-#define HPB_DMAE_DSAR0	0x00
-#define HPB_DMAE_DDAR0	0x04
-#define HPB_DMAE_DTCR0	0x08
-#define HPB_DMAE_DSAR1	0x0C
-#define HPB_DMAE_DDAR1	0x10
-#define HPB_DMAE_DTCR1	0x14
-#define HPB_DMAE_DSASR	0x18
-#define HPB_DMAE_DDASR	0x1C
-#define HPB_DMAE_DTCSR	0x20
-#define HPB_DMAE_DPTR	0x24
-#define HPB_DMAE_DCR	0x28
-#define HPB_DMAE_DCMDR	0x2C
-#define HPB_DMAE_DSTPR	0x30
-#define HPB_DMAE_DSTSR	0x34
-#define HPB_DMAE_DDBGR	0x38
-#define HPB_DMAE_DDBGR2	0x3C
-#define HPB_DMAE_CHAN(n)	(0x40 * (n))
-
-/* DMA command register (DCMDR) bits */
-#define HPB_DMAE_DCMDR_BDOUT	BIT(7)
-#define HPB_DMAE_DCMDR_DQSPD	BIT(6)
-#define HPB_DMAE_DCMDR_DQSPC	BIT(5)
-#define HPB_DMAE_DCMDR_DMSPD	BIT(4)
-#define HPB_DMAE_DCMDR_DMSPC	BIT(3)
-#define HPB_DMAE_DCMDR_DQEND	BIT(2)
-#define HPB_DMAE_DCMDR_DNXT	BIT(1)
-#define HPB_DMAE_DCMDR_DMEN	BIT(0)
-
-/* DMA forced stop register (DSTPR) bits */
-#define HPB_DMAE_DSTPR_DMSTP	BIT(0)
-
-/* DMA status register (DSTSR) bits */
-#define HPB_DMAE_DSTSR_DQSTS	BIT(2)
-#define HPB_DMAE_DSTSR_DMSTS	BIT(0)
-
-/* DMA common registers */
-#define HPB_DMAE_DTIMR		0x00
-#define HPB_DMAE_DINTSR0		0x0C
-#define HPB_DMAE_DINTSR1		0x10
-#define HPB_DMAE_DINTCR0		0x14
-#define HPB_DMAE_DINTCR1		0x18
-#define HPB_DMAE_DINTMR0		0x1C
-#define HPB_DMAE_DINTMR1		0x20
-#define HPB_DMAE_DACTSR0		0x24
-#define HPB_DMAE_DACTSR1		0x28
-#define HPB_DMAE_HSRSTR(n)	(0x40 + (n) * 4)
-#define HPB_DMAE_HPB_DMASPR(n)	(0x140 + (n) * 4)
-#define HPB_DMAE_HPB_DMLVLR0	0x160
-#define HPB_DMAE_HPB_DMLVLR1	0x164
-#define HPB_DMAE_HPB_DMSHPT0	0x168
-#define HPB_DMAE_HPB_DMSHPT1	0x16C
-
-#define HPB_DMA_SLAVE_NUMBER 256
-#define HPB_DMA_TCR_MAX 0x01000000	/* 16 MiB */
-
-struct hpb_dmae_chan {
-	struct shdma_chan shdma_chan;
-	int xfer_mode;			/* DMA transfer mode */
-#define XFER_SINGLE	1
-#define XFER_DOUBLE	2
-	unsigned plane_idx;		/* current DMA information set */
-	bool first_desc;		/* first/next transfer */
-	int xmit_shift;			/* log_2(bytes_per_xfer) */
-	void __iomem *base;
-	const struct hpb_dmae_slave_config *cfg;
-	char dev_id[16];		/* unique name per DMAC of channel */
-	dma_addr_t slave_addr;
-};
-
-struct hpb_dmae_device {
-	struct shdma_dev shdma_dev;
-	spinlock_t reg_lock;		/* comm_reg operation lock */
-	struct hpb_dmae_pdata *pdata;
-	void __iomem *chan_reg;
-	void __iomem *comm_reg;
-	void __iomem *reset_reg;
-	void __iomem *mode_reg;
-};
-
-struct hpb_dmae_regs {
-	u32 sar; /* SAR / source address */
-	u32 dar; /* DAR / destination address */
-	u32 tcr; /* TCR / transfer count */
-};
-
-struct hpb_desc {
-	struct shdma_desc shdma_desc;
-	struct hpb_dmae_regs hw;
-	unsigned plane_idx;
-};
-
-#define to_chan(schan) container_of(schan, struct hpb_dmae_chan, shdma_chan)
-#define to_desc(sdesc) container_of(sdesc, struct hpb_desc, shdma_desc)
-#define to_dev(sc) container_of(sc->shdma_chan.dma_chan.device, \
-				struct hpb_dmae_device, shdma_dev.dma_dev)
-
-static void ch_reg_write(struct hpb_dmae_chan *hpb_dc, u32 data, u32 reg)
-{
-	iowrite32(data, hpb_dc->base + reg);
-}
-
-static u32 ch_reg_read(struct hpb_dmae_chan *hpb_dc, u32 reg)
-{
-	return ioread32(hpb_dc->base + reg);
-}
-
-static void dcmdr_write(struct hpb_dmae_device *hpbdev, u32 data)
-{
-	iowrite32(data, hpbdev->chan_reg + HPB_DMAE_DCMDR);
-}
-
-static void hsrstr_write(struct hpb_dmae_device *hpbdev, u32 ch)
-{
-	iowrite32(0x1, hpbdev->comm_reg + HPB_DMAE_HSRSTR(ch));
-}
-
-static u32 dintsr_read(struct hpb_dmae_device *hpbdev, u32 ch)
-{
-	u32 v;
-
-	if (ch < 32)
-		v = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTSR0) >> ch;
-	else
-		v = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTSR1) >> (ch - 32);
-	return v & 0x1;
-}
-
-static void dintcr_write(struct hpb_dmae_device *hpbdev, u32 ch)
-{
-	if (ch < 32)
-		iowrite32((0x1 << ch), hpbdev->comm_reg + HPB_DMAE_DINTCR0);
-	else
-		iowrite32((0x1 << (ch - 32)),
-			  hpbdev->comm_reg + HPB_DMAE_DINTCR1);
-}
-
-static void asyncmdr_write(struct hpb_dmae_device *hpbdev, u32 data)
-{
-	iowrite32(data, hpbdev->mode_reg);
-}
-
-static u32 asyncmdr_read(struct hpb_dmae_device *hpbdev)
-{
-	return ioread32(hpbdev->mode_reg);
-}
-
-static void hpb_dmae_enable_int(struct hpb_dmae_device *hpbdev, u32 ch)
-{
-	u32 intreg;
-
-	spin_lock_irq(&hpbdev->reg_lock);
-	if (ch < 32) {
-		intreg = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTMR0);
-		iowrite32(BIT(ch) | intreg,
-			  hpbdev->comm_reg + HPB_DMAE_DINTMR0);
-	} else {
-		intreg = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTMR1);
-		iowrite32(BIT(ch - 32) | intreg,
-			  hpbdev->comm_reg + HPB_DMAE_DINTMR1);
-	}
-	spin_unlock_irq(&hpbdev->reg_lock);
-}
-
-static void hpb_dmae_async_reset(struct hpb_dmae_device *hpbdev, u32 data)
-{
-	u32 rstr;
-	int timeout = 10000;	/* 100 ms */
-
-	spin_lock(&hpbdev->reg_lock);
-	rstr = ioread32(hpbdev->reset_reg);
-	rstr |= data;
-	iowrite32(rstr, hpbdev->reset_reg);
-	do {
-		rstr = ioread32(hpbdev->reset_reg);
-		if ((rstr & data) == data)
-			break;
-		udelay(10);
-	} while (timeout--);
-
-	if (timeout < 0)
-		dev_err(hpbdev->shdma_dev.dma_dev.dev,
-			"%s timeout\n", __func__);
-
-	rstr &= ~data;
-	iowrite32(rstr, hpbdev->reset_reg);
-	spin_unlock(&hpbdev->reg_lock);
-}
-
-static void hpb_dmae_set_async_mode(struct hpb_dmae_device *hpbdev,
-				    u32 mask, u32 data)
-{
-	u32 mode;
-
-	spin_lock_irq(&hpbdev->reg_lock);
-	mode = asyncmdr_read(hpbdev);
-	mode &= ~mask;
-	mode |= data;
-	asyncmdr_write(hpbdev, mode);
-	spin_unlock_irq(&hpbdev->reg_lock);
-}
-
-static void hpb_dmae_ctl_stop(struct hpb_dmae_device *hpbdev)
-{
-	dcmdr_write(hpbdev, HPB_DMAE_DCMDR_DQSPD);
-}
-
-static void hpb_dmae_reset(struct hpb_dmae_device *hpbdev)
-{
-	u32 ch;
-
-	for (ch = 0; ch < hpbdev->pdata->num_hw_channels; ch++)
-		hsrstr_write(hpbdev, ch);
-}
-
-static unsigned int calc_xmit_shift(struct hpb_dmae_chan *hpb_chan)
-{
-	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
-	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
-	int width = ch_reg_read(hpb_chan, HPB_DMAE_DCR);
-	int i;
-
-	switch (width & (HPB_DMAE_DCR_SPDS_MASK | HPB_DMAE_DCR_DPDS_MASK)) {
-	case HPB_DMAE_DCR_SPDS_8BIT | HPB_DMAE_DCR_DPDS_8BIT:
-	default:
-		i = XMIT_SZ_8BIT;
-		break;
-	case HPB_DMAE_DCR_SPDS_16BIT | HPB_DMAE_DCR_DPDS_16BIT:
-		i = XMIT_SZ_16BIT;
-		break;
-	case HPB_DMAE_DCR_SPDS_32BIT | HPB_DMAE_DCR_DPDS_32BIT:
-		i = XMIT_SZ_32BIT;
-		break;
-	}
-	return pdata->ts_shift[i];
-}
-
-static void hpb_dmae_set_reg(struct hpb_dmae_chan *hpb_chan,
-			     struct hpb_dmae_regs *hw, unsigned plane)
-{
-	ch_reg_write(hpb_chan, hw->sar,
-		     plane ? HPB_DMAE_DSAR1 : HPB_DMAE_DSAR0);
-	ch_reg_write(hpb_chan, hw->dar,
-		     plane ? HPB_DMAE_DDAR1 : HPB_DMAE_DDAR0);
-	ch_reg_write(hpb_chan, hw->tcr >> hpb_chan->xmit_shift,
-		     plane ? HPB_DMAE_DTCR1 : HPB_DMAE_DTCR0);
-}
-
-static void hpb_dmae_start(struct hpb_dmae_chan *hpb_chan, bool next)
-{
-	ch_reg_write(hpb_chan, (next ? HPB_DMAE_DCMDR_DNXT : 0) |
-		     HPB_DMAE_DCMDR_DMEN, HPB_DMAE_DCMDR);
-}
-
-static void hpb_dmae_halt(struct shdma_chan *schan)
-{
-	struct hpb_dmae_chan *chan = to_chan(schan);
-
-	ch_reg_write(chan, HPB_DMAE_DCMDR_DQEND, HPB_DMAE_DCMDR);
-	ch_reg_write(chan, HPB_DMAE_DSTPR_DMSTP, HPB_DMAE_DSTPR);
-
-	chan->plane_idx = 0;
-	chan->first_desc = true;
-}
-
-static const struct hpb_dmae_slave_config *
-hpb_dmae_find_slave(struct hpb_dmae_chan *hpb_chan, int slave_id)
-{
-	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
-	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
-	int i;
-
-	if (slave_id >= HPB_DMA_SLAVE_NUMBER)
-		return NULL;
-
-	for (i = 0; i < pdata->num_slaves; i++)
-		if (pdata->slaves[i].id == slave_id)
-			return pdata->slaves + i;
-
-	return NULL;
-}
-
-static void hpb_dmae_start_xfer(struct shdma_chan *schan,
-				struct shdma_desc *sdesc)
-{
-	struct hpb_dmae_chan *chan = to_chan(schan);
-	struct hpb_dmae_device *hpbdev = to_dev(chan);
-	struct hpb_desc *desc = to_desc(sdesc);
-
-	if (chan->cfg->flags & HPB_DMAE_SET_ASYNC_RESET)
-		hpb_dmae_async_reset(hpbdev, chan->cfg->rstr);
-
-	desc->plane_idx = chan->plane_idx;
-	hpb_dmae_set_reg(chan, &desc->hw, chan->plane_idx);
-	hpb_dmae_start(chan, !chan->first_desc);
-
-	if (chan->xfer_mode == XFER_DOUBLE) {
-		chan->plane_idx ^= 1;
-		chan->first_desc = false;
-	}
-}
-
-static bool hpb_dmae_desc_completed(struct shdma_chan *schan,
-				    struct shdma_desc *sdesc)
-{
-	/*
-	 * This is correct since we always have at most single
-	 * outstanding DMA transfer per channel, and by the time
-	 * we get completion interrupt the transfer is completed.
-	 * This will change if we ever use alternating DMA
-	 * information sets and submit two descriptors at once.
-	 */
-	return true;
-}
-
-static bool hpb_dmae_chan_irq(struct shdma_chan *schan, int irq)
-{
-	struct hpb_dmae_chan *chan = to_chan(schan);
-	struct hpb_dmae_device *hpbdev = to_dev(chan);
-	int ch = chan->cfg->dma_ch;
-
-	/* Check Complete DMA Transfer */
-	if (dintsr_read(hpbdev, ch)) {
-		/* Clear Interrupt status */
-		dintcr_write(hpbdev, ch);
-		return true;
-	}
-	return false;
-}
-
-static int hpb_dmae_desc_setup(struct shdma_chan *schan,
-			       struct shdma_desc *sdesc,
-			       dma_addr_t src, dma_addr_t dst, size_t *len)
-{
-	struct hpb_desc *desc = to_desc(sdesc);
-
-	if (*len > (size_t)HPB_DMA_TCR_MAX)
-		*len = (size_t)HPB_DMA_TCR_MAX;
-
-	desc->hw.sar = src;
-	desc->hw.dar = dst;
-	desc->hw.tcr = *len;
-
-	return 0;
-}
-
-static size_t hpb_dmae_get_partial(struct shdma_chan *schan,
-				   struct shdma_desc *sdesc)
-{
-	struct hpb_desc *desc = to_desc(sdesc);
-	struct hpb_dmae_chan *chan = to_chan(schan);
-	u32 tcr = ch_reg_read(chan, desc->plane_idx ?
-			      HPB_DMAE_DTCR1 : HPB_DMAE_DTCR0);
-
-	return (desc->hw.tcr - tcr) << chan->xmit_shift;
-}
-
-static bool hpb_dmae_channel_busy(struct shdma_chan *schan)
-{
-	struct hpb_dmae_chan *chan = to_chan(schan);
-	u32 dstsr = ch_reg_read(chan, HPB_DMAE_DSTSR);
-
-	if (chan->xfer_mode == XFER_DOUBLE)
-		return dstsr & HPB_DMAE_DSTSR_DQSTS;
-	else
-		return dstsr & HPB_DMAE_DSTSR_DMSTS;
-}
-
-static int
-hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan,
-			      const struct hpb_dmae_slave_config *cfg)
-{
-	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
-	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
-	const struct hpb_dmae_channel *channel = pdata->channels;
-	int slave_id = cfg->id;
-	int i, err;
-
-	for (i = 0; i < pdata->num_channels; i++, channel++) {
-		if (channel->s_id == slave_id) {
-			struct device *dev = hpb_chan->shdma_chan.dev;
-
-			hpb_chan->base = hpbdev->chan_reg +
-				HPB_DMAE_CHAN(cfg->dma_ch);
-
-			dev_dbg(dev, "Detected Slave device\n");
-			dev_dbg(dev, " -- slave_id       : 0x%x\n", slave_id);
-			dev_dbg(dev, " -- cfg->dma_ch    : %d\n", cfg->dma_ch);
-			dev_dbg(dev, " -- channel->ch_irq: %d\n",
-				channel->ch_irq);
-			break;
-		}
-	}
-
-	err = shdma_request_irq(&hpb_chan->shdma_chan, channel->ch_irq,
-				IRQF_SHARED, hpb_chan->dev_id);
-	if (err) {
-		dev_err(hpb_chan->shdma_chan.dev,
-			"DMA channel request_irq %d failed with error %d\n",
-			channel->ch_irq, err);
-		return err;
-	}
-
-	hpb_chan->plane_idx = 0;
-	hpb_chan->first_desc = true;
-
-	if ((cfg->dcr & (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) == 0) {
-		hpb_chan->xfer_mode = XFER_SINGLE;
-	} else if ((cfg->dcr & (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) ==
-		   (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) {
-		hpb_chan->xfer_mode = XFER_DOUBLE;
-	} else {
-		dev_err(hpb_chan->shdma_chan.dev, "DCR setting error");
-		return -EINVAL;
-	}
-
-	if (cfg->flags & HPB_DMAE_SET_ASYNC_MODE)
-		hpb_dmae_set_async_mode(hpbdev, cfg->mdm, cfg->mdr);
-	ch_reg_write(hpb_chan, cfg->dcr, HPB_DMAE_DCR);
-	ch_reg_write(hpb_chan, cfg->port, HPB_DMAE_DPTR);
-	hpb_chan->xmit_shift = calc_xmit_shift(hpb_chan);
-	hpb_dmae_enable_int(hpbdev, cfg->dma_ch);
-
-	return 0;
-}
-
-static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id,
-			      dma_addr_t slave_addr, bool try)
-{
-	struct hpb_dmae_chan *chan = to_chan(schan);
-	const struct hpb_dmae_slave_config *sc =
-		hpb_dmae_find_slave(chan, slave_id);
-
-	if (!sc)
-		return -ENODEV;
-	if (try)
-		return 0;
-	chan->cfg = sc;
-	chan->slave_addr = slave_addr ? : sc->addr;
-	return hpb_dmae_alloc_chan_resources(chan, sc);
-}
-
-static void hpb_dmae_setup_xfer(struct shdma_chan *schan, int slave_id)
-{
-}
-
-static dma_addr_t hpb_dmae_slave_addr(struct shdma_chan *schan)
-{
-	struct hpb_dmae_chan *chan = to_chan(schan);
-
-	return chan->slave_addr;
-}
-
-static struct shdma_desc *hpb_dmae_embedded_desc(void *buf, int i)
-{
-	return &((struct hpb_desc *)buf)[i].shdma_desc;
-}
-
-static const struct shdma_ops hpb_dmae_ops = {
-	.desc_completed = hpb_dmae_desc_completed,
-	.halt_channel = hpb_dmae_halt,
-	.channel_busy = hpb_dmae_channel_busy,
-	.slave_addr = hpb_dmae_slave_addr,
-	.desc_setup = hpb_dmae_desc_setup,
-	.set_slave = hpb_dmae_set_slave,
-	.setup_xfer = hpb_dmae_setup_xfer,
-	.start_xfer = hpb_dmae_start_xfer,
-	.embedded_desc = hpb_dmae_embedded_desc,
-	.chan_irq = hpb_dmae_chan_irq,
-	.get_partial = hpb_dmae_get_partial,
-};
-
-static int hpb_dmae_chan_probe(struct hpb_dmae_device *hpbdev, int id)
-{
-	struct shdma_dev *sdev = &hpbdev->shdma_dev;
-	struct platform_device *pdev =
-		to_platform_device(hpbdev->shdma_dev.dma_dev.dev);
-	struct hpb_dmae_chan *new_hpb_chan;
-	struct shdma_chan *schan;
-
-	/* Alloc channel */
-	new_hpb_chan = devm_kzalloc(&pdev->dev,
-				    sizeof(struct hpb_dmae_chan), GFP_KERNEL);
-	if (!new_hpb_chan) {
-		dev_err(hpbdev->shdma_dev.dma_dev.dev,
-			"No free memory for allocating DMA channels!\n");
-		return -ENOMEM;
-	}
-
-	schan = &new_hpb_chan->shdma_chan;
-	schan->max_xfer_len = HPB_DMA_TCR_MAX;
-
-	shdma_chan_probe(sdev, schan, id);
-
-	if (pdev->id >= 0)
-		snprintf(new_hpb_chan->dev_id, sizeof(new_hpb_chan->dev_id),
-			 "hpb-dmae%d.%d", pdev->id, id);
-	else
-		snprintf(new_hpb_chan->dev_id, sizeof(new_hpb_chan->dev_id),
-			 "hpb-dma.%d", id);
-
-	return 0;
-}
-
-static int hpb_dmae_probe(struct platform_device *pdev)
-{
-	const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE |
-		DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES;
-	struct hpb_dmae_pdata *pdata = pdev->dev.platform_data;
-	struct hpb_dmae_device *hpbdev;
-	struct dma_device *dma_dev;
-	struct resource *chan, *comm, *rest, *mode, *irq_res;
-	int err, i;
-
-	/* Get platform data */
-	if (!pdata || !pdata->num_channels)
-		return -ENODEV;
-
-	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	comm = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	rest = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	mode = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-
-	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!irq_res)
-		return -ENODEV;
-
-	hpbdev = devm_kzalloc(&pdev->dev, sizeof(struct hpb_dmae_device),
-			      GFP_KERNEL);
-	if (!hpbdev) {
-		dev_err(&pdev->dev, "Not enough memory\n");
-		return -ENOMEM;
-	}
-
-	hpbdev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
-	if (IS_ERR(hpbdev->chan_reg))
-		return PTR_ERR(hpbdev->chan_reg);
-
-	hpbdev->comm_reg = devm_ioremap_resource(&pdev->dev, comm);
-	if (IS_ERR(hpbdev->comm_reg))
-		return PTR_ERR(hpbdev->comm_reg);
-
-	hpbdev->reset_reg = devm_ioremap_resource(&pdev->dev, rest);
-	if (IS_ERR(hpbdev->reset_reg))
-		return PTR_ERR(hpbdev->reset_reg);
-
-	hpbdev->mode_reg = devm_ioremap_resource(&pdev->dev, mode);
-	if (IS_ERR(hpbdev->mode_reg))
-		return PTR_ERR(hpbdev->mode_reg);
-
-	dma_dev = &hpbdev->shdma_dev.dma_dev;
-
-	spin_lock_init(&hpbdev->reg_lock);
-
-	/* Platform data */
-	hpbdev->pdata = pdata;
-
-	pm_runtime_enable(&pdev->dev);
-	err = pm_runtime_get_sync(&pdev->dev);
-	if (err < 0)
-		dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err);
-
-	/* Reset DMA controller */
-	hpb_dmae_reset(hpbdev);
-
-	pm_runtime_put(&pdev->dev);
-
-	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
-	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
-	dma_dev->src_addr_widths = widths;
-	dma_dev->dst_addr_widths = widths;
-	dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
-	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
-
-	hpbdev->shdma_dev.ops = &hpb_dmae_ops;
-	hpbdev->shdma_dev.desc_size = sizeof(struct hpb_desc);
-	err = shdma_init(&pdev->dev, &hpbdev->shdma_dev, pdata->num_channels);
-	if (err < 0)
-		goto error;
-
-	/* Create DMA channels */
-	for (i = 0; i < pdata->num_channels; i++)
-		hpb_dmae_chan_probe(hpbdev, i);
-
-	platform_set_drvdata(pdev, hpbdev);
-	err = dma_async_device_register(dma_dev);
-	if (!err)
-		return 0;
-
-	shdma_cleanup(&hpbdev->shdma_dev);
-error:
-	pm_runtime_disable(&pdev->dev);
-	return err;
-}
-
-static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
-{
-	struct shdma_chan *schan;
-	int i;
-
-	shdma_for_each_chan(schan, &hpbdev->shdma_dev, i) {
-		BUG_ON(!schan);
-
-		shdma_chan_remove(schan);
-	}
-}
-
-static int hpb_dmae_remove(struct platform_device *pdev)
-{
-	struct hpb_dmae_device *hpbdev = platform_get_drvdata(pdev);
-
-	dma_async_device_unregister(&hpbdev->shdma_dev.dma_dev);
-
-	pm_runtime_disable(&pdev->dev);
-
-	hpb_dmae_chan_remove(hpbdev);
-
-	return 0;
-}
-
-static void hpb_dmae_shutdown(struct platform_device *pdev)
-{
-	struct hpb_dmae_device *hpbdev = platform_get_drvdata(pdev);
-	hpb_dmae_ctl_stop(hpbdev);
-}
-
-static struct platform_driver hpb_dmae_driver = {
-	.probe		= hpb_dmae_probe,
-	.remove		= hpb_dmae_remove,
-	.shutdown	= hpb_dmae_shutdown,
-	.driver = {
-		.name	= "hpb-dma-engine",
-	},
-};
-module_platform_driver(hpb_dmae_driver);
-
-MODULE_AUTHOR("Max Filippov <max.filippov@cogentembedded.com>");
-MODULE_DESCRIPTION("Renesas HPB DMA Engine driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index f1bcc2a..749f1bd 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -448,7 +448,7 @@
 static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
 {
 	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
-	struct usb_dmac_desc *desc;
+	struct usb_dmac_desc *desc, *_desc;
 	unsigned long flags;
 	LIST_HEAD(head);
 	LIST_HEAD(list);
@@ -459,7 +459,7 @@
 	if (uchan->desc)
 		uchan->desc = NULL;
 	list_splice_init(&uchan->desc_got, &list);
-	list_for_each_entry(desc, &list, node)
+	list_for_each_entry_safe(desc, _desc, &list, node)
 		list_move_tail(&desc->node, &uchan->desc_freed);
 	spin_unlock_irqrestore(&uchan->vc.lock, flags);
 	vchan_dma_desc_free_list(&uchan->vc, &head);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index dd3e7ba..6fb8307 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3543,8 +3543,8 @@
 	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct device_node *np = pdev->dev.of_node;
 	int ret = -ENOENT;
-	struct d40_base *base = NULL;
-	struct resource *res = NULL;
+	struct d40_base *base;
+	struct resource *res;
 	int num_reserved_chans;
 	u32 val;
 
@@ -3552,17 +3552,17 @@
 		if (np) {
 			if (d40_of_probe(pdev, np)) {
 				ret = -ENOMEM;
-				goto failure;
+				goto report_failure;
 			}
 		} else {
 			d40_err(&pdev->dev, "No pdata or Device Tree provided\n");
-			goto failure;
+			goto report_failure;
 		}
 	}
 
 	base = d40_hw_detect_init(pdev);
 	if (!base)
-		goto failure;
+		goto report_failure;
 
 	num_reserved_chans = d40_phy_res_init(base);
 
@@ -3693,51 +3693,48 @@
 	return 0;
 
 failure:
-	if (base) {
-		if (base->desc_slab)
-			kmem_cache_destroy(base->desc_slab);
-		if (base->virtbase)
-			iounmap(base->virtbase);
+	kmem_cache_destroy(base->desc_slab);
+	if (base->virtbase)
+		iounmap(base->virtbase);
 
-		if (base->lcla_pool.base && base->plat_data->use_esram_lcla) {
-			iounmap(base->lcla_pool.base);
-			base->lcla_pool.base = NULL;
-		}
-
-		if (base->lcla_pool.dma_addr)
-			dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
-					 SZ_1K * base->num_phy_chans,
-					 DMA_TO_DEVICE);
-
-		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
-			free_pages((unsigned long)base->lcla_pool.base,
-				   base->lcla_pool.pages);
-
-		kfree(base->lcla_pool.base_unaligned);
-
-		if (base->phy_lcpa)
-			release_mem_region(base->phy_lcpa,
-					   base->lcpa_size);
-		if (base->phy_start)
-			release_mem_region(base->phy_start,
-					   base->phy_size);
-		if (base->clk) {
-			clk_disable_unprepare(base->clk);
-			clk_put(base->clk);
-		}
-
-		if (base->lcpa_regulator) {
-			regulator_disable(base->lcpa_regulator);
-			regulator_put(base->lcpa_regulator);
-		}
-
-		kfree(base->lcla_pool.alloc_map);
-		kfree(base->lookup_log_chans);
-		kfree(base->lookup_phy_chans);
-		kfree(base->phy_res);
-		kfree(base);
+	if (base->lcla_pool.base && base->plat_data->use_esram_lcla) {
+		iounmap(base->lcla_pool.base);
+		base->lcla_pool.base = NULL;
 	}
 
+	if (base->lcla_pool.dma_addr)
+		dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
+				 SZ_1K * base->num_phy_chans,
+				 DMA_TO_DEVICE);
+
+	if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
+		free_pages((unsigned long)base->lcla_pool.base,
+			   base->lcla_pool.pages);
+
+	kfree(base->lcla_pool.base_unaligned);
+
+	if (base->phy_lcpa)
+		release_mem_region(base->phy_lcpa,
+				   base->lcpa_size);
+	if (base->phy_start)
+		release_mem_region(base->phy_start,
+				   base->phy_size);
+	if (base->clk) {
+		clk_disable_unprepare(base->clk);
+		clk_put(base->clk);
+	}
+
+	if (base->lcpa_regulator) {
+		regulator_disable(base->lcpa_regulator);
+		regulator_put(base->lcpa_regulator);
+	}
+
+	kfree(base->lcla_pool.alloc_map);
+	kfree(base->lookup_log_chans);
+	kfree(base->lookup_phy_chans);
+	kfree(base->phy_res);
+	kfree(base);
+report_failure:
 	d40_err(&pdev->dev, "probe failed\n");
 	return ret;
 }
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
new file mode 100644
index 0000000..047476a
--- /dev/null
+++ b/drivers/dma/stm32-dma.c
@@ -0,0 +1,1141 @@
+/*
+ * Driver for STM32 DMA controller
+ *
+ * Inspired by dma-jz4740.c and tegra20-apb-dma.c
+ *
+ * Copyright (C) M'boumba Cedric Madianga 2015
+ * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define STM32_DMA_LISR			0x0000 /* DMA Low Int Status Reg */
+#define STM32_DMA_HISR			0x0004 /* DMA High Int Status Reg */
+#define STM32_DMA_LIFCR			0x0008 /* DMA Low Int Flag Clear Reg */
+#define STM32_DMA_HIFCR			0x000c /* DMA High Int Flag Clear Reg */
+#define STM32_DMA_TCI			BIT(5) /* Transfer Complete Interrupt */
+#define STM32_DMA_TEI			BIT(3) /* Transfer Error Interrupt */
+#define STM32_DMA_DMEI			BIT(2) /* Direct Mode Error Interrupt */
+#define STM32_DMA_FEI			BIT(0) /* FIFO Error Interrupt */
+
+/* DMA Stream x Configuration Register */
+#define STM32_DMA_SCR(x)		(0x0010 + 0x18 * (x)) /* x = 0..7 */
+#define STM32_DMA_SCR_REQ(n)		((n & 0x7) << 25)
+#define STM32_DMA_SCR_MBURST_MASK	GENMASK(24, 23)
+#define STM32_DMA_SCR_MBURST(n)	        ((n & 0x3) << 23)
+#define STM32_DMA_SCR_PBURST_MASK	GENMASK(22, 21)
+#define STM32_DMA_SCR_PBURST(n)	        ((n & 0x3) << 21)
+#define STM32_DMA_SCR_PL_MASK		GENMASK(17, 16)
+#define STM32_DMA_SCR_PL(n)		((n & 0x3) << 16)
+#define STM32_DMA_SCR_MSIZE_MASK	GENMASK(14, 13)
+#define STM32_DMA_SCR_MSIZE(n)		((n & 0x3) << 13)
+#define STM32_DMA_SCR_PSIZE_MASK	GENMASK(12, 11)
+#define STM32_DMA_SCR_PSIZE(n)		((n & 0x3) << 11)
+#define STM32_DMA_SCR_PSIZE_GET(n)	((n & STM32_DMA_SCR_PSIZE_MASK) >> 11)
+#define STM32_DMA_SCR_DIR_MASK		GENMASK(7, 6)
+#define STM32_DMA_SCR_DIR(n)		((n & 0x3) << 6)
+#define STM32_DMA_SCR_CT		BIT(19) /* Target in double buffer */
+#define STM32_DMA_SCR_DBM		BIT(18) /* Double Buffer Mode */
+#define STM32_DMA_SCR_PINCOS		BIT(15) /* Peripheral inc offset size */
+#define STM32_DMA_SCR_MINC		BIT(10) /* Memory increment mode */
+#define STM32_DMA_SCR_PINC		BIT(9) /* Peripheral increment mode */
+#define STM32_DMA_SCR_CIRC		BIT(8) /* Circular mode */
+#define STM32_DMA_SCR_PFCTRL		BIT(5) /* Peripheral Flow Controller */
+#define STM32_DMA_SCR_TCIE		BIT(4) /* Transfer Cplete Int Enable*/
+#define STM32_DMA_SCR_TEIE		BIT(2) /* Transfer Error Int Enable */
+#define STM32_DMA_SCR_DMEIE		BIT(1) /* Direct Mode Err Int Enable */
+#define STM32_DMA_SCR_EN		BIT(0) /* Stream Enable */
+#define STM32_DMA_SCR_CFG_MASK		(STM32_DMA_SCR_PINC \
+					| STM32_DMA_SCR_MINC \
+					| STM32_DMA_SCR_PINCOS \
+					| STM32_DMA_SCR_PL_MASK)
+#define STM32_DMA_SCR_IRQ_MASK		(STM32_DMA_SCR_TCIE \
+					| STM32_DMA_SCR_TEIE \
+					| STM32_DMA_SCR_DMEIE)
+
+/* DMA Stream x number of data register */
+#define STM32_DMA_SNDTR(x)		(0x0014 + 0x18 * (x))
+
+/* DMA stream peripheral address register */
+#define STM32_DMA_SPAR(x)		(0x0018 + 0x18 * (x))
+
+/* DMA stream x memory 0 address register */
+#define STM32_DMA_SM0AR(x)		(0x001c + 0x18 * (x))
+
+/* DMA stream x memory 1 address register */
+#define STM32_DMA_SM1AR(x)		(0x0020 + 0x18 * (x))
+
+/* DMA stream x FIFO control register */
+#define STM32_DMA_SFCR(x)		(0x0024 + 0x18 * (x))
+#define STM32_DMA_SFCR_FTH_MASK		GENMASK(1, 0)
+#define STM32_DMA_SFCR_FTH(n)		(n & STM32_DMA_SFCR_FTH_MASK)
+#define STM32_DMA_SFCR_FEIE		BIT(7) /* FIFO error interrupt enable */
+#define STM32_DMA_SFCR_DMDIS		BIT(2) /* Direct mode disable */
+#define STM32_DMA_SFCR_MASK		(STM32_DMA_SFCR_FEIE \
+					| STM32_DMA_SFCR_DMDIS)
+
+/* DMA direction */
+#define STM32_DMA_DEV_TO_MEM		0x00
+#define	STM32_DMA_MEM_TO_DEV		0x01
+#define	STM32_DMA_MEM_TO_MEM		0x02
+
+/* DMA priority level */
+#define STM32_DMA_PRIORITY_LOW		0x00
+#define STM32_DMA_PRIORITY_MEDIUM	0x01
+#define STM32_DMA_PRIORITY_HIGH		0x02
+#define STM32_DMA_PRIORITY_VERY_HIGH	0x03
+
+/* DMA FIFO threshold selection */
+#define STM32_DMA_FIFO_THRESHOLD_1QUARTERFULL		0x00
+#define STM32_DMA_FIFO_THRESHOLD_HALFFULL		0x01
+#define STM32_DMA_FIFO_THRESHOLD_3QUARTERSFULL		0x02
+#define STM32_DMA_FIFO_THRESHOLD_FULL			0x03
+
+#define STM32_DMA_MAX_DATA_ITEMS	0xffff
+#define STM32_DMA_MAX_CHANNELS		0x08
+#define STM32_DMA_MAX_REQUEST_ID	0x08
+#define STM32_DMA_MAX_DATA_PARAM	0x03
+
+enum stm32_dma_width {
+	STM32_DMA_BYTE,
+	STM32_DMA_HALF_WORD,
+	STM32_DMA_WORD,
+};
+
+enum stm32_dma_burst_size {
+	STM32_DMA_BURST_SINGLE,
+	STM32_DMA_BURST_INCR4,
+	STM32_DMA_BURST_INCR8,
+	STM32_DMA_BURST_INCR16,
+};
+
+struct stm32_dma_cfg {
+	u32 channel_id;
+	u32 request_line;
+	u32 stream_config;
+	u32 threshold;
+};
+
+struct stm32_dma_chan_reg {
+	u32 dma_lisr;
+	u32 dma_hisr;
+	u32 dma_lifcr;
+	u32 dma_hifcr;
+	u32 dma_scr;
+	u32 dma_sndtr;
+	u32 dma_spar;
+	u32 dma_sm0ar;
+	u32 dma_sm1ar;
+	u32 dma_sfcr;
+};
+
+struct stm32_dma_sg_req {
+	u32 len;
+	struct stm32_dma_chan_reg chan_reg;
+};
+
+struct stm32_dma_desc {
+	struct virt_dma_desc vdesc;
+	bool cyclic;
+	u32 num_sgs;
+	struct stm32_dma_sg_req sg_req[];
+};
+
+struct stm32_dma_chan {
+	struct virt_dma_chan vchan;
+	bool config_init;
+	bool busy;
+	u32 id;
+	u32 irq;
+	struct stm32_dma_desc *desc;
+	u32 next_sg;
+	struct dma_slave_config	dma_sconfig;
+	struct stm32_dma_chan_reg chan_reg;
+};
+
+struct stm32_dma_device {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+	struct reset_control *rst;
+	bool mem2mem;
+	struct stm32_dma_chan chan[STM32_DMA_MAX_CHANNELS];
+};
+
+static struct stm32_dma_device *stm32_dma_get_dev(struct stm32_dma_chan *chan)
+{
+	return container_of(chan->vchan.chan.device, struct stm32_dma_device,
+			    ddev);
+}
+
+static struct stm32_dma_chan *to_stm32_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct stm32_dma_chan, vchan.chan);
+}
+
+static struct stm32_dma_desc *to_stm32_dma_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct stm32_dma_desc, vdesc);
+}
+
+static struct device *chan2dev(struct stm32_dma_chan *chan)
+{
+	return &chan->vchan.chan.dev->device;
+}
+
+static u32 stm32_dma_read(struct stm32_dma_device *dmadev, u32 reg)
+{
+	return readl_relaxed(dmadev->base + reg);
+}
+
+static void stm32_dma_write(struct stm32_dma_device *dmadev, u32 reg, u32 val)
+{
+	writel_relaxed(val, dmadev->base + reg);
+}
+
+static struct stm32_dma_desc *stm32_dma_alloc_desc(u32 num_sgs)
+{
+	return kzalloc(sizeof(struct stm32_dma_desc) +
+		       sizeof(struct stm32_dma_sg_req) * num_sgs, GFP_NOWAIT);
+}
+
+static int stm32_dma_get_width(struct stm32_dma_chan *chan,
+			       enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return STM32_DMA_BYTE;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return STM32_DMA_HALF_WORD;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return STM32_DMA_WORD;
+	default:
+		dev_err(chan2dev(chan), "Dma bus width not supported\n");
+		return -EINVAL;
+	}
+}
+
+static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
+{
+	switch (maxburst) {
+	case 0:
+	case 1:
+		return STM32_DMA_BURST_SINGLE;
+	case 4:
+		return STM32_DMA_BURST_INCR4;
+	case 8:
+		return STM32_DMA_BURST_INCR8;
+	case 16:
+		return STM32_DMA_BURST_INCR16;
+	default:
+		dev_err(chan2dev(chan), "Dma burst size not supported\n");
+		return -EINVAL;
+	}
+}
+
+static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan,
+				      u32 src_maxburst, u32 dst_maxburst)
+{
+	chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK;
+	chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE;
+
+	if ((!src_maxburst) && (!dst_maxburst)) {
+		/* Using direct mode */
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE;
+	} else {
+		/* Using FIFO mode */
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+	}
+}
+
+static int stm32_dma_slave_config(struct dma_chan *c,
+				  struct dma_slave_config *config)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+	memcpy(&chan->dma_sconfig, config, sizeof(*config));
+
+	chan->config_init = true;
+
+	return 0;
+}
+
+static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 flags, dma_isr;
+
+	/*
+	 * Read "flags" from DMA_xISR register corresponding to the selected
+	 * DMA channel at the correct bit offset inside that register.
+	 *
+	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
+	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
+	 */
+
+	if (chan->id & 4)
+		dma_isr = stm32_dma_read(dmadev, STM32_DMA_HISR);
+	else
+		dma_isr = stm32_dma_read(dmadev, STM32_DMA_LISR);
+
+	flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
+
+	return flags;
+}
+
+static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 dma_ifcr;
+
+	/*
+	 * Write "flags" to the DMA_xIFCR register corresponding to the selected
+	 * DMA channel at the correct bit offset inside that register.
+	 *
+	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
+	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
+	 */
+	dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
+
+	if (chan->id & 4)
+		stm32_dma_write(dmadev, STM32_DMA_HIFCR, dma_ifcr);
+	else
+		stm32_dma_write(dmadev, STM32_DMA_LIFCR, dma_ifcr);
+}
+
+static int stm32_dma_disable_chan(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	unsigned long timeout = jiffies + msecs_to_jiffies(5000);
+	u32 dma_scr, id;
+
+	id = chan->id;
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+	if (dma_scr & STM32_DMA_SCR_EN) {
+		dma_scr &= ~STM32_DMA_SCR_EN;
+		stm32_dma_write(dmadev, STM32_DMA_SCR(id), dma_scr);
+
+		do {
+			dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+			dma_scr &= STM32_DMA_SCR_EN;
+			if (!dma_scr)
+				break;
+
+			if (time_after_eq(jiffies, timeout)) {
+				dev_err(chan2dev(chan), "%s: timeout!\n",
+					__func__);
+				return -EBUSY;
+			}
+			cond_resched();
+		} while (1);
+	}
+
+	return 0;
+}
+
+static void stm32_dma_stop(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 dma_scr, dma_sfcr, status;
+	int ret;
+
+	/* Disable interrupts */
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	dma_scr &= ~STM32_DMA_SCR_IRQ_MASK;
+	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr);
+	dma_sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+	dma_sfcr &= ~STM32_DMA_SFCR_FEIE;
+	stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), dma_sfcr);
+
+	/* Disable DMA */
+	ret = stm32_dma_disable_chan(chan);
+	if (ret < 0)
+		return;
+
+	/* Clear interrupt status if it is there */
+	status = stm32_dma_irq_status(chan);
+	if (status) {
+		dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n",
+			__func__, status);
+		stm32_dma_irq_clear(chan, status);
+	}
+
+	chan->busy = false;
+}
+
+static int stm32_dma_terminate_all(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	if (chan->busy) {
+		stm32_dma_stop(chan);
+		chan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	u32 ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+	u32 spar = stm32_dma_read(dmadev, STM32_DMA_SPAR(chan->id));
+	u32 sm0ar = stm32_dma_read(dmadev, STM32_DMA_SM0AR(chan->id));
+	u32 sm1ar = stm32_dma_read(dmadev, STM32_DMA_SM1AR(chan->id));
+	u32 sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+
+	dev_dbg(chan2dev(chan), "SCR:   0x%08x\n", scr);
+	dev_dbg(chan2dev(chan), "NDTR:  0x%08x\n", ndtr);
+	dev_dbg(chan2dev(chan), "SPAR:  0x%08x\n", spar);
+	dev_dbg(chan2dev(chan), "SM0AR: 0x%08x\n", sm0ar);
+	dev_dbg(chan2dev(chan), "SM1AR: 0x%08x\n", sm1ar);
+	dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
+}
+
+static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	struct virt_dma_desc *vdesc;
+	struct stm32_dma_sg_req *sg_req;
+	struct stm32_dma_chan_reg *reg;
+	u32 status;
+	int ret;
+
+	ret = stm32_dma_disable_chan(chan);
+	if (ret < 0)
+		return ret;
+
+	if (!chan->desc) {
+		vdesc = vchan_next_desc(&chan->vchan);
+		if (!vdesc)
+			return -EPERM;
+
+		chan->desc = to_stm32_dma_desc(vdesc);
+		chan->next_sg = 0;
+	}
+
+	if (chan->next_sg == chan->desc->num_sgs)
+		chan->next_sg = 0;
+
+	sg_req = &chan->desc->sg_req[chan->next_sg];
+	reg = &sg_req->chan_reg;
+
+	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
+	stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar);
+	stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar);
+	stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr);
+	stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar);
+	stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr);
+
+	chan->next_sg++;
+
+	/* Clear interrupt status if it is there */
+	status = stm32_dma_irq_status(chan);
+	if (status)
+		stm32_dma_irq_clear(chan, status);
+
+	stm32_dma_dump_reg(chan);
+
+	/* Start DMA */
+	reg->dma_scr |= STM32_DMA_SCR_EN;
+	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
+
+	chan->busy = true;
+
+	return 0;
+}
+
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	struct stm32_dma_sg_req *sg_req;
+	u32 dma_scr, dma_sm0ar, dma_sm1ar, id;
+
+	id = chan->id;
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+	if (dma_scr & STM32_DMA_SCR_DBM) {
+		if (chan->next_sg == chan->desc->num_sgs)
+			chan->next_sg = 0;
+
+		sg_req = &chan->desc->sg_req[chan->next_sg];
+
+		if (dma_scr & STM32_DMA_SCR_CT) {
+			dma_sm0ar = sg_req->chan_reg.dma_sm0ar;
+			stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), dma_sm0ar);
+			dev_dbg(chan2dev(chan), "CT=1 <=> SM0AR: 0x%08x\n",
+				stm32_dma_read(dmadev, STM32_DMA_SM0AR(id)));
+		} else {
+			dma_sm1ar = sg_req->chan_reg.dma_sm1ar;
+			stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), dma_sm1ar);
+			dev_dbg(chan2dev(chan), "CT=0 <=> SM1AR: 0x%08x\n",
+				stm32_dma_read(dmadev, STM32_DMA_SM1AR(id)));
+		}
+
+		chan->next_sg++;
+	}
+}
+
+static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan)
+{
+	if (chan->desc) {
+		if (chan->desc->cyclic) {
+			vchan_cyclic_callback(&chan->desc->vdesc);
+			stm32_dma_configure_next_sg(chan);
+		} else {
+			chan->busy = false;
+			if (chan->next_sg == chan->desc->num_sgs) {
+				list_del(&chan->desc->vdesc.node);
+				vchan_cookie_complete(&chan->desc->vdesc);
+				chan->desc = NULL;
+			}
+			stm32_dma_start_transfer(chan);
+		}
+	}
+}
+
+static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
+{
+	struct stm32_dma_chan *chan = devid;
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 status, scr, sfcr;
+
+	spin_lock(&chan->vchan.lock);
+
+	status = stm32_dma_irq_status(chan);
+	scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+
+	if ((status & STM32_DMA_TCI) && (scr & STM32_DMA_SCR_TCIE)) {
+		stm32_dma_irq_clear(chan, STM32_DMA_TCI);
+		stm32_dma_handle_chan_done(chan);
+
+	} else {
+		stm32_dma_irq_clear(chan, status);
+		dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+	}
+
+	spin_unlock(&chan->vchan.lock);
+
+	return IRQ_HANDLED;
+}
+
+static void stm32_dma_issue_pending(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (!chan->busy) {
+		if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
+			ret = stm32_dma_start_transfer(chan);
+			if ((!ret) && (chan->desc->cyclic))
+				stm32_dma_configure_next_sg(chan);
+		}
+	}
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
+				    enum dma_transfer_direction direction,
+				    enum dma_slave_buswidth *buswidth)
+{
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
+	int src_bus_width, dst_bus_width;
+	int src_burst_size, dst_burst_size;
+	u32 src_maxburst, dst_maxburst;
+	dma_addr_t src_addr, dst_addr;
+	u32 dma_scr = 0;
+
+	src_addr_width = chan->dma_sconfig.src_addr_width;
+	dst_addr_width = chan->dma_sconfig.dst_addr_width;
+	src_maxburst = chan->dma_sconfig.src_maxburst;
+	dst_maxburst = chan->dma_sconfig.dst_maxburst;
+	src_addr = chan->dma_sconfig.src_addr;
+	dst_addr = chan->dma_sconfig.dst_addr;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
+		if (dst_bus_width < 0)
+			return dst_bus_width;
+
+		dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+		if (dst_burst_size < 0)
+			return dst_burst_size;
+
+		if (!src_addr_width)
+			src_addr_width = dst_addr_width;
+
+		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
+		if (src_bus_width < 0)
+			return src_bus_width;
+
+		src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+		if (src_burst_size < 0)
+			return src_burst_size;
+
+		dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_DEV) |
+			STM32_DMA_SCR_PSIZE(dst_bus_width) |
+			STM32_DMA_SCR_MSIZE(src_bus_width) |
+			STM32_DMA_SCR_PBURST(dst_burst_size) |
+			STM32_DMA_SCR_MBURST(src_burst_size);
+
+		chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
+		*buswidth = dst_addr_width;
+		break;
+
+	case DMA_DEV_TO_MEM:
+		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
+		if (src_bus_width < 0)
+			return src_bus_width;
+
+		src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+		if (src_burst_size < 0)
+			return src_burst_size;
+
+		if (!dst_addr_width)
+			dst_addr_width = src_addr_width;
+
+		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
+		if (dst_bus_width < 0)
+			return dst_bus_width;
+
+		dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+		if (dst_burst_size < 0)
+			return dst_burst_size;
+
+		dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_DEV_TO_MEM) |
+			STM32_DMA_SCR_PSIZE(src_bus_width) |
+			STM32_DMA_SCR_MSIZE(dst_bus_width) |
+			STM32_DMA_SCR_PBURST(src_burst_size) |
+			STM32_DMA_SCR_MBURST(dst_burst_size);
+
+		chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
+		*buswidth = chan->dma_sconfig.src_addr_width;
+		break;
+
+	default:
+		dev_err(chan2dev(chan), "Dma direction is not supported\n");
+		return -EINVAL;
+	}
+
+	stm32_dma_set_fifo_config(chan, src_maxburst, dst_maxburst);
+
+	chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK |
+			STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK |
+			STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK);
+	chan->chan_reg.dma_scr |= dma_scr;
+
+	return 0;
+}
+
+static void stm32_dma_clear_reg(struct stm32_dma_chan_reg *regs)
+{
+	memset(regs, 0, sizeof(struct stm32_dma_chan_reg));
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
+	struct dma_chan *c, struct scatterlist *sgl,
+	u32 sg_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_desc *desc;
+	struct scatterlist *sg;
+	enum dma_slave_buswidth buswidth;
+	u32 nb_data_items;
+	int i, ret;
+
+	if (!chan->config_init) {
+		dev_err(chan2dev(chan), "dma channel is not configured\n");
+		return NULL;
+	}
+
+	if (sg_len < 1) {
+		dev_err(chan2dev(chan), "Invalid segment length %d\n", sg_len);
+		return NULL;
+	}
+
+	desc = stm32_dma_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
+	if (ret < 0)
+		goto err;
+
+	/* Set peripheral flow controller */
+	if (chan->dma_sconfig.device_fc)
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL;
+	else
+		chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc->sg_req[i].len = sg_dma_len(sg);
+
+		nb_data_items = desc->sg_req[i].len / buswidth;
+		if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+			dev_err(chan2dev(chan), "nb items not supported\n");
+			goto err;
+		}
+
+		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+		desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
+		desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
+		desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
+		desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg);
+		desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg);
+		desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
+	}
+
+	desc->num_sgs = sg_len;
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+err:
+	kfree(desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
+	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_desc *desc;
+	enum dma_slave_buswidth buswidth;
+	u32 num_periods, nb_data_items;
+	int i, ret;
+
+	if (!buf_len || !period_len) {
+		dev_err(chan2dev(chan), "Invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (!chan->config_init) {
+		dev_err(chan2dev(chan), "dma channel is not configured\n");
+		return NULL;
+	}
+
+	if (buf_len % period_len) {
+		dev_err(chan2dev(chan), "buf_len not multiple of period_len\n");
+		return NULL;
+	}
+
+	/*
+	 * We allow to take more number of requests till DMA is
+	 * not started. The driver will loop over all requests.
+	 * Once DMA is started then new requests can be queued only after
+	 * terminating the DMA.
+	 */
+	if (chan->busy) {
+		dev_err(chan2dev(chan), "Request not allowed when dma busy\n");
+		return NULL;
+	}
+
+	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
+	if (ret < 0)
+		return NULL;
+
+	nb_data_items = period_len / buswidth;
+	if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+		dev_err(chan2dev(chan), "number of items not supported\n");
+		return NULL;
+	}
+
+	/*  Enable Circular mode or double buffer mode */
+	if (buf_len == period_len)
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_CIRC;
+	else
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM;
+
+	/* Clear periph ctrl if client set it */
+	chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
+
+	num_periods = buf_len / period_len;
+
+	desc = stm32_dma_alloc_desc(num_periods);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < num_periods; i++) {
+		desc->sg_req[i].len = period_len;
+
+		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+		desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
+		desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
+		desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
+		desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr;
+		desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr;
+		desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
+		buf_addr += period_len;
+	}
+
+	desc->num_sgs = num_periods;
+	desc->cyclic = true;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
+	struct dma_chan *c, dma_addr_t dest,
+	dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	u32 num_sgs;
+	struct stm32_dma_desc *desc;
+	size_t xfer_count, offset;
+	int i;
+
+	num_sgs = DIV_ROUND_UP(len, STM32_DMA_MAX_DATA_ITEMS);
+	desc = stm32_dma_alloc_desc(num_sgs);
+	if (!desc)
+		return NULL;
+
+	for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
+		xfer_count = min_t(size_t, len - offset,
+				   STM32_DMA_MAX_DATA_ITEMS);
+
+		desc->sg_req[i].len = xfer_count;
+
+		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+		desc->sg_req[i].chan_reg.dma_scr =
+			STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
+			STM32_DMA_SCR_MINC |
+			STM32_DMA_SCR_PINC |
+			STM32_DMA_SCR_TCIE |
+			STM32_DMA_SCR_TEIE;
+		desc->sg_req[i].chan_reg.dma_sfcr = STM32_DMA_SFCR_DMDIS |
+			STM32_DMA_SFCR_FTH(STM32_DMA_FIFO_THRESHOLD_FULL) |
+			STM32_DMA_SFCR_FEIE;
+		desc->sg_req[i].chan_reg.dma_spar = src + offset;
+		desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
+		desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
+	}
+
+	desc->num_sgs = num_sgs;
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
+				     struct stm32_dma_desc *desc,
+				     u32 next_sg)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 dma_scr, width, residue, count;
+	int i;
+
+	residue = 0;
+
+	for (i = next_sg; i < desc->num_sgs; i++)
+		residue += desc->sg_req[i].len;
+
+	if (next_sg != 0) {
+		dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+		width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
+		count = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+
+		residue += count << width;
+	}
+
+	return residue;
+}
+
+static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *state)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	unsigned long flags;
+	u32 residue;
+
+	status = dma_cookie_status(c, cookie, state);
+	if ((status == DMA_COMPLETE) || (!state))
+		return status;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	vdesc = vchan_find_desc(&chan->vchan, cookie);
+	if (cookie == chan->desc->vdesc.tx.cookie) {
+		residue = stm32_dma_desc_residue(chan, chan->desc,
+						 chan->next_sg);
+	} else if (vdesc) {
+		residue = stm32_dma_desc_residue(chan,
+						 to_stm32_dma_desc(vdesc), 0);
+	} else {
+		residue = 0;
+	}
+
+	dma_set_residue(state, residue);
+
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	return status;
+}
+
+static int stm32_dma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	int ret;
+
+	chan->config_init = false;
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret < 0) {
+		dev_err(chan2dev(chan), "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = stm32_dma_disable_chan(chan);
+	if (ret < 0)
+		clk_disable_unprepare(dmadev->clk);
+
+	return ret;
+}
+
+static void stm32_dma_free_chan_resources(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	unsigned long flags;
+
+	dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id);
+
+	if (chan->busy) {
+		spin_lock_irqsave(&chan->vchan.lock, flags);
+		stm32_dma_stop(chan);
+		chan->desc = NULL;
+		spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	}
+
+	clk_disable_unprepare(dmadev->clk);
+
+	vchan_free_chan_resources(to_virt_chan(c));
+}
+
+static void stm32_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct stm32_dma_desc, vdesc));
+}
+
+void stm32_dma_set_config(struct stm32_dma_chan *chan,
+			  struct stm32_dma_cfg *cfg)
+{
+	stm32_dma_clear_reg(&chan->chan_reg);
+
+	chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK;
+	chan->chan_reg.dma_scr |= STM32_DMA_SCR_REQ(cfg->request_line);
+
+	/* Enable Interrupts  */
+	chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
+
+	chan->chan_reg.dma_sfcr = cfg->threshold & STM32_DMA_SFCR_FTH_MASK;
+}
+
+static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct stm32_dma_device *dmadev = ofdma->of_dma_data;
+	struct stm32_dma_cfg cfg;
+	struct stm32_dma_chan *chan;
+	struct dma_chan *c;
+
+	if (dma_spec->args_count < 3)
+		return NULL;
+
+	cfg.channel_id = dma_spec->args[0];
+	cfg.request_line = dma_spec->args[1];
+	cfg.stream_config = dma_spec->args[2];
+	cfg.threshold = 0;
+
+	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >=
+				STM32_DMA_MAX_REQUEST_ID))
+		return NULL;
+
+	if (dma_spec->args_count > 3)
+		cfg.threshold = dma_spec->args[3];
+
+	chan = &dmadev->chan[cfg.channel_id];
+
+	c = dma_get_slave_channel(&chan->vchan.chan);
+	if (c)
+		stm32_dma_set_config(chan, &cfg);
+
+	return c;
+}
+
+static const struct of_device_id stm32_dma_of_match[] = {
+	{ .compatible = "st,stm32-dma", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, stm32_dma_of_match);
+
+static int stm32_dma_probe(struct platform_device *pdev)
+{
+	struct stm32_dma_chan *chan;
+	struct stm32_dma_device *dmadev;
+	struct dma_device *dd;
+	const struct of_device_id *match;
+	struct resource *res;
+	int i, ret;
+
+	match = of_match_device(stm32_dma_of_match, &pdev->dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
+	}
+
+	dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+	if (!dmadev)
+		return -ENOMEM;
+
+	dd = &dmadev->ddev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmadev->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dmadev->base))
+		return PTR_ERR(dmadev->base);
+
+	dmadev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dmadev->clk)) {
+		dev_err(&pdev->dev, "Error: Missing controller clock\n");
+		return PTR_ERR(dmadev->clk);
+	}
+
+	dmadev->mem2mem = of_property_read_bool(pdev->dev.of_node,
+						"st,mem2mem");
+
+	dmadev->rst = devm_reset_control_get(&pdev->dev, NULL);
+	if (!IS_ERR(dmadev->rst)) {
+		reset_control_assert(dmadev->rst);
+		udelay(2);
+		reset_control_deassert(dmadev->rst);
+	}
+
+	dma_cap_set(DMA_SLAVE, dd->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dd->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+	dd->device_alloc_chan_resources = stm32_dma_alloc_chan_resources;
+	dd->device_free_chan_resources = stm32_dma_free_chan_resources;
+	dd->device_tx_status = stm32_dma_tx_status;
+	dd->device_issue_pending = stm32_dma_issue_pending;
+	dd->device_prep_slave_sg = stm32_dma_prep_slave_sg;
+	dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic;
+	dd->device_config = stm32_dma_slave_config;
+	dd->device_terminate_all = stm32_dma_terminate_all;
+	dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	if (dmadev->mem2mem) {
+		dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+		dd->device_prep_dma_memcpy = stm32_dma_prep_dma_memcpy;
+		dd->directions |= BIT(DMA_MEM_TO_MEM);
+	}
+
+	for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
+		chan = &dmadev->chan[i];
+		chan->id = i;
+		chan->vchan.desc_free = stm32_dma_desc_free;
+		vchan_init(&chan->vchan, dd);
+	}
+
+	ret = dma_async_device_register(dd);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
+		chan = &dmadev->chan[i];
+		res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!res) {
+			ret = -EINVAL;
+			dev_err(&pdev->dev, "No irq resource for chan %d\n", i);
+			goto err_unregister;
+		}
+		chan->irq = res->start;
+		ret = devm_request_irq(&pdev->dev, chan->irq,
+				       stm32_dma_chan_irq, 0,
+				       dev_name(chan2dev(chan)), chan);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"request_irq failed with err %d channel %d\n",
+				ret, i);
+			goto err_unregister;
+		}
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 stm32_dma_of_xlate, dmadev);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"STM32 DMA DMA OF registration failed %d\n", ret);
+		goto err_unregister;
+	}
+
+	platform_set_drvdata(pdev, dmadev);
+
+	dev_info(&pdev->dev, "STM32 DMA driver registered\n");
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+
+	return ret;
+}
+
+static struct platform_driver stm32_dma_driver = {
+	.driver = {
+		.name = "stm32-dma",
+		.of_match_table = stm32_dma_of_match,
+	},
+};
+
+static int __init stm32_dma_init(void)
+{
+	return platform_driver_probe(&stm32_dma_driver, stm32_dma_probe);
+}
+subsys_initcall(stm32_dma_init);
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index c8f79dc..935da81 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -296,7 +296,7 @@
 	spin_unlock_irqrestore(&tdc->lock, flags);
 
 	/* Allocate DMA desc */
-	dma_desc = kzalloc(sizeof(*dma_desc), GFP_ATOMIC);
+	dma_desc = kzalloc(sizeof(*dma_desc), GFP_NOWAIT);
 	if (!dma_desc) {
 		dev_err(tdc2dev(tdc), "dma_desc alloc failed\n");
 		return NULL;
@@ -336,7 +336,7 @@
 	}
 	spin_unlock_irqrestore(&tdc->lock, flags);
 
-	sg_req = kzalloc(sizeof(struct tegra_dma_sg_req), GFP_ATOMIC);
+	sg_req = kzalloc(sizeof(struct tegra_dma_sg_req), GFP_NOWAIT);
 	if (!sg_req)
 		dev_err(tdc2dev(tdc), "sg_req alloc failed\n");
 	return sg_req;
@@ -1186,10 +1186,12 @@
 
 	dma_cookie_init(&tdc->dma_chan);
 	tdc->config_init = false;
-	ret = clk_prepare_enable(tdma->dma_clk);
+
+	ret = pm_runtime_get_sync(tdma->dev);
 	if (ret < 0)
-		dev_err(tdc2dev(tdc), "clk_prepare_enable failed: %d\n", ret);
-	return ret;
+		return ret;
+
+	return 0;
 }
 
 static void tegra_dma_free_chan_resources(struct dma_chan *dc)
@@ -1232,7 +1234,7 @@
 		list_del(&sg_req->node);
 		kfree(sg_req);
 	}
-	clk_disable_unprepare(tdma->dma_clk);
+	pm_runtime_put(tdma->dev);
 
 	tdc->slave_id = 0;
 }
@@ -1356,20 +1358,14 @@
 	spin_lock_init(&tdma->global_lock);
 
 	pm_runtime_enable(&pdev->dev);
-	if (!pm_runtime_enabled(&pdev->dev)) {
+	if (!pm_runtime_enabled(&pdev->dev))
 		ret = tegra_dma_runtime_resume(&pdev->dev);
-		if (ret) {
-			dev_err(&pdev->dev, "dma_runtime_resume failed %d\n",
-				ret);
-			goto err_pm_disable;
-		}
-	}
+	else
+		ret = pm_runtime_get_sync(&pdev->dev);
 
-	/* Enable clock before accessing registers */
-	ret = clk_prepare_enable(tdma->dma_clk);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "clk_prepare_enable failed: %d\n", ret);
-		goto err_pm_disable;
+		pm_runtime_disable(&pdev->dev);
+		return ret;
 	}
 
 	/* Reset DMA controller */
@@ -1382,7 +1378,7 @@
 	tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
 	tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
 
-	clk_disable_unprepare(tdma->dma_clk);
+	pm_runtime_put(&pdev->dev);
 
 	INIT_LIST_HEAD(&tdma->dma_dev.channels);
 	for (i = 0; i < cdata->nr_channels; i++) {
@@ -1400,8 +1396,7 @@
 		}
 		tdc->irq = res->start;
 		snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i);
-		ret = devm_request_irq(&pdev->dev, tdc->irq,
-				tegra_dma_isr, 0, tdc->name, tdc);
+		ret = request_irq(tdc->irq, tegra_dma_isr, 0, tdc->name, tdc);
 		if (ret) {
 			dev_err(&pdev->dev,
 				"request_irq failed with err %d channel %d\n",
@@ -1482,10 +1477,11 @@
 err_irq:
 	while (--i >= 0) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+		free_irq(tdc->irq, tdc);
 		tasklet_kill(&tdc->tasklet);
 	}
 
-err_pm_disable:
 	pm_runtime_disable(&pdev->dev);
 	if (!pm_runtime_status_suspended(&pdev->dev))
 		tegra_dma_runtime_suspend(&pdev->dev);
@@ -1502,6 +1498,7 @@
 
 	for (i = 0; i < tdma->chip_data->nr_channels; ++i) {
 		tdc = &tdma->channels[i];
+		free_irq(tdc->irq, tdc);
 		tasklet_kill(&tdc->tasklet);
 	}
 
@@ -1514,8 +1511,7 @@
 
 static int tegra_dma_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct tegra_dma *tdma = platform_get_drvdata(pdev);
+	struct tegra_dma *tdma = dev_get_drvdata(dev);
 
 	clk_disable_unprepare(tdma->dma_clk);
 	return 0;
@@ -1523,8 +1519,7 @@
 
 static int tegra_dma_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct tegra_dma *tdma = platform_get_drvdata(pdev);
+	struct tegra_dma *tdma = dev_get_drvdata(dev);
 	int ret;
 
 	ret = clk_prepare_enable(tdma->dma_clk);
@@ -1543,7 +1538,7 @@
 	int ret;
 
 	/* Enable clock before accessing register */
-	ret = tegra_dma_runtime_resume(dev);
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
 		return ret;
 
@@ -1552,15 +1547,22 @@
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
 		struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
 
+		/* Only save the state of DMA channels that are in use */
+		if (!tdc->config_init)
+			continue;
+
 		ch_reg->csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
 		ch_reg->ahb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBPTR);
 		ch_reg->apb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBPTR);
 		ch_reg->ahb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBSEQ);
 		ch_reg->apb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBSEQ);
+		if (tdma->chip_data->support_separate_wcount_reg)
+			ch_reg->wcount = tdc_read(tdc,
+						  TEGRA_APBDMA_CHAN_WCOUNT);
 	}
 
 	/* Disable clock */
-	tegra_dma_runtime_suspend(dev);
+	pm_runtime_put(dev);
 	return 0;
 }
 
@@ -1571,7 +1573,7 @@
 	int ret;
 
 	/* Enable clock before accessing register */
-	ret = tegra_dma_runtime_resume(dev);
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
 		return ret;
 
@@ -1583,6 +1585,13 @@
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
 		struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
 
+		/* Only restore the state of DMA channels that are in use */
+		if (!tdc->config_init)
+			continue;
+
+		if (tdma->chip_data->support_separate_wcount_reg)
+			tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT,
+				  ch_reg->wcount);
 		tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_reg->apb_seq);
 		tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_reg->apb_ptr);
 		tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_reg->ahb_seq);
@@ -1592,16 +1601,14 @@
 	}
 
 	/* Disable clock */
-	tegra_dma_runtime_suspend(dev);
+	pm_runtime_put(dev);
 	return 0;
 }
 #endif
 
 static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
-#ifdef CONFIG_PM
-	.runtime_suspend = tegra_dma_runtime_suspend,
-	.runtime_resume = tegra_dma_runtime_resume,
-#endif
+	SET_RUNTIME_PM_OPS(tegra_dma_runtime_suspend, tegra_dma_runtime_resume,
+			   NULL)
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_pm_suspend, tegra_dma_pm_resume)
 };
 
diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
index a415edb..e107779 100644
--- a/drivers/dma/ti-dma-crossbar.c
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
-#include <linux/idr.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
@@ -198,7 +197,8 @@
 	void __iomem *iomem;
 
 	struct dma_router dmarouter;
-	struct idr map_idr;
+	struct mutex mutex;
+	unsigned long *dma_inuse;
 
 	u16 safe_val; /* Value to rest the crossbar lines */
 	u32 xbar_requests; /* number of DMA requests connected to XBAR */
@@ -225,7 +225,9 @@
 		map->xbar_in, map->xbar_out);
 
 	ti_dra7_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val);
-	idr_remove(&xbar->map_idr, map->xbar_out);
+	mutex_lock(&xbar->mutex);
+	clear_bit(map->xbar_out, xbar->dma_inuse);
+	mutex_unlock(&xbar->mutex);
 	kfree(map);
 }
 
@@ -255,8 +257,17 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
-	map->xbar_out = idr_alloc(&xbar->map_idr, NULL, 0, xbar->dma_requests,
-				  GFP_KERNEL);
+	mutex_lock(&xbar->mutex);
+	map->xbar_out = find_first_zero_bit(xbar->dma_inuse,
+					    xbar->dma_requests);
+	mutex_unlock(&xbar->mutex);
+	if (map->xbar_out == xbar->dma_requests) {
+		dev_err(&pdev->dev, "Run out of free DMA requests\n");
+		kfree(map);
+		return ERR_PTR(-ENOMEM);
+	}
+	set_bit(map->xbar_out, xbar->dma_inuse);
+
 	map->xbar_in = (u16)dma_spec->args[0];
 
 	dma_spec->args[0] = map->xbar_out + xbar->dma_offset;
@@ -278,17 +289,29 @@
 		.compatible = "ti,edma3",
 		.data = (void *)TI_XBAR_EDMA_OFFSET,
 	},
+	{
+		.compatible = "ti,edma3-tpcc",
+		.data = (void *)TI_XBAR_EDMA_OFFSET,
+	},
 	{},
 };
 
+static inline void ti_dra7_xbar_reserve(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		clear_bit(offset + (len - 1), p);
+}
+
 static int ti_dra7_xbar_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	const struct of_device_id *match;
 	struct device_node *dma_node;
 	struct ti_dra7_xbar_data *xbar;
+	struct property *prop;
 	struct resource *res;
 	u32 safe_val;
+	size_t sz;
 	void __iomem *iomem;
 	int i, ret;
 
@@ -299,8 +322,6 @@
 	if (!xbar)
 		return -ENOMEM;
 
-	idr_init(&xbar->map_idr);
-
 	dma_node = of_parse_phandle(node, "dma-masters", 0);
 	if (!dma_node) {
 		dev_err(&pdev->dev, "Can't get DMA master node\n");
@@ -322,6 +343,12 @@
 	}
 	of_node_put(dma_node);
 
+	xbar->dma_inuse = devm_kcalloc(&pdev->dev,
+				       BITS_TO_LONGS(xbar->dma_requests),
+				       sizeof(unsigned long), GFP_KERNEL);
+	if (!xbar->dma_inuse)
+		return -ENOMEM;
+
 	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) {
 		dev_info(&pdev->dev,
 			 "Missing XBAR input information, using %u.\n",
@@ -332,6 +359,33 @@
 	if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val))
 		xbar->safe_val = (u16)safe_val;
 
+
+	prop = of_find_property(node, "ti,reserved-dma-request-ranges", &sz);
+	if (prop) {
+		const char pname[] = "ti,reserved-dma-request-ranges";
+		u32 (*rsv_events)[2];
+		size_t nelm = sz / sizeof(*rsv_events);
+		int i;
+
+		if (!nelm)
+			return -EINVAL;
+
+		rsv_events = kcalloc(nelm, sizeof(*rsv_events), GFP_KERNEL);
+		if (!rsv_events)
+			return -ENOMEM;
+
+		ret = of_property_read_u32_array(node, pname, (u32 *)rsv_events,
+						 nelm * 2);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < nelm; i++) {
+			ti_dra7_xbar_reserve(rsv_events[i][0], rsv_events[i][1],
+					     xbar->dma_inuse);
+		}
+		kfree(rsv_events);
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	iomem = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(iomem))
@@ -343,18 +397,23 @@
 	xbar->dmarouter.route_free = ti_dra7_xbar_free;
 	xbar->dma_offset = (u32)match->data;
 
+	mutex_init(&xbar->mutex);
 	platform_set_drvdata(pdev, xbar);
 
 	/* Reset the crossbar */
-	for (i = 0; i < xbar->dma_requests; i++)
-		ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val);
+	for (i = 0; i < xbar->dma_requests; i++) {
+		if (!test_bit(i, xbar->dma_inuse))
+			ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val);
+	}
 
 	ret = of_dma_router_register(node, ti_dra7_xbar_route_allocate,
 				     &xbar->dmarouter);
 	if (ret) {
 		/* Restore the defaults for the crossbar */
-		for (i = 0; i < xbar->dma_requests; i++)
-			ti_dra7_xbar_write(xbar->iomem, i, i);
+		for (i = 0; i < xbar->dma_requests; i++) {
+			if (!test_bit(i, xbar->dma_inuse))
+				ti_dra7_xbar_write(xbar->iomem, i, i);
+		}
 	}
 
 	return ret;
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 6f80432..a35c211 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -29,7 +29,7 @@
 	spin_lock_irqsave(&vc->lock, flags);
 	cookie = dma_cookie_assign(tx);
 
-	list_add_tail(&vd->node, &vc->desc_submitted);
+	list_move_tail(&vd->node, &vc->desc_submitted);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
@@ -39,6 +39,33 @@
 }
 EXPORT_SYMBOL_GPL(vchan_tx_submit);
 
+/**
+ * vchan_tx_desc_free - free a reusable descriptor
+ * @tx: the transfer
+ *
+ * This function frees a previously allocated reusable descriptor. The only
+ * other way is to clear the DMA_CTRL_REUSE flag and submit one last time the
+ * transfer.
+ *
+ * Returns 0 upon success
+ */
+int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct virt_dma_desc *vd = to_virt_desc(tx);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_del(&vd->node);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
+		vc, vd, vd->tx.cookie);
+	vc->desc_free(vd);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
+
 struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *vc,
 	dma_cookie_t cookie)
 {
@@ -83,8 +110,10 @@
 		cb_data = vd->tx.callback_param;
 
 		list_del(&vd->node);
-
-		vc->desc_free(vd);
+		if (dmaengine_desc_test_reuse(&vd->tx))
+			list_add(&vd->node, &vc->desc_allocated);
+		else
+			vc->desc_free(vd);
 
 		if (cb)
 			cb(cb_data);
@@ -96,9 +125,13 @@
 	while (!list_empty(head)) {
 		struct virt_dma_desc *vd = list_first_entry(head,
 			struct virt_dma_desc, node);
-		list_del(&vd->node);
-		dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
-		vc->desc_free(vd);
+		if (dmaengine_desc_test_reuse(&vd->tx)) {
+			list_move_tail(&vd->node, &vc->desc_allocated);
+		} else {
+			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
+			list_del(&vd->node);
+			vc->desc_free(vd);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
@@ -108,6 +141,7 @@
 	dma_cookie_init(&vc->chan);
 
 	spin_lock_init(&vc->lock);
+	INIT_LIST_HEAD(&vc->desc_allocated);
 	INIT_LIST_HEAD(&vc->desc_submitted);
 	INIT_LIST_HEAD(&vc->desc_issued);
 	INIT_LIST_HEAD(&vc->desc_completed);
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index 2fa4774..d9731ca 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -29,6 +29,7 @@
 	spinlock_t lock;
 
 	/* protected by vc.lock */
+	struct list_head desc_allocated;
 	struct list_head desc_submitted;
 	struct list_head desc_issued;
 	struct list_head desc_completed;
@@ -55,10 +56,17 @@
 	struct virt_dma_desc *vd, unsigned long tx_flags)
 {
 	extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+	extern int vchan_tx_desc_free(struct dma_async_tx_descriptor *);
+	unsigned long flags;
 
 	dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
 	vd->tx.flags = tx_flags;
 	vd->tx.tx_submit = vchan_tx_submit;
+	vd->tx.desc_free = vchan_tx_desc_free;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_add_tail(&vd->node, &vc->desc_allocated);
+	spin_unlock_irqrestore(&vc->lock, flags);
 
 	return &vd->tx;
 }
@@ -134,6 +142,7 @@
 static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 	struct list_head *head)
 {
+	list_splice_tail_init(&vc->desc_allocated, head);
 	list_splice_tail_init(&vc->desc_submitted, head);
 	list_splice_tail_init(&vc->desc_issued, head);
 	list_splice_tail_init(&vc->desc_completed, head);
@@ -141,14 +150,30 @@
 
 static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
 {
+	struct virt_dma_desc *vd;
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&vc->lock, flags);
 	vchan_get_all_descriptors(vc, &head);
+	list_for_each_entry(vd, &head, node)
+		dmaengine_desc_clear_reuse(&vd->tx);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	vchan_dma_desc_free_list(vc, &head);
 }
 
+/**
+ * vchan_synchronize() - synchronize callback execution to the current context
+ * @vc: virtual channel to synchronize
+ *
+ * Makes sure that all scheduled or active callbacks have finished running. For
+ * proper operation the caller has to ensure that no new callbacks are scheduled
+ * after the invocation of this function started.
+ */
+static inline void vchan_synchronize(struct virt_dma_chan *vc)
+{
+	tasklet_kill(&vc->task);
+}
+
 #endif
diff --git a/include/linux/dca.h b/include/linux/dca.h
index d27a7a0..ad956c2 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -34,7 +34,7 @@
 
 struct dca_provider {
 	struct list_head	node;
-	struct dca_ops		*ops;
+	const struct dca_ops	*ops;
 	struct device 		*cd;
 	int			 id;
 };
@@ -53,7 +53,8 @@
 	int	(*dev_managed)      (struct dca_provider *, struct device *);
 };
 
-struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
+struct dca_provider *alloc_dca_provider(const struct dca_ops *ops,
+					int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
 void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c47c68e..16a1cad 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -607,11 +607,38 @@
 };
 
 /**
+ * struct dma_slave_map - associates slave device and it's slave channel with
+ * parameter to be used by a filter function
+ * @devname: name of the device
+ * @slave: slave channel name
+ * @param: opaque parameter to pass to struct dma_filter.fn
+ */
+struct dma_slave_map {
+	const char *devname;
+	const char *slave;
+	void *param;
+};
+
+/**
+ * struct dma_filter - information for slave device/channel to filter_fn/param
+ * mapping
+ * @fn: filter function callback
+ * @mapcnt: number of slave device/channel in the map
+ * @map: array of channel to filter mapping data
+ */
+struct dma_filter {
+	dma_filter_fn fn;
+	int mapcnt;
+	const struct dma_slave_map *map;
+};
+
+/**
  * struct dma_device - info on the entity supplying DMA services
  * @chancnt: how many DMA channels are supported
  * @privatecnt: how many DMA channels are requested by dma_request_channel
  * @channels: the list of struct dma_chan
  * @global_node: list_head for global dma_device_list
+ * @filter: information for device/slave to filter function/param mapping
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
  * @max_pq: maximum number of PQ sources and PQ-continue capability
@@ -654,11 +681,14 @@
  *	paused. Returns 0 or an error code
  * @device_terminate_all: Aborts all transfers on a channel. Returns 0
  *	or an error code
+ * @device_synchronize: Synchronizes the termination of a transfers to the
+ *  current context.
  * @device_tx_status: poll for transaction completion, the optional
  *	txstate parameter can be supplied with a pointer to get a
  *	struct with auxiliary transfer status information, otherwise the call
  *	will just return a simple status code
  * @device_issue_pending: push pending transactions to hardware
+ * @descriptor_reuse: a submitted transfer can be resubmitted after completion
  */
 struct dma_device {
 
@@ -666,6 +696,7 @@
 	unsigned int privatecnt;
 	struct list_head channels;
 	struct list_head global_node;
+	struct dma_filter filter;
 	dma_cap_mask_t  cap_mask;
 	unsigned short max_xor;
 	unsigned short max_pq;
@@ -681,6 +712,7 @@
 	u32 src_addr_widths;
 	u32 dst_addr_widths;
 	u32 directions;
+	bool descriptor_reuse;
 	enum dma_residue_granularity residue_granularity;
 
 	int (*device_alloc_chan_resources)(struct dma_chan *chan);
@@ -737,6 +769,7 @@
 	int (*device_pause)(struct dma_chan *chan);
 	int (*device_resume)(struct dma_chan *chan);
 	int (*device_terminate_all)(struct dma_chan *chan);
+	void (*device_synchronize)(struct dma_chan *chan);
 
 	enum dma_status (*device_tx_status)(struct dma_chan *chan,
 					    dma_cookie_t cookie,
@@ -828,6 +861,13 @@
 			src_sg, src_nents, flags);
 }
 
+/**
+ * dmaengine_terminate_all() - Terminate all active DMA transfers
+ * @chan: The channel for which to terminate the transfers
+ *
+ * This function is DEPRECATED use either dmaengine_terminate_sync() or
+ * dmaengine_terminate_async() instead.
+ */
 static inline int dmaengine_terminate_all(struct dma_chan *chan)
 {
 	if (chan->device->device_terminate_all)
@@ -836,6 +876,88 @@
 	return -ENOSYS;
 }
 
+/**
+ * dmaengine_terminate_async() - Terminate all active DMA transfers
+ * @chan: The channel for which to terminate the transfers
+ *
+ * Calling this function will terminate all active and pending descriptors
+ * that have previously been submitted to the channel. It is not guaranteed
+ * though that the transfer for the active descriptor has stopped when the
+ * function returns. Furthermore it is possible the complete callback of a
+ * submitted transfer is still running when this function returns.
+ *
+ * dmaengine_synchronize() needs to be called before it is safe to free
+ * any memory that is accessed by previously submitted descriptors or before
+ * freeing any resources accessed from within the completion callback of any
+ * perviously submitted descriptors.
+ *
+ * This function can be called from atomic context as well as from within a
+ * complete callback of a descriptor submitted on the same channel.
+ *
+ * If none of the two conditions above apply consider using
+ * dmaengine_terminate_sync() instead.
+ */
+static inline int dmaengine_terminate_async(struct dma_chan *chan)
+{
+	if (chan->device->device_terminate_all)
+		return chan->device->device_terminate_all(chan);
+
+	return -EINVAL;
+}
+
+/**
+ * dmaengine_synchronize() - Synchronize DMA channel termination
+ * @chan: The channel to synchronize
+ *
+ * Synchronizes to the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and and it is safe to free the memory assoicated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ *
+ * The behavior of this function is undefined if dma_async_issue_pending() has
+ * been called between dmaengine_terminate_async() and this function.
+ *
+ * This function must only be called from non-atomic context and must not be
+ * called from within a complete callback of a descriptor submitted on the same
+ * channel.
+ */
+static inline void dmaengine_synchronize(struct dma_chan *chan)
+{
+	might_sleep();
+
+	if (chan->device->device_synchronize)
+		chan->device->device_synchronize(chan);
+}
+
+/**
+ * dmaengine_terminate_sync() - Terminate all active DMA transfers
+ * @chan: The channel for which to terminate the transfers
+ *
+ * Calling this function will terminate all active and pending transfers
+ * that have previously been submitted to the channel. It is similar to
+ * dmaengine_terminate_async() but guarantees that the DMA transfer has actually
+ * stopped and that all complete callbacks have finished running when the
+ * function returns.
+ *
+ * This function must only be called from non-atomic context and must not be
+ * called from within a complete callback of a descriptor submitted on the same
+ * channel.
+ */
+static inline int dmaengine_terminate_sync(struct dma_chan *chan)
+{
+	int ret;
+
+	ret = dmaengine_terminate_async(chan);
+	if (ret)
+		return ret;
+
+	dmaengine_synchronize(chan);
+
+	return 0;
+}
+
 static inline int dmaengine_pause(struct dma_chan *chan)
 {
 	if (chan->device->device_pause)
@@ -1140,9 +1262,11 @@
 void dma_issue_pending_all(void);
 struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
 					dma_filter_fn fn, void *fn_param);
-struct dma_chan *dma_request_slave_channel_reason(struct device *dev,
-						  const char *name);
 struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name);
+
+struct dma_chan *dma_request_chan(struct device *dev, const char *name);
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask);
+
 void dma_release_channel(struct dma_chan *chan);
 int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps);
 #else
@@ -1166,16 +1290,21 @@
 {
 	return NULL;
 }
-static inline struct dma_chan *dma_request_slave_channel_reason(
-					struct device *dev, const char *name)
-{
-	return ERR_PTR(-ENODEV);
-}
 static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
 							 const char *name)
 {
 	return NULL;
 }
+static inline struct dma_chan *dma_request_chan(struct device *dev,
+						const char *name)
+{
+	return ERR_PTR(-ENODEV);
+}
+static inline struct dma_chan *dma_request_chan_by_mask(
+						const dma_cap_mask_t *mask)
+{
+	return ERR_PTR(-ENODEV);
+}
 static inline void dma_release_channel(struct dma_chan *chan)
 {
 }
@@ -1186,6 +1315,8 @@
 }
 #endif
 
+#define dma_request_slave_channel_reason(dev, name) dma_request_chan(dev, name)
+
 static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
 {
 	struct dma_slave_caps caps;
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 88fa8af..1d99b61 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -267,6 +267,9 @@
 	u8	type;
 };
 
+#define SDMA_FILTER_PARAM(hw_req)	((int[]) { (hw_req) })
+struct dma_slave_map;
+
 /* System DMA platform data structure */
 struct omap_system_dma_plat_info {
 	const struct omap_dma_reg *reg_map;
@@ -278,6 +281,9 @@
 	void (*clear_dma)(int lch);
 	void (*dma_write)(u32 val, int reg, int lch);
 	u32 (*dma_read)(int reg, int lch);
+
+	const struct dma_slave_map *slave_map;
+	int slavecnt;
 };
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
diff --git a/include/linux/platform_data/dma-rcar-hpbdma.h b/include/linux/platform_data/dma-rcar-hpbdma.h
deleted file mode 100644
index 648b8ea..0000000
--- a/include/linux/platform_data/dma-rcar-hpbdma.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2011-2013 Renesas Electronics Corporation
- * Copyright (C) 2013 Cogent Embedded, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- */
-
-#ifndef __DMA_RCAR_HPBDMA_H
-#define __DMA_RCAR_HPBDMA_H
-
-#include <linux/bitops.h>
-#include <linux/types.h>
-
-/* Transmit sizes and respective register values */
-enum {
-	XMIT_SZ_8BIT	= 0,
-	XMIT_SZ_16BIT	= 1,
-	XMIT_SZ_32BIT	= 2,
-	XMIT_SZ_MAX
-};
-
-/* DMA control register (DCR) bits */
-#define HPB_DMAE_DCR_DTAMD		(1u << 26)
-#define HPB_DMAE_DCR_DTAC		(1u << 25)
-#define HPB_DMAE_DCR_DTAU		(1u << 24)
-#define HPB_DMAE_DCR_DTAU1		(1u << 23)
-#define HPB_DMAE_DCR_SWMD		(1u << 22)
-#define HPB_DMAE_DCR_BTMD		(1u << 21)
-#define HPB_DMAE_DCR_PKMD		(1u << 20)
-#define HPB_DMAE_DCR_CT			(1u << 18)
-#define HPB_DMAE_DCR_ACMD		(1u << 17)
-#define HPB_DMAE_DCR_DIP		(1u << 16)
-#define HPB_DMAE_DCR_SMDL		(1u << 13)
-#define HPB_DMAE_DCR_SPDAM		(1u << 12)
-#define HPB_DMAE_DCR_SDRMD_MASK		(3u << 10)
-#define HPB_DMAE_DCR_SDRMD_MOD		(0u << 10)
-#define HPB_DMAE_DCR_SDRMD_AUTO		(1u << 10)
-#define HPB_DMAE_DCR_SDRMD_TIMER	(2u << 10)
-#define HPB_DMAE_DCR_SPDS_MASK		(3u << 8)
-#define HPB_DMAE_DCR_SPDS_8BIT		(0u << 8)
-#define HPB_DMAE_DCR_SPDS_16BIT		(1u << 8)
-#define HPB_DMAE_DCR_SPDS_32BIT		(2u << 8)
-#define HPB_DMAE_DCR_DMDL		(1u << 5)
-#define HPB_DMAE_DCR_DPDAM		(1u << 4)
-#define HPB_DMAE_DCR_DDRMD_MASK		(3u << 2)
-#define HPB_DMAE_DCR_DDRMD_MOD		(0u << 2)
-#define HPB_DMAE_DCR_DDRMD_AUTO		(1u << 2)
-#define HPB_DMAE_DCR_DDRMD_TIMER	(2u << 2)
-#define HPB_DMAE_DCR_DPDS_MASK		(3u << 0)
-#define HPB_DMAE_DCR_DPDS_8BIT		(0u << 0)
-#define HPB_DMAE_DCR_DPDS_16BIT		(1u << 0)
-#define HPB_DMAE_DCR_DPDS_32BIT		(2u << 0)
-
-/* Asynchronous reset register (ASYNCRSTR) bits */
-#define HPB_DMAE_ASYNCRSTR_ASRST41	BIT(10)
-#define HPB_DMAE_ASYNCRSTR_ASRST40	BIT(9)
-#define HPB_DMAE_ASYNCRSTR_ASRST39	BIT(8)
-#define HPB_DMAE_ASYNCRSTR_ASRST27	BIT(7)
-#define HPB_DMAE_ASYNCRSTR_ASRST26	BIT(6)
-#define HPB_DMAE_ASYNCRSTR_ASRST25	BIT(5)
-#define HPB_DMAE_ASYNCRSTR_ASRST24	BIT(4)
-#define HPB_DMAE_ASYNCRSTR_ASRST23	BIT(3)
-#define HPB_DMAE_ASYNCRSTR_ASRST22	BIT(2)
-#define HPB_DMAE_ASYNCRSTR_ASRST21	BIT(1)
-#define HPB_DMAE_ASYNCRSTR_ASRST20	BIT(0)
-
-struct hpb_dmae_slave_config {
-	unsigned int	id;
-	dma_addr_t	addr;
-	u32		dcr;
-	u32		port;
-	u32		rstr;
-	u32		mdr;
-	u32		mdm;
-	u32		flags;
-#define	HPB_DMAE_SET_ASYNC_RESET	BIT(0)
-#define	HPB_DMAE_SET_ASYNC_MODE		BIT(1)
-	u32		dma_ch;
-};
-
-#define HPB_DMAE_CHANNEL(_irq, _s_id)	\
-{					\
-	.ch_irq		= _irq,		\
-	.s_id		= _s_id,	\
-}
-
-struct hpb_dmae_channel {
-	unsigned int	ch_irq;
-	unsigned int	s_id;
-};
-
-struct hpb_dmae_pdata {
-	const struct hpb_dmae_slave_config *slaves;
-	int num_slaves;
-	const struct hpb_dmae_channel *channels;
-	int num_channels;
-	const unsigned int ts_shift[XMIT_SZ_MAX];
-	int num_hw_channels;
-};
-
-#endif
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 4299f4b..0a533f9 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -53,12 +53,16 @@
 #define EDMA_CTLR(i)			((i) >> 16)
 #define EDMA_CHAN_SLOT(i)		((i) & 0xffff)
 
+#define EDMA_FILTER_PARAM(ctlr, chan)	((int[]) { EDMA_CTLR_CHAN(ctlr, chan) })
+
 struct edma_rsv_info {
 
 	const s16	(*rsv_chans)[2];
 	const s16	(*rsv_slots)[2];
 };
 
+struct dma_slave_map;
+
 /* platform_data for EDMA driver */
 struct edma_soc_info {
 	/*
@@ -76,6 +80,9 @@
 
 	s8	(*queue_priority_mapping)[2];
 	const s16	(*xbar_chans)[2];
+
+	const struct dma_slave_map *slave_map;
+	int slavecnt;
 };
 
 #endif
diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
index fba365a..697c166 100644
--- a/sound/core/pcm_dmaengine.c
+++ b/sound/core/pcm_dmaengine.c
@@ -202,13 +202,13 @@
 		if (runtime->info & SNDRV_PCM_INFO_PAUSE)
 			dmaengine_pause(prtd->dma_chan);
 		else
-			dmaengine_terminate_all(prtd->dma_chan);
+			dmaengine_terminate_async(prtd->dma_chan);
 		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		dmaengine_pause(prtd->dma_chan);
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
-		dmaengine_terminate_all(prtd->dma_chan);
+		dmaengine_terminate_async(prtd->dma_chan);
 		break;
 	default:
 		return -EINVAL;
@@ -346,6 +346,7 @@
 {
 	struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
 
+	dmaengine_synchronize(prtd->dma_chan);
 	kfree(prtd);
 
 	return 0;
@@ -362,9 +363,11 @@
 {
 	struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
 
+	dmaengine_synchronize(prtd->dma_chan);
 	dma_release_channel(prtd->dma_chan);
+	kfree(prtd);
 
-	return snd_dmaengine_pcm_close(substream);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_close_release_chan);