diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-17 12:34:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-17 12:34:54 -0700 |
commit | b5b131c7473e17275debcdf1c226f452dc3876ed (patch) | |
tree | a272e947c38213d4ee989bb3f863a8091d50426b /drivers | |
parent | c7eec380e85a427983782df744f0fb745d867170 (diff) | |
parent | 896e041e8e8efb34520d033a693ef25391f9c9f0 (diff) |
Merge tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul:
"This is smallish update with minor changes to core and new driver and
usual updates. Nothing super exciting here..
- We have made slave address as physical to enable driver to do the
mapping.
- We now expose the maxburst for slave dma as new capability so
clients can know this and program accordingly
- addition of device synchronize callbacks on omap and edma.
- pl330 updates to support DMAFLUSHP for Rockchip platforms.
- Updates and improved sg handling in Xilinx VDMA driver.
- New hidma qualcomm dma driver, though some bits are still in
progress"
* tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (40 commits)
dmaengine: IOATDMA: revise channel reset workaround on CB3.3 platforms
dmaengine: add Qualcomm Technologies HIDMA channel driver
dmaengine: add Qualcomm Technologies HIDMA management driver
dmaengine: hidma: Add Device Tree binding
dmaengine: qcom_bam_dma: move to qcom directory
dmaengine: tegra: Move of_device_id table near to its user
dmaengine: xilinx_vdma: Remove unnecessary variable initializations
dmaengine: sirf: use __maybe_unused to hide pm functions
dmaengine: rcar-dmac: clear pertinence number of channels
dmaengine: sh: shdmac: don't open code of_device_get_match_data()
dmaengine: tegra: don't open code of_device_get_match_data()
dmaengine: qcom_bam_dma: Make driver work for BE
dmaengine: sun4i: support module autoloading
dma/mic_x100_dma: IS_ERR() vs PTR_ERR() typo
dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's
dmaengine: xilinx_vdma: Simplify spin lock handling
dmaengine: xilinx_vdma: Fix issues with non-parking mode
dmaengine: xilinx_vdma: Improve SG engine handling
dmaengine: pl330: fix to support the burst mode
dmaengine: make slave address physical
...
Diffstat (limited to 'drivers')
33 files changed, 1953 insertions, 462 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 79b1390f2016..d96d87c56f2e 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -341,12 +341,13 @@ config MV_XOR config MXS_DMA bool "MXS DMA support" - depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q + depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q || SOC_IMX6UL select STMP_DEVICE select DMA_ENGINE help Support the MXS DMA engine. This engine including APBH-DMA - and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips. + and APBX-DMA is integrated into Freescale + i.MX23/28/MX6Q/MX6DL/MX6UL chips. config MX3_IPU bool "MX3x Image Processing Unit support" @@ -408,15 +409,6 @@ config PXA_DMA 16 to 32 channels for peripheral to memory or memory to memory transfers. -config QCOM_BAM_DMA - tristate "QCOM BAM DMA support" - depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM) - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - ---help--- - Enable support for the QCOM BAM DMA controller. This controller - provides DMA capabilities for a variety of on-chip devices. - config SIRF_DMA tristate "CSR SiRFprimaII/SiRFmarco DMA support" depends on ARCH_SIRF @@ -539,6 +531,8 @@ config ZX_DMA # driver files source "drivers/dma/bestcomm/Kconfig" +source "drivers/dma/qcom/Kconfig" + source "drivers/dma/dw/Kconfig" source "drivers/dma/hsu/Kconfig" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 2dd0a067a0ca..6084127c1486 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -52,7 +52,6 @@ obj-$(CONFIG_PCH_DMA) += pch_dma.o obj-$(CONFIG_PL330_DMA) += pl330.o obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ obj-$(CONFIG_PXA_DMA) += pxa_dma.o -obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o obj-$(CONFIG_RENESAS_DMA) += sh/ obj-$(CONFIG_SIRF_DMA) += sirf-dma.o obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o @@ -67,4 +66,5 @@ obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_XGENE_DMA) += xgene-dma.o obj-$(CONFIG_ZX_DMA) += zx296702_dma.o +obj-y += qcom/ obj-y += xilinx/ diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index eed6bda01790..4a748c3435d7 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -438,7 +438,7 @@ struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev, return ERR_PTR(-ENODEV); } - dev_dbg(dev, "found DMA channel \"%s\" at index %d\n", name, index); + dev_dbg(dev, "Looking for DMA channel \"%s\" at index %d...\n", name, index); return acpi_dma_request_slave_chan_by_index(dev, index); } EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index c50a247be2e0..0cb259c59916 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -496,6 +496,7 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) caps->src_addr_widths = device->src_addr_widths; caps->dst_addr_widths = device->dst_addr_widths; caps->directions = device->directions; + caps->max_burst = device->max_burst; caps->residue_granularity = device->residue_granularity; caps->descriptor_reuse = device->descriptor_reuse; diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 241ff2b1402b..0a50c18d85b8 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -150,7 +150,7 @@ enum dw_dma_msize { #define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ #define DWC_CTLL_DST_DEC (1<<7) #define DWC_CTLL_DST_FIX (2<<7) -#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ +#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */ #define DWC_CTLL_SRC_DEC (1<<9) #define DWC_CTLL_SRC_FIX (2<<9) #define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index e3d7fcb69b4c..ee3463e774f8 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -869,6 +869,13 @@ static int edma_terminate_all(struct dma_chan *chan) return 0; } +static void edma_synchronize(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + + vchan_synchronize(&echan->vchan); +} + static int edma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) { @@ -1365,36 +1372,36 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( static void edma_completion_handler(struct edma_chan *echan) { struct device *dev = echan->vchan.chan.device->dev; - struct edma_desc *edesc = echan->edesc; - - if (!edesc) - return; + struct edma_desc *edesc; spin_lock(&echan->vchan.lock); - if (edesc->cyclic) { - vchan_cyclic_callback(&edesc->vdesc); - spin_unlock(&echan->vchan.lock); - return; - } else if (edesc->processed == edesc->pset_nr) { - edesc->residue = 0; - edma_stop(echan); - vchan_cookie_complete(&edesc->vdesc); - echan->edesc = NULL; - - dev_dbg(dev, "Transfer completed on channel %d\n", - echan->ch_num); - } else { - dev_dbg(dev, "Sub transfer completed on channel %d\n", - echan->ch_num); - - edma_pause(echan); - - /* Update statistics for tx_status */ - edesc->residue -= edesc->sg_len; - edesc->residue_stat = edesc->residue; - edesc->processed_stat = edesc->processed; + edesc = echan->edesc; + if (edesc) { + if (edesc->cyclic) { + vchan_cyclic_callback(&edesc->vdesc); + spin_unlock(&echan->vchan.lock); + return; + } else if (edesc->processed == edesc->pset_nr) { + edesc->residue = 0; + edma_stop(echan); + vchan_cookie_complete(&edesc->vdesc); + echan->edesc = NULL; + + dev_dbg(dev, "Transfer completed on channel %d\n", + echan->ch_num); + } else { + dev_dbg(dev, "Sub transfer completed on channel %d\n", + echan->ch_num); + + edma_pause(echan); + + /* Update statistics for tx_status */ + edesc->residue -= edesc->sg_len; + edesc->residue_stat = edesc->residue; + edesc->processed_stat = edesc->processed; + } + edma_execute(echan); } - edma_execute(echan); spin_unlock(&echan->vchan.lock); } @@ -1837,6 +1844,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode) s_ddev->device_pause = edma_dma_pause; s_ddev->device_resume = edma_dma_resume; s_ddev->device_terminate_all = edma_terminate_all; + s_ddev->device_synchronize = edma_synchronize; s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS; s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS; @@ -1862,6 +1870,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode) m_ddev->device_pause = edma_dma_pause; m_ddev->device_resume = edma_dma_resume; m_ddev->device_terminate_all = edma_terminate_all; + m_ddev->device_synchronize = edma_synchronize; m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS; m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS; diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 57ff46284f15..21f08cc3352b 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -421,23 +421,25 @@ static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) desc->size); } - switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) { - case M2P_INTERRUPT_STALL: - /* Disable interrupts */ - control = readl(edmac->regs + M2P_CONTROL); - control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); - m2p_set_control(edmac, control); - - return INTERRUPT_DONE; - - case M2P_INTERRUPT_NFB: - if (ep93xx_dma_advance_active(edmac)) - m2p_fill_desc(edmac); + /* + * Even latest E2 silicon revision sometimes assert STALL interrupt + * instead of NFB. Therefore we treat them equally, basing on the + * amount of data we still have to transfer. + */ + if (!(irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB))) + return INTERRUPT_UNKNOWN; + if (ep93xx_dma_advance_active(edmac)) { + m2p_fill_desc(edmac); return INTERRUPT_NEXT_BUFFER; } - return INTERRUPT_UNKNOWN; + /* Disable interrupts */ + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + return INTERRUPT_DONE; } /* diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index 3cb7b2c78197..1953e57505f4 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -289,6 +289,9 @@ static void idma64_desc_fill(struct idma64_chan *idma64c, /* Trigger an interrupt after the last block is transfered */ lli->ctllo |= IDMA64C_CTLL_INT_EN; + + /* Disable LLP transfer in the last block */ + lli->ctllo &= ~(IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN); } static struct dma_async_tx_descriptor *idma64_prep_slave_sg( diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h index 8423f13ed0da..dc6874424188 100644 --- a/drivers/dma/idma64.h +++ b/drivers/dma/idma64.h @@ -71,7 +71,7 @@ #define IDMA64C_CFGH_SRC_PER(x) ((x) << 0) /* src peripheral */ #define IDMA64C_CFGH_DST_PER(x) ((x) << 4) /* dst peripheral */ #define IDMA64C_CFGH_RD_ISSUE_THD(x) ((x) << 8) -#define IDMA64C_CFGH_RW_ISSUE_THD(x) ((x) << 18) +#define IDMA64C_CFGH_WR_ISSUE_THD(x) ((x) << 18) /* Interrupt registers */ diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 21539d5c54c3..bd09961443b1 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -31,6 +31,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/prefetch.h> +#include <linux/sizes.h> #include "dma.h" #include "registers.h" #include "hw.h" @@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) } static struct ioat_ring_ent * -ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) +ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags) { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *desc; struct ioatdma_device *ioat_dma; + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + int chunk; dma_addr_t phys; + u8 *pos; + off_t offs; ioat_dma = to_ioatdma_device(chan->device); - hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); - if (!hw) - return NULL; + + chunk = idx / IOAT_DESCS_PER_2M; + idx &= (IOAT_DESCS_PER_2M - 1); + offs = idx * IOAT_DESC_SZ; + pos = (u8 *)ioat_chan->descs[chunk].virt + offs; + phys = ioat_chan->descs[chunk].hw + offs; + hw = (struct ioat_dma_descriptor *)pos; memset(hw, 0, sizeof(*hw)); desc = kmem_cache_zalloc(ioat_cache, flags); - if (!desc) { - pci_pool_free(ioat_dma->dma_pool, hw, phys); + if (!desc) return NULL; - } dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = ioat_tx_submit_unlock; @@ -318,32 +325,63 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) { - struct ioatdma_device *ioat_dma; - - ioat_dma = to_ioatdma_device(chan->device); - pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); kmem_cache_free(ioat_cache, desc); } struct ioat_ring_ent ** ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); struct ioat_ring_ent **ring; - int descs = 1 << order; - int i; - - if (order > ioat_get_max_alloc_order()) - return NULL; + int total_descs = 1 << order; + int i, chunks; /* allocate the array to hold the software ring */ - ring = kcalloc(descs, sizeof(*ring), flags); + ring = kcalloc(total_descs, sizeof(*ring), flags); if (!ring) return NULL; - for (i = 0; i < descs; i++) { - ring[i] = ioat_alloc_ring_ent(c, flags); + + ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M; + + for (i = 0; i < chunks; i++) { + struct ioat_descs *descs = &ioat_chan->descs[i]; + + descs->virt = dma_alloc_coherent(to_dev(ioat_chan), + SZ_2M, &descs->hw, flags); + if (!descs->virt && (i > 0)) { + int idx; + + for (idx = 0; idx < i; idx++) { + dma_free_coherent(to_dev(ioat_chan), SZ_2M, + descs->virt, descs->hw); + descs->virt = NULL; + descs->hw = 0; + } + + ioat_chan->desc_chunks = 0; + kfree(ring); + return NULL; + } + } + + for (i = 0; i < total_descs; i++) { + ring[i] = ioat_alloc_ring_ent(c, i, flags); if (!ring[i]) { + int idx; + while (i--) ioat_free_ring_ent(ring[i], c); + + for (idx = 0; idx < ioat_chan->desc_chunks; idx++) { + dma_free_coherent(to_dev(ioat_chan), + SZ_2M, + ioat_chan->descs[idx].virt, + ioat_chan->descs[idx].hw); + ioat_chan->descs[idx].virt = NULL; + ioat_chan->descs[idx].hw = 0; + } + + ioat_chan->desc_chunks = 0; kfree(ring); return NULL; } @@ -351,7 +389,7 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) } /* link descs */ - for (i = 0; i < descs-1; i++) { + for (i = 0; i < total_descs-1; i++) { struct ioat_ring_ent *next = ring[i+1]; struct ioat_dma_descriptor *hw = ring[i]->hw; @@ -362,114 +400,6 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) return ring; } -static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) -{ - /* reshape differs from normal ring allocation in that we want - * to allocate a new software ring while only - * extending/truncating the hardware ring - */ - struct dma_chan *c = &ioat_chan->dma_chan; - const u32 curr_size = ioat_ring_size(ioat_chan); - const u16 active = ioat_ring_active(ioat_chan); - const u32 new_size = 1 << order; - struct ioat_ring_ent **ring; - u32 i; - - if (order > ioat_get_max_alloc_order()) - return false; - - /* double check that we have at least 1 free descriptor */ - if (active == curr_size) - return false; - - /* when shrinking, verify that we can hold the current active - * set in the new ring - */ - if (active >= new_size) - return false; - - /* allocate the array to hold the software ring */ - ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); - if (!ring) - return false; - - /* allocate/trim descriptors as needed */ - if (new_size > curr_size) { - /* copy current descriptors to the new ring */ - for (i = 0; i < curr_size; i++) { - u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); - u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - - ring[new_idx] = ioat_chan->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* add new descriptors to the ring */ - for (i = curr_size; i < new_size; i++) { - u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - - ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); - if (!ring[new_idx]) { - while (i--) { - u16 new_idx = (ioat_chan->tail+i) & - (new_size-1); - - ioat_free_ring_ent(ring[new_idx], c); - } - kfree(ring); - return false; - } - set_desc_id(ring[new_idx], new_idx); - } - - /* hw link new descriptors */ - for (i = curr_size-1; i < new_size; i++) { - u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - struct ioat_ring_ent *next = - ring[(new_idx+1) & (new_size-1)]; - struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - - hw->next = next->txd.phys; - } - } else { - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *next; - - /* copy current descriptors to the new ring, dropping the - * removed descriptors - */ - for (i = 0; i < new_size; i++) { - u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); - u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - - ring[new_idx] = ioat_chan->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* free deleted descriptors */ - for (i = new_size; i < curr_size; i++) { - struct ioat_ring_ent *ent; - - ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); - ioat_free_ring_ent(ent, c); - } - - /* fix up hardware ring */ - hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; - next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; - hw->next = next->txd.phys; - } - - dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", - __func__, new_size); - - kfree(ioat_chan->ring); - ioat_chan->ring = ring; - ioat_chan->alloc_order = order; - - return true; -} - /** * ioat_check_space_lock - verify space and grab ring producer lock * @ioat: ioat,3 channel (ring) to operate on @@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) __acquires(&ioat_chan->prep_lock) { - bool retry; - - retry: spin_lock_bh(&ioat_chan->prep_lock); /* never allow the last descriptor to be consumed, we need at * least one free at all times to allow for on-the-fly ring @@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) ioat_chan->produce = num_descs; return 0; /* with ioat->prep_lock held */ } - retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); spin_unlock_bh(&ioat_chan->prep_lock); - /* is another cpu already trying to expand the ring? */ - if (retry) - goto retry; - - spin_lock_bh(&ioat_chan->cleanup_lock); - spin_lock_bh(&ioat_chan->prep_lock); - retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); - clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); - spin_unlock_bh(&ioat_chan->prep_lock); - spin_unlock_bh(&ioat_chan->cleanup_lock); - - /* if we were able to expand the ring retry the allocation */ - if (retry) - goto retry; - dev_dbg_ratelimited(to_dev(ioat_chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", __func__, num_descs, ioat_chan->head, @@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan) if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat_chan->alloc_order > ioat_get_alloc_order()) - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); - } - } void ioat_timer_event(unsigned long data) @@ -916,40 +814,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, return dma_cookie_status(c, cookie, txstate); } -static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) -{ - struct pci_dev *pdev = ioat_dma->pdev; - int irq = pdev->irq, i; - - if (!is_bwd_ioat(pdev)) - return 0; - - switch (ioat_dma->irq_mode) { - case IOAT_MSIX: - for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { - struct msix_entry *msix = &ioat_dma->msix_entries[i]; - struct ioatdma_chan *ioat_chan; - - ioat_chan = ioat_chan_by_index(ioat_dma, i); - devm_free_irq(&pdev->dev, msix->vector, ioat_chan); - } - - pci_disable_msix(pdev); - break; - case IOAT_MSI: - pci_disable_msi(pdev); - /* fall through */ - case IOAT_INTX: - devm_free_irq(&pdev->dev, irq, ioat_dma); - break; - default: - return 0; - } - ioat_dma->irq_mode = IOAT_NOIRQ; - - return ioat_dma_setup_interrupts(ioat_dma); -} - int ioat_reset_hw(struct ioatdma_chan *ioat_chan) { /* throw away whatever the channel was doing and get it @@ -989,9 +853,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan) } } + if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { + ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000); + ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008); + ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800); + } + + err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); - if (!err) - err = ioat_irq_reinit(ioat_dma); + if (!err) { + if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { + writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000); + writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008); + writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800); + } + } if (err) dev_err(&pdev->dev, "Failed to reset: %d\n", err); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index b8f48074789f..a9bc1a15b0d1 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -62,7 +62,6 @@ enum ioat_irq_mode { * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device * @reg_base: MMIO register space base address - * @dma_pool: for allocating DMA descriptors * @completion_pool: DMA buffers for completion ops * @sed_hw_pool: DMA super descriptor pools * @dma_dev: embedded struct dma_device @@ -76,8 +75,7 @@ enum ioat_irq_mode { struct ioatdma_device { struct pci_dev *pdev; void __iomem *reg_base; - struct pci_pool *dma_pool; - struct pci_pool *completion_pool; + struct dma_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; struct dma_device dma_dev; @@ -88,6 +86,16 @@ struct ioatdma_device { struct dca_provider *dca; enum ioat_irq_mode irq_mode; u32 cap; + + /* shadow version for CB3.3 chan reset errata workaround */ + u64 msixtba0; + u64 msixdata0; + u32 msixpba; +}; + +struct ioat_descs { + void *virt; + dma_addr_t hw; }; struct ioatdma_chan { @@ -100,7 +108,6 @@ struct ioatdma_chan { #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 - #define IOAT_RESHAPE_PENDING 4 #define IOAT_RUN 5 #define IOAT_CHAN_ACTIVE 6 struct timer_list timer; @@ -133,6 +140,8 @@ struct ioatdma_chan { u16 produce; struct ioat_ring_ent **ring; spinlock_t prep_lock; + struct ioat_descs descs[2]; + int desc_chunks; }; struct ioat_sysfs_entry { @@ -302,10 +311,8 @@ static inline bool is_ioat_bug(unsigned long err) } #define IOAT_MAX_ORDER 16 -#define ioat_get_alloc_order() \ - (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) -#define ioat_get_max_alloc_order() \ - (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) +#define IOAT_MAX_DESCS 65536 +#define IOAT_DESCS_PER_2M 32768 static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) { diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 690e3b4f8202..8e67895bcca3 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -73,6 +73,8 @@ int system_has_dca_enabled(struct pci_dev *pdev); +#define IOAT_DESC_SZ 64 + struct ioat_dma_descriptor { uint32_t size; union { diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 4ef0c5e07912..efdee1a69fc4 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -28,6 +28,7 @@ #include <linux/prefetch.h> #include <linux/dca.h> #include <linux/aer.h> +#include <linux/sizes.h> #include "dma.h" #include "registers.h" #include "hw.h" @@ -136,14 +137,6 @@ int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); -int ioat_ring_alloc_order = 8; -module_param(ioat_ring_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_alloc_order, - "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); -int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; -module_param(ioat_ring_max_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_max_alloc_order, - "ioat+: upper limit for ring size (default: 16)"); static char ioat_interrupt_style[32] = "msix"; module_param_string(ioat_interrupt_style, ioat_interrupt_style, sizeof(ioat_interrupt_style), 0644); @@ -504,23 +497,14 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) struct pci_dev *pdev = ioat_dma->pdev; struct device *dev = &pdev->dev; - /* DMA coherent memory pool for DMA descriptor allocations */ - ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!ioat_dma->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, + ioat_dma->completion_pool = dma_pool_create("completion_pool", dev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES); if (!ioat_dma->completion_pool) { err = -ENOMEM; - goto err_completion_pool; + goto err_out; } ioat_enumerate_channels(ioat_dma); @@ -546,10 +530,8 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) err_self_test: ioat_disable_interrupts(ioat_dma); err_setup_interrupts: - pci_pool_destroy(ioat_dma->completion_pool); -err_completion_pool: - pci_pool_destroy(ioat_dma->dma_pool); -err_dma_pool: + dma_pool_destroy(ioat_dma->complet |