diff options
35 files changed, 771 insertions, 425 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 7ee2ae6d5451..70a09f8a0383 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1633,6 +1633,48 @@ There are some more advanced barrier functions: operations" subsection for information on where to use these. + (*) dma_wmb(); + (*) dma_rmb(); + + These are for use with consistent memory to guarantee the ordering + of writes or reads of shared memory accessible to both the CPU and a + DMA capable device. + + For example, consider a device driver that shares memory with a device + and uses a descriptor status value to indicate if the descriptor belongs + to the device or the CPU, and a doorbell to notify it when new + descriptors are available: + + if (desc->status != DEVICE_OWN) { + /* do not read data until we own descriptor */ + dma_rmb(); + + /* read/modify data */ + read_data = desc->data; + desc->data = write_data; + + /* flush modifications before status update */ + dma_wmb(); + + /* assign ownership */ + desc->status = DEVICE_OWN; + + /* force memory to sync before notifying device via MMIO */ + wmb(); + + /* notify device of new descriptors */ + writel(DESC_NOTIFY, doorbell); + } + + The dma_rmb() allows us guarantee the device has released ownership + before we read the data from the descriptor, and he dma_wmb() allows + us to guarantee the data is written to the descriptor before the device + can see it now has ownership. The wmb() is needed to guarantee that the + cache coherent memory writes have completed before attempting a write to + the cache incoherent MMIO region. + + See Documentation/DMA-API.txt for more information on consistent memory. + MMIO WRITE BARRIER ------------------ diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h index 3832bdb794fe..77516c87255d 100644 --- a/arch/alpha/include/asm/barrier.h +++ b/arch/alpha/include/asm/barrier.h @@ -7,6 +7,57 @@ #define rmb() __asm__ __volatile__("mb": : :"memory") #define wmb() __asm__ __volatile__("wmb": : :"memory") +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + */ #define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") #ifdef CONFIG_SMP diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index c6a3e73a6e24..d2f81e6b8c1c 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -43,10 +43,14 @@ #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() #define wmb() do { dsb(st); outer_sync(); } while (0) +#define dma_rmb() dmb(osh) +#define dma_wmb() dmb(oshst) #else #define mb() barrier() #define rmb() barrier() #define wmb() barrier() +#define dma_rmb() barrier() +#define dma_wmb() barrier() #endif #ifndef CONFIG_SMP diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 6389d60574d9..a5abb0062d6e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -32,6 +32,9 @@ #define rmb() dsb(ld) #define wmb() dsb(st) +#define dma_rmb() dmb(oshld) +#define dma_wmb() dmb(oshst) + #ifndef CONFIG_SMP #define smp_mb() barrier() #define smp_rmb() barrier() diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h index 420006877998..dfb66fe88b34 100644 --- a/arch/blackfin/include/asm/barrier.h +++ b/arch/blackfin/include/asm/barrier.h @@ -22,6 +22,57 @@ # define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) # define rmb() do { barrier(); smp_check_barrier(); } while (0) # define wmb() do { barrier(); smp_mark_barrier(); } while (0) +/* + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + */ # define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) #endif diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index a48957c7b445..f6769eb2bbf9 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -35,26 +35,25 @@ * it's (presumably) much slower than mf and (b) mf.a is supported for * sequential memory pages only. */ -#define mb() ia64_mf() -#define rmb() mb() -#define wmb() mb() -#define read_barrier_depends() do { } while(0) +#define mb() ia64_mf() +#define rmb() mb() +#define wmb() mb() + +#define dma_rmb() mb() +#define dma_wmb() mb() #ifdef CONFIG_SMP # define smp_mb() mb() -# define smp_rmb() rmb() -# define smp_wmb() wmb() -# define smp_read_barrier_depends() read_barrier_depends() - #else - # define smp_mb() barrier() -# define smp_rmb() barrier() -# define smp_wmb() barrier() -# define smp_read_barrier_depends() do { } while(0) - #endif +#define smp_rmb() smp_mb() +#define smp_wmb() smp_mb() + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #define smp_mb__before_atomic() barrier() #define smp_mb__after_atomic() barrier() diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index c7591e80067c..d703d8e26a65 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -4,8 +4,6 @@ #include <asm/metag_mem.h> #define nop() asm volatile ("NOP") -#define mb() wmb() -#define rmb() barrier() #ifdef CONFIG_METAG_META21 @@ -41,13 +39,13 @@ static inline void wr_fence(void) #endif /* !CONFIG_METAG_META21 */ -static inline void wmb(void) -{ - /* flush writes through the write combiner */ - wr_fence(); -} +/* flush writes through the write combiner */ +#define mb() wr_fence() +#define rmb() barrier() +#define wmb() mb() -#define read_barrier_depends() do { } while (0) +#define dma_rmb() rmb() +#define dma_wmb() wmb() #ifndef CONFIG_SMP #define fence() do { } while (0) @@ -82,7 +80,10 @@ static inline void fence(void) #define smp_wmb() barrier() #endif #endif -#define smp_read_barrier_depends() do { } while (0) + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #define set_mb(var, value) do { var = value; smp_mb(); } while (0) #define smp_store_release(p, v) \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index d0101dd0575e..2b8bbbcb9be0 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -10,58 +10,6 @@ #include <asm/addrspace.h> -/* - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - */ - #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) @@ -127,20 +75,21 @@ #include <asm/wbflush.h> -#define wmb() fast_wmb() -#define rmb() fast_rmb() #define mb() wbflush() #define iob() wbflush() #else /* !CONFIG_CPU_HAS_WB */ -#define wmb() fast_wmb() -#define rmb() fast_rmb() #define mb() fast_mb() #define iob() fast_iob() #endif /* !CONFIG_CPU_HAS_WB */ +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define dma_wmb() fast_wmb() +#define dma_rmb() fast_rmb() + #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) # ifdef CONFIG_CPU_CAVIUM_OCTEON # define smp_mb() __sync() diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index bab79a110c7b..a3bf5be111ff 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -33,12 +33,9 @@ #define mb() __asm__ __volatile__ ("sync" : : : "memory") #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define read_barrier_depends() do { } while(0) #define set_mb(var, value) do { var = value; mb(); } while (0) -#ifdef CONFIG_SMP - #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC #else @@ -46,20 +43,26 @@ #endif #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") +#define dma_rmb() __lwsync() +#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") + +#ifdef CONFIG_SMP +#define smp_lwsync() __lwsync() #define smp_mb() mb() #define smp_rmb() __lwsync() #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") -#define smp_read_barrier_depends() read_barrier_depends() #else -#define __lwsync() barrier() +#define smp_lwsync() barrier() #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) #endif /* CONFIG_SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + /* * This is a barrier which prevents following instructions from being * started until the value of the argument x is known. For example, if @@ -72,7 +75,7 @@ #define smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ACCESS_ONCE(*p) = (v); \ } while (0) @@ -80,7 +83,7 @@ do { \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ___p1; \ }) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index b5dce6544d76..8d724718ec21 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -24,11 +24,14 @@ #define rmb() mb() #define wmb() mb() -#define read_barrier_depends() do { } while(0) +#define dma_rmb() rmb() +#define dma_wmb() wmb() #define smp_mb() mb() #define smp_rmb() rmb() #define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 305dcc3dc721..76648941fea7 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -37,7 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define rmb() __asm__ __volatile__("":::"memory") #define wmb() __asm__ __volatile__("":::"memory") -#define read_barrier_depends() do { } while(0) +#define dma_rmb() rmb() +#define dma_wmb() wmb() + #define set_mb(__var, __value) \ do { __var = __value; membar_safe("#StoreLoad"); } while(0) @@ -51,7 +53,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define smp_wmb() __asm__ __volatile__("":::"memory") #endif -#define smp_read_barrier_depends() do { } while(0) +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 0f4460b5636d..2ab1eb33106e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,78 +24,28 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() +#define dma_rmb() rmb() #else -# define smp_rmb() barrier() +#define dma_rmb() barrier() #endif +#define dma_wmb() barrier() + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() dma_rmb() #define smp_wmb() barrier() -#define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #if defined(CONFIG_X86_PPRO_FENCE) /* diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc04e67bfd05..2d7d9a1f5b53 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -29,20 +29,18 @@ #endif /* CONFIG_X86_32 */ -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP - -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -#define smp_rmb() rmb() +#define dma_rmb() rmb() #else /* CONFIG_X86_PPRO_FENCE */ -#define smp_rmb() barrier() +#define dma_rmb() barrier() #endif /* CONFIG_X86_PPRO_FENCE */ +#define dma_wmb() barrier() -#define smp_wmb() barrier() +#ifdef CONFIG_SMP -#define smp_read_barrier_depends() read_barrier_depends() +#define smp_mb() mb() +#define smp_rmb() dma_rmb() +#define smp_wmb() barrier() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* CONFIG_SMP */ @@ -50,11 +48,13 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* CONFIG_SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + /* * Stop RDTSC speculation. This is needed when you need to use RDTSC * (or get_cycles or vread that possibly accesses the TSC) in a defined diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 4f4c2a7888e5..feb29c4526f7 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -684,10 +684,9 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, struct fixed_phy_status *status) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - u32 link, duplex, pause, speed; + u32 duplex, pause, speed; u32 reg; - link = core_readl(priv, CORE_LNKSTS); duplex = core_readl(priv, CORE_DUPSTS); pause = core_readl(priv, CORE_PAUSESTS); speed = core_readl(priv, CORE_SPDSTS); @@ -701,22 +700,26 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, * which means that we need to force the link at the port override * level to get the data to flow. We do use what the interrupt handler * did determine before. + * + * For the other ports, we just force the link status, since this is + * a fixed PHY device. */ if (port == 7) { status->link = priv->port_sts[port].link; - reg = core_readl(priv, CORE_STS_OVERRIDE_GMIIP_PORT(7)); - reg |= SW_OVERRIDE; - if (status->link) - reg |= LINK_STS; - else - reg &= ~LINK_STS; - core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(7)); status->duplex = 1; } else { - status->link = !!(link & (1 << port)); + status->link = 1; status->duplex = !!(duplex & (1 << port)); } + reg = core_readl(priv, CORE_STS_OVERRIDE_GMIIP_PORT(port)); + reg |= SW_OVERRIDE; + if (status->link) + reg |= LINK_STS; + else + reg &= ~LINK_STS; + core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); + switch (speed) { case SPDSTS_10: status->speed = SPEED_10; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index b6bc318b148e..0987d2a77f9f 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -66,23 +66,25 @@ static unsigned int macb_tx_ring_wrap(unsigned int index) return index & (TX_RING_SIZE - 1); } -static struct macb_dma_desc *macb_tx_desc(struct macb *bp, unsigned int index) +static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue, + unsigned int index) { - return &bp->tx_ring[macb_tx_ring_wrap(index)]; + return &queue->tx_ring[macb_tx_ring_wrap(index)]; } -static struct macb_tx_skb *macb_tx_skb(struct macb *bp, unsigned int index) +static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue, + unsigned int index) { - return &bp->tx_skb[macb_tx_ring_wrap(index)]; + return &queue->tx_skb[macb_tx_ring_wrap(index)]; } -static dma_addr_t macb_tx_dma(struct macb *bp, unsigned int index) +static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index) { dma_addr_t offset; offset = macb_tx_ring_wrap(index) * sizeof(struct macb_dma_desc); - return bp->tx_ring_dma + offset; + return queue->tx_ring_dma + offset; } static unsigned int macb_rx_ring_wrap(unsigned int index) @@ -490,38 +492,49 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb) static void macb_tx_error_task(struct work_struct *work) { - struct macb *bp = container_of(work, struct macb, tx_error_task); + struct macb_queue *queue = container_of(work, struct macb_queue, + tx_error_task); + struct macb *bp = queue->bp; struct macb_tx_skb *tx_skb; + struct macb_dma_desc *desc; struct sk_buff *skb; unsigned int tail; + unsigned long flags; + + netdev_vdbg(bp->dev, "macb_tx_error_task: q = %u, t = %u, h = %u\n", + (unsigned int)(queue - bp->queues), + queue->tx_tail, queue->tx_head); - netdev_vdbg(bp->dev, "macb_tx_error_task: t = %u, h = %u\n", - bp->tx_tail, bp->tx_head); + /* Prevent the queue IRQ handlers from running: each of them may call + * macb_tx_interrupt(), which in turn may call netif_wake_subqueue(). + * As explained below, we have to halt the transmission before updating + * TBQP registers so we call netif_tx_stop_all_queues() to notify the + * network engine about the macb/gem being halted. + */ + spin_lock_irqsave(&bp->lock, flags); /* Make sure nobody is trying to queue up new packets */ - netif_stop_queue(bp->dev); + netif_tx_stop_all_queues(bp->dev); /* * Stop transmission now * (in case we have just queued new packets) + * macb/gem must be halted to write TBQP register */ if (macb_halt_tx(bp)) /* Just complain for now, reinitializing TX path can be good */ netdev_err(bp->dev, "BUG: halt tx timed out\n"); - /* No need for the lock here as nobody will interrupt us anymore */ - /* * Treat frames in TX queue including the ones that caused the error. * Free transmit buffers in upper layer. */ - for (tail = bp->tx_tail; tail != bp->tx_head; tail++) { - struct macb_dma_desc *desc; - u32 ctrl; + for (tail = queue->tx_tail; tail != queue->tx_head; tail++) { + u32 ctrl; - desc = macb_tx_desc(bp, tail); + desc = macb_tx_desc(queue, tail); ctrl = desc->ctrl; - tx_skb = macb_tx_skb(bp, tail); + tx_skb = macb_tx_skb(queue, tail); skb = tx_skb->skb; if (ctrl & MACB_BIT(TX_USED)) { @@ -529,7 +542,7 @@ static void macb_tx_error_task(struct work_struct *work) while (!skb) { macb_tx_unmap(bp, tx_skb); tail++; - tx_skb = macb_tx_skb(bp, tail); + tx_skb = macb_tx_skb(queue, tail); skb = tx_skb->skb; } @@ -558,45 +571,56 @@ static void macb_tx_error_task(struct work_struct *work) macb_tx_unmap(bp, tx_skb); } + /* Set end of TX queue */ + desc = macb_tx_desc(queue, 0); + desc->addr = 0; + desc->ctrl = MACB_BIT(TX_USED); + /* Make descriptor updates visible to hardware */ wmb(); /* Reinitialize the TX desc queue */ - macb_writel(bp, TBQP, bp->tx_ring_dma); + queue_writel(queue, TBQP, queue->tx_ring_dma); |