net-next: mediatek: add support for MT7623 ethernet

Add ethernet support for MediaTek SoCs from the MT7623 family. These have dual GMAC. Depending on the exact version, there might be a built-in Gigabit switch (MT7530). The core does not have the typical DMA ring setup. Instead there is a linked list that we add descriptors to. There is only one linked list that both MACs use together. There is a special field inside the TX descriptors called the VQID. This allows us to assign packets to different internal queues. By using a separate id for each MAC we are able to get deterministic results for BQL. Additionally we need to provide the core with a block of scratch memory that is the same size as the RX ring and data buffer. This is really needed to make the HW datapath work. Although the driver does not support this yet, we still need to assign the memory and tell the core about it for RX to work. Signed-off-by: Felix Fietkau <nbd@openwrt.org> Signed-off-by: Michael Lee <igvtee@gmail.com> Signed-off-by: John Crispin <blogic@openwrt.org> Signed-off-by: David S. Miller <davem@davemloft.net>
author: John Crispin <blogic@openwrt.org> 2016-03-08 11:29:55 +0100
committer: David S. Miller <davem@davemloft.net> 2016-03-10 16:22:12 -0500
commit: 656e705243fd0c2864b89634ea16ed444ef64dc6 (patch)
tree: 91b5f18702da809741c5da88db0c6987ef901fec
parent: 58ff9865b75dab73ccfae89bc8313ca2497b4c8f (diff)
2 files changed, 2228 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
new file mode 100644
index 000000000000..ba3afa5d4640
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -0,0 +1,1807 @@
+/*   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; version 2 of the License
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
+ *   Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
+ *   Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
+ */
+
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/clk.h>
+#include <linux/if_vlan.h>
+#include <linux/reset.h>
+#include <linux/tcp.h>
+
+#include "mtk_eth_soc.h"
+
+static int mtk_msg_level = -1;
+module_param_named(msg_level, mtk_msg_level, int, 0);
+MODULE_PARM_DESC(msg_level, "Message level (-1=defaults,0=none,...,16=all)");
+
+#define MTK_ETHTOOL_STAT(x) { #x, \
+			      offsetof(struct mtk_hw_stats, x) / sizeof(u64) }
+
+/* strings used by ethtool */
+static const struct mtk_ethtool_stats {
+	char str[ETH_GSTRING_LEN];
+	u32 offset;
+} mtk_ethtool_stats[] = {
+	MTK_ETHTOOL_STAT(tx_bytes),
+	MTK_ETHTOOL_STAT(tx_packets),
+	MTK_ETHTOOL_STAT(tx_skip),
+	MTK_ETHTOOL_STAT(tx_collisions),
+	MTK_ETHTOOL_STAT(rx_bytes),
+	MTK_ETHTOOL_STAT(rx_packets),
+	MTK_ETHTOOL_STAT(rx_overflow),
+	MTK_ETHTOOL_STAT(rx_fcs_errors),
+	MTK_ETHTOOL_STAT(rx_short_errors),
+	MTK_ETHTOOL_STAT(rx_long_errors),
+	MTK_ETHTOOL_STAT(rx_checksum_errors),
+	MTK_ETHTOOL_STAT(rx_flow_control_packets),
+};
+
+void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
+{
+	__raw_writel(val, eth->base + reg);
+}
+
+u32 mtk_r32(struct mtk_eth *eth, unsigned reg)
+{
+	return __raw_readl(eth->base + reg);
+}
+
+static int mtk_mdio_busy_wait(struct mtk_eth *eth)
+{
+	unsigned long t_start = jiffies;
+
+	while (1) {
+		if (!(mtk_r32(eth, MTK_PHY_IAC) & PHY_IAC_ACCESS))
+			return 0;
+		if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
+			break;
+		usleep_range(10, 20);
+	}
+
+	dev_err(eth->dev, "mdio: MDIO timeout\n");
+	return -1;
+}
+
+u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
+		    u32 phy_register, u32 write_data)
+{
+	if (mtk_mdio_busy_wait(eth))
+		return -1;
+
+	write_data &= 0xffff;
+
+	mtk_w32(eth, PHY_IAC_ACCESS | PHY_IAC_START | PHY_IAC_WRITE |
+		(phy_register << PHY_IAC_REG_SHIFT) |
+		(phy_addr << PHY_IAC_ADDR_SHIFT) | write_data,
+		MTK_PHY_IAC);
+
+	if (mtk_mdio_busy_wait(eth))
+		return -1;
+
+	return 0;
+}
+
+u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
+{
+	u32 d;
+
+	if (mtk_mdio_busy_wait(eth))
+		return 0xffff;
+
+	mtk_w32(eth, PHY_IAC_ACCESS | PHY_IAC_START | PHY_IAC_READ |
+		(phy_reg << PHY_IAC_REG_SHIFT) |
+		(phy_addr << PHY_IAC_ADDR_SHIFT),
+		MTK_PHY_IAC);
+
+	if (mtk_mdio_busy_wait(eth))
+		return 0xffff;
+
+	d = mtk_r32(eth, MTK_PHY_IAC) & 0xffff;
+
+	return d;
+}
+
+static int mtk_mdio_write(struct mii_bus *bus, int phy_addr,
+			  int phy_reg, u16 val)
+{
+	struct mtk_eth *eth = bus->priv;
+
+	return _mtk_mdio_write(eth, phy_addr, phy_reg, val);
+}
+
+static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
+{
+	struct mtk_eth *eth = bus->priv;
+
+	return _mtk_mdio_read(eth, phy_addr, phy_reg);
+}
+
+static void mtk_phy_link_adjust(struct net_device *dev)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG |
+		  MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN |
+		  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
+		  MAC_MCR_BACKPR_EN;
+
+	switch (mac->phy_dev->speed) {
+	case SPEED_1000:
+		mcr |= MAC_MCR_SPEED_1000;
+		break;
+	case SPEED_100:
+		mcr |= MAC_MCR_SPEED_100;
+		break;
+	};
+
+	if (mac->phy_dev->link)
+		mcr |= MAC_MCR_FORCE_LINK;
+
+	if (mac->phy_dev->duplex)
+		mcr |= MAC_MCR_FORCE_DPX;
+
+	if (mac->phy_dev->pause)
+		mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC;
+
+	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
+
+	if (mac->phy_dev->link)
+		netif_carrier_on(dev);
+	else
+		netif_carrier_off(dev);
+}
+
+static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
+				struct device_node *phy_node)
+{
+	const __be32 *_addr = NULL;
+	struct phy_device *phydev;
+	int phy_mode, addr;
+
+	_addr = of_get_property(phy_node, "reg", NULL);
+
+	if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) {
+		pr_err("%s: invalid phy address\n", phy_node->name);
+		return -EINVAL;
+	}
+	addr = be32_to_cpu(*_addr);
+	phy_mode = of_get_phy_mode(phy_node);
+	if (phy_mode < 0) {
+		dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
+		return -EINVAL;
+	}
+
+	phydev = of_phy_connect(eth->netdev[mac->id], phy_node,
+				mtk_phy_link_adjust, 0, phy_mode);
+	if (IS_ERR(phydev)) {
+		dev_err(eth->dev, "could not connect to PHY\n");
+		return PTR_ERR(phydev);
+	}
+
+	dev_info(eth->dev,
+		 "connected mac %d to PHY at %s [uid=%08x, driver=%s]\n",
+		 mac->id, phydev_name(phydev), phydev->phy_id,
+		 phydev->drv->name);
+
+	mac->phy_dev = phydev;
+
+	return 0;
+}
+
+static int mtk_phy_connect(struct mtk_mac *mac)
+{
+	struct mtk_eth *eth = mac->hw;
+	struct device_node *np;
+	u32 val, ge_mode;
+
+	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
+	if (!np)
+		return -ENODEV;
+
+	switch (of_get_phy_mode(np)) {
+	case PHY_INTERFACE_MODE_RGMII:
+		ge_mode = 0;
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		ge_mode = 1;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		ge_mode = 2;
+		break;
+	default:
+		dev_err(eth->dev, "invalid phy_mode\n");
+		return -1;
+	}
+
+	/* put the gmac into the right mode */
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+	val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
+	val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
+	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
+
+	mtk_phy_connect_node(eth, mac, np);
+	mac->phy_dev->autoneg = AUTONEG_ENABLE;
+	mac->phy_dev->speed = 0;
+	mac->phy_dev->duplex = 0;
+	mac->phy_dev->supported &= PHY_BASIC_FEATURES;
+	mac->phy_dev->advertising = mac->phy_dev->supported |
+				    ADVERTISED_Autoneg;
+	phy_start_aneg(mac->phy_dev);
+
+	return 0;
+}
+
+static int mtk_mdio_init(struct mtk_eth *eth)
+{
+	struct device_node *mii_np;
+	int err;
+
+	mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
+	if (!mii_np) {
+		dev_err(eth->dev, "no %s child node found", "mdio-bus");
+		return -ENODEV;
+	}
+
+	if (!of_device_is_available(mii_np)) {
+		err = 0;
+		goto err_put_node;
+	}
+
+	eth->mii_bus = mdiobus_alloc();
+	if (!eth->mii_bus) {
+		err = -ENOMEM;
+		goto err_put_node;
+	}
+
+	eth->mii_bus->name = "mdio";
+	eth->mii_bus->read = mtk_mdio_read;
+	eth->mii_bus->write = mtk_mdio_write;
+	eth->mii_bus->priv = eth;
+	eth->mii_bus->parent = eth->dev;
+
+	snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
+	err = of_mdiobus_register(eth->mii_bus, mii_np);
+	if (err)
+		goto err_free_bus;
+
+	return 0;
+
+err_free_bus:
+	kfree(eth->mii_bus);
+
+err_put_node:
+	of_node_put(mii_np);
+	eth->mii_bus = NULL;
+	return err;
+}
+
+static void mtk_mdio_cleanup(struct mtk_eth *eth)
+{
+	if (!eth->mii_bus)
+		return;
+
+	mdiobus_unregister(eth->mii_bus);
+	of_node_put(eth->mii_bus->dev.of_node);
+	kfree(eth->mii_bus);
+}
+
+static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
+{
+	u32 val;
+
+	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
+	mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+	/* flush write */
+	mtk_r32(eth, MTK_QDMA_INT_MASK);
+}
+
+static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
+{
+	u32 val;
+
+	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
+	mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+	/* flush write */
+	mtk_r32(eth, MTK_QDMA_INT_MASK);
+}
+
+static int mtk_set_mac_address(struct net_device *dev, void *p)
+{
+	int ret = eth_mac_addr(dev, p);
+	struct mtk_mac *mac = netdev_priv(dev);
+	const char *macaddr = dev->dev_addr;
+	unsigned long flags;
+
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&mac->hw->page_lock, flags);
+	mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
+		MTK_GDMA_MAC_ADRH(mac->id));
+	mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
+		(macaddr[4] << 8) | macaddr[5],
+		MTK_GDMA_MAC_ADRL(mac->id));
+	spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+
+	return 0;
+}
+
+void mtk_stats_update_mac(struct mtk_mac *mac)
+{
+	struct mtk_hw_stats *hw_stats = mac->hw_stats;
+	unsigned int base = MTK_GDM1_TX_GBCNT;
+	u64 stats;
+
+	base += hw_stats->reg_offset;
+
+	u64_stats_update_begin(&hw_stats->syncp);
+
+	hw_stats->rx_bytes += mtk_r32(mac->hw, base);
+	stats =  mtk_r32(mac->hw, base + 0x04);
+	if (stats)
+		hw_stats->rx_bytes += (stats << 32);
+	hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08);
+	hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10);
+	hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14);
+	hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18);
+	hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c);
+	hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20);
+	hw_stats->rx_flow_control_packets +=
+					mtk_r32(mac->hw, base + 0x24);
+	hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28);
+	hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c);
+	hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30);
+	stats =  mtk_r32(mac->hw, base + 0x34);
+	if (stats)
+		hw_stats->tx_bytes += (stats << 32);
+	hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38);
+	u64_stats_update_end(&hw_stats->syncp);
+}
+
+static void mtk_stats_update(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->mac[i] || !eth->mac[i]->hw_stats)
+			continue;
+		if (spin_trylock(&eth->mac[i]->hw_stats->stats_lock)) {
+			mtk_stats_update_mac(eth->mac[i]);
+			spin_unlock(&eth->mac[i]->hw_stats->stats_lock);
+		}
+	}
+}
+
+static struct rtnl_link_stats64 *mtk_get_stats64(struct net_device *dev,
+					struct rtnl_link_stats64 *storage)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_hw_stats *hw_stats = mac->hw_stats;
+	unsigned int start;
+
+	if (netif_running(dev) && netif_device_present(dev)) {
+		if (spin_trylock(&hw_stats->stats_lock)) {
+			mtk_stats_update_mac(mac);
+			spin_unlock(&hw_stats->stats_lock);
+		}
+	}
+
+	do {
+		start = u64_stats_fetch_begin_irq(&hw_stats->syncp);
+		storage->rx_packets = hw_stats->rx_packets;
+		storage->tx_packets = hw_stats->tx_packets;
+		storage->rx_bytes = hw_stats->rx_bytes;
+		storage->tx_bytes = hw_stats->tx_bytes;
+		storage->collisions = hw_stats->tx_collisions;
+		storage->rx_length_errors = hw_stats->rx_short_errors +
+			hw_stats->rx_long_errors;
+		storage->rx_over_errors = hw_stats->rx_overflow;
+		storage->rx_crc_errors = hw_stats->rx_fcs_errors;
+		storage->rx_errors = hw_stats->rx_checksum_errors;
+		storage->tx_aborted_errors = hw_stats->tx_skip;
+	} while (u64_stats_fetch_retry_irq(&hw_stats->syncp, start));
+
+	storage->tx_errors = dev->stats.tx_errors;
+	storage->rx_dropped = dev->stats.rx_dropped;
+	storage->tx_dropped = dev->stats.tx_dropped;
+
+	return storage;
+}
+
+static inline int mtk_max_frag_size(int mtu)
+{
+	/* make sure buf_size will be at least MTK_MAX_RX_LENGTH */
+	if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH)
+		mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+
+	return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static inline int mtk_max_buf_size(int frag_size)
+{
+	int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
+		       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	WARN_ON(buf_size < MTK_MAX_RX_LENGTH);
+
+	return buf_size;
+}
+
+static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
+				   struct mtk_rx_dma *dma_rxd)
+{
+	rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
+	rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
+	rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
+	rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
+}
+
+/* the qdma core needs scratch memory to be setup */
+static int mtk_init_fq_dma(struct mtk_eth *eth)
+{
+	unsigned int phy_ring_head, phy_ring_tail;
+	int cnt = MTK_DMA_SIZE;
+	dma_addr_t dma_addr;
+	int i;
+
+	eth->scratch_ring = dma_alloc_coherent(eth->dev,
+					       cnt * sizeof(struct mtk_tx_dma),
+					       &phy_ring_head,
+					       GFP_ATOMIC | __GFP_ZERO);
+	if (unlikely(!eth->scratch_ring))
+		return -ENOMEM;
+
+	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
+				    GFP_KERNEL);
+	dma_addr = dma_map_single(eth->dev,
+				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
+				  DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+		return -ENOMEM;
+
+	memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
+	phy_ring_tail = phy_ring_head +
+			(sizeof(struct mtk_tx_dma) * (cnt - 1));
+
+	for (i = 0; i < cnt; i++) {
+		eth->scratch_ring[i].txd1 =
+					(dma_addr + (i * MTK_QDMA_PAGE_SIZE));
+		if (i < cnt - 1)
+			eth->scratch_ring[i].txd2 = (phy_ring_head +
+				((i + 1) * sizeof(struct mtk_tx_dma)));
+		eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
+	}
+
+	mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+	mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
+	mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
+	mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
+
+	return 0;
+}
+
+static inline void *mtk_qdma_phys_to_virt(struct mtk_tx_ring *ring, u32 desc)
+{
+	void *ret = ring->dma;
+
+	return ret + (desc - ring->phys);
+}
+
+static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
+						    struct mtk_tx_dma *txd)
+{
+	int idx = txd - ring->dma;
+
+	return &ring->buf[idx];
+}
+
+static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+{
+	if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
+		dma_unmap_single(dev,
+				 dma_unmap_addr(tx_buf, dma_addr0),
+				 dma_unmap_len(tx_buf, dma_len0),
+				 DMA_TO_DEVICE);
+	} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
+		dma_unmap_page(dev,
+			       dma_unmap_addr(tx_buf, dma_addr0),
+			       dma_unmap_len(tx_buf, dma_len0),
+			       DMA_TO_DEVICE);
+	}
+	tx_buf->flags = 0;
+	if (tx_buf->skb &&
+	    (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
+		dev_kfree_skb_any(tx_buf->skb);
+	tx_buf->skb = NULL;
+}
+
+static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
+		      int tx_num, struct mtk_tx_ring *ring, bool gso)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	struct mtk_tx_dma *itxd, *txd;
+	struct mtk_tx_buf *tx_buf;
+	unsigned long flags;
+	dma_addr_t mapped_addr;
+	unsigned int nr_frags;
+	int i, n_desc = 1;
+	u32 txd4 = 0;
+
+	itxd = ring->next_free;
+	if (itxd == ring->last_free)
+		return -ENOMEM;
+
+	/* set the forward port */
+	txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+
+	tx_buf = mtk_desc_to_tx_buf(ring, itxd);
+	memset(tx_buf, 0, sizeof(*tx_buf));
+
+	if (gso)
+		txd4 |= TX_DMA_TSO;
+
+	/* TX Checksum offload */
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		txd4 |= TX_DMA_CHKSUM;
+
+	/* VLAN header offload */
+	if (skb_vlan_tag_present(skb))
+		txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
+
+	mapped_addr = dma_map_single(&dev->dev, skb->data,
+				     skb_headlen(skb), DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+		return -ENOMEM;
+
+	/* normally we can rely on the stack not calling this more than once,
+	 * however we have 2 queues running ont he same ring so we need to lock
+	 * the ring access
+	 */
+	spin_lock_irqsave(&eth->page_lock, flags);
+	WRITE_ONCE(itxd->txd1, mapped_addr);
+	tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+	dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+	dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+
+	/* TX SG offload */
+	txd = itxd;
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < nr_frags; i++) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		unsigned int offset = 0;
+		int frag_size = skb_frag_size(frag);
+
+		while (frag_size) {
+			bool last_frag = false;
+			unsigned int frag_map_size;
+
+			txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
+			if (txd == ring->last_free)
+				goto err_dma;
+
+			n_desc++;
+			frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
+			mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+						       frag_map_size,
+						       DMA_TO_DEVICE);
+			if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+				goto err_dma;
+
+			if (i == nr_frags - 1 &&
+			    (frag_size - frag_map_size) == 0)
+				last_frag = true;
+
+			WRITE_ONCE(txd->txd1, mapped_addr);
+			WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
+					       TX_DMA_PLEN0(frag_map_size) |
+					       last_frag * TX_DMA_LS0) |
+					       mac->id);
+			WRITE_ONCE(txd->txd4, 0);
+
+			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
+			tx_buf = mtk_desc_to_tx_buf(ring, txd);
+			memset(tx_buf, 0, sizeof(*tx_buf));
+
+			tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
+			dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+			dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
+			frag_size -= frag_map_size;
+			offset += frag_map_size;
+		}
+	}
+
+	/* store skb to cleanup */
+	tx_buf->skb = skb;
+
+	WRITE_ONCE(itxd->txd4, txd4);
+	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
+				(!nr_frags * TX_DMA_LS0)));
+
+	spin_unlock_irqrestore(&eth->page_lock, flags);
+
+	netdev_sent_queue(dev, skb->len);
+	skb_tx_timestamp(skb);
+
+	ring->next_free = mtk_qdma_phys_to_virt(ring, txd->txd2);
+	atomic_sub(n_desc, &ring->free_count);
+
+	/* make sure that all changes to the dma ring are flushed before we
+	 * continue
+	 */
+	wmb();
+
+	if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
+		mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
+
+	return 0;
+
+err_dma:
+	do {
+		tx_buf = mtk_desc_to_tx_buf(ring, txd);
+
+		/* unmap dma */
+		mtk_tx_unmap(&dev->dev, tx_buf);
+
+		itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+		itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
+	} while (itxd != txd);
+
+	return -ENOMEM;
+}
+
+static inline int mtk_cal_txd_req(struct sk_buff *skb)
+{
+	int i, nfrags;
+	struct skb_frag_struct *frag;
+
+	nfrags = 1;
+	if (skb_is_gso(skb)) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			frag = &skb_shinfo(skb)->frags[i];
+			nfrags += DIV_ROUND_UP(frag->size, MTK_TX_DMA_BUF_LEN);
+		}
+	} else {
+		nfrags += skb_shinfo(skb)->nr_frags;
+	}
+
+	return DIV_ROUND_UP(nfrags, 2);
+}
+
+static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	struct mtk_tx_ring *ring = &eth->tx_ring;
+	struct net_device_stats *stats = &dev->stats;
+	bool gso = false;
+	int tx_num;
+
+	tx_num = mtk_cal_txd_req(skb);
+	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
+		netif_stop_queue(dev);
+		netif_err(eth, tx_queued, dev,
+			  "Tx Ring full when queue awake!\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	/* TSO: fill MSS info in tcp checksum field */
+	if (skb_is_gso(skb)) {
+		if (skb_cow_head(skb, 0)) {
+			netif_warn(eth, tx_err, dev,
+				   "GSO expand head fail.\n");
+			goto drop;
+		}
+
+		if (skb_shinfo(skb)->gso_type &
+				(SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+			gso = true;
+			tcp_hdr(skb)->check = htons(skb_shinfo(skb)->gso_size);
+		}
+	}
+
+	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
+		goto drop;
+
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+		netif_stop_queue(dev);
+		if (unlikely(atomic_read(&ring->free_count) >
+			     ring->thresh))
+			netif_wake_queue(dev);
+	}
+
+	return NETDEV_TX_OK;
+
+drop:
+	stats->tx_dropped++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int mtk_poll_rx(struct napi_struct *napi, int budget,
+		       struct mtk_eth *eth, u32 rx_intr)
+{
+	struct mtk_rx_ring *ring = &eth->rx_ring;
+	int idx = ring->calc_idx;
+	struct sk_buff *skb;
+	u8 *data, *new_data;
+	struct mtk_rx_dma *rxd, trxd;
+	int done = 0;
+
+	while (done < budget) {
+		struct net_device *netdev;
+		unsigned int pktlen;
+		dma_addr_t dma_addr;
+		int mac = 0;
+
+		idx = NEXT_RX_DESP_IDX(idx);
+		rxd = &ring->dma[idx];
+		data = ring->data[idx];
+
+		mtk_rx_get_desc(&trxd, rxd);
+		if (!(trxd.rxd2 & RX_DMA_DONE))
+			break;
+
+		/* find out which mac the packet come from. values start at 1 */
+		mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
+		      RX_DMA_FPORT_MASK;
+		mac--;
+
+		netdev = eth->netdev[mac];
+
+		/* alloc new buffer */
+		new_data = napi_alloc_frag(ring->frag_size);
+		if (unlikely(!new_data)) {
+			netdev->stats.rx_dropped++;
+			goto release_desc;
+		}
+		dma_addr = dma_map_single(&eth->netdev[mac]->dev,
+					  new_data + NET_SKB_PAD,
+					  ring->buf_size,
+					  DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+			skb_free_frag(new_data);
+			goto release_desc;
+		}
+
+		/* receive data */
+		skb = build_skb(data, ring->frag_size);
+		if (unlikely(!skb)) {
+			put_page(virt_to_head_page(new_data));
+			goto release_desc;
+		}
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+		dma_unmap_single(&netdev->dev, trxd.rxd1,
+				 ring->buf_size, DMA_FROM_DEVICE);
+		pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
+		skb->dev = netdev;
+		skb_put(skb, pktlen);
+		if (trxd.rxd4 & RX_DMA_L4_VALID)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb_checksum_none_assert(skb);
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
+		    RX_DMA_VID(trxd.rxd3))
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       RX_DMA_VID(trxd.rxd3));
+		napi_gro_receive(napi, skb);
+
+		ring->data[idx] = new_data;
+		rxd->rxd1 = (unsigned int)dma_addr;
+
+release_desc:
+		rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
+
+		ring->calc_idx = idx;
+		/* make sure that all changes to the dma ring are flushed before
+		 * we continue
+		 */
+		wmb();
+		mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
+		done++;
+	}
+
+	if (done < budget)
+		mtk_w32(eth, rx_intr, MTK_QMTK_INT_STATUS);
+
+	return done;
+}
+
+static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
+{
+	struct mtk_tx_ring *ring = &eth->tx_ring;
+	struct mtk_tx_dma *desc;
+	struct sk_buff *skb;
+	struct mtk_tx_buf *tx_buf;
+	int total = 0, done[MTK_MAX_DEVS];
+	unsigned int bytes[MTK_MAX_DEVS];
+	u32 cpu, dma;
+	static int condition;
+	int i;
+
+	memset(done, 0, sizeof(done));
+	memset(bytes, 0, sizeof(bytes));
+
+	cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
+	dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
+
+	desc = mtk_qdma_phys_to_virt(ring, cpu);
+
+	while ((cpu != dma) && budget) {
+		u32 next_cpu = desc->txd2;
+		int mac;
+
+		desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
+		if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
+			break;
+
+		mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
+		       TX_DMA_FPORT_MASK;
+		mac--;
+
+		tx_buf = mtk_desc_to_tx_buf(ring, desc);
+		skb = tx_buf->skb;
+		if (!skb) {
+			condition = 1;
+			break;
+		}
+
+		if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
+			bytes[mac] += skb->len;
+			done[mac]++;
+			budget--;
+		}
+		mtk_tx_unmap(eth->dev, tx_buf);
+
+		ring->last_free->txd2 = next_cpu;
+		ring->last_free = desc;
+		atomic_inc(&ring->free_count);
+
+		cpu = next_cpu;
+	}
+
+	mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i] || !done[i])
+			continue;
+		netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
+		total += done[i];
+	}
+
+	/* read hw index again make sure no new tx packet */
+	if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR))
+		*tx_again = true;
+	else
+		mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
+
+	if (!total)
+		return 0;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i] ||
+		    unlikely(!netif_queue_stopped(eth->netdev[i])))
+			continue;
+		if (atomic_read(&ring->free_count) > ring->thresh)
+			netif_wake_queue(eth->netdev[i]);
+	}
+
+	return total;
+}
+
+static int mtk_poll(struct napi_struct *napi, int budget)
+{
+	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
+	u32 status, status2, mask, tx_intr, rx_intr, status_intr;
+	int tx_done, rx_done;
+	bool tx_again = false;
+
+	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+	status2 = mtk_r32(eth, MTK_INT_STATUS2);
+	tx_intr = MTK_TX_DONE_INT;
+	rx_intr = MTK_RX_DONE_INT;
+	status_intr = (MTK_GDM1_AF | MTK_GDM2_AF);
+	tx_done = 0;
+	rx_done = 0;
+	tx_again = 0;
+
+	if (status & tx_intr)
+		tx_done = mtk_poll_tx(eth, budget, &tx_again);
+
+	if (status & rx_intr)
+		rx_done = mtk_poll_rx(napi, budget, eth, rx_intr);
+
+	if (unlikely(status2 & status_intr)) {
+		mtk_stats_update(eth);
+		mtk_w32(eth, status_intr, MTK_INT_STATUS2);
+	}
+
+	if (unlikely(netif_msg_intr(eth))) {
+		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+		netdev_info(eth->netdev[0],
+			    "done tx %d, rx %d, intr 0x%08x/0x%x\n",
+			    tx_done, rx_done, status, mask);
+	}
+
+	if (tx_again || rx_done == budget)
+		return budget;
+
+	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+	if (status & (tx_intr | rx_intr))
+		return budget;
+
+	napi_complete(napi);
+	mtk_irq_enable(eth, tx_intr | rx_intr);
+
+	return rx_done;
+}
+
+static int mtk_tx_alloc(struct mtk_eth *eth)
+{
+	struct mtk_tx_ring *ring = &eth->tx_ring;
+	int i, sz = sizeof(*ring->dma);
+
+	ring->buf = kcalloc(MTK_DMA_SIZE, sizeof(*ring->buf),
+			       GFP_KERNEL);
+	if (!ring->buf)
+		goto no_tx_mem;
+
+	ring->dma = dma_alloc_coherent(eth->dev,
+					  MTK_DMA_SIZE * sz,
+					  &ring->phys,
+					  GFP_ATOMIC | __GFP_ZERO);
+	if (!ring->dma)
+		goto no_tx_mem;
+
+	memset(ring->dma, 0, MTK_DMA_SIZE * sz);
+	for (i = 0; i < MTK_DMA_SIZE; i++) {
+		int next = (i + 1) % MTK_DMA_SIZE;
+		u32 next_ptr = ring->phys + next * sz;
+
+		ring->dma[i].txd2 = next_ptr;
+		ring->dma[i].txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+	}
+
+	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
+	ring->next_free = &ring->dma[0];
+	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
+	ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
+			      MAX_SKB_FRAGS);
+
+	/* make sure that all changes to the dma ring are flushed before we
+	 * continue
+	 */
+	wmb();
+
+	mtk_w32(eth, ring->phys, MTK_QTX_CTX_PTR);
+	mtk_w32(eth, ring->phys, MTK_QTX_DTX_PTR);
+	mtk_w32(eth,
+		ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+		MTK_QTX_CRX_PTR);
+	mtk_w32(eth,
+		ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+		MTK_QTX_DRX_PTR);
+
+	return 0;
+
+no_tx_mem:
+	return -ENOMEM;
+}
+
+static void mtk_tx_clean(struct mtk_eth *eth)
+{
+	struct mtk_tx_ring *ring = &eth->tx_ring;
+	int i;
+
+	if (ring->buf) {
+		for (i = 0; i < MTK_DMA_SIZE; i++)
+			mtk_tx_unmap(eth->dev, &ring->buf[i]);
+		kfree(ring->buf);
+		ring->buf = NULL;
+	}
+
+	if (ring->dma) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(*ring->dma),
+				  ring->dma,
+				  ring->phys);
+		ring->dma = NULL;
+	}
+}
+
+static int mtk_rx_alloc(struct mtk_eth *eth)
+{
+	struct mtk_rx_ring *ring = &eth->rx_ring;
+	int i;
+
+	ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
+	ring->buf_size = mtk_max_buf_size(ring->frag_size);
+	ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data),
+			     GFP_KERNEL);
+	if (!ring->data)
+		return -ENOMEM;
+
+	for (i = 0; i < MTK_DMA_SIZE; i++) {
+		ring->data[i] = netdev_alloc_frag(ring->frag_size);
+		if (!ring->data[i])
+			return -ENOMEM;
+	}
+
+	ring->dma = dma_alloc_coherent(eth->dev,
+				       MTK_DMA_SIZE * sizeof(*ring->dma),
+				       &ring->phys,
+				       GFP_ATOMIC | __GFP_ZERO);
+	if (!ring->dma)
+		return -ENOMEM;
+
+	for (i = 0; i < MTK_DMA_SIZE; i++) {
+		dma_addr_t dma_addr = dma_map_single(eth->dev,
+				ring->data[i] + NET_SKB_PAD,
+				ring->buf_size,
+				DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+			return -ENOMEM;
+		ring->dma[i].rxd1 = (unsigned int)dma_addr;
+
+		ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
+	}
+	ring->calc_idx = MTK_DMA_SIZE - 1;
+	/* make sure that all changes to the dma ring are flushed before we
+	 * continue
+	 */
+	wmb();
+
+	mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
+	mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
+	mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
+	mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
+	mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+
+	return 0;
+}
+
+static void mtk_rx_clean(struct mtk_eth *eth)
+{
+	struct mtk_rx_ring *ring = &eth->rx_ring;
+	int i;
+
+	if (ring->data && ring->dma) {
+		for (i = 0; i < MTK_DMA_SIZE; i++) {
+			if (!ring->data[i])
+				continue;
+			if (!ring->dma[i].rxd1)
+				continue;
+			dma_unmap_single(eth->dev,
+					 ring->dma[i].rxd1,
+					 ring->buf_size,
+					 DMA_FROM_DEVICE);
+			skb_free_frag(ring->data[i]);
+		}
+		kfree(ring->data);
+		ring->data = NULL;
+	}
+
+	if (ring->dma) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(*ring->dma),
+				  ring->dma,
+				  ring->phys);
+		ring->dma = NULL;
+	}
+}
+
+/* wait for DMA to finish whatever it is doing before we start using it again */
+static int mtk_dma_busy_wait(struct mtk_eth *eth)
+{
+	unsigned long t_start = jiffies;
+
+	while (1) {
+		if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
+		      (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
+			return 0;
+		if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
+			break;
+	}
+
+	dev_err(eth->dev, "DMA init timeout\n");
+	return -1;
+}
+
+static int mtk_dma_init(struct mtk_eth *eth)
+{
+	int err;
+
+	if (mtk_dma_busy_wait(eth))
+		return -EBUSY;
+
+	/* QDMA needs scratch memory for internal reordering of the
+	 * descriptors
+	 */
+	err = mtk_init_fq_dma(eth);
+	if (err)
+		return err;
+
+	err = mtk_tx_alloc(eth);
+	if (err)
+		return err;
+
+	err = mtk_rx_alloc(eth);
+	if (err)
+		return err;
+
+	/* Enable random early drop and set drop threshold automatically */
+	mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
+		MTK_QDMA_FC_THRES);
+	mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
+
+	return 0;
+}
+
+static void mtk_dma_free(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++)
+		if (eth->netdev[i])
+			netdev_reset_queue(eth->netdev[i]);
+	mtk_tx_clean(eth);
+	mtk_rx_clean(eth);
+	kfree(eth->scratch_head);
+}
+
+static void mtk_tx_timeout(struct net_device *dev)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+
+	eth->netdev[mac->id]->stats.tx_errors++;
+	netif_err(eth, tx_err, dev,
+		  "transmit timed out\n");
+	schedule_work(&mac->pending_work);
+}
+
+static irqreturn_t mtk_handle_irq(int irq, void *_eth)
+{
+	struct mtk_eth *eth = _eth;
author	John Crispin <blogic@openwrt.org>	2016-03-08 11:29:55 +0100
committer	David S. Miller <davem@davemloft.net>	2016-03-10 16:22:12 -0500
commit	656e705243fd0c2864b89634ea16ed444ef64dc6 (patch)
tree	91b5f18702da809741c5da88db0c6987ef901fec
parent	58ff9865b75dab73ccfae89bc8313ca2497b4c8f (diff)