From 53fadb4d90c762b560a9d0983bb5894129057ea1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:33 +0200 Subject: spi: spi-fsl-dspi: Simplify bytes_per_word gymnastics Reduce the if-then-else-if-then-else sequence to: - a simple division in the case of bytes_per_word calculation - a memcpy command with a variable size. The semantics of larger-than-8 xfer->bits_per_word is that those words are to be interpreted and transmitted in CPU native endianness. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-2-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index c357c3247232..896d7a0f45b0 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -252,12 +252,7 @@ static u32 dspi_pop_tx(struct fsl_dspi *dspi) u32 txdata = 0; if (dspi->tx) { - if (dspi->bytes_per_word == 1) - txdata = *(u8 *)dspi->tx; - else if (dspi->bytes_per_word == 2) - txdata = *(u16 *)dspi->tx; - else /* dspi->bytes_per_word == 4 */ - txdata = *(u32 *)dspi->tx; + memcpy(&txdata, dspi->tx, dspi->bytes_per_word); dspi->tx += dspi->bytes_per_word; } dspi->len -= dspi->bytes_per_word; @@ -284,12 +279,7 @@ static void dspi_push_rx(struct fsl_dspi *dspi, u32 rxdata) /* Mask off undefined bits */ rxdata &= (1 << dspi->bits_per_word) - 1; - if (dspi->bytes_per_word == 1) - *(u8 *)dspi->rx = rxdata; - else if (dspi->bytes_per_word == 2) - *(u16 *)dspi->rx = rxdata; - else /* dspi->bytes_per_word == 4 */ - *(u32 *)dspi->rx = rxdata; + memcpy(dspi->rx, &rxdata, dspi->bytes_per_word); dspi->rx += dspi->bytes_per_word; } @@ -814,12 +804,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->progress = 0; /* Validated transfer specific frame size (defaults applied) */ dspi->bits_per_word = transfer->bits_per_word; - if (transfer->bits_per_word <= 8) - dspi->bytes_per_word = 1; - else if (transfer->bits_per_word <= 16) - dspi->bytes_per_word = 2; - else - dspi->bytes_per_word = 4; + dspi->bytes_per_word = DIV_ROUND_UP(dspi->bits_per_word, 8); regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF, -- cgit v1.2.3 From 6d6af5796e5d9a88ae83c9c753023bba61deb18b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:34 +0200 Subject: spi: spi-fsl-dspi: Remove unused chip->void_write_data This variable has been present since the initial submission of the driver, and held, for some reason, the value of zero, to be sent on the wire in the case there wasn't any TX buffer for the current transfer. Since quite a while now, however, it isn't doing anything at all. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-3-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 896d7a0f45b0..63ec1d634d08 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -110,7 +110,6 @@ struct chip_data { u32 ctar_val; - u16 void_write_data; }; enum dspi_trans_mode { @@ -235,7 +234,6 @@ struct fsl_dspi { const void *tx; void *rx; void *rx_end; - u16 void_write_data; u16 tx_cmd; u8 bits_per_word; u8 bytes_per_word; @@ -795,8 +793,6 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->tx_cmd |= SPI_PUSHR_CMD_CONT; } - dspi->void_write_data = dspi->cur_chip->void_write_data; - dspi->tx = transfer->tx_buf; dspi->rx = transfer->rx_buf; dspi->rx_end = dspi->rx + transfer->len; @@ -897,8 +893,6 @@ static int dspi_setup(struct spi_device *spi) sck_cs_delay = pdata->sck_cs_delay; } - chip->void_write_data = 0; - clkrate = clk_get_rate(dspi->clk); hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate); -- cgit v1.2.3 From 5542bd797190d5d77f1ad3a6df9628f26d117b31 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:35 +0200 Subject: spi: spi-fsl-dspi: Don't mask off undefined bits This is a useless operation, and if the driver needs to do that, there's something deeply wrong going on. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-4-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 63ec1d634d08..b92c2b84a94b 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -274,9 +274,6 @@ static void dspi_push_rx(struct fsl_dspi *dspi, u32 rxdata) if (!dspi->rx) return; - /* Mask off undefined bits */ - rxdata &= (1 << dspi->bits_per_word) - 1; - memcpy(dspi->rx, &rxdata, dspi->bytes_per_word); dspi->rx += dspi->bytes_per_word; } -- cgit v1.2.3 From 8f8303ee05857e1b3084c467dde8bb31c58464bd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:36 +0200 Subject: spi: spi-fsl-dspi: Add comments around dspi_pop_tx and dspi_push_rx functions Their names are confusing, since dspi_pop_tx prepares a word to be written to the PUSHR register, and dspi_push_rx gets a word from the POPR register. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-5-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index b92c2b84a94b..a8077d4903c7 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -245,6 +245,10 @@ struct fsl_dspi { struct fsl_dspi_dma *dma; }; +/* + * Pop one word from the TX buffer for pushing into the + * PUSHR register (TX FIFO) + */ static u32 dspi_pop_tx(struct fsl_dspi *dspi) { u32 txdata = 0; @@ -257,6 +261,7 @@ static u32 dspi_pop_tx(struct fsl_dspi *dspi) return txdata; } +/* Prepare one TX FIFO entry (txdata plus cmd) */ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi) { u16 cmd = dspi->tx_cmd, data = dspi_pop_tx(dspi); @@ -269,6 +274,7 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi) return cmd << 16 | data; } +/* Push one word to the RX buffer from the POPR register (RX FIFO) */ static void dspi_push_rx(struct fsl_dspi *dspi, u32 rxdata) { if (!dspi->rx) -- cgit v1.2.3 From 547248fbed23f3cd2f6a5937b44fad60993640c4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:37 +0200 Subject: spi: spi-fsl-dspi: Rename fifo_{read,write} and {tx,cmd}_fifo_write These function names are very generic and it is easy to get confused. Rename them after the hardware register that they are accessing. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-6-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index a8077d4903c7..f37090ad7ad1 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -601,12 +601,12 @@ static void ns_delay_scale(char *psc, char *sc, int delay_ns, } } -static void fifo_write(struct fsl_dspi *dspi) +static void dspi_pushr_write(struct fsl_dspi *dspi) { regmap_write(dspi->regmap, SPI_PUSHR, dspi_pop_tx_pushr(dspi)); } -static void cmd_fifo_write(struct fsl_dspi *dspi) +static void dspi_pushr_cmd_write(struct fsl_dspi *dspi) { u16 cmd = dspi->tx_cmd; @@ -615,7 +615,7 @@ static void cmd_fifo_write(struct fsl_dspi *dspi) regmap_write(dspi->regmap_pushr, PUSHR_CMD, cmd); } -static void tx_fifo_write(struct fsl_dspi *dspi, u16 txdata) +static void dspi_pushr_txdata_write(struct fsl_dspi *dspi, u16 txdata) { regmap_write(dspi->regmap_pushr, PUSHR_TX, txdata); } @@ -631,18 +631,18 @@ static void dspi_tcfq_write(struct fsl_dspi *dspi) */ u32 data = dspi_pop_tx(dspi); - cmd_fifo_write(dspi); - tx_fifo_write(dspi, data & 0xFFFF); - tx_fifo_write(dspi, data >> 16); + dspi_pushr_cmd_write(dspi); + dspi_pushr_txdata_write(dspi, data & 0xFFFF); + dspi_pushr_txdata_write(dspi, data >> 16); } else { /* Write one entry to both TX FIFO and CMD FIFO * simultaneously. */ - fifo_write(dspi); + dspi_pushr_write(dspi); } } -static u32 fifo_read(struct fsl_dspi *dspi) +static u32 dspi_popr_read(struct fsl_dspi *dspi) { u32 rxdata = 0; @@ -652,7 +652,7 @@ static u32 fifo_read(struct fsl_dspi *dspi) static void dspi_tcfq_read(struct fsl_dspi *dspi) { - dspi_push_rx(dspi, fifo_read(dspi)); + dspi_push_rx(dspi, dspi_popr_read(dspi)); } static void dspi_eoq_write(struct fsl_dspi *dspi) @@ -670,7 +670,7 @@ static void dspi_eoq_write(struct fsl_dspi *dspi) if (fifo_size == (dspi->devtype_data->fifo_size - 1)) dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT; /* Write combined TX FIFO and CMD FIFO entry */ - fifo_write(dspi); + dspi_pushr_write(dspi); } } @@ -680,7 +680,7 @@ static void dspi_eoq_read(struct fsl_dspi *dspi) /* Read one FIFO entry and push to rx buffer */ while ((dspi->rx < dspi->rx_end) && fifo_size--) - dspi_push_rx(dspi, fifo_read(dspi)); + dspi_push_rx(dspi, dspi_popr_read(dspi)); } static int dspi_rxtx(struct fsl_dspi *dspi) -- cgit v1.2.3 From a3185c38dc6cd664b2b576eb9d3e5d2f49101a10 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:38 +0200 Subject: spi: spi-fsl-dspi: Implement .max_message_size method for EOQ mode When it gets set, End Of Queue Flag halts the DSPI controller and forces the chip select signal to deassert. This operating mode is not ideal, but it is used for the DSPI instantiations where there is no other notification from the controller that the data in the FIFO has finished transmission. So in practice, it means that transmitting buffers larger than the FIFO size will yield unpredictable results. The only controller that operates in EOQ mode is MCF5441X (Coldfire). I would say that the way EOQ is used (and documented in the reference manual, too) on this chip is incorrect, and I would personally migrate it to TCFQ, but that's notably worse in terms of performance (it can only use 1 entry of the 16-deep FIFO) and if this limitation didn't bother any Coldfire DSPI user so far, it's likely that we just need to throw an error for larger buffers to make sure that callers are aware their transfers are getting truncated/split. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-7-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index f37090ad7ad1..158cb48c0f4a 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1084,6 +1084,22 @@ static int dspi_slave_abort(struct spi_master *master) return 0; } +/* + * EOQ mode will inevitably deassert its PCS signal on last word in a queue + * (hardware limitation), so we need to inform the spi_device that larger + * buffers than the FIFO size are going to have the chip select randomly + * toggling, so it has a chance to adapt its message sizes. + */ +static size_t dspi_max_message_size(struct spi_device *spi) +{ + struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller); + + if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE) + return dspi->devtype_data->fifo_size; + + return SIZE_MAX; +} + static int dspi_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1105,6 +1121,7 @@ static int dspi_probe(struct platform_device *pdev) ctlr->setup = dspi_setup; ctlr->transfer_one_message = dspi_transfer_one_message; + ctlr->max_message_size = dspi_max_message_size; ctlr->dev.of_node = pdev->dev.of_node; ctlr->cleanup = dspi_cleanup; -- cgit v1.2.3 From 6a726824aaa3adaaf3bcfca3b471408e225f33d6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:39 +0200 Subject: spi: Do spi_take_timestamp_pre for as many times as necessary When dealing with a SPI controller driver that is sending more than 1 byte at once (or the entire buffer at once), and the SPI peripheral driver has requested timestamping for a byte in the middle of the buffer, we find that spi_take_timestamp_pre never records a "pre" timestamp. This happens because the function currently expects to be called with the "progress" argument >= to what the peripheral has requested to be timestamped. But clearly there are cases when that isn't going to fly. And since we can't change the past when we realize that the opportunity to take a "pre" timestamp has just passed and there isn't going to be another one, the approach taken is to keep recording the "pre" timestamp on each call, overwriting the previously recorded one until the "post" timestamp is also taken. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-8-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 292f26807b41..6c223f7d1ddc 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1515,17 +1515,15 @@ void spi_take_timestamp_pre(struct spi_controller *ctlr, if (!xfer->ptp_sts) return; - if (xfer->timestamped_pre) + if (xfer->timestamped) return; - if (progress < xfer->ptp_sts_word_pre) + if (progress > xfer->ptp_sts_word_pre) return; /* Capture the resolution of the timestamp */ xfer->ptp_sts_word_pre = progress; - xfer->timestamped_pre = true; - if (irqs_off) { local_irq_save(ctlr->irq_flags); preempt_disable(); @@ -1554,7 +1552,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, if (!xfer->ptp_sts) return; - if (xfer->timestamped_post) + if (xfer->timestamped) return; if (progress < xfer->ptp_sts_word_post) @@ -1570,7 +1568,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, /* Capture the resolution of the timestamp */ xfer->ptp_sts_word_post = progress; - xfer->timestamped_post = true; + xfer->timestamped = true; } EXPORT_SYMBOL_GPL(spi_take_timestamp_post); @@ -1675,12 +1673,9 @@ void spi_finalize_current_message(struct spi_controller *ctlr) } } - if (unlikely(ctlr->ptp_sts_supported)) { - list_for_each_entry(xfer, &mesg->transfers, transfer_list) { - WARN_ON_ONCE(xfer->ptp_sts && !xfer->timestamped_pre); - WARN_ON_ONCE(xfer->ptp_sts && !xfer->timestamped_post); - } - } + if (unlikely(ctlr->ptp_sts_supported)) + list_for_each_entry(xfer, &mesg->transfers, transfer_list) + WARN_ON_ONCE(xfer->ptp_sts && !xfer->timestamped); spi_unmap_msg(ctlr, mesg); -- cgit v1.2.3 From d59c90a2400ffc4d7127a7653f99da1c4a8fa762 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:40 +0200 Subject: spi: spi-fsl-dspi: Convert TCFQ users to XSPI FIFO mode The Transfer Complete Flag (TCF) interrupt gets raised after each write to the TX FIFO (PUSHR) which means that it is not possible to devise a transfer procedure that makes full utilization of the FIFO depth (4 entries on most controllers, 16 entries on some). On the other hand, XSPI mode has a feature called "command cycling", which allows a single TX command to be run for a pre-specified number of TX words. When the command cycle ends, the Command Transfer Complete Flag bit asserts and raises an interrupt. The advantage in this mode is that the TX FIFO can be better utilized (more words can be batched at once). Other changes brought by this patch: - The dspi->rx_end variable has been removed, since now the dspi_fifo_write function sets up dspi->words_in_flight, so dspi_fifo_read knows how much to read without overrunning the RX buffer. - Stop using poll mode unconditionally for TCFQ mode, since XSPI mode is a little less efficient than that, and so, poll mode doesn't bring as many improvements for XSPI. - Stop relying on the hardware transfer counter (SPI_TCR_GET_TCNT) and instead increment the message->actual_length based on the newly introduced dspi->words_in_flight variable. - The CTARE register is now written in the hotpath instead of just at transfer init time, since it contains the DTCP field (transfer preload - the counter indicating how many txdata words will follow), which is a dynamic value. Due to the fact that the Chip Select toggling setting is part of the command written to the TX FIFO, the ending word of each buffer needs to be sent via its own TX command, so that we have a chance to emit a 1-word command with deasserted PCS. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-9-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 189 ++++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 88 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 158cb48c0f4a..298c22def165 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -72,6 +72,7 @@ #define SPI_RSER 0x30 #define SPI_RSER_TCFQE BIT(31) #define SPI_RSER_EOQFE BIT(28) +#define SPI_RSER_CMDTCFE BIT(23) #define SPI_PUSHR 0x34 #define SPI_PUSHR_CMD_CONT BIT(15) @@ -114,14 +115,13 @@ struct chip_data { enum dspi_trans_mode { DSPI_EOQ_MODE = 0, - DSPI_TCFQ_MODE, + DSPI_XSPI_MODE, DSPI_DMA_MODE, }; struct fsl_dspi_devtype_data { enum dspi_trans_mode trans_mode; u8 max_clock_factor; - bool xspi_mode; int fifo_size; int dma_bufsize; }; @@ -147,37 +147,32 @@ static const struct fsl_dspi_devtype_data devtype_data[] = { }, [LS1021A] = { /* Has A-011218 DMA erratum */ - .trans_mode = DSPI_TCFQ_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, - .xspi_mode = true, .fifo_size = 4, }, [LS1012A] = { /* Has A-011218 DMA erratum */ - .trans_mode = DSPI_TCFQ_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, - .xspi_mode = true, .fifo_size = 16, }, [LS1043A] = { /* Has A-011218 DMA erratum */ - .trans_mode = DSPI_TCFQ_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, - .xspi_mode = true, .fifo_size = 16, }, [LS1046A] = { /* Has A-011218 DMA erratum */ - .trans_mode = DSPI_TCFQ_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, - .xspi_mode = true, .fifo_size = 16, }, [LS2080A] = { .trans_mode = DSPI_DMA_MODE, .dma_bufsize = 8, .max_clock_factor = 8, - .xspi_mode = true, .fifo_size = 4, }, [LS2085A] = { @@ -190,7 +185,6 @@ static const struct fsl_dspi_devtype_data devtype_data[] = { .trans_mode = DSPI_DMA_MODE, .dma_bufsize = 8, .max_clock_factor = 8, - .xspi_mode = true, .fifo_size = 4, }, [MCF5441X] = { @@ -233,7 +227,6 @@ struct fsl_dspi { size_t len; const void *tx; void *rx; - void *rx_end; u16 tx_cmd; u8 bits_per_word; u8 bytes_per_word; @@ -243,6 +236,8 @@ struct fsl_dspi { u32 waitflags; struct fsl_dspi_dma *dma; + + int words_in_flight; }; /* @@ -610,7 +605,17 @@ static void dspi_pushr_cmd_write(struct fsl_dspi *dspi) { u16 cmd = dspi->tx_cmd; - if (dspi->len > 0) + /* + * The only time when the PCS doesn't need continuation after this word + * is when it's last. We need to look ahead, because we actually call + * dspi_pop_tx (the function that decrements dspi->len) _after_ + * dspi_pushr_cmd_write with XSPI mode. As for how much in advance? One + * word is enough. If there's more to transmit than that, + * dspi_xspi_write will know to split the FIFO writes in 2, and + * generate a new PUSHR command with the final word that will have PCS + * deasserted (not continued) here. + */ + if (dspi->len > dspi->bytes_per_word) cmd |= SPI_PUSHR_CMD_CONT; regmap_write(dspi->regmap_pushr, PUSHR_CMD, cmd); } @@ -620,93 +625,115 @@ static void dspi_pushr_txdata_write(struct fsl_dspi *dspi, u16 txdata) regmap_write(dspi->regmap_pushr, PUSHR_TX, txdata); } -static void dspi_tcfq_write(struct fsl_dspi *dspi) +static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt) { - /* Clear transfer count */ - dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT; + regmap_write(dspi->regmap, SPI_CTARE(0), + SPI_FRAME_EBITS(dspi->bits_per_word) | + SPI_CTARE_DTCP(cnt)); - if (dspi->devtype_data->xspi_mode && dspi->bits_per_word > 16) { - /* Write the CMD FIFO entry first, and then the two - * corresponding TX FIFO entries. - */ + /* + * Write the CMD FIFO entry first, and then the two + * corresponding TX FIFO entries (or one...). + */ + dspi_pushr_cmd_write(dspi); + + /* Fill TX FIFO with as many transfers as possible */ + while (cnt--) { u32 data = dspi_pop_tx(dspi); - dspi_pushr_cmd_write(dspi); dspi_pushr_txdata_write(dspi, data & 0xFFFF); - dspi_pushr_txdata_write(dspi, data >> 16); - } else { - /* Write one entry to both TX FIFO and CMD FIFO - * simultaneously. - */ - dspi_pushr_write(dspi); + if (dspi->bits_per_word > 16) + dspi_pushr_txdata_write(dspi, data >> 16); } } -static u32 dspi_popr_read(struct fsl_dspi *dspi) +static void dspi_xspi_fifo_write(struct fsl_dspi *dspi) { - u32 rxdata = 0; + int num_fifo_entries = dspi->devtype_data->fifo_size; + int bytes_in_flight; - regmap_read(dspi->regmap, SPI_POPR, &rxdata); - return rxdata; -} + /* In XSPI mode each 32-bit word occupies 2 TX FIFO entries */ + if (dspi->bits_per_word > 16) + num_fifo_entries /= 2; -static void dspi_tcfq_read(struct fsl_dspi *dspi) -{ - dspi_push_rx(dspi, dspi_popr_read(dspi)); + dspi->words_in_flight = dspi->len / dspi->bytes_per_word; + + if (dspi->words_in_flight > num_fifo_entries) + dspi->words_in_flight = num_fifo_entries; + + bytes_in_flight = dspi->words_in_flight * dspi->bytes_per_word; + + /* + * If the PCS needs to de-assert (i.e. we're at the end of the buffer + * and cs_change does not want the PCS to stay on), then we need a new + * PUSHR command, since this one (for the body of the buffer) + * necessarily has the CONT bit set. + * So send one word less during this go, to force a split and a command + * with a single word next time, when CONT will be unset. + */ + if (bytes_in_flight == dspi->len && dspi->words_in_flight > 1 && + !(dspi->tx_cmd & SPI_PUSHR_CMD_CONT)) + dspi->words_in_flight--; + + dspi_xspi_write(dspi, dspi->words_in_flight); } -static void dspi_eoq_write(struct fsl_dspi *dspi) +static void dspi_eoq_fifo_write(struct fsl_dspi *dspi) { - int fifo_size = dspi->devtype_data->fifo_size; + int num_fifo_entries = dspi->devtype_data->fifo_size; u16 xfer_cmd = dspi->tx_cmd; + dspi->words_in_flight = num_fifo_entries; + /* Fill TX FIFO with as many transfers as possible */ - while (dspi->len && fifo_size--) { + while (dspi->len && num_fifo_entries--) { dspi->tx_cmd = xfer_cmd; /* Request EOQF for last transfer in FIFO */ - if (dspi->len == dspi->bytes_per_word || fifo_size == 0) + if (dspi->len == dspi->bytes_per_word || num_fifo_entries == 0) dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ; - /* Clear transfer count for first transfer in FIFO */ - if (fifo_size == (dspi->devtype_data->fifo_size - 1)) - dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT; /* Write combined TX FIFO and CMD FIFO entry */ dspi_pushr_write(dspi); } } -static void dspi_eoq_read(struct fsl_dspi *dspi) +static u32 dspi_popr_read(struct fsl_dspi *dspi) { - int fifo_size = dspi->devtype_data->fifo_size; + u32 rxdata = 0; + regmap_read(dspi->regmap, SPI_POPR, &rxdata); + return rxdata; +} + +static void dspi_fifo_read(struct fsl_dspi *dspi) +{ /* Read one FIFO entry and push to rx buffer */ - while ((dspi->rx < dspi->rx_end) && fifo_size--) + while (dspi->words_in_flight--) dspi_push_rx(dspi, dspi_popr_read(dspi)); } +static void dspi_fifo_write(struct fsl_dspi *dspi) +{ + if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE) + dspi_eoq_fifo_write(dspi); + else + dspi_xspi_fifo_write(dspi); +} + static int dspi_rxtx(struct fsl_dspi *dspi) { + struct spi_transfer *xfer = dspi->cur_transfer; struct spi_message *msg = dspi->cur_msg; - enum dspi_trans_mode trans_mode; - u16 spi_tcnt; - u32 spi_tcr; + int bytes_sent; + + /* Update total number of bytes that were transferred */ + bytes_sent = dspi->words_in_flight * dspi->bytes_per_word; + msg->actual_length += bytes_sent; + dspi->progress += bytes_sent / DIV_ROUND_UP(xfer->bits_per_word, 8); spi_take_timestamp_post(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); - /* Get transfer counter (in number of SPI transfers). It was - * reset to 0 when transfer(s) were started. - */ - regmap_read(dspi->regmap, SPI_TCR, &spi_tcr); - spi_tcnt = SPI_TCR_GET_TCNT(spi_tcr); - /* Update total number of bytes that were transferred */ - msg->actual_length += spi_tcnt * dspi->bytes_per_word; - dspi->progress += spi_tcnt; - - trans_mode = dspi->devtype_data->trans_mode; - if (trans_mode == DSPI_EOQ_MODE) - dspi_eoq_read(dspi); - else if (trans_mode == DSPI_TCFQ_MODE) - dspi_tcfq_read(dspi); + dspi_fifo_read(dspi); if (!dspi->len) /* Success! */ @@ -715,10 +742,7 @@ static int dspi_rxtx(struct fsl_dspi *dspi) spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); - if (trans_mode == DSPI_EOQ_MODE) - dspi_eoq_write(dspi); - else if (trans_mode == DSPI_TCFQ_MODE) - dspi_tcfq_write(dspi); + dspi_fifo_write(dspi); return -EINPROGRESS; } @@ -732,7 +756,7 @@ static int dspi_poll(struct fsl_dspi *dspi) regmap_read(dspi->regmap, SPI_SR, &spi_sr); regmap_write(dspi->regmap, SPI_SR, spi_sr); - if (spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF)) + if (spi_sr & (SPI_SR_EOQF | SPI_SR_CMDTCF)) break; } while (--tries); @@ -750,7 +774,7 @@ static irqreturn_t dspi_interrupt(int irq, void *dev_id) regmap_read(dspi->regmap, SPI_SR, &spi_sr); regmap_write(dspi->regmap, SPI_SR, spi_sr); - if (!(spi_sr & SPI_SR_EOQF)) + if (!(spi_sr & (SPI_SR_EOQF | SPI_SR_CMDTCF))) return IRQ_NONE; if (dspi_rxtx(dspi) == 0) { @@ -798,7 +822,6 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->tx = transfer->tx_buf; dspi->rx = transfer->rx_buf; - dspi->rx_end = dspi->rx + transfer->len; dspi->len = transfer->len; dspi->progress = 0; /* Validated transfer specific frame size (defaults applied) */ @@ -811,10 +834,6 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, regmap_write(dspi->regmap, SPI_CTAR(0), dspi->cur_chip->ctar_val | SPI_FRAME_BITS(transfer->bits_per_word)); - if (dspi->devtype_data->xspi_mode) - regmap_write(dspi->regmap, SPI_CTARE(0), - SPI_FRAME_EBITS(transfer->bits_per_word) | - SPI_CTARE_DTCP(1)); spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); @@ -823,11 +842,11 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, switch (trans_mode) { case DSPI_EOQ_MODE: regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE); - dspi_eoq_write(dspi); + dspi_fifo_write(dspi); break; - case DSPI_TCFQ_MODE: - regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_TCFQE); - dspi_tcfq_write(dspi); + case DSPI_XSPI_MODE: + regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_CMDTCFE); + dspi_fifo_write(dspi); break; case DSPI_DMA_MODE: regmap_write(dspi->regmap, SPI_RSER, @@ -1053,16 +1072,13 @@ static void dspi_init(struct fsl_dspi *dspi) { unsigned int mcr = SPI_MCR_PCSIS; - if (dspi->devtype_data->xspi_mode) + if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) mcr |= SPI_MCR_XSPI; if (!spi_controller_is_slave(dspi->ctlr)) mcr |= SPI_MCR_MASTER; regmap_write(dspi->regmap, SPI_MCR, mcr); regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); - if (dspi->devtype_data->xspi_mode) - regmap_write(dspi->regmap, SPI_CTARE(0), - SPI_CTARE_FMSZE(0) | SPI_CTARE_DTCP(1)); } static int dspi_slave_abort(struct spi_master *master) @@ -1162,7 +1178,7 @@ static int dspi_probe(struct platform_device *pdev) } } - if (dspi->devtype_data->xspi_mode) + if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); else ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16); @@ -1174,7 +1190,7 @@ static int dspi_probe(struct platform_device *pdev) goto out_ctlr_put; } - if (dspi->devtype_data->xspi_mode) + if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) regmap_config = &dspi_xspi_regmap_config[0]; else regmap_config = &dspi_regmap_config; @@ -1186,7 +1202,7 @@ static int dspi_probe(struct platform_device *pdev) goto out_ctlr_put; } - if (dspi->devtype_data->xspi_mode) { + if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) { dspi->regmap_pushr = devm_regmap_init_mmio( &pdev->dev, base + SPI_PUSHR, &dspi_xspi_regmap_config[1]); @@ -1211,9 +1227,6 @@ static int dspi_probe(struct platform_device *pdev) dspi_init(dspi); - if (dspi->devtype_data->trans_mode == DSPI_TCFQ_MODE) - goto poll_mode; - dspi->irq = platform_get_irq(pdev, 0); if (dspi->irq <= 0) { dev_info(&pdev->dev, -- cgit v1.2.3 From 6c1c26ecd9a31c24f9ea7dfb174528141dd32361 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:41 +0200 Subject: spi: spi-fsl-dspi: Accelerate transfers using larger word size if possible This patch adds logic in the driver to transmit SPI buffers that use bits_per_word=8 with a higher bits_per_word count (multiple of 8). Currently the following (most common) modes are implemented: - 8 bits_per_word on 32-bit capable controllers - 8 bits_per_word on 16-bit capable controllers - 16 bits_per_word on 32-bit capable controllers Transfers which are not accelerated are transferred with a hardware bits_per_word value equal to the one of the SPI transfer. The difference from just extending bits_per_word=32 at the spi_device driver level is that endianness is different - the SPI core wants to treat bits_per_word=32 buffers as arrays of u32 (i.e. words in host CPU endianness). So to preserve endianness when clumping 8x4 bits into 32-bit words, one must perform conversion between CPU and standard (big) endianness. All appearances (both on the wire as well as in the buffers presented to the peripheral driver) are preserved, just that accesses to the PUSHR and POPR registers are now more efficient, since the same number of reads/writes can now carry more data (2x more data on TX, 4x more data on RX). Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-10-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 160 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 135 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 298c22def165..f5b802070d29 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -228,8 +228,6 @@ struct fsl_dspi { const void *tx; void *rx; u16 tx_cmd; - u8 bits_per_word; - u8 bytes_per_word; const struct fsl_dspi_devtype_data *devtype_data; wait_queue_head_t waitq; @@ -237,9 +235,70 @@ struct fsl_dspi { struct fsl_dspi_dma *dma; + int oper_word_size; + int oper_bits_per_word; + int words_in_flight; + + void (*host_to_dev)(struct fsl_dspi *dspi, u32 *txdata); + void (*dev_to_host)(struct fsl_dspi *dspi, u32 rxdata); }; +static void dspi_native_host_to_dev(struct fsl_dspi *dspi, u32 *txdata) +{ + memcpy(txdata, dspi->tx, dspi->oper_word_size); + dspi->tx += dspi->oper_word_size; +} + +static void dspi_native_dev_to_host(struct fsl_dspi *dspi, u32 rxdata) +{ + memcpy(dspi->rx, &rxdata, dspi->oper_word_size); + dspi->rx += dspi->oper_word_size; +} + +static void dspi_8on32_host_to_dev(struct fsl_dspi *dspi, u32 *txdata) +{ + *txdata = cpu_to_be32(*(u32 *)dspi->tx); + dspi->tx += sizeof(u32); +} + +static void dspi_8on32_dev_to_host(struct fsl_dspi *dspi, u32 rxdata) +{ + *(u32 *)dspi->rx = be32_to_cpu(rxdata); + dspi->rx += sizeof(u32); +} + +static void dspi_8on16_host_to_dev(struct fsl_dspi *dspi, u32 *txdata) +{ + *txdata = cpu_to_be16(*(u16 *)dspi->tx); + dspi->tx += sizeof(u16); +} + +static void dspi_8on16_dev_to_host(struct fsl_dspi *dspi, u32 rxdata) +{ + *(u16 *)dspi->rx = be16_to_cpu(rxdata); + dspi->rx += sizeof(u16); +} + +static void dspi_16on32_host_to_dev(struct fsl_dspi *dspi, u32 *txdata) +{ + u16 hi = *(u16 *)dspi->tx; + u16 lo = *(u16 *)(dspi->tx + 2); + + *txdata = (u32)hi << 16 | lo; + dspi->tx += sizeof(u32); +} + +static void dspi_16on32_dev_to_host(struct fsl_dspi *dspi, u32 rxdata) +{ + u16 hi = rxdata & 0xffff; + u16 lo = rxdata >> 16; + + *(u16 *)dspi->rx = lo; + *(u16 *)(dspi->rx + 2) = hi; + dspi->rx += sizeof(u32); +} + /* * Pop one word from the TX buffer for pushing into the * PUSHR register (TX FIFO) @@ -248,11 +307,9 @@ static u32 dspi_pop_tx(struct fsl_dspi *dspi) { u32 txdata = 0; - if (dspi->tx) { - memcpy(&txdata, dspi->tx, dspi->bytes_per_word); - dspi->tx += dspi->bytes_per_word; - } - dspi->len -= dspi->bytes_per_word; + if (dspi->tx) + dspi->host_to_dev(dspi, &txdata); + dspi->len -= dspi->oper_word_size; return txdata; } @@ -274,9 +331,7 @@ static void dspi_push_rx(struct fsl_dspi *dspi, u32 rxdata) { if (!dspi->rx) return; - - memcpy(dspi->rx, &rxdata, dspi->bytes_per_word); - dspi->rx += dspi->bytes_per_word; + dspi->dev_to_host(dspi, rxdata); } static void dspi_tx_dma_callback(void *arg) @@ -393,8 +448,8 @@ static int dspi_dma_xfer(struct fsl_dspi *dspi) dspi->devtype_data->fifo_size; while (curr_remaining_bytes) { /* Check if current transfer fits the DMA buffer */ - dma->curr_xfer_len = curr_remaining_bytes - / dspi->bytes_per_word; + dma->curr_xfer_len = curr_remaining_bytes / + dspi->oper_word_size; if (dma->curr_xfer_len > bytes_per_buffer) dma->curr_xfer_len = bytes_per_buffer; @@ -404,8 +459,8 @@ static int dspi_dma_xfer(struct fsl_dspi *dspi) goto exit; } else { - const int len = - dma->curr_xfer_len * dspi->bytes_per_word; + const int len = dma->curr_xfer_len * + dspi->oper_word_size; curr_remaining_bytes -= len; message->actual_length += len; if (curr_remaining_bytes < 0) @@ -615,7 +670,7 @@ static void dspi_pushr_cmd_write(struct fsl_dspi *dspi) * generate a new PUSHR command with the final word that will have PCS * deasserted (not continued) here. */ - if (dspi->len > dspi->bytes_per_word) + if (dspi->len > dspi->oper_word_size) cmd |= SPI_PUSHR_CMD_CONT; regmap_write(dspi->regmap_pushr, PUSHR_CMD, cmd); } @@ -627,8 +682,9 @@ static void dspi_pushr_txdata_write(struct fsl_dspi *dspi, u16 txdata) static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt) { + /* Update CTARE */ regmap_write(dspi->regmap, SPI_CTARE(0), - SPI_FRAME_EBITS(dspi->bits_per_word) | + SPI_FRAME_EBITS(dspi->oper_bits_per_word) | SPI_CTARE_DTCP(cnt)); /* @@ -642,7 +698,7 @@ static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt) u32 data = dspi_pop_tx(dspi); dspi_pushr_txdata_write(dspi, data & 0xFFFF); - if (dspi->bits_per_word > 16) + if (dspi->oper_bits_per_word > 16) dspi_pushr_txdata_write(dspi, data >> 16); } } @@ -653,15 +709,20 @@ static void dspi_xspi_fifo_write(struct fsl_dspi *dspi) int bytes_in_flight; /* In XSPI mode each 32-bit word occupies 2 TX FIFO entries */ - if (dspi->bits_per_word > 16) + if (dspi->oper_word_size == 4) num_fifo_entries /= 2; - dspi->words_in_flight = dspi->len / dspi->bytes_per_word; + /* + * Integer division intentionally trims off odd (or non-multiple of 4) + * numbers of bytes at the end of the buffer, which will be sent next + * time using a smaller oper_word_size. + */ + dspi->words_in_flight = dspi->len / dspi->oper_word_size; if (dspi->words_in_flight > num_fifo_entries) dspi->words_in_flight = num_fifo_entries; - bytes_in_flight = dspi->words_in_flight * dspi->bytes_per_word; + bytes_in_flight = dspi->words_in_flight * dspi->oper_word_size; /* * If the PCS needs to de-assert (i.e. we're at the end of the buffer @@ -689,7 +750,7 @@ static void dspi_eoq_fifo_write(struct fsl_dspi *dspi) while (dspi->len && num_fifo_entries--) { dspi->tx_cmd = xfer_cmd; /* Request EOQF for last transfer in FIFO */ - if (dspi->len == dspi->bytes_per_word || num_fifo_entries == 0) + if (dspi->len == dspi->oper_word_size || num_fifo_entries == 0) dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ; /* Write combined TX FIFO and CMD FIFO entry */ dspi_pushr_write(dspi); @@ -711,8 +772,56 @@ static void dspi_fifo_read(struct fsl_dspi *dspi) dspi_push_rx(dspi, dspi_popr_read(dspi)); } +static void dspi_setup_accel(struct fsl_dspi *dspi) +{ + struct spi_transfer *xfer = dspi->cur_transfer; + + /* Start off with maximum supported by hardware */ + if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) + dspi->oper_bits_per_word = 32; + else + dspi->oper_bits_per_word = 16; + + /* And go down only if the buffer can't be sent with words this big */ + do { + if (dspi->len >= DIV_ROUND_UP(dspi->oper_bits_per_word, 8)) + break; + + dspi->oper_bits_per_word /= 2; + } while (dspi->oper_bits_per_word > 8); + + if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 32) { + dspi->dev_to_host = dspi_8on32_dev_to_host; + dspi->host_to_dev = dspi_8on32_host_to_dev; + } else if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 16) { + dspi->dev_to_host = dspi_8on16_dev_to_host; + dspi->host_to_dev = dspi_8on16_host_to_dev; + } else if (xfer->bits_per_word == 16 && dspi->oper_bits_per_word == 32) { + dspi->dev_to_host = dspi_16on32_dev_to_host; + dspi->host_to_dev = dspi_16on32_host_to_dev; + } else { + /* No acceleration needed (8dev_to_host = dspi_native_dev_to_host; + dspi->host_to_dev = dspi_native_host_to_dev; + dspi->oper_bits_per_word = xfer->bits_per_word; + } + + dspi->oper_word_size = DIV_ROUND_UP(dspi->oper_bits_per_word, 8); + + /* + * Update CTAR here (code is common for both EOQ and XSPI modes). + * We will update CTARE in the portion specific to XSPI, when we + * also know the preload value (DTCP). + */ + regmap_write(dspi->regmap, SPI_CTAR(0), + dspi->cur_chip->ctar_val | + SPI_FRAME_BITS(dspi->oper_bits_per_word)); +} + static void dspi_fifo_write(struct fsl_dspi *dspi) { + dspi_setup_accel(dspi); + if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE) dspi_eoq_fifo_write(dspi); else @@ -726,7 +835,7 @@ static int dspi_rxtx(struct fsl_dspi *dspi) int bytes_sent; /* Update total number of bytes that were transferred */ - bytes_sent = dspi->words_in_flight * dspi->bytes_per_word; + bytes_sent = dspi->words_in_flight * dspi->oper_word_size; msg->actual_length += bytes_sent; dspi->progress += bytes_sent / DIV_ROUND_UP(xfer->bits_per_word, 8); @@ -824,13 +933,14 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->rx = transfer->rx_buf; dspi->len = transfer->len; dspi->progress = 0; - /* Validated transfer specific frame size (defaults applied) */ - dspi->bits_per_word = transfer->bits_per_word; - dspi->bytes_per_word = DIV_ROUND_UP(dspi->bits_per_word, 8); regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF); + /* + * Static CTAR setup for modes that don't dynamically adjust it + * via dspi_setup_accel (aka for DMA) + */ regmap_write(dspi->regmap, SPI_CTAR(0), dspi->cur_chip->ctar_val | SPI_FRAME_BITS(transfer->bits_per_word)); -- cgit v1.2.3 From 6365504d42d90c68555ee40cdf297a1f187cb4a3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:42 +0200 Subject: spi: spi-fsl-dspi: Optimize dspi_setup_accel for lowest interrupt count Currently, a SPI transfer that is not multiple of the highest supported word width (e.g. 4 bytes) will be transmitted as follows (assume a 30-byte buffer transmitted through a 32-bit wide FIFO that is 32 bytes deep): - First 28 bytes are sent as 7 words of 32 bits each - Last 2 bytes are sent as 1 word of 16 bits size But if the dspi_setup_accel function had decided to use a lower oper_bits_per_word value (16 instead of 32), there would have been enough space in the TX FIFO to fit the entire buffer in one go (15 words of 16 bits each). What we're actually trying to avoid is mixing word sizes within the same run with the TX FIFO, since there is an erratum surrounding this, and invalid data might get transmitted. So this patch adds special cases for when the remaining length of the buffer can be sent in one go as 8-bit or 16-bit words, otherwise it falls back to the standard logic of sending as many bytes as possible at the highest oper_bits_per_word value possible. The benefit is that there will be one less CMDFQ/EOQ interrupt to service when the entire buffer is transmitted during a single go, and that will improve the overall latency of the transfer. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-11-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index f5b802070d29..df4944353ed5 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -775,20 +775,34 @@ static void dspi_fifo_read(struct fsl_dspi *dspi) static void dspi_setup_accel(struct fsl_dspi *dspi) { struct spi_transfer *xfer = dspi->cur_transfer; + bool odd = !!(dspi->len & 1); - /* Start off with maximum supported by hardware */ - if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) - dspi->oper_bits_per_word = 32; - else + /* No accel for frames not multiple of 8 bits at the moment */ + if (xfer->bits_per_word % 8) + goto no_accel; + + if (!odd && dspi->len <= dspi->devtype_data->fifo_size * 2) { dspi->oper_bits_per_word = 16; + } else if (odd && dspi->len <= dspi->devtype_data->fifo_size) { + dspi->oper_bits_per_word = 8; + } else { + /* Start off with maximum supported by hardware */ + if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) + dspi->oper_bits_per_word = 32; + else + dspi->oper_bits_per_word = 16; - /* And go down only if the buffer can't be sent with words this big */ - do { - if (dspi->len >= DIV_ROUND_UP(dspi->oper_bits_per_word, 8)) - break; + /* + * And go down only if the buffer can't be sent with + * words this big + */ + do { + if (dspi->len >= DIV_ROUND_UP(dspi->oper_bits_per_word, 8)) + break; - dspi->oper_bits_per_word /= 2; - } while (dspi->oper_bits_per_word > 8); + dspi->oper_bits_per_word /= 2; + } while (dspi->oper_bits_per_word > 8); + } if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 32) { dspi->dev_to_host = dspi_8on32_dev_to_host; @@ -800,7 +814,7 @@ static void dspi_setup_accel(struct fsl_dspi *dspi) dspi->dev_to_host = dspi_16on32_dev_to_host; dspi->host_to_dev = dspi_16on32_host_to_dev; } else { - /* No acceleration needed (8dev_to_host = dspi_native_dev_to_host; dspi->host_to_dev = dspi_native_host_to_dev; dspi->oper_bits_per_word = xfer->bits_per_word; -- cgit v1.2.3 From ea93ed4c181bd42d27b49b612d56f4ceb23d1d6c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:43 +0200 Subject: spi: spi-fsl-dspi: Use EOQ for last word in buffer even for XSPI mode The EOQ mode has a hardware limitation in that it stops the transmission (including the deassertion of the chip select signal) once the host CPU requests end-of-queue for a particular word in the TX FIFO. And XSPI mode has a limitation in that we need a separate CMD FIFO entry for the last byte in the buffer, where the chip select signal needs to be deasserted. It's not a functional limitation, but it's rather clunky and the fact that we need to halt the pipeline and write a single entry to the TX FIFO whenever a buffer ends brings the throughput down when transmitting small buffers. So the idea here is to use EOQ's limitation in our favor when using XSPI mode. Stop special-casing that final word in the buffer, and just kill the chip select signal by issuing an EOQ for that last word. Now it can be mixed in with all the other words in the current TX FIFO train. A small trick here is that we still keep using the XSPI-specific signaling via the CMDTCFQ interrupt in RSER, and not enabling the EOQ interrupt, in order to avoid hardware weirdness (potential races with separate interrupts being raised for CMDTCFQ and EOQ for what is in fact the end of the same transmission). That is just theoretical, but it's good to be cautious, and the EOQ interrupt isn't needed. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-12-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index df4944353ed5..d5983be32180 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -656,10 +656,8 @@ static void dspi_pushr_write(struct fsl_dspi *dspi) regmap_write(dspi->regmap, SPI_PUSHR, dspi_pop_tx_pushr(dspi)); } -static void dspi_pushr_cmd_write(struct fsl_dspi *dspi) +static void dspi_pushr_cmd_write(struct fsl_dspi *dspi, u16 cmd) { - u16 cmd = dspi->tx_cmd; - /* * The only time when the PCS doesn't need continuation after this word * is when it's last. We need to look ahead, because we actually call @@ -680,8 +678,13 @@ static void dspi_pushr_txdata_write(struct fsl_dspi *dspi, u16 txdata) regmap_write(dspi->regmap_pushr, PUSHR_TX, txdata); } -static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt) +static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt, bool eoq) { + u16 tx_cmd = dspi->tx_cmd; + + if (eoq) + tx_cmd |= SPI_PUSHR_CMD_EOQ; + /* Update CTARE */ regmap_write(dspi->regmap, SPI_CTARE(0), SPI_FRAME_EBITS(dspi->oper_bits_per_word) | @@ -691,7 +694,7 @@ static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt) * Write the CMD FIFO entry first, and then the two * corresponding TX FIFO entries (or one...). */ - dspi_pushr_cmd_write(dspi); + dspi_pushr_cmd_write(dspi, tx_cmd); /* Fill TX FIFO with as many transfers as possible */ while (cnt--) { @@ -707,6 +710,7 @@ static void dspi_xspi_fifo_write(struct fsl_dspi *dspi) { int num_fifo_entries = dspi->devtype_data->fifo_size; int bytes_in_flight; + bool eoq = false; /* In XSPI mode each 32-bit word occupies 2 TX FIFO entries */ if (dspi->oper_word_size == 4) @@ -732,11 +736,11 @@ static void dspi_xspi_fifo_write(struct fsl_dspi *dspi) * So send one word less during this go, to force a split and a command * with a single word next time, when CONT will be unset. */ - if (bytes_in_flight == dspi->len && dspi->words_in_flight > 1 && - !(dspi->tx_cmd & SPI_PUSHR_CMD_CONT)) - dspi->words_in_flight--; + if (!(dspi->tx_cmd & SPI_PUSHR_CMD_CONT) && + bytes_in_flight == dspi->len) + eoq = true; - dspi_xspi_write(dspi, dspi->words_in_flight); + dspi_xspi_write(dspi, dspi->words_in_flight, eoq); } static void dspi_eoq_fifo_write(struct fsl_dspi *dspi) -- cgit v1.2.3 From e9bac90036d394b01cc7d5297a11d33b8ab92a91 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:44 +0200 Subject: spi: spi-fsl-dspi: Take software timestamp in dspi_fifo_write Although the SPI system timestamps are supposed to reflect the moment that the peripheral has received a word rather than the moment when the CPU has enqueued that word to the FIFO, in practice it is easier to just record the latter time than the former (with a smaller error). With the recent migration of TCFQ users from poll back to interrupt mode (this time for XSPI FIFO), it's wiser to keep the interrupt latency outside of the measurement of the PTP system timestamp itself. If there proves to be any constant offset that requires static compensation, that can always be added later. So far that does not appear to be the case at least on the LS1021A-TSN board, where testing shows that the phc2sys offset is able to remain within +/- 200 ns even after 68 hours of testing. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-13-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index d5983be32180..b5ab0afbfa26 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -838,19 +838,18 @@ no_accel: static void dspi_fifo_write(struct fsl_dspi *dspi) { + struct spi_transfer *xfer = dspi->cur_transfer; + struct spi_message *msg = dspi->cur_msg; + int bytes_sent; + dspi_setup_accel(dspi); + spi_take_timestamp_pre(dspi->ctlr, xfer, dspi->progress, !dspi->irq); + if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE) dspi_eoq_fifo_write(dspi); else dspi_xspi_fifo_write(dspi); -} - -static int dspi_rxtx(struct fsl_dspi *dspi) -{ - struct spi_transfer *xfer = dspi->cur_transfer; - struct spi_message *msg = dspi->cur_msg; - int bytes_sent; /* Update total number of bytes that were transferred */ bytes_sent = dspi->words_in_flight * dspi->oper_word_size; @@ -859,16 +858,16 @@ static int dspi_rxtx(struct fsl_dspi *dspi) spi_take_timestamp_post(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); +} +static int dspi_rxtx(struct fsl_dspi *dspi) +{ dspi_fifo_read(dspi); if (!dspi->len) /* Success! */ return 0; - spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer, - dspi->progress, !dspi->irq); - dspi_fifo_write(dspi); return -EINPROGRESS; -- cgit v1.2.3