Transfer data to and from the MMC using DMA

- Add a method to transact with the MSDC interface using the DMA engine,
instead of using the FIFO directly.
- Fix some issues in the tuning where the return values weren't being
used -- this makes tuning 200MHz/8-bit happy
- Set the bus to 8-bit wide, and the bus-frequency to 188MHz (taken from
LK)

Change-Id: Ia3e8780093843a1f5892a8a89a974858355cccf0
diff --git a/arch/arm/dts/mt8516-coral.dts b/arch/arm/dts/mt8516-coral.dts
index ce13f2d..a45e694 100644
--- a/arch/arm/dts/mt8516-coral.dts
+++ b/arch/arm/dts/mt8516-coral.dts
@@ -93,11 +93,10 @@
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins_default>;
-	bus-width = <4>;
-	max-frequency = <200000000>;
+	bus-width = <8>;
+	max-frequency = <188000000>;
 	cap-mmc-highspeed;
-	// Disabled for now -- Enabled on pumpkin, but gives fastboot issues.
-	// mmc-hs200-1_8v;
+	mmc-hs200-1_8v;
 	cap-mmc-hw-reset;
 	vmmc-supply = <&reg_3p3v>;
 	vqmmc-supply = <&reg_1p8v>;
diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index 7361bca..42f4650 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -690,6 +690,15 @@
 	  This is needed if support for any SD/SDIO/MMC devices is required.
 	  If unsure, say N.
 
+config MMC_MTK_PIO
+	bool "Mediatek SD/MMC PIO support"
+	default n
+	depends on MMC_MTK
+	help
+	  This selects the basic pin-IO interface to use the MediaTek SD/MMC.
+	  If this is not selected, the interface will use DMA to move data.
+	  If unsure, say N.
+
 endif
 
 config FSL_ESDHC
diff --git a/drivers/mmc/mtk-sd.c b/drivers/mmc/mtk-sd.c
index b76e8c1..dde9450 100644
--- a/drivers/mmc/mtk-sd.c
+++ b/drivers/mmc/mtk-sd.c
@@ -53,10 +53,12 @@
 #define MSDC_INT_ACMDRDY		BIT(3)
 #define MSDC_INT_ACMDTMO		BIT(4)
 #define MSDC_INT_ACMDCRCERR		BIT(5)
+#define MSDC_INT_DMAQ_EMPTY		BIT(6)
 #define MSDC_INT_CMDRDY			BIT(8)
 #define MSDC_INT_CMDTMO			BIT(9)
 #define MSDC_INT_RSPCRCERR		BIT(10)
 #define MSDC_INT_XFER_COMPL		BIT(12)
+#define MSDC_INT_DXFER_DONE		BIT(13)
 #define MSDC_INT_DATTMO			BIT(14)
 #define MSDC_INT_DATCRCERR		BIT(15)
 
@@ -67,6 +69,17 @@
 #define MSDC_FIFOCS_RXCNT_M		0xff
 #define MSDC_FIFOCS_RXCNT_S		0
 
+/* MSDC_DMA */
+#define MSDC_DMA_CTRL_BURSTSZ	(0x7 << 12)
+#define MSDC_DMA_CTRL_MODE		(0x1 <<  8)
+#define MSDC_DMA_CTRL_LASTBUF	(0x1 << 10)
+#define MSDC_DMA_CTRL_START		(0x1 <<  0)
+#define MSDC_DMA_CTRL_STOP		(0x1 <<  1)
+#define MSDC_DMA_CFG_STS		(0x1 <<  0)
+#define MSDC_DMA_CTRL_BURST_64B	(6)
+#define MSDC_DMA_CTRL_LASTBUF_S	(10)
+#define MSDC_DMA_CTRL_BURSTSZ_S	(12)
+
 /* #define SDC_CFG */
 #define SDC_CFG_DTOC_M			0xff000000
 #define SDC_CFG_DTOC_S			24
@@ -285,6 +298,7 @@
 	struct msdc_tune_para saved_tune_para;
 };
 
+static void msdc_set_timeout(struct msdc_host *host, u32 ns, u32 clks);
 static void msdc_reset_hw(struct msdc_host *host)
 {
 	u32 reg;
@@ -305,6 +319,7 @@
 			   !(reg & MSDC_FIFOCS_CLR), 1000000);
 }
 
+#if defined(CONFIG_MMC_MTK_PIO)
 static u32 msdc_fifo_rx_bytes(struct msdc_host *host)
 {
 	return (readl(&host->base->msdc_fifocs) &
@@ -316,6 +331,7 @@
 	return (readl(&host->base->msdc_fifocs) &
 		MSDC_FIFOCS_TXCNT_M) >> MSDC_FIFOCS_TXCNT_S;
 }
+#endif  // defined(CONFIG_MMC_MTK_PIO)
 
 static u32 msdc_cmd_find_resp(struct msdc_host *host, struct mmc_cmd *cmd)
 {
@@ -484,8 +500,13 @@
 
 	rawcmd = msdc_cmd_prepare_raw_cmd(host, cmd, data);
 
-	if (data)
+	if (data) {
+#if !defined(CONFIG_MMC_MTK_PIO)
+		clrbits_le32(&host->base->msdc_cfg, MSDC_CFG_PIO);
+#endif
+
 		blocks = data->blocks;
+	}
 
 	writel(CMD_INTS_MASK, &host->base->msdc_int);
 	writel(blocks, &host->base->sdc_blk_num);
@@ -501,6 +522,7 @@
 	return msdc_cmd_done(host, status, cmd);
 }
 
+#if defined(CONFIG_MMC_MTK_PIO)
 static void msdc_fifo_read(struct msdc_host *host, u8 *buf, u32 size)
 {
 	u32 *wbuf;
@@ -628,10 +650,72 @@
 
 	return ret;
 }
+#else
+static int msdc_dma_rw(struct msdc_host *host, struct mmc_cmd *cmd, struct mmc_data *data, bool write) {
+	int ret;
+	u32 reg;
+	int timeout = write ? 250 : 100;
 
-static int msdc_start_data(struct msdc_host *host, struct mmc_data *data)
+	msdc_set_timeout(host, timeout * 1000000, 0);
+
+	// Start Address
+	writel((uintptr_t)data->dest, &host->base->dma_sa);
+
+	// Burst Size
+	clrsetbits_le32(&host->base->dma_ctrl, MSDC_DMA_CTRL_BURSTSZ, MSDC_DMA_CTRL_BURST_64B << MSDC_DMA_CTRL_BURSTSZ_S);
+
+	// Clear mode bit to set Basic Mode
+	clrbits_le32(&host->base->dma_ctrl, MSDC_DMA_CTRL_MODE);
+
+	// Only sending one buffer, so set last buffer bit
+	clrsetbits_le32(&host->base->dma_ctrl, MSDC_DMA_CTRL_LASTBUF, 1 << MSDC_DMA_CTRL_LASTBUF_S);
+
+	// Transaction Length
+	writel(data->blocks * data->blocksize, &host->base->dma_length);
+
+	// Enable interrupts that we need to check
+	setbits_le32(&host->base->msdc_inten, MSDC_INT_XFER_COMPL | MSDC_INT_DATTMO | MSDC_INT_DATCRCERR);
+
+	flush_cache((uintptr_t)data->dest, data->blocks * data->blocksize);
+
+	// Clear interrupts before beginning DMA
+	u32 wints = MSDC_INT_XFER_COMPL | MSDC_INT_DATTMO | MSDC_INT_DATCRCERR |
+		MSDC_INT_DXFER_DONE | MSDC_INT_DMAQ_EMPTY |
+		MSDC_INT_CMDRDY | MSDC_INT_CMDTMO | MSDC_INT_RSPCRCERR;
+	writel(wints, &host->base->msdc_int);
+	
+	setbits_le32(&host->base->dma_ctrl, MSDC_DMA_CTRL_START);
+	do {
+		reg = readl(&host->base->msdc_int);
+		if (!reg) {
+			continue;
+		}
+		
+		if (reg & MSDC_INT_XFER_COMPL) {
+			ret = 0;
+			break;
+		}
+
+		if (reg == 0 || reg & MSDC_INT_DATTMO || reg & MSDC_INT_DATCRCERR) {
+			ret = -1;
+			break;
+		} else {
+			printf("MSDC: unexpected err 0x%x\n", reg);
+			ret = -2;
+			break;
+		}
+	} while (1);
+	setbits_le32(&host->base->dma_ctrl, MSDC_DMA_CTRL_STOP);
+	while (readl(&host->base->dma_cfg) & MSDC_DMA_CFG_STS);
+
+	flush_cache((uintptr_t)data->dest, data->blocks * data->blocksize);
+
+	return ret;
+}
+#endif  // defined(CONFIG_MMC_MTK_PIO)
+
+static int msdc_start_data(struct msdc_host *host, struct mmc_cmd *cmd, struct mmc_data *data)
 {
-	u32 size;
 	int ret;
 
 	WATCHDOG_RESET();
@@ -641,12 +725,19 @@
 
 	writel(DATA_INTS_MASK, &host->base->msdc_int);
 
-	size = data->blocks * data->blocksize;
 
+#if defined(CONFIG_MMC_MTK_PIO)
+	u32 size = data->blocks * data->blocksize;
 	if (data->flags == MMC_DATA_WRITE)
 		ret = msdc_pio_write(host, (const u8 *)data->src, size);
 	else
 		ret = msdc_pio_read(host, (u8 *)data->dest, size);
+#else
+	if (data->flags == MMC_DATA_WRITE)
+		ret = msdc_dma_rw(host, cmd, data, true);
+	else
+		ret = msdc_dma_rw(host, cmd, data, false);
+#endif
 
 	if (ret) {
 		msdc_reset_hw(host);
@@ -670,7 +761,11 @@
 		return cmd_ret;
 
 	if (data) {
-		data_ret = msdc_start_data(host, data);
+		data_ret = msdc_start_data(host, cmd, data);
+#if defined(CONFIG_MMC_MTK_PIO)
+		setbits_le32(&host->base->msdc_cfg, MSDC_CFG_PIO);
+#endif
+		
 		if (cmd_ret)
 			return cmd_ret;
 		else
@@ -984,7 +1079,7 @@
 				i << MSDC_PAD_TUNE_CMDRDLY_S);
 
 		for (j = 0; j < 3; j++) {
-			mmc_send_tuning(mmc, opcode, &cmd_err);
+			cmd_err = mmc_send_tuning(mmc, opcode, NULL);
 			if (!cmd_err) {
 				rise_delay |= (1 << i);
 			} else {
@@ -1006,7 +1101,7 @@
 				i << MSDC_PAD_TUNE_CMDRDLY_S);
 
 		for (j = 0; j < 3; j++) {
-			mmc_send_tuning(mmc, opcode, &cmd_err);
+			cmd_err = mmc_send_tuning(mmc, opcode, NULL);
 			if (!cmd_err) {
 				fall_delay |= (1 << i);
 			} else {
@@ -1041,7 +1136,7 @@
 		clrsetbits_le32(tune_reg, MSDC_PAD_TUNE_CMDRRDLY_M,
 				i << MSDC_PAD_TUNE_CMDRRDLY_S);
 
-		mmc_send_tuning(mmc, opcode, &cmd_err);
+		cmd_err = mmc_send_tuning(mmc, opcode, NULL);
 		if (!cmd_err)
 			internal_delay |= (1 << i);
 	}