[WCNCR00153321] perf: Add UsbReq FFA queue to reduce buffer size

[Description]
Add UsbReq FFA queue to reduce overall USB TX aggregation buffer size

Change-Id: Ib07be794a4f1fb85c7f49b9371cc610b078c6444
Signed-off-by: Desmond Lin <desmond.lin@mediatek.com>
CR-Id: WCNCR00153321
Feature: perf
diff --git a/Makefile.ce b/Makefile.ce
index 47ce0af..ed8bd57 100644
--- a/Makefile.ce
+++ b/Makefile.ce
@@ -136,7 +136,7 @@
 
 CFG_DEFAULT_DBG_LEVEL=0xF
 
-CFG_USB_REQ_TX_DATA_CNT=4
+CFG_USB_REQ_TX_DATA_CNT=2
 
 CFG_USB_REQ_RX_DATA_CNT=4
 
diff --git a/Makefile.x86 b/Makefile.x86
index 2bb3106..87f7ec8 100644
--- a/Makefile.x86
+++ b/Makefile.x86
@@ -54,7 +54,7 @@
 
 #CFG_DEFAULT_DBG_LEVEL=0xF
 
-CFG_USB_REQ_TX_DATA_CNT=4
+CFG_USB_REQ_TX_DATA_CNT=2
 
 CFG_USB_REQ_RX_DATA_CNT=2
 
diff --git a/os/linux/hif/usb/hal_api.c b/os/linux/hif/usb/hal_api.c
index fbbdc86..8a992ef 100644
--- a/os/linux/hif/usb/hal_api.c
+++ b/os/linux/hif/usb/hal_api.c
@@ -485,26 +485,31 @@
 	spin_lock_irqsave(&prHifInfo->rTxDataFreeQLock, flags);
 
 	if (list_empty(&prHifInfo->rTxDataFreeQ[ucTc])) {
-		spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
-		DBGLOG(HAL, ERROR, "run out of rTxDataFreeQ #1!!\n");
-		wlanProcessQueuedMsduInfo(prGlueInfo->prAdapter, prMsduInfo);
-		return WLAN_STATUS_RESOURCES;
+		if (glUsbBorrowFfaReq(prHifInfo, ucTc) == FALSE) {
+			spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
+			DBGLOG(HAL, ERROR, "run out of rTxDataFreeQ #1!!\n");
+			wlanProcessQueuedMsduInfo(prGlueInfo->prAdapter, prMsduInfo);
+			return WLAN_STATUS_RESOURCES;
+		}
 	}
 	prUsbReq = list_entry(prHifInfo->rTxDataFreeQ[ucTc].next, struct _USB_REQ_T, list);
 	prBufCtrl = prUsbReq->prBufCtrl;
 
-	if (prHifInfo->u4AggRsvSize < ALIGN_4(u4Length))
-		DBGLOG(HAL, ERROR, "u4AggRsvSize count FAIL (%u, %u)\n", prHifInfo->u4AggRsvSize, u4Length);
-	prHifInfo->u4AggRsvSize -= ALIGN_4(u4Length);
+	if (prHifInfo->u4AggRsvSize[ucTc] < ALIGN_4(u4Length))
+		DBGLOG(HAL, ERROR, "u4AggRsvSize[%hhu] count FAIL (%u, %u)\n",
+		       ucTc, prHifInfo->u4AggRsvSize[ucTc], u4Length);
+	prHifInfo->u4AggRsvSize[ucTc] -= ALIGN_4(u4Length);
 
 	if (prBufCtrl->u4WrIdx + ALIGN_4(u4Length) + LEN_USB_UDMA_TX_TERMINATOR > prBufCtrl->u4BufSize) {
 		halTxUSBSendAggData(prHifInfo, ucTc, prUsbReq);
 
 		if (list_empty(&prHifInfo->rTxDataFreeQ[ucTc])) {
-			spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
-			DBGLOG(HAL, ERROR, "run out of rTxDataFreeQ #2!!\n");
-			wlanProcessQueuedMsduInfo(prGlueInfo->prAdapter, prMsduInfo);
-			return WLAN_STATUS_FAILURE;
+			if (glUsbBorrowFfaReq(prHifInfo, ucTc) == FALSE) {
+				spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
+				DBGLOG(HAL, ERROR, "run out of rTxDataFreeQ #2!!\n");
+				wlanProcessQueuedMsduInfo(prGlueInfo->prAdapter, prMsduInfo);
+				return WLAN_STATUS_FAILURE;
+			}
 		}
 
 		prUsbReq = list_entry(prHifInfo->rTxDataFreeQ[ucTc].next, struct _USB_REQ_T, list);
@@ -636,7 +641,7 @@
 	struct urb *urb = prUsbReq->prUrb;
 	UINT_32 u4SentDataSize;
 
-	ucTc = *((PUINT_8)&prUsbReq->prPriv);
+	ucTc = *((PUINT_8)&prUsbReq->prPriv) & TC_MASK;
 
 	prFreeQueue = &rFreeQueue;
 	QUEUE_INITIALIZE(prFreeQueue);
@@ -651,6 +656,7 @@
 VOID halTxUSBProcessDataComplete(IN P_ADAPTER_T prAdapter, P_USB_REQ_T prUsbReq)
 {
 	UINT_8 ucTc;
+	BOOLEAN fgFfa;
 	struct urb *urb = prUsbReq->prUrb;
 	P_GL_HIF_INFO_T prHifInfo = prUsbReq->prHifInfo;
 #if CFG_USB_TX_AGG
@@ -658,7 +664,8 @@
 #endif
 	unsigned long flags;
 
-	ucTc = *((PUINT_8)&prUsbReq->prPriv);
+	ucTc = *((PUINT_8)&prUsbReq->prPriv) & TC_MASK;
+	fgFfa =  *((PUINT_8)&prUsbReq->prPriv) & FFA_MASK;
 
 	if (urb->status != 0) {
 		DBGLOG(TX, ERROR, "[%s] send DATA fail (status = %d)\n", __func__, urb->status);
@@ -671,7 +678,10 @@
 #if CFG_USB_TX_AGG
 	prBufCtrl->u4WrIdx = 0;
 
-	list_add_tail(&prUsbReq->list, &prHifInfo->rTxDataFreeQ[ucTc]);
+	if ((fgFfa == FALSE) || list_empty(&prHifInfo->rTxDataFreeQ[ucTc]))
+		list_add_tail(&prUsbReq->list, &prHifInfo->rTxDataFreeQ[ucTc]);
+	else
+		list_add_tail(&prUsbReq->list, &prHifInfo->rTxDataFfaQ);
 
 	if (usb_anchor_empty(&prHifInfo->rTxDataAnchor[ucTc])) {
 		prUsbReq = list_entry(prHifInfo->rTxDataFreeQ[ucTc].next, struct _USB_REQ_T, list);
@@ -774,7 +784,6 @@
 
 		usb_anchor_urb(prUsbReq->prUrb, &prHifInfo->rRxEventAnchor);
 
-		prUsbReq->prPriv = NULL;
 		prUsbReq->prBufCtrl->u4ReadSize = 0;
 		if (prHifInfo->eEventEpType == EVENT_EP_TYPE_INTR && fgFillUrb) {
 			usb_fill_int_urb(prUsbReq->prUrb,
@@ -865,7 +874,6 @@
 
 		usb_anchor_urb(prUsbReq->prUrb, &prHifInfo->rRxDataAnchor);
 
-		prUsbReq->prPriv = NULL;
 		prUsbReq->prBufCtrl->u4ReadSize = 0;
 		usb_fill_bulk_urb(prUsbReq->prUrb,
 				  prHifInfo->udev,
@@ -1156,9 +1164,10 @@
 
 #if CFG_USB_TX_AGG
 	if (list_empty(&prHifInfo->rTxDataFreeQ[ucTc])) {
-		spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
-
-		return FALSE;
+		if (glUsbBorrowFfaReq(prHifInfo, ucTc) == FALSE) {
+			spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
+			return FALSE;
+		}
 	}
 
 	prUsbReq = list_entry(prHifInfo->rTxDataFreeQ[ucTc].next, struct _USB_REQ_T, list);
@@ -1167,13 +1176,15 @@
 	if (prHifInfo->rTxDataFreeQ[ucTc].next->next == &prHifInfo->rTxDataFreeQ[ucTc]) {
 		/* length of rTxDataFreeQ equals 1 */
 		if (prBufCtrl->u4WrIdx + ALIGN_4(u4Length) >
-		    prBufCtrl->u4BufSize - prHifInfo->u4AggRsvSize - LEN_USB_UDMA_TX_TERMINATOR) {
+		    prBufCtrl->u4BufSize - prHifInfo->u4AggRsvSize[ucTc] - LEN_USB_UDMA_TX_TERMINATOR) {
 			/* Buffer is not enough */
-			spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
-			return FALSE;
+			if (glUsbBorrowFfaReq(prHifInfo, ucTc) == FALSE) {
+				spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
+				return FALSE;
+			}
 		}
 	}
-	prHifInfo->u4AggRsvSize += ALIGN_4(u4Length);
+	prHifInfo->u4AggRsvSize[ucTc] += ALIGN_4(u4Length);
 #else
 	if (list_empty(&prHifInfo->rTxDataFreeQ)) {
 		spin_unlock_irqrestore(&prHifInfo->rTxDataFreeQLock, flags);
diff --git a/os/linux/hif/usb/include/hif.h b/os/linux/hif/usb/include/hif.h
index 7f297db..8b5e052 100644
--- a/os/linux/hif/usb/include/hif.h
+++ b/os/linux/hif/usb/include/hif.h
@@ -124,6 +124,8 @@
 
 #define HIF_TX_INIT_CMD_PORT             USB_CMD_EP_OUT
 
+#define USB_REQ_TX_DATA_FFA_CNT         (10)
+
 #ifdef CFG_USB_REQ_TX_DATA_CNT
 #define USB_REQ_TX_DATA_CNT             (CFG_USB_REQ_TX_DATA_CNT)	/* platform specific USB_REQ_TX_DATA_CNT */
 #else
@@ -236,6 +238,7 @@
 	spinlock_t rRxDataQLock;
 
 	PVOID prTxCmdReqHead;
+	PVOID arTxDataFfaReqHead;
 	PVOID arTxDataReqHead[USB_TC_NUM];
 	PVOID prRxEventReqHead;
 	PVOID prRxDataReqHead;
@@ -243,8 +246,9 @@
 	spinlock_t rTxCmdFreeQLock;
 	struct list_head rTxCmdSendingQ;
 	spinlock_t rTxCmdSendingQLock;
+	struct list_head rTxDataFfaQ;
 #if CFG_USB_TX_AGG
-	UINT_32 u4AggRsvSize;
+	UINT_32 u4AggRsvSize[USB_TC_NUM];
 	struct list_head rTxDataFreeQ[USB_TC_NUM];
 	struct usb_anchor rTxDataAnchor[USB_TC_NUM];
 #else
@@ -266,6 +270,7 @@
 	struct list_head rTxDataCompleteQ;
 
 	BUF_CTRL_T rTxCmdBufCtrl[USB_REQ_TX_CMD_CNT];
+	BUF_CTRL_T rTxDataFfaBufCtrl[USB_REQ_TX_DATA_FFA_CNT];
 #if CFG_USB_TX_AGG
 	BUF_CTRL_T rTxDataBufCtrl[USB_TC_NUM][USB_REQ_TX_DATA_CNT];
 #else
@@ -290,6 +295,11 @@
 	QUE_T rSendingDataMsduInfoList;
 } USB_REQ_T, *P_USB_REQ_T;
 
+/* USB_REQ_T prPriv field for TxData */
+#define FFA_MASK                        BIT(7)           /* Indicate if this UsbReq is from FFA queue. */
+#define TC_MASK                         BITS(0, 6)       /* Indicate which TC this UsbReq belongs to */
+
+
 /*******************************************************************************
 *                            P U B L I C   D A T A
 ********************************************************************************
@@ -341,6 +351,7 @@
 VOID glUsbEnqueueReq(P_GL_HIF_INFO_T prHifInfo, struct list_head *prHead, P_USB_REQ_T prUsbReq,
 		     spinlock_t *prLock, BOOLEAN fgHead);
 P_USB_REQ_T glUsbDequeueReq(P_GL_HIF_INFO_T prHifInfo, struct list_head *prHead, spinlock_t *prLock);
+BOOLEAN glUsbBorrowFfaReq(P_GL_HIF_INFO_T prHifInfo, UINT_8 ucTc);
 
 WLAN_STATUS halTxUSBSendCmd(IN P_GLUE_INFO_T prGlueInfo, IN UINT_8 ucTc, IN P_CMD_INFO_T prCmdInfo);
 VOID halTxUSBSendCmdComplete(struct urb *urb);
diff --git a/os/linux/hif/usb/usb.c b/os/linux/hif/usb/usb.c
index bde191f..c676dda 100644
--- a/os/linux/hif/usb/usb.c
+++ b/os/linux/hif/usb/usb.c
@@ -669,6 +669,32 @@
 
 /*----------------------------------------------------------------------------*/
 /*!
+* \brief This function borrow UsbReq from Tx data FFA queue to the spcified TC Tx data free queue
+*
+* \param[in] prGlueInfo Pointer to HIF info structure
+* \param[in] ucTc       Specify TC index
+*
+* \retval TRUE          operation success
+* \retval FALSE         operation fail
+*/
+/*----------------------------------------------------------------------------*/
+BOOLEAN glUsbBorrowFfaReq(P_GL_HIF_INFO_T prHifInfo, UINT_8 ucTc)
+{
+	P_USB_REQ_T prUsbReq;
+
+	if (list_empty(&prHifInfo->rTxDataFfaQ))
+		return FALSE;
+	prUsbReq = list_entry(prHifInfo->rTxDataFfaQ.next, struct _USB_REQ_T, list);
+	list_del_init(prHifInfo->rTxDataFfaQ.next);
+
+	*((PUINT_8)&prUsbReq->prPriv) = FFA_MASK | ucTc;
+	list_add_tail(&prUsbReq->list, &prHifInfo->rTxDataFreeQ[ucTc]);
+
+	return TRUE;
+}
+
+/*----------------------------------------------------------------------------*/
+/*!
 * \brief This function stores hif related info, which is initialized before.
 *
 * \param[in] prGlueInfo Pointer to glue info structure
@@ -733,10 +759,10 @@
 	mutex_init(&prHifInfo->vendor_req_sem);
 
 #if CFG_USB_TX_AGG
-	prHifInfo->u4AggRsvSize = 0;
-
-	for (ucTc = 0; ucTc < USB_TC_NUM; ++ucTc)
+	for (ucTc = 0; ucTc < USB_TC_NUM; ++ucTc) {
+		prHifInfo->u4AggRsvSize[ucTc] = 0;
 		init_usb_anchor(&prHifInfo->rTxDataAnchor[ucTc]);
+	}
 #else
 	init_usb_anchor(&prHifInfo->rTxDataAnchor);
 #endif
@@ -765,9 +791,36 @@
 
 	glUsbInitQ(prHifInfo, &prHifInfo->rTxCmdSendingQ, 0);
 
+	/* TX Data FFA */
+	prHifInfo->arTxDataFfaReqHead = glUsbInitQ(prHifInfo,
+							&prHifInfo->rTxDataFfaQ, USB_REQ_TX_DATA_FFA_CNT);
+	i = 0;
+	list_for_each_entry_safe(prUsbReq, prUsbReqNext, &prHifInfo->rTxDataFfaQ, list) {
+		QUEUE_INITIALIZE(&prUsbReq->rSendingDataMsduInfoList);
+		*((PUINT_8)&prUsbReq->prPriv) = FFA_MASK;
+		prUsbReq->prBufCtrl = &prHifInfo->rTxDataFfaBufCtrl[i];
+#if CFG_USB_CONSISTENT_DMA
+		prUsbReq->prBufCtrl->pucBuf =
+		    usb_alloc_coherent(prHifInfo->udev, USB_TX_DATA_BUFF_SIZE, GFP_ATOMIC,
+				       &prUsbReq->prUrb->transfer_dma);
+#else
+		prUsbReq->prBufCtrl->pucBuf = kmalloc(USB_TX_DATA_BUFF_SIZE, GFP_ATOMIC);
+#endif
+		if (prUsbReq->prBufCtrl->pucBuf == NULL) {
+			DBGLOG(HAL, ERROR, "kmalloc() reports error\n");
+			goto error;
+		}
+		prUsbReq->prBufCtrl->u4BufSize = USB_TX_DATA_BUFF_SIZE;
+		prUsbReq->prBufCtrl->u4WrIdx = 0;
+		++i;
+	}
+
 	/* TX Data */
 #if CFG_USB_TX_AGG
 	for (ucTc = 0; ucTc < USB_TC_NUM; ++ucTc) {
+		/* Only for TC0 ~ TC3 and DBDC1_TC */
+		if (ucTc >= TC4_INDEX && ucTc < USB_DBDC1_TC)
+			continue;
 		prHifInfo->arTxDataReqHead[ucTc] = glUsbInitQ(prHifInfo,
 								&prHifInfo->rTxDataFreeQ[ucTc], USB_REQ_TX_DATA_CNT);
 		i = 0;
@@ -885,6 +938,8 @@
 
 #if CFG_USB_TX_AGG
 	for (ucTc = 0; ucTc < USB_TC_NUM; ++ucTc) {
+		if (ucTc >= TC4_INDEX && ucTc < USB_DBDC1_TC)
+			continue;
 		list_for_each_entry_safe(prUsbReq, prUsbReqNext, &prHifInfo->rTxDataFreeQ[ucTc], list) {
 #if CFG_USB_CONSISTENT_DMA
 			usb_free_coherent(prHifInfo->udev, USB_TX_DATA_BUFF_SIZE,
@@ -894,7 +949,6 @@
 #endif
 			usb_free_urb(prUsbReq->prUrb);
 		}
-		kfree(prHifInfo->arTxDataReqHead[ucTc]);
 	}
 #else
 	list_for_each_entry_safe(prUsbReq, prUsbReqNext, &prHifInfo->rTxDataFreeQ, list) {
@@ -908,6 +962,16 @@
 	}
 #endif
 
+	list_for_each_entry_safe(prUsbReq, prUsbReqNext, &prHifInfo->rTxDataFfaQ, list) {
+#if CFG_USB_CONSISTENT_DMA
+		usb_free_coherent(prHifInfo->udev, USB_TX_DATA_BUFF_SIZE,
+			prUsbReq->prBufCtrl->pucBuf, prUsbReq->prUrb->transfer_dma);
+#else
+		kfree(prUsbReq->prBufCtrl->pucBuf);
+#endif
+		usb_free_urb(prUsbReq->prUrb);
+	}
+
 	list_for_each_entry_safe(prUsbReq, prUsbReqNext, &prHifInfo->rTxCmdFreeQ, list) {
 #if CFG_USB_CONSISTENT_DMA
 		usb_free_coherent(prHifInfo->udev, USB_TX_CMD_BUF_SIZE,
@@ -959,6 +1023,9 @@
 	}
 
 	kfree(prHifInfo->prTxCmdReqHead);
+	kfree(prHifInfo->arTxDataFfaReqHead);
+	for (ucTc = 0; ucTc < USB_TC_NUM; ++ucTc)
+		kfree(prHifInfo->arTxDataReqHead[ucTc]);
 	kfree(prHifInfo->prRxEventReqHead);
 	kfree(prHifInfo->prRxDataReqHead);