mlx4_en: Consider inline packets on completion

Was trying to unmap work queue entries that had inline packets,
so naturally weren't mapped.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index ff4d752..4afd599 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -203,19 +203,21 @@
 
 	/* Optimize the common case when there are no wraparounds */
 	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
-		if (tx_info->linear) {
-			pci_unmap_single(mdev->pdev,
-					 (dma_addr_t) be64_to_cpu(data->addr),
+		if (!tx_info->inl) {
+			if (tx_info->linear) {
+				pci_unmap_single(mdev->pdev,
+					(dma_addr_t) be64_to_cpu(data->addr),
 					 be32_to_cpu(data->byte_count),
 					 PCI_DMA_TODEVICE);
-			++data;
-		}
+				++data;
+			}
 
-		for (i = 0; i < frags; i++) {
-			frag = &skb_shinfo(skb)->frags[i];
-			pci_unmap_page(mdev->pdev,
-				       (dma_addr_t) be64_to_cpu(data[i].addr),
-				       frag->size, PCI_DMA_TODEVICE);
+			for (i = 0; i < frags; i++) {
+				frag = &skb_shinfo(skb)->frags[i];
+				pci_unmap_page(mdev->pdev,
+					(dma_addr_t) be64_to_cpu(data[i].addr),
+					frag->size, PCI_DMA_TODEVICE);
+			}
 		}
 		/* Stamp the freed descriptor */
 		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
@@ -224,27 +226,29 @@
 		}
 
 	} else {
-		if ((void *) data >= end) {
-			data = (struct mlx4_wqe_data_seg *)
-					(ring->buf + ((void *) data - end));
-		}
+		if (!tx_info->inl) {
+			if ((void *) data >= end) {
+				data = (struct mlx4_wqe_data_seg *)
+						(ring->buf + ((void *) data - end));
+			}
 
-		if (tx_info->linear) {
-			pci_unmap_single(mdev->pdev,
-					 (dma_addr_t) be64_to_cpu(data->addr),
+			if (tx_info->linear) {
+				pci_unmap_single(mdev->pdev,
+					(dma_addr_t) be64_to_cpu(data->addr),
 					 be32_to_cpu(data->byte_count),
 					 PCI_DMA_TODEVICE);
-			++data;
-		}
+				++data;
+			}
 
-		for (i = 0; i < frags; i++) {
-			/* Check for wraparound before unmapping */
-			if ((void *) data >= end)
-				data = (struct mlx4_wqe_data_seg *) ring->buf;
-			frag = &skb_shinfo(skb)->frags[i];
-			pci_unmap_page(mdev->pdev,
+			for (i = 0; i < frags; i++) {
+				/* Check for wraparound before unmapping */
+				if ((void *) data >= end)
+					data = (struct mlx4_wqe_data_seg *) ring->buf;
+				frag = &skb_shinfo(skb)->frags[i];
+				pci_unmap_page(mdev->pdev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 frag->size, PCI_DMA_TODEVICE);
+			}
 		}
 		/* Stamp the freed descriptor */
 		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
@@ -790,8 +794,11 @@
 			wmb();
 			data->byte_count = cpu_to_be32(skb_headlen(skb) - lso_header_size);
 		}
-	} else
+		tx_info->inl = 0;
+	} else {
 		build_inline_wqe(tx_desc, skb, real_size, &vlan_tag, tx_ind, fragptr);
+		tx_info->inl = 1;
+	}
 
 	ring->prod += nr_txbb;
 
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 2e96c7b..08bf321 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -202,6 +202,7 @@
 	u32 nr_txbb;
 	u8 linear;
 	u8 data_offset;
+	u8 inl;
 };