Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Conflicts were simple overlapping changes in microchip
driver.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/Documentation/devicetree/bindings/net/microchip,lan78xx.txt b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt
new file mode 100644
index 0000000..76786a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt
@@ -0,0 +1,54 @@
+Microchip LAN78xx Gigabit Ethernet controller
+
+The LAN78XX devices are usually configured by programming their OTP or with
+an external EEPROM, but some platforms (e.g. Raspberry Pi 3 B+) have neither.
+The Device Tree properties, if present, override the OTP and EEPROM.
+
+Required properties:
+- compatible: Should be one of "usb424,7800", "usb424,7801" or "usb424,7850".
+
+Optional properties:
+- local-mac-address:   see ethernet.txt
+- mac-address:         see ethernet.txt
+
+Optional properties of the embedded PHY:
+- microchip,led-modes: a 0..4 element vector, with each element configuring
+  the operating mode of an LED. Omitted LEDs are turned off. Allowed values
+  are defined in "include/dt-bindings/net/microchip-lan78xx.h".
+
+Example:
+
+/* Based on the configuration for a Raspberry Pi 3 B+ */
+&usb {
+	usb-port@1 {
+		compatible = "usb424,2514";
+		reg = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		usb-port@1 {
+			compatible = "usb424,2514";
+			reg = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			ethernet: ethernet@1 {
+				compatible = "usb424,7800";
+				reg = <1>;
+				local-mac-address = [ 00 11 22 33 44 55 ];
+
+				mdio {
+					#address-cells = <0x1>;
+					#size-cells = <0x0>;
+					eth_phy: ethernet-phy@1 {
+						reg = <1>;
+						microchip,led-modes = <
+							LAN78XX_LINK_1000_ACTIVITY
+							LAN78XX_LINK_10_100_ACTIVITY
+						>;
+					};
+				};
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
index 96398cc..fc8f017 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -13,13 +13,25 @@
  - reg: Address where registers are mapped and size of region.
  - interrupts: Should contain the MAC interrupt.
  - phy-mode: See ethernet.txt in the same directory. Allow to choose
-	"rgmii", "rmii", or "mii" according to the PHY.
+	"rgmii", "rmii", "mii", or "internal" according to the PHY.
+	The acceptable mode is SoC-dependent.
  - phy-handle: Should point to the external phy device.
 	See ethernet.txt file in the same directory.
  - clocks: A phandle to the clock for the MAC.
+	For Pro4 SoC, that is "socionext,uniphier-pro4-ave4",
+	another MAC clock, GIO bus clock and PHY clock are also required.
+ - clock-names: Should contain
+	- "ether", "ether-gb", "gio", "ether-phy" for Pro4 SoC
+	- "ether" for others
+ - resets: A phandle to the reset control for the MAC. For Pro4 SoC,
+	GIO bus reset is also required.
+ - reset-names: Should contain
+	- "ether", "gio" for Pro4 SoC
+	- "ether" for others
+ - socionext,syscon-phy-mode: A phandle to syscon with one argument
+	that configures phy mode. The argument is the ID of MAC instance.
 
 Optional properties:
- - resets: A phandle to the reset control for the MAC.
  - local-mac-address: See ethernet.txt in the same directory.
 
 Required subnode:
@@ -34,8 +46,11 @@
 		interrupts = <0 66 4>;
 		phy-mode = "rgmii";
 		phy-handle = <&ethphy>;
+		clock-names = "ether";
 		clocks = <&sys_clk 6>;
+		reset-names = "ether";
 		resets = <&sys_rst 6>;
+		socionext,syscon-phy-mode = <&soc_glue 0>;
 		local-mac-address = [00 00 00 00 00 00];
 
 		mdio {
diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
index 77cd42c..b025770 100644
--- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
+++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
@@ -17,7 +17,8 @@
 
 
 Required properties:
-- compatible	: Must be "ti,keystone-navigator-qmss";
+- compatible	: Must be "ti,keystone-navigator-qmss".
+		: Must be "ti,66ak2g-navss-qm" for QMSS on K2G SoC.
 - clocks	: phandle to the reference clock for this device.
 - queue-range	: <start number> total range of queue numbers for the device.
 - linkram0	: <address size> for internal link ram, where size is the total
@@ -39,6 +40,12 @@
 			  - Descriptor memory setup region.
 			  - Queue Management/Queue Proxy region for queue Push.
 			  - Queue Management/Queue Proxy region for queue Pop.
+
+For QMSS on K2G SoC, following QM reg indexes are used in that order
+			  - Queue Peek region.
+			  - Queue configuration region.
+			  - Queue Management/Queue Proxy region for queue Push/Pop.
+
 - queue-pools	: child node classifying the queue ranges into pools.
 		  Queue ranges are grouped into 3 type of pools:
 		  - qpend	    : pool of qpend(interruptible) queues
diff --git a/MAINTAINERS b/MAINTAINERS
index 5ae51d0..fc812fb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5320,7 +5320,6 @@
 F:	include/linux/of_net.h
 F:	include/linux/phy.h
 F:	include/linux/phy_fixed.h
-F:	include/linux/platform_data/mdio-gpio.h
 F:	include/linux/platform_data/mdio-bcm-unimac.h
 F:	include/trace/events/mdio.h
 F:	include/uapi/linux/mdio.h
@@ -14614,7 +14613,9 @@
 M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
+F:	Documentation/devicetree/bindings/net/microchip,lan78xx.txt
 F:	drivers/net/usb/lan78xx.*
+F:	include/dt-bindings/net/microchip-lan78xx.h
 
 USB MASS STORAGE DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index 9fee7c8..f2f0264 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -29,8 +29,8 @@
 	  called axnet_cs.  If unsure, say N.
 
 config AX88796
-	tristate "ASIX AX88796 NE2000 clone support"
-	depends on (ARM || MIPS || SUPERH)
+	tristate "ASIX AX88796 NE2000 clone support" if !ZORRO
+	depends on (ARM || MIPS || SUPERH || ZORRO || COMPILE_TEST)
 	select CRC32
 	select PHYLIB
 	select MDIO_BITBANG
@@ -45,6 +45,19 @@
 	---help---
 	  Select this if your platform comes with an external 93CX6 eeprom.
 
+config XSURF100
+	tristate "Amiga XSurf 100 AX88796/NE2000 clone support"
+	depends on ZORRO
+	select AX88796
+	select ASIX_PHY
+	help
+	  This driver is for the Individual Computers X-Surf 100 Ethernet
+	  card (based on the Asix AX88796 chip). If you have such a card,
+	  say Y. Otherwise, say N.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called xsurf100.
+
 config HYDRA
 	tristate "Hydra support"
 	depends on ZORRO
diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index 1d650e6..85c83c5 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -16,4 +16,5 @@
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
 obj-$(CONFIG_WD80x3) += wd.o 8390.o
+obj-$(CONFIG_XSURF100) += xsurf100.o
 obj-$(CONFIG_ZORRO8390) += zorro8390.o
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index da61cf3..2a0ddec 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -163,6 +163,21 @@ static void ax_reset_8390(struct net_device *dev)
 	ei_outb(ENISR_RESET, addr + EN0_ISR);	/* Ack intr. */
 }
 
+/* Wrapper for __ei_interrupt for platforms that have a platform-specific
+ * way to find out whether the interrupt request might be caused by
+ * the ax88796 chip.
+ */
+static irqreturn_t ax_ei_interrupt_filtered(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct ax_device *ax = to_ax_dev(dev);
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+
+	if (!ax->plat->check_irq(pdev))
+		return IRQ_NONE;
+
+	return ax_ei_interrupt(irq, dev_id);
+}
 
 static void ax_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr,
 			    int ring_page)
@@ -387,6 +402,90 @@ static void ax_phy_switch(struct net_device *dev, int on)
 	ei_outb(reg_gpoc, ei_local->mem + EI_SHIFT(0x17));
 }
 
+static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level)
+{
+	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+
+	if (level)
+		ax->reg_memr |= AX_MEMR_MDC;
+	else
+		ax->reg_memr &= ~AX_MEMR_MDC;
+
+	ei_outb(ax->reg_memr, ax->addr_memr);
+}
+
+static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output)
+{
+	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+
+	if (output)
+		ax->reg_memr &= ~AX_MEMR_MDIR;
+	else
+		ax->reg_memr |= AX_MEMR_MDIR;
+
+	ei_outb(ax->reg_memr, ax->addr_memr);
+}
+
+static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value)
+{
+	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+
+	if (value)
+		ax->reg_memr |= AX_MEMR_MDO;
+	else
+		ax->reg_memr &= ~AX_MEMR_MDO;
+
+	ei_outb(ax->reg_memr, ax->addr_memr);
+}
+
+static int ax_bb_get_data(struct mdiobb_ctrl *ctrl)
+{
+	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+	int reg_memr = ei_inb(ax->addr_memr);
+
+	return reg_memr & AX_MEMR_MDI ? 1 : 0;
+}
+
+static const struct mdiobb_ops bb_ops = {
+	.owner = THIS_MODULE,
+	.set_mdc = ax_bb_mdc,
+	.set_mdio_dir = ax_bb_dir,
+	.set_mdio_data = ax_bb_set_data,
+	.get_mdio_data = ax_bb_get_data,
+};
+
+static int ax_mii_init(struct net_device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+	struct ei_device *ei_local = netdev_priv(dev);
+	struct ax_device *ax = to_ax_dev(dev);
+	int err;
+
+	ax->bb_ctrl.ops = &bb_ops;
+	ax->addr_memr = ei_local->mem + AX_MEMR;
+	ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl);
+	if (!ax->mii_bus) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	ax->mii_bus->name = "ax88796_mii_bus";
+	ax->mii_bus->parent = dev->dev.parent;
+	snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		 pdev->name, pdev->id);
+
+	err = mdiobus_register(ax->mii_bus);
+	if (err)
+		goto out_free_mdio_bitbang;
+
+	return 0;
+
+ out_free_mdio_bitbang:
+	free_mdio_bitbang(ax->mii_bus);
+ out:
+	return err;
+}
+
 static int ax_open(struct net_device *dev)
 {
 	struct ax_device *ax = to_ax_dev(dev);
@@ -394,8 +493,16 @@ static int ax_open(struct net_device *dev)
 
 	netdev_dbg(dev, "open\n");
 
-	ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
-			  dev->name, dev);
+	ret = ax_mii_init(dev);
+	if (ret)
+		goto failed_mii;
+
+	if (ax->plat->check_irq)
+		ret = request_irq(dev->irq, ax_ei_interrupt_filtered,
+				  ax->irqflags, dev->name, dev);
+	else
+		ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
+				  dev->name, dev);
 	if (ret)
 		goto failed_request_irq;
 
@@ -421,6 +528,10 @@ static int ax_open(struct net_device *dev)
 	ax_phy_switch(dev, 0);
 	free_irq(dev->irq, dev);
  failed_request_irq:
+	/* unregister mdiobus */
+	mdiobus_unregister(ax->mii_bus);
+	free_mdio_bitbang(ax->mii_bus);
+ failed_mii:
 	return ret;
 }
 
@@ -440,6 +551,9 @@ static int ax_close(struct net_device *dev)
 	phy_disconnect(dev->phydev);
 
 	free_irq(dev->irq, dev);
+
+	mdiobus_unregister(ax->mii_bus);
+	free_mdio_bitbang(ax->mii_bus);
 	return 0;
 }
 
@@ -539,92 +653,8 @@ static const struct net_device_ops ax_netdev_ops = {
 #endif
 };
 
-static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level)
-{
-	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-
-	if (level)
-		ax->reg_memr |= AX_MEMR_MDC;
-	else
-		ax->reg_memr &= ~AX_MEMR_MDC;
-
-	ei_outb(ax->reg_memr, ax->addr_memr);
-}
-
-static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output)
-{
-	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-
-	if (output)
-		ax->reg_memr &= ~AX_MEMR_MDIR;
-	else
-		ax->reg_memr |= AX_MEMR_MDIR;
-
-	ei_outb(ax->reg_memr, ax->addr_memr);
-}
-
-static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value)
-{
-	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-
-	if (value)
-		ax->reg_memr |= AX_MEMR_MDO;
-	else
-		ax->reg_memr &= ~AX_MEMR_MDO;
-
-	ei_outb(ax->reg_memr, ax->addr_memr);
-}
-
-static int ax_bb_get_data(struct mdiobb_ctrl *ctrl)
-{
-	struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-	int reg_memr = ei_inb(ax->addr_memr);
-
-	return reg_memr & AX_MEMR_MDI ? 1 : 0;
-}
-
-static const struct mdiobb_ops bb_ops = {
-	.owner = THIS_MODULE,
-	.set_mdc = ax_bb_mdc,
-	.set_mdio_dir = ax_bb_dir,
-	.set_mdio_data = ax_bb_set_data,
-	.get_mdio_data = ax_bb_get_data,
-};
-
 /* setup code */
 
-static int ax_mii_init(struct net_device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev->dev.parent);
-	struct ei_device *ei_local = netdev_priv(dev);
-	struct ax_device *ax = to_ax_dev(dev);
-	int err;
-
-	ax->bb_ctrl.ops = &bb_ops;
-	ax->addr_memr = ei_local->mem + AX_MEMR;
-	ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl);
-	if (!ax->mii_bus) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	ax->mii_bus->name = "ax88796_mii_bus";
-	ax->mii_bus->parent = dev->dev.parent;
-	snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
-		pdev->name, pdev->id);
-
-	err = mdiobus_register(ax->mii_bus);
-	if (err)
-		goto out_free_mdio_bitbang;
-
-	return 0;
-
- out_free_mdio_bitbang:
-	free_mdio_bitbang(ax->mii_bus);
- out:
-	return err;
-}
-
 static void ax_initial_setup(struct net_device *dev, struct ei_device *ei_local)
 {
 	void __iomem *ioaddr = ei_local->mem;
@@ -669,10 +699,16 @@ static int ax_init_dev(struct net_device *dev)
 	if (ax->plat->flags & AXFLG_HAS_EEPROM) {
 		unsigned char SA_prom[32];
 
+		ei_outb(6, ioaddr + EN0_RCNTLO);
+		ei_outb(0, ioaddr + EN0_RCNTHI);
+		ei_outb(0, ioaddr + EN0_RSARLO);
+		ei_outb(0, ioaddr + EN0_RSARHI);
+		ei_outb(E8390_RREAD + E8390_START, ioaddr + NE_CMD);
 		for (i = 0; i < sizeof(SA_prom); i += 2) {
 			SA_prom[i] = ei_inb(ioaddr + NE_DATAPORT);
 			SA_prom[i + 1] = ei_inb(ioaddr + NE_DATAPORT);
 		}
+		ei_outb(ENISR_RDC, ioaddr + EN0_ISR);	/* Ack intr. */
 
 		if (ax->plat->wordlength == 2)
 			for (i = 0; i < 16; i++)
@@ -741,18 +777,20 @@ static int ax_init_dev(struct net_device *dev)
 #endif
 
 	ei_local->reset_8390 = &ax_reset_8390;
-	ei_local->block_input = &ax_block_input;
-	ei_local->block_output = &ax_block_output;
+	if (ax->plat->block_input)
+		ei_local->block_input = ax->plat->block_input;
+	else
+		ei_local->block_input = &ax_block_input;
+	if (ax->plat->block_output)
+		ei_local->block_output = ax->plat->block_output;
+	else
+		ei_local->block_output = &ax_block_output;
 	ei_local->get_8390_hdr = &ax_get_8390_hdr;
 	ei_local->priv = 0;
 
 	dev->netdev_ops = &ax_netdev_ops;
 	dev->ethtool_ops = &ax_ethtool_ops;
 
-	ret = ax_mii_init(dev);
-	if (ret)
-		goto err_out;
-
 	ax_NS8390_init(dev, 0);
 
 	ret = register_netdev(dev);
@@ -777,7 +815,6 @@ static int ax_remove(struct platform_device *pdev)
 	struct resource *mem;
 
 	unregister_netdev(dev);
-	free_irq(dev->irq, dev);
 
 	iounmap(ei_local->mem);
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -789,6 +826,7 @@ static int ax_remove(struct platform_device *pdev)
 		release_mem_region(mem->start, resource_size(mem));
 	}
 
+	platform_set_drvdata(pdev, NULL);
 	free_netdev(dev);
 
 	return 0;
@@ -835,6 +873,9 @@ static int ax_probe(struct platform_device *pdev)
 	dev->irq = irq->start;
 	ax->irqflags = irq->flags & IRQF_TRIGGER_MASK;
 
+	if (irq->flags &  IORESOURCE_IRQ_SHAREABLE)
+		ax->irqflags |= IRQF_SHARED;
+
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem) {
 		dev_err(&pdev->dev, "no MEM specified\n");
@@ -919,6 +960,7 @@ static int ax_probe(struct platform_device *pdev)
 	release_mem_region(mem->start, mem_size);
 
  exit_mem:
+	platform_set_drvdata(pdev, NULL);
 	free_netdev(dev);
 
 	return ret;
diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c
new file mode 100644
index 0000000..e2c9638
--- /dev/null
+++ b/drivers/net/ethernet/8390/xsurf100.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/zorro.h>
+#include <net/ax88796.h>
+#include <asm/amigaints.h>
+
+#define ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100 \
+		ZORRO_ID(INDIVIDUAL_COMPUTERS, 0x64, 0)
+
+#define XS100_IRQSTATUS_BASE 0x40
+#define XS100_8390_BASE 0x800
+
+/* Longword-access area. Translated to 2 16-bit access cycles by the
+ * X-Surf 100 FPGA
+ */
+#define XS100_8390_DATA32_BASE 0x8000
+#define XS100_8390_DATA32_SIZE 0x2000
+/* Sub-Areas for fast data register access; addresses relative to area begin */
+#define XS100_8390_DATA_READ32_BASE 0x0880
+#define XS100_8390_DATA_WRITE32_BASE 0x0C80
+#define XS100_8390_DATA_AREA_SIZE 0x80
+
+#define __NS8390_init ax_NS8390_init
+
+/* force unsigned long back to 'void __iomem *' */
+#define ax_convert_addr(_a) ((void __force __iomem *)(_a))
+
+#define ei_inb(_a) z_readb(ax_convert_addr(_a))
+#define ei_outb(_v, _a) z_writeb(_v, ax_convert_addr(_a))
+
+#define ei_inw(_a) z_readw(ax_convert_addr(_a))
+#define ei_outw(_v, _a) z_writew(_v, ax_convert_addr(_a))
+
+#define ei_inb_p(_a) ei_inb(_a)
+#define ei_outb_p(_v, _a) ei_outb(_v, _a)
+
+/* define EI_SHIFT() to take into account our register offsets */
+#define EI_SHIFT(x) (ei_local->reg_offset[(x)])
+
+/* Ensure we have our RCR base value */
+#define AX88796_PLATFORM
+
+static unsigned char version[] =
+		"ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
+
+#include "lib8390.c"
+
+/* from ne.c */
+#define NE_CMD		EI_SHIFT(0x00)
+#define NE_RESET	EI_SHIFT(0x1f)
+#define NE_DATAPORT	EI_SHIFT(0x10)
+
+struct xsurf100_ax_plat_data {
+	struct ax_plat_data ax;
+	void __iomem *base_regs;
+	void __iomem *data_area;
+};
+
+static int is_xsurf100_network_irq(struct platform_device *pdev)
+{
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	return (readw(xs100->base_regs + XS100_IRQSTATUS_BASE) & 0xaaaa) != 0;
+}
+
+/* These functions guarantee that the iomem is accessed with 32 bit
+ * cycles only. z_memcpy_fromio / z_memcpy_toio don't
+ */
+static void z_memcpy_fromio32(void *dst, const void __iomem *src, size_t bytes)
+{
+	while (bytes > 32) {
+		asm __volatile__
+		   ("movem.l (%0)+,%%d0-%%d7\n"
+		    "movem.l %%d0-%%d7,(%1)\n"
+		    "adda.l #32,%1" : "=a"(src), "=a"(dst)
+		    : "0"(src), "1"(dst) : "d0", "d1", "d2", "d3", "d4",
+					   "d5", "d6", "d7", "memory");
+		bytes -= 32;
+	}
+	while (bytes) {
+		*(uint32_t *)dst = z_readl(src);
+		src += 4;
+		dst += 4;
+		bytes -= 4;
+	}
+}
+
+static void z_memcpy_toio32(void __iomem *dst, const void *src, size_t bytes)
+{
+	while (bytes) {
+		z_writel(*(const uint32_t *)src, dst);
+		src += 4;
+		dst += 4;
+		bytes -= 4;
+	}
+}
+
+static void xs100_write(struct net_device *dev, const void *src,
+			unsigned int count)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	/* copy whole blocks */
+	while (count > XS100_8390_DATA_AREA_SIZE) {
+		z_memcpy_toio32(xs100->data_area +
+				XS100_8390_DATA_WRITE32_BASE, src,
+				XS100_8390_DATA_AREA_SIZE);
+		src += XS100_8390_DATA_AREA_SIZE;
+		count -= XS100_8390_DATA_AREA_SIZE;
+	}
+	/* copy whole dwords */
+	z_memcpy_toio32(xs100->data_area + XS100_8390_DATA_WRITE32_BASE,
+			src, count & ~3);
+	src += count & ~3;
+	if (count & 2) {
+		ei_outw(*(uint16_t *)src, ei_local->mem + NE_DATAPORT);
+		src += 2;
+	}
+	if (count & 1)
+		ei_outb(*(uint8_t *)src, ei_local->mem + NE_DATAPORT);
+}
+
+static void xs100_read(struct net_device *dev, void *dst, unsigned int count)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	/* copy whole blocks */
+	while (count > XS100_8390_DATA_AREA_SIZE) {
+		z_memcpy_fromio32(dst, xs100->data_area +
+				  XS100_8390_DATA_READ32_BASE,
+				  XS100_8390_DATA_AREA_SIZE);
+		dst += XS100_8390_DATA_AREA_SIZE;
+		count -= XS100_8390_DATA_AREA_SIZE;
+	}
+	/* copy whole dwords */
+	z_memcpy_fromio32(dst, xs100->data_area + XS100_8390_DATA_READ32_BASE,
+			  count & ~3);
+	dst += count & ~3;
+	if (count & 2) {
+		*(uint16_t *)dst = ei_inw(ei_local->mem + NE_DATAPORT);
+		dst += 2;
+	}
+	if (count & 1)
+		*(uint8_t *)dst = ei_inb(ei_local->mem + NE_DATAPORT);
+}
+
+/* Block input and output, similar to the Crynwr packet driver. If
+ * you are porting to a new ethercard, look at the packet driver
+ * source for hints. The NEx000 doesn't share the on-board packet
+ * memory -- you have to put the packet out through the "remote DMA"
+ * dataport using ei_outb.
+ */
+static void xs100_block_input(struct net_device *dev, int count,
+			      struct sk_buff *skb, int ring_offset)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	void __iomem *nic_base = ei_local->mem;
+	char *buf = skb->data;
+
+	if (ei_local->dmaing) {
+		netdev_err(dev,
+			   "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+			   __func__,
+			   ei_local->dmaing, ei_local->irqlock);
+		return;
+	}
+
+	ei_local->dmaing |= 0x01;
+
+	ei_outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD);
+	ei_outb(count & 0xff, nic_base + EN0_RCNTLO);
+	ei_outb(count >> 8, nic_base + EN0_RCNTHI);
+	ei_outb(ring_offset & 0xff, nic_base + EN0_RSARLO);
+	ei_outb(ring_offset >> 8, nic_base + EN0_RSARHI);
+	ei_outb(E8390_RREAD + E8390_START, nic_base + NE_CMD);
+
+	xs100_read(dev, buf, count);
+
+	ei_local->dmaing &= ~1;
+}
+
+static void xs100_block_output(struct net_device *dev, int count,
+			       const unsigned char *buf, const int start_page)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	void __iomem *nic_base = ei_local->mem;
+	unsigned long dma_start;
+
+	/* Round the count up for word writes. Do we need to do this?
+	 * What effect will an odd byte count have on the 8390?  I
+	 * should check someday.
+	 */
+	if (ei_local->word16 && (count & 0x01))
+		count++;
+
+	/* This *shouldn't* happen. If it does, it's the last thing
+	 * you'll see
+	 */
+	if (ei_local->dmaing) {
+		netdev_err(dev,
+			   "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+			   __func__,
+			   ei_local->dmaing, ei_local->irqlock);
+		return;
+	}
+
+	ei_local->dmaing |= 0x01;
+	/* We should already be in page 0, but to be safe... */
+	ei_outb(E8390_PAGE0 + E8390_START + E8390_NODMA, nic_base + NE_CMD);
+
+	ei_outb(ENISR_RDC, nic_base + EN0_ISR);
+
+	/* Now the normal output. */
+	ei_outb(count & 0xff, nic_base + EN0_RCNTLO);
+	ei_outb(count >> 8, nic_base + EN0_RCNTHI);
+	ei_outb(0x00, nic_base + EN0_RSARLO);
+	ei_outb(start_page, nic_base + EN0_RSARHI);
+
+	ei_outb(E8390_RWRITE + E8390_START, nic_base + NE_CMD);
+
+	xs100_write(dev, buf, count);
+
+	dma_start = jiffies;
+
+	while ((ei_inb(nic_base + EN0_ISR) & ENISR_RDC) == 0) {
+		if (jiffies - dma_start > 2 * HZ / 100) {	/* 20ms */
+			netdev_warn(dev, "timeout waiting for Tx RDC.\n");
+			ei_local->reset_8390(dev);
+			ax_NS8390_init(dev, 1);
+			break;
+		}
+	}
+
+	ei_outb(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	ei_local->dmaing &= ~0x01;
+}
+
+static int xsurf100_probe(struct zorro_dev *zdev,
+			  const struct zorro_device_id *ent)
+{
+	struct platform_device *pdev;
+	struct xsurf100_ax_plat_data ax88796_data;
+	struct resource res[2] = {
+		DEFINE_RES_NAMED(IRQ_AMIGA_PORTS, 1, NULL,
+				 IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE),
+		DEFINE_RES_MEM(zdev->resource.start + XS100_8390_BASE,
+			       4 * 0x20)
+	};
+	int reg;
+	/* This table is referenced in the device structure, so it must
+	 * outlive the scope of xsurf100_probe.
+	 */
+	static u32 reg_offsets[32];
+	int ret = 0;
+
+	/* X-Surf 100 control and 32 bit ring buffer data access areas.
+	 * These resources are not used by the ax88796 driver, so must
+	 * be requested here and passed via platform data.
+	 */
+
+	if (!request_mem_region(zdev->resource.start, 0x100, zdev->name)) {
+		dev_err(&zdev->dev, "cannot reserve X-Surf 100 control registers\n");
+		return -ENXIO;
+	}
+
+	if (!request_mem_region(zdev->resource.start +
+				XS100_8390_DATA32_BASE,
+				XS100_8390_DATA32_SIZE,
+				"X-Surf 100 32-bit data access")) {
+		dev_err(&zdev->dev, "cannot reserve 32-bit area\n");
+		ret = -ENXIO;
+		goto exit_req;
+	}
+
+	for (reg = 0; reg < 0x20; reg++)
+		reg_offsets[reg] = 4 * reg;
+
+	memset(&ax88796_data, 0, sizeof(ax88796_data));
+	ax88796_data.ax.flags = AXFLG_HAS_EEPROM;
+	ax88796_data.ax.wordlength = 2;
+	ax88796_data.ax.dcr_val = 0x48;
+	ax88796_data.ax.rcr_val = 0x40;
+	ax88796_data.ax.reg_offsets = reg_offsets;
+	ax88796_data.ax.check_irq = is_xsurf100_network_irq;
+	ax88796_data.base_regs = ioremap(zdev->resource.start, 0x100);
+
+	/* error handling for ioremap regs */
+	if (!ax88796_data.base_regs) {
+		dev_err(&zdev->dev, "Cannot ioremap area %pR (registers)\n",
+			&zdev->resource);
+
+		ret = -ENXIO;
+		goto exit_req2;
+	}
+
+	ax88796_data.data_area = ioremap(zdev->resource.start +
+			XS100_8390_DATA32_BASE, XS100_8390_DATA32_SIZE);
+
+	/* error handling for ioremap data */
+	if (!ax88796_data.data_area) {
+		dev_err(&zdev->dev,
+			"Cannot ioremap area %pR offset %x (32-bit access)\n",
+			&zdev->resource,  XS100_8390_DATA32_BASE);
+
+		ret = -ENXIO;
+		goto exit_mem;
+	}
+
+	ax88796_data.ax.block_output = xs100_block_output;
+	ax88796_data.ax.block_input = xs100_block_input;
+
+	pdev = platform_device_register_resndata(&zdev->dev, "ax88796",
+						 zdev->slotaddr, res, 2,
+						 &ax88796_data,
+						 sizeof(ax88796_data));
+
+	if (IS_ERR(pdev)) {
+		dev_err(&zdev->dev, "cannot register platform device\n");
+		ret = -ENXIO;
+		goto exit_mem2;
+	}
+
+	zorro_set_drvdata(zdev, pdev);
+
+	if (!ret)
+		return 0;
+
+ exit_mem2:
+	iounmap(ax88796_data.data_area);
+
+ exit_mem:
+	iounmap(ax88796_data.base_regs);
+
+ exit_req2:
+	release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE,
+			   XS100_8390_DATA32_SIZE);
+
+ exit_req:
+	release_mem_region(zdev->resource.start, 0x100);
+
+	return ret;
+}
+
+static void xsurf100_remove(struct zorro_dev *zdev)
+{
+	struct platform_device *pdev = zorro_get_drvdata(zdev);
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	platform_device_unregister(pdev);
+
+	iounmap(xs100->base_regs);
+	release_mem_region(zdev->resource.start, 0x100);
+	iounmap(xs100->data_area);
+	release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE,
+			   XS100_8390_DATA32_SIZE);
+}
+
+static const struct zorro_device_id xsurf100_zorro_tbl[] = {
+	{ ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100, },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(zorro, xsurf100_zorro_tbl);
+
+static struct zorro_driver xsurf100_driver = {
+	.name           = "xsurf100",
+	.id_table       = xsurf100_zorro_tbl,
+	.probe          = xsurf100_probe,
+	.remove         = xsurf100_remove,
+};
+
+module_driver(xsurf100_driver, zorro_register_driver, zorro_unregister_driver);
+
+MODULE_DESCRIPTION("X-Surf 100 driver");
+MODULE_AUTHOR("Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 1389ab5..1f0e872 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	if (tx_avail != bp->tx_ring_size)
 		*event &= ~BNXT_RX_EVENT;
 
+	*len = xdp.data_end - xdp.data;
 	if (orig_data != xdp.data) {
 		offset = xdp.data - xdp.data_hard_start;
 		*data_ptr = xdp.data_hard_start + offset;
-		*len = xdp.data_end - xdp.data;
 	}
 	switch (act) {
 	case XDP_PASS:
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index e8b2904..bc9861c 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -1497,3 +1497,57 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
 		octeon_mbox_write(oct, &mbox_cmd);
 	}
 }
+
+static void
+cn23xx_get_vf_stats_callback(struct octeon_device *oct,
+			     struct octeon_mbox_cmd *cmd, void *arg)
+{
+	struct oct_vf_stats_ctx *ctx = arg;
+
+	memcpy(ctx->stats, cmd->data, sizeof(struct oct_vf_stats));
+	atomic_set(&ctx->status, 1);
+}
+
+int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx,
+			struct oct_vf_stats *stats)
+{
+	u32 timeout = HZ; // 1sec
+	struct octeon_mbox_cmd mbox_cmd;
+	struct oct_vf_stats_ctx ctx;
+	u32 count = 0, ret;
+
+	if (!(oct->sriov_info.vf_drv_loaded_mask & (1ULL << vfidx)))
+		return -1;
+
+	if (sizeof(struct oct_vf_stats) > sizeof(mbox_cmd.data))
+		return -1;
+
+	mbox_cmd.msg.u64 = 0;
+	mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+	mbox_cmd.msg.s.resp_needed = 1;
+	mbox_cmd.msg.s.cmd = OCTEON_GET_VF_STATS;
+	mbox_cmd.msg.s.len = 1;
+	mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf;
+	mbox_cmd.recv_len = 0;
+	mbox_cmd.recv_status = 0;
+	mbox_cmd.fn = (octeon_mbox_callback_t)cn23xx_get_vf_stats_callback;
+	ctx.stats = stats;
+	atomic_set(&ctx.status, 0);
+	mbox_cmd.fn_arg = (void *)&ctx;
+	memset(mbox_cmd.data, 0, sizeof(mbox_cmd.data));
+	octeon_mbox_write(oct, &mbox_cmd);
+
+	do {
+		schedule_timeout_uninterruptible(1);
+	} while ((atomic_read(&ctx.status) == 0) && (count++ < timeout));
+
+	ret = atomic_read(&ctx.status);
+	if (ret == 0) {
+		octeon_mbox_cancel(oct, 0);
+		dev_err(&oct->pci_dev->dev, "Unable to get stats from VF-%d, timedout\n",
+			vfidx);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
index 2aba524..63b3de4 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -43,6 +43,15 @@ struct octeon_cn23xx_pf {
 
 #define CN23XX_SLI_DEF_BP			0x40
 
+struct oct_vf_stats {
+	u64 rx_packets;
+	u64 tx_packets;
+	u64 rx_bytes;
+	u64 tx_bytes;
+	u64 broadcast;
+	u64 multicast;
+};
+
 int setup_cn23xx_octeon_pf_device(struct octeon_device *oct);
 
 int validate_cn23xx_pf_config_info(struct octeon_device *oct,
@@ -56,4 +65,7 @@ int cn23xx_fw_loaded(struct octeon_device *oct);
 
 void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
 					u8 *mac);
+
+int cn23xx_get_vf_stats(struct octeon_device *oct, int ifidx,
+			struct oct_vf_stats *stats);
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 550ac29..9926a12 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -96,11 +96,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_packets",
 	"rx_bytes",
 	"tx_bytes",
-	"rx_errors",	/*jabber_err+l2_err+frame_err */
-	"tx_errors",	/*fw_err_pko+fw_err_link+fw_err_drop */
-	"rx_dropped",   /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd +
-			 *st->fromwire.dmac_drop + st->fromwire.fw_err_drop
-			 */
+	"rx_errors",
+	"tx_errors",
+	"rx_dropped",
 	"tx_dropped",
 
 	"tx_total_sent",
@@ -119,7 +117,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"mac_tx_total_bytes",
 	"mac_tx_mcast_pkts",
 	"mac_tx_bcast_pkts",
-	"mac_tx_ctl_packets",	/*oct->link_stats.fromhost.ctl_sent */
+	"mac_tx_ctl_packets",
 	"mac_tx_total_collisions",
 	"mac_tx_one_collision",
 	"mac_tx_multi_collison",
@@ -170,17 +168,17 @@ static const char oct_vf_stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_packets",
 	"rx_bytes",
 	"tx_bytes",
-	"rx_errors", /* jabber_err + l2_err+frame_err */
-	"tx_errors", /* fw_err_pko + fw_err_link+fw_err_drop */
-	"rx_dropped", /* total_rcvd - fw_total_rcvd + dmac_drop + fw_err_drop */
+	"rx_errors",
+	"tx_errors",
+	"rx_dropped",
 	"tx_dropped",
 	"link_state_changes",
 };
 
 /* statistics of host tx queue */
 static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
-	"packets",		/*oct->instr_queue[iq_no]->stats.tx_done*/
-	"bytes",		/*oct->instr_queue[iq_no]->stats.tx_tot_bytes*/
+	"packets",
+	"bytes",
 	"dropped",
 	"iq_busy",
 	"sgentry_sent",
@@ -197,13 +195,9 @@ static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
 
 /* statistics of host rx queue */
 static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = {
-	"packets",		/*oct->droq[oq_no]->stats.rx_pkts_received */
-	"bytes",		/*oct->droq[oq_no]->stats.rx_bytes_received */
-	"dropped",		/*oct->droq[oq_no]->stats.rx_dropped+
-				 *oct->droq[oq_no]->stats.dropped_nodispatch+
-				 *oct->droq[oq_no]->stats.dropped_toomany+
-				 *oct->droq[oq_no]->stats.dropped_nomem
-				 */
+	"packets",
+	"bytes",
+	"dropped",
 	"dropped_nomem",
 	"dropped_toomany",
 	"fw_dropped",
@@ -1080,16 +1074,33 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	data[i++] = CVM_CAST64(netstats->rx_bytes);
 	/*sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
 	data[i++] = CVM_CAST64(netstats->tx_bytes);
-	data[i++] = CVM_CAST64(netstats->rx_errors);
+	data[i++] = CVM_CAST64(netstats->rx_errors +
+			       oct_dev->link_stats.fromwire.fcs_err +
+			       oct_dev->link_stats.fromwire.jabber_err +
+			       oct_dev->link_stats.fromwire.l2_err +
+			       oct_dev->link_stats.fromwire.frame_err);
 	data[i++] = CVM_CAST64(netstats->tx_errors);
 	/*sum of oct->droq[oq_no]->stats->rx_dropped +
 	 *oct->droq[oq_no]->stats->dropped_nodispatch +
 	 *oct->droq[oq_no]->stats->dropped_toomany +
 	 *oct->droq[oq_no]->stats->dropped_nomem
 	 */
-	data[i++] = CVM_CAST64(netstats->rx_dropped);
+	data[i++] = CVM_CAST64(netstats->rx_dropped +
+			       oct_dev->link_stats.fromwire.fifo_err +
+			       oct_dev->link_stats.fromwire.dmac_drop +
+			       oct_dev->link_stats.fromwire.red_drops +
+			       oct_dev->link_stats.fromwire.fw_err_pko +
+			       oct_dev->link_stats.fromwire.fw_err_link +
+			       oct_dev->link_stats.fromwire.fw_err_drop);
 	/*sum of oct->instr_queue[iq_no]->stats.tx_dropped */
-	data[i++] = CVM_CAST64(netstats->tx_dropped);
+	data[i++] = CVM_CAST64(netstats->tx_dropped +
+			       oct_dev->link_stats.fromhost.max_collision_fail +
+			       oct_dev->link_stats.fromhost.max_deferral_fail +
+			       oct_dev->link_stats.fromhost.total_collisions +
+			       oct_dev->link_stats.fromhost.fw_err_pko +
+			       oct_dev->link_stats.fromhost.fw_err_link +
+			       oct_dev->link_stats.fromhost.fw_err_drop +
+			       oct_dev->link_stats.fromhost.fw_err_pki);
 
 	/* firmware tx stats */
 	/*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx].
@@ -1798,6 +1809,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 		rstats->jabber_err = rsp_rstats->jabber_err;
 		rstats->l2_err    = rsp_rstats->l2_err;
 		rstats->frame_err = rsp_rstats->frame_err;
+		rstats->red_drops = rsp_rstats->red_drops;
 
 		/* RX firmware stats */
 		rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 603a144..f3891ae 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2585,6 +2585,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
 			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
 				__func__);
+			stats->tx_dmamap_fail++;
 			return NETDEV_TX_BUSY;
 		}
 
@@ -2624,6 +2625,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) {
 			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n",
 				__func__);
+			stats->tx_dmamap_fail++;
 			return NETDEV_TX_BUSY;
 		}
 		add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0);
@@ -3324,6 +3326,31 @@ static const struct switchdev_ops lio_pf_switchdev_ops = {
 	.switchdev_port_attr_get = lio_pf_switchdev_attr_get,
 };
 
+static int liquidio_get_vf_stats(struct net_device *netdev, int vfidx,
+				 struct ifla_vf_stats *vf_stats)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct oct_vf_stats stats;
+	int ret;
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+		return -EINVAL;
+
+	memset(&stats, 0, sizeof(struct oct_vf_stats));
+	ret = cn23xx_get_vf_stats(oct, vfidx, &stats);
+	if (!ret) {
+		vf_stats->rx_packets = stats.rx_packets;
+		vf_stats->tx_packets = stats.tx_packets;
+		vf_stats->rx_bytes = stats.rx_bytes;
+		vf_stats->tx_bytes = stats.tx_bytes;
+		vf_stats->broadcast = stats.broadcast;
+		vf_stats->multicast = stats.multicast;
+	}
+
+	return ret;
+}
+
 static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
@@ -3346,6 +3373,7 @@ static const struct net_device_ops lionetdevops = {
 	.ndo_get_vf_config	= liquidio_get_vf_config,
 	.ndo_set_vf_trust	= liquidio_set_vf_trust,
 	.ndo_set_vf_link_state  = liquidio_set_vf_link_state,
+	.ndo_get_vf_stats	= liquidio_get_vf_stats,
 };
 
 /** \brief Entry point for the liquidio module
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 75eea83..34a94da 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -779,18 +779,24 @@ struct liquidio_if_cfg_info {
 /** Stats for each NIC port in RX direction. */
 struct nic_rx_stats {
 	/* link-level stats */
-	u64 total_rcvd;
-	u64 bytes_rcvd;
-	u64 total_bcst;
-	u64 total_mcst;
-	u64 runts;
-	u64 ctl_rcvd;
-	u64 fifo_err;      /* Accounts for over/under-run of buffers */
-	u64 dmac_drop;
-	u64 fcs_err;
-	u64 jabber_err;
-	u64 l2_err;
-	u64 frame_err;
+	u64 total_rcvd;		/* Received packets */
+	u64 bytes_rcvd;		/* Octets of received packets */
+	u64 total_bcst;		/* Number of non-dropped L2 broadcast packets */
+	u64 total_mcst;		/* Number of non-dropped L2 multicast packets */
+	u64 runts;		/* Packets shorter than allowed */
+	u64 ctl_rcvd;		/* Received PAUSE packets */
+	u64 fifo_err;		/* Packets dropped due to RX FIFO full */
+	u64 dmac_drop;		/* Packets dropped by the DMAC filter */
+	u64 fcs_err;		/* Sum of fragment, overrun, and FCS errors */
+	u64 jabber_err;		/* Packets larger than allowed */
+	u64 l2_err;		/* Sum of DMA, parity, PCAM access, no memory,
+				 * buffer overflow, malformed L2 header or
+				 * length, oversize errors
+				 **/
+	u64 frame_err;		/* Sum of IPv4 and L4 checksum errors */
+	u64 red_drops;		/* Packets dropped by RED due to buffer
+				 * exhaustion
+				 **/
 
 	/* firmware stats */
 	u64 fw_total_rcvd;
@@ -806,11 +812,11 @@ struct nic_rx_stats {
 	u64 fw_lro_pkts;   /* Number of packets that are LROed      */
 	u64 fw_lro_octs;   /* Number of octets that are LROed       */
 	u64 fw_total_lro;  /* Number of LRO packets formed          */
-	u64 fw_lro_aborts; /* Number of times lRO of packet aborted */
+	u64 fw_lro_aborts; /* Number of times LRO of packet aborted */
 	u64 fw_lro_aborts_port;
 	u64 fw_lro_aborts_seq;
 	u64 fw_lro_aborts_tsval;
-	u64 fw_lro_aborts_timer;
+	u64 fw_lro_aborts_timer;	/* Timer setting error */
 	/* intrmod: packet forward rate */
 	u64 fwd_rate;
 };
@@ -818,18 +824,35 @@ struct nic_rx_stats {
 /** Stats for each NIC port in RX direction. */
 struct nic_tx_stats {
 	/* link-level stats */
-	u64 total_pkts_sent;
-	u64 total_bytes_sent;
-	u64 mcast_pkts_sent;
-	u64 bcast_pkts_sent;
-	u64 ctl_sent;
-	u64 one_collision_sent;   /* Packets sent after one collision*/
-	u64 multi_collision_sent; /* Packets sent after multiple collision*/
-	u64 max_collision_fail;   /* Packets not sent due to max collisions */
-	u64 max_deferral_fail;   /* Packets not sent due to max deferrals */
-	u64 fifo_err;       /* Accounts for over/under-run of buffers */
-	u64 runts;
-	u64 total_collisions; /* Total number of collisions detected */
+	u64 total_pkts_sent;		/* Total frames sent on the interface */
+	u64 total_bytes_sent;		/* Total octets sent on the interface */
+	u64 mcast_pkts_sent;		/* Packets sent to the multicast DMAC */
+	u64 bcast_pkts_sent;		/* Packets sent to a broadcast DMAC */
+	u64 ctl_sent;			/* Control/PAUSE packets sent */
+	u64 one_collision_sent;		/* Packets sent that experienced a
+					 * single collision before successful
+					 * transmission
+					 **/
+	u64 multi_collision_sent;	/* Packets sent that experienced
+					 * multiple collisions before successful
+					 * transmission
+					 **/
+	u64 max_collision_fail;		/* Packets dropped due to excessive
+					 * collisions
+					 **/
+	u64 max_deferral_fail;		/* Packets not sent due to max
+					 * deferrals
+					 **/
+	u64 fifo_err;			/* Packets sent that experienced a
+					 * transmit underflow and were
+					 * truncated
+					 **/
+	u64 runts;			/* Packets sent with an octet count
+					 * lessthan 64
+					 **/
+	u64 total_collisions;		/* Packets dropped due to excessive
+					 * collisions
+					 **/
 
 	/* firmware stats */
 	u64 fw_total_sent;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 81c9876..5fed7b6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -62,8 +62,8 @@ struct oct_iq_stats {
 	u64 tx_tot_bytes;/**< Total count of bytes sento to network. */
 	u64 tx_gso;  /* count of tso */
 	u64 tx_vxlan; /* tunnel */
-	u64 tx_dmamap_fail;
-	u64 tx_restart;
+	u64 tx_dmamap_fail; /* Number of times dma mapping failed */
+	u64 tx_restart; /* Number of times this queue restarted */
 };
 
 #define OCT_IQ_STATS_SIZE   (sizeof(struct oct_iq_stats))
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 28e74ee..021d99c 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -24,6 +24,7 @@
 #include "octeon_device.h"
 #include "octeon_main.h"
 #include "octeon_mailbox.h"
+#include "cn23xx_pf_device.h"
 
 /**
  * octeon_mbox_read:
@@ -205,6 +206,26 @@ int octeon_mbox_write(struct octeon_device *oct,
 	return ret;
 }
 
+static void get_vf_stats(struct octeon_device *oct,
+			 struct oct_vf_stats *stats)
+{
+	int i;
+
+	for (i = 0; i < oct->num_iqs; i++) {
+		if (!oct->instr_queue[i])
+			continue;
+		stats->tx_packets += oct->instr_queue[i]->stats.tx_done;
+		stats->tx_bytes += oct->instr_queue[i]->stats.tx_tot_bytes;
+	}
+
+	for (i = 0; i < oct->num_oqs; i++) {
+		if (!oct->droq[i])
+			continue;
+		stats->rx_packets += oct->droq[i]->stats.rx_pkts_received;
+		stats->rx_bytes += oct->droq[i]->stats.rx_bytes_received;
+	}
+}
+
 /**
  * octeon_mbox_process_cmd:
  * @mbox: Pointer mailbox
@@ -250,6 +271,15 @@ static int octeon_mbox_process_cmd(struct octeon_mbox *mbox,
 						     mbox_cmd->msg.s.params);
 		break;
 
+	case OCTEON_GET_VF_STATS:
+		dev_dbg(&oct->pci_dev->dev, "Got VF stats request. Sending data back\n");
+		mbox_cmd->msg.s.type = OCTEON_MBOX_RESPONSE;
+		mbox_cmd->msg.s.resp_needed = 1;
+		mbox_cmd->msg.s.len = 1 +
+			sizeof(struct oct_vf_stats) / sizeof(u64);
+		get_vf_stats(oct, (struct oct_vf_stats *)mbox_cmd->data);
+		octeon_mbox_write(oct, mbox_cmd);
+		break;
 	default:
 		break;
 	}
@@ -322,3 +352,25 @@ int octeon_mbox_process_message(struct octeon_mbox *mbox)
 
 	return 0;
 }
+
+int octeon_mbox_cancel(struct octeon_device *oct, int q_no)
+{
+	struct octeon_mbox *mbox = oct->mbox[q_no];
+	struct octeon_mbox_cmd *mbox_cmd;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&mbox->lock, flags);
+	mbox_cmd = &mbox->mbox_resp;
+
+	if (!(mbox->state & OCTEON_MBOX_STATE_RESPONSE_PENDING)) {
+		spin_unlock_irqrestore(&mbox->lock, flags);
+		return 1;
+	}
+
+	mbox->state = OCTEON_MBOX_STATE_IDLE;
+	memset(mbox_cmd, 0, sizeof(*mbox_cmd));
+	writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+	spin_unlock_irqrestore(&mbox->lock, flags);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
index 1def22a..d92bd7e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
@@ -25,6 +25,7 @@
 #define OCTEON_VF_ACTIVE		0x1
 #define OCTEON_VF_FLR_REQUEST		0x2
 #define OCTEON_PF_CHANGED_VF_MACADDR	0x4
+#define OCTEON_GET_VF_STATS		0x8
 
 /*Macro for Read acknowldgement*/
 #define OCTEON_PFVFACK			0xffffffffffffffffULL
@@ -107,9 +108,15 @@ struct octeon_mbox {
 
 };
 
+struct oct_vf_stats_ctx {
+	atomic_t status;
+	struct oct_vf_stats *stats;
+};
+
 int octeon_mbox_read(struct octeon_mbox *mbox);
 int octeon_mbox_write(struct octeon_device *oct,
 		      struct octeon_mbox_cmd *mbox_cmd);
 int octeon_mbox_process_message(struct octeon_mbox *mbox);
+int octeon_mbox_cancel(struct octeon_device *oct, int q_no);
 
 #endif
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 707db33..7135db4 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	action = bpf_prog_run_xdp(prog, &xdp);
 	rcu_read_unlock();
 
+	len = xdp.data_end - xdp.data;
 	/* Check if XDP program has changed headers */
 	if (orig_data != xdp.data) {
-		len = xdp.data_end - xdp.data;
 		offset = orig_data - xdp.data;
 		dma_addr -= offset;
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 9a81b523..71f13bd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1419,6 +1419,22 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev,
 		base->duplex = DUPLEX_UNKNOWN;
 	}
 
+	if (pi->link_cfg.fc & PAUSE_RX) {
+		if (pi->link_cfg.fc & PAUSE_TX) {
+			ethtool_link_ksettings_add_link_mode(link_ksettings,
+							     advertising,
+							     Pause);
+		} else {
+			ethtool_link_ksettings_add_link_mode(link_ksettings,
+							     advertising,
+							     Asym_Pause);
+		}
+	} else if (pi->link_cfg.fc & PAUSE_TX) {
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising,
+						     Asym_Pause);
+	}
+
 	base->autoneg = pi->link_cfg.autoneg;
 	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig
index 08db249..e4e8b24 100644
--- a/drivers/net/ethernet/huawei/hinic/Kconfig
+++ b/drivers/net/ethernet/huawei/hinic/Kconfig
@@ -4,7 +4,7 @@
 
 config HINIC
 	tristate "Huawei Intelligent PCIE Network Interface Card"
-	depends on (PCI_MSI && X86)
+	depends on (PCI_MSI && (X86 || ARM64))
 	---help---
 	  This driver supports HiNIC PCIE Ethernet cards.
 	  To compile this driver as part of the kernel, choose Y here.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f174c72..87fb27a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -638,7 +638,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
 			kfree(tx_buffer->raw_buf);
 		else if (ring_is_xdp(ring))
-			page_frag_free(tx_buffer->raw_buf);
+			xdp_return_frame(tx_buffer->xdpf);
 		else
 			dev_kfree_skb_any(tx_buffer->skb);
 		if (dma_unmap_len(tx_buffer, len))
@@ -841,7 +841,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 
 		/* free the skb/XDP data */
 		if (ring_is_xdp(tx_ring))
-			page_frag_free(tx_buf->raw_buf);
+			xdp_return_frame(tx_buf->xdpf);
 		else
 			napi_consume_skb(tx_buf->skb, napi_budget);
 
@@ -2203,9 +2203,20 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
 #define I40E_XDP_CONSUMED 1
 #define I40E_XDP_TX 2
 
-static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
+static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
 			      struct i40e_ring *xdp_ring);
 
+static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
+				 struct i40e_ring *xdp_ring)
+{
+	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+	if (unlikely(!xdpf))
+		return I40E_XDP_CONSUMED;
+
+	return i40e_xmit_xdp_ring(xdpf, xdp_ring);
+}
+
 /**
  * i40e_run_xdp - run an XDP program
  * @rx_ring: Rx ring being processed
@@ -2225,13 +2236,15 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 	if (!xdp_prog)
 		goto xdp_out;
 
+	prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	switch (act) {
 	case XDP_PASS:
 		break;
 	case XDP_TX:
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-		result = i40e_xmit_xdp_ring(xdp, xdp_ring);
+		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
@@ -3478,13 +3491,13 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
  * @xdp: data to transmit
  * @xdp_ring: XDP Tx ring
  **/
-static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
+static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
 			      struct i40e_ring *xdp_ring)
 {
-	u32 size = xdp->data_end - xdp->data;
 	u16 i = xdp_ring->next_to_use;
 	struct i40e_tx_buffer *tx_bi;
 	struct i40e_tx_desc *tx_desc;
+	u32 size = xdpf->len;
 	dma_addr_t dma;
 
 	if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
@@ -3492,14 +3505,14 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
 		return I40E_XDP_CONSUMED;
 	}
 
-	dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE);
+	dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
 	if (dma_mapping_error(xdp_ring->dev, dma))
 		return I40E_XDP_CONSUMED;
 
 	tx_bi = &xdp_ring->tx_bi[i];
 	tx_bi->bytecount = size;
 	tx_bi->gso_segs = 1;
-	tx_bi->raw_buf = xdp->data;
+	tx_bi->xdpf = xdpf;
 
 	/* record length, and DMA address */
 	dma_unmap_len_set(tx_bi, len, size);
@@ -3675,7 +3688,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  *
  * Returns Zero if sent, else an error code
  **/
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	unsigned int queue_index = smp_processor_id();
@@ -3688,7 +3701,7 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
 		return -ENXIO;
 
-	err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]);
+	err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
 	if (err != I40E_XDP_TX)
 		return -ENOSPC;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 3043483..4bf318b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -306,6 +306,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size)
 struct i40e_tx_buffer {
 	struct i40e_tx_desc *next_to_watch;
 	union {
+		struct xdp_frame *xdpf;
 		struct sk_buff *skb;
 		void *raw_buf;
 	};
@@ -510,7 +511,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
 void i40e_xdp_flush(struct net_device *dev);
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4f08c71..7dd5038 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -241,8 +241,7 @@ struct ixgbe_tx_buffer {
 	unsigned long time_stamp;
 	union {
 		struct sk_buff *skb;
-		/* XDP uses address ptr on irq_clean */
-		void *data;
+		struct xdp_frame *xdpf;
 	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index afadba9..51e7d82 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1216,7 +1216,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 
 		/* free the skb */
 		if (ring_is_xdp(tx_ring))
-			page_frag_free(tx_buffer->data);
+			xdp_return_frame(tx_buffer->xdpf);
 		else
 			napi_consume_skb(tx_buffer->skb, napi_budget);
 
@@ -2262,7 +2262,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 #define IXGBE_XDP_TX 2
 
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
-			       struct xdp_buff *xdp);
+			       struct xdp_frame *xdpf);
 
 static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 				     struct ixgbe_ring *rx_ring,
@@ -2270,6 +2270,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 {
 	int err, result = IXGBE_XDP_PASS;
 	struct bpf_prog *xdp_prog;
+	struct xdp_frame *xdpf;
 	u32 act;
 
 	rcu_read_lock();
@@ -2278,12 +2279,19 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 	if (!xdp_prog)
 		goto xdp_out;
 
+	prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	switch (act) {
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		result = ixgbe_xmit_xdp_ring(adapter, xdp);
+		xdpf = convert_to_xdp_frame(xdp);
+		if (unlikely(!xdpf)) {
+			result = IXGBE_XDP_CONSUMED;
+			break;
+		}
+		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
@@ -5797,7 +5805,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
 
 		/* Free all the Tx ring sk_buffs */
 		if (ring_is_xdp(tx_ring))
-			page_frag_free(tx_buffer->data);
+			xdp_return_frame(tx_buffer->xdpf);
 		else
 			dev_kfree_skb_any(tx_buffer->skb);
 
@@ -6370,7 +6378,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 	struct device *dev = rx_ring->dev;
 	int orig_node = dev_to_node(dev);
 	int ring_node = -1;
-	int size;
+	int size, err;
 
 	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
 
@@ -6407,6 +6415,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 			     rx_ring->queue_index) < 0)
 		goto err;
 
+	err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
+					 MEM_TYPE_PAGE_SHARED, NULL);
+	if (err) {
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+		goto err;
+	}
+
 	rx_ring->xdp_prog = adapter->xdp_prog;
 
 	return 0;
@@ -8336,7 +8351,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 }
 
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
-			       struct xdp_buff *xdp)
+			       struct xdp_frame *xdpf)
 {
 	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
 	struct ixgbe_tx_buffer *tx_buffer;
@@ -8345,12 +8360,12 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 	dma_addr_t dma;
 	u16 i;
 
-	len = xdp->data_end - xdp->data;
+	len = xdpf->len;
 
 	if (unlikely(!ixgbe_desc_unused(ring)))
 		return IXGBE_XDP_CONSUMED;
 
-	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+	dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
 	if (dma_mapping_error(ring->dev, dma))
 		return IXGBE_XDP_CONSUMED;
 
@@ -8365,7 +8380,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 
 	dma_unmap_len_set(tx_buffer, len, len);
 	dma_unmap_addr_set(tx_buffer, dma, dma);
-	tx_buffer->data = xdp->data;
+	tx_buffer->xdpf = xdpf;
+
 	tx_desc->read.buffer_addr = cpu_to_le64(dma);
 
 	/* put descriptor type bits */
@@ -9996,7 +10012,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
-static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_ring *ring;
@@ -10012,7 +10028,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 	if (unlikely(!ring))
 		return -ENXIO;
 
-	err = ixgbe_xmit_xdp_ring(adapter, xdp);
+	err = ixgbe_xmit_xdp_ring(adapter, xdpf);
 	if (err != IXGBE_XDP_TX)
 		return -ENOSPC;
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e0b72bf..d8ebf0a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2503,7 +2503,6 @@ static int mtk_probe(struct platform_device *pdev)
 {
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct device_node *mac_np;
-	const struct of_device_id *match;
 	struct mtk_eth *eth;
 	int err;
 	int i;
@@ -2512,8 +2511,7 @@ static int mtk_probe(struct platform_device *pdev)
 	if (!eth)
 		return -ENOMEM;
 
-	match = of_match_device(of_mtk_match, &pdev->dev);
-	eth->soc = (struct mtk_soc_data *)match->data;
+	eth->soc = of_device_get_match_data(&pdev->dev);
 
 	eth->dev = &pdev->dev;
 	eth->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5c613c6..efc55fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 			act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
+			length = xdp.data_end - xdp.data;
 			if (xdp.data != orig_data) {
-				length = xdp.data_end - xdp.data;
 				frags[0].page_offset = xdp.data -
 					xdp.data_hard_start;
 				va = xdp.data;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index c032319..1225703 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -30,6 +30,7 @@
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
 	depends on IPV6=y || IPV6=n || MLX5_CORE=m
+	select PAGE_POOL
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 30cad07..3317a4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -53,6 +53,8 @@
 #include "mlx5_core.h"
 #include "en_stats.h"
 
+struct page_pool;
+
 #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
 
 #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
@@ -392,6 +394,7 @@ struct mlx5e_xdpsq {
 	struct {
 		struct mlx5e_dma_info     *di;
 		bool                       doorbell;
+		bool                       redirect_flush;
 	} db;
 
 	/* read only */
@@ -533,6 +536,7 @@ struct mlx5e_rq {
 	unsigned int           hw_mtu;
 	struct mlx5e_xdpsq     xdpsq;
 	DECLARE_BITMAP(flags, 8);
+	struct page_pool      *page_pool;
 
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b29c1d9..f100374 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/fs.h>
 #include <net/vxlan.h>
 #include <linux/bpf.h>
+#include <net/page_pool.h>
 #include "eswitch.h"
 #include "en.h"
 #include "en_tc.h"
@@ -389,10 +390,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_rq_param *rqp,
 			  struct mlx5e_rq *rq)
 {
+	struct page_pool_params pp_params = { 0 };
 	struct mlx5_core_dev *mdev = c->mdev;
 	void *rqc = rqp->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
-	u32 byte_count;
+	u32 byte_count, pool_size;
 	int npages;
 	int wq_sz;
 	int err;
@@ -432,9 +434,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+	pool_size = 1 << params->log_rq_mtu_frames;
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+
+		pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
 		rq->post_wqes = mlx5e_post_rx_mpwqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
@@ -512,6 +517,32 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->mkey_be = c->mkey_be;
 	}
 
+	/* Create a page_pool and register it with rxq */
+	pp_params.order     = rq->buff.page_order;
+	pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
+	pp_params.pool_size = pool_size;
+	pp_params.nid       = cpu_to_node(c->cpu);
+	pp_params.dev       = c->pdev;
+	pp_params.dma_dir   = rq->buff.map_dir;
+
+	/* page_pool can be used even when there is no rq->xdp_prog,
+	 * given page_pool does not handle DMA mapping there is no
+	 * required state to clear. And page_pool gracefully handle
+	 * elevated refcnt.
+	 */
+	rq->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rq->page_pool)) {
+		if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+			kfree(rq->wqe.frag_info);
+		err = PTR_ERR(rq->page_pool);
+		rq->page_pool = NULL;
+		goto err_rq_wq_destroy;
+	}
+	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+					 MEM_TYPE_PAGE_POOL, rq->page_pool);
+	if (err)
+		goto err_rq_wq_destroy;
+
 	for (i = 0; i < wq_sz; i++) {
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 
@@ -548,6 +579,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
+	if (rq->page_pool)
+		page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
@@ -561,6 +594,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 		bpf_prog_put(rq->xdp_prog);
 
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
+	if (rq->page_pool)
+		page_pool_destroy(rq->page_pool);
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1766457..7bbf0db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -37,6 +37,7 @@
 #include <linux/bpf_trace.h>
 #include <net/busy_poll.h>
 #include <net/ip6_checksum.h>
+#include <net/page_pool.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -221,7 +222,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
 	if (mlx5e_rx_cache_get(rq, dma_info))
 		return 0;
 
-	dma_info->page = dev_alloc_pages(rq->buff.page_order);
+	dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
 	if (unlikely(!dma_info->page))
 		return -ENOMEM;
 
@@ -236,15 +237,26 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
 	return 0;
 }
 
+static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq,
+					struct mlx5e_dma_info *dma_info)
+{
+	dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
+		       rq->buff.map_dir);
+}
+
 void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
 			bool recycle)
 {
-	if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
-		return;
+	if (likely(recycle)) {
+		if (mlx5e_rx_cache_put(rq, dma_info))
+			return;
 
-	dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
-		       rq->buff.map_dir);
-	put_page(dma_info->page);
+		mlx5e_page_dma_unmap(rq, dma_info);
+		page_pool_recycle_direct(rq->page_pool, dma_info->page);
+	} else {
+		mlx5e_page_dma_unmap(rq, dma_info);
+		put_page(dma_info->page);
+	}
 }
 
 static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
@@ -800,9 +812,10 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
 				   struct mlx5e_dma_info *di,
 				   void *va, u16 *rx_headroom, u32 *len)
 {
-	const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
 	struct xdp_buff xdp;
 	u32 act;
+	int err;
 
 	if (!prog)
 		return false;
@@ -823,6 +836,15 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
 		if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp)))
 			trace_xdp_exception(rq->netdev, prog, act);
 		return true;
+	case XDP_REDIRECT:
+		/* When XDP enabled then page-refcnt==1 here */
+		err = xdp_do_redirect(rq->netdev, &xdp, prog);
+		if (!err) {
+			__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+			rq->xdpsq.db.redirect_flush = true;
+			mlx5e_page_dma_unmap(rq, di);
+		}
+		return true;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 	case XDP_ABORTED:
@@ -868,6 +890,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 
 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
 				      frag_size, DMA_FROM_DEVICE);
+	prefetchw(va); /* xdp_frame data area */
 	prefetch(data);
 	wi->offset += frag_size;
 
@@ -1140,6 +1163,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 		xdpsq->db.doorbell = false;
 	}
 
+	if (xdpsq->db.redirect_flush) {
+		xdp_do_flush_map();
+		xdpsq->db.redirect_flush = false;
+	}
+
 	mlx5_cqwq_update_db_record(&cq->wq);
 
 	/* ensure cq space is freed before enabling more cqes */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1904c03..8e4edb6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry {
 
 struct mlxsw_sp_rt6 {
 	struct list_head list;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 };
 
 struct mlxsw_sp_lpm_tree {
@@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
 		struct in6_addr *gw;
 		int ifindex, weight;
 
-		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
-		weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
-		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
+		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
+		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
+		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
 							 weight))
 			return false;
@@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
 	struct net_device *dev;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		dev = mlxsw_sp_rt6->rt->dst.dev;
+		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
 		val ^= dev->ifindex;
 	}
 
@@ -3834,11 +3834,11 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 
 	for (i = 0; i < nh_grp->count; i++) {
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		if (nh->rif && nh->rif->dev == rt->dst.dev &&
+		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
-				    &rt->rt6i_gateway))
+				    &rt->fib6_nh.nh_gw))
 			return nh;
 		continue;
 	}
@@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
@@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 
 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
 		if (nh && nh->offloaded)
-			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
 		else
-			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -3920,9 +3920,9 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
 				  common);
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -4699,29 +4699,29 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
-static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
 {
 	/* Packets with link-local destination IP arriving to the router
 	 * are trapped to the CPU, so no need to program specific routes
 	 * for them.
 	 */
-	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
 		return true;
 
 	/* Multicast routes aren't supported, so ignore them. Neighbour
 	 * Discovery packets are specifically trapped.
 	 */
-	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
 		return true;
 
 	/* Cloned routes are irrelevant in the forwarding path. */
-	if (rt->rt6i_flags & RTF_CACHE)
+	if (rt->fib6_flags & RTF_CACHE)
 		return true;
 
 	return false;
 }
 
-static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
 	 * memory.
 	 */
 	mlxsw_sp_rt6->rt = rt;
-	rt6_hold(rt);
+	fib6_info_hold(rt);
 
 	return mlxsw_sp_rt6;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
 {
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 #else
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
 {
 }
 #endif
@@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 	kfree(mlxsw_sp_rt6);
 }
 
-static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
 {
 	/* RTF_CACHE routes are ignored */
-	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
 }
 
-static struct rt6_info *
+static struct fib6_info *
 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 {
 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
@@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct rt6_info *nrt, bool replace)
+				 const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
@@ -4779,21 +4779,21 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
 		 * virtual router.
 		 */
-		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
 			continue;
-		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
 			break;
-		if (rt->rt6i_metric < nrt->rt6i_metric)
+		if (rt->fib6_metric < nrt->fib6_metric)
 			continue;
-		if (rt->rt6i_metric == nrt->rt6i_metric &&
+		if (rt->fib6_metric == nrt->fib6_metric &&
 		    mlxsw_sp_fib6_rt_can_mp(rt))
 			return fib6_entry;
-		if (rt->rt6i_metric > nrt->rt6i_metric)
+		if (rt->fib6_metric > nrt->fib6_metric)
 			break;
 	}
 
@@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 
 static struct mlxsw_sp_rt6 *
 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
-			    const struct rt6_info *rt)
+			    const struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -4815,21 +4815,21 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
 }
 
 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
-					const struct rt6_info *rt,
+					const struct fib6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
-	return rt->dst.dev &&
-	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
+	return rt->fib6_nh.nh_dev &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
 }
 
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_nexthop_group *nh_grp,
 				       struct mlxsw_sp_nexthop *nh,
-				       const struct rt6_info *rt)
+				       const struct fib6_info *rt)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct mlxsw_sp_rif *rif;
 	int err;
 
@@ -4870,13 +4870,13 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_nexthop_group *nh_grp,
 				  struct mlxsw_sp_nexthop *nh,
-				  const struct rt6_info *rt)
+				  const struct fib6_info *rt)
 {
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev = rt->fib6_nh.nh_dev;
 
 	nh->nh_grp = nh_grp;
-	nh->nh_weight = rt->rt6i_nh_weight;
-	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+	nh->nh_weight = rt->fib6_nh.nh_weight;
+	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 
 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -4897,9 +4897,9 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
 }
 
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
-				    const struct rt6_info *rt)
+				    const struct fib6_info *rt)
 {
-	return rt->rt6i_flags & RTF_GATEWAY ||
+	return rt->fib6_flags & RTF_GATEWAY ||
 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
 }
 
@@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
 	nh_grp->count = fib6_entry->nrt6;
 	for (i = 0; i < nh_grp->count; i++) {
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		nh = &nh_grp->nexthops[i];
 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
@@ -5040,7 +5040,7 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
 static int
 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct rt6_info *rt)
+				struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 	int err;
@@ -5068,7 +5068,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 static void
 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct rt6_info *rt)
+				struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_fib_entry *fib_entry,
-					 const struct rt6_info *rt)
+					 const struct fib6_info *rt)
 {
 	/* Packets hitting RTF_REJECT routes need to be discarded by the
 	 * stack. We can rely on their destination device not having a
@@ -5092,9 +5092,9 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 	 * local, which will cause them to be trapped with a lower
 	 * priority than packets that need to be locally received.
 	 */
-	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
-	else if (rt->rt6i_flags & RTF_REJECT)
+	else if (rt->fib6_flags & RTF_REJECT)
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
@@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
-			   struct rt6_info *rt)
+			   struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
@@ -5168,25 +5168,25 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct rt6_info *nrt, bool replace)
+			      const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
-		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
 			continue;
-		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
 			break;
-		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+		if (replace && rt->fib6_metric == nrt->fib6_metric) {
 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
 			    mlxsw_sp_fib6_rt_can_mp(nrt))
 				return fib6_entry;
 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
 				fallback = fallback ?: fib6_entry;
 		}
-		if (rt->rt6i_metric > nrt->rt6i_metric)
+		if (rt->fib6_metric > nrt->fib6_metric)
 			return fallback ?: fib6_entry;
 	}
 
@@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
 			       bool replace)
 {
 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
-	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
@@ -5213,9 +5213,9 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
 		struct mlxsw_sp_fib6_entry *last;
 
 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
-			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
 
-			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
+			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
 				break;
 			fib6_entry = last;
 		}
@@ -5268,29 +5268,29 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
-			   const struct rt6_info *rt)
+			   const struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 	struct mlxsw_sp_fib *fib;
 	struct mlxsw_sp_vr *vr;
 
-	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
+	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
 	if (!vr)
 		return NULL;
 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
 
-	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
-					    sizeof(rt->rt6i_dst.addr),
-					    rt->rt6i_dst.plen);
+	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
+					    sizeof(rt->fib6_dst.addr),
+					    rt->fib6_dst.plen);
 	if (!fib_node)
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
-		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
-		    rt->rt6i_metric == iter_rt->rt6i_metric &&
+		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
+		    rt->fib6_metric == iter_rt->fib6_metric &&
 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
 			return fib6_entry;
 	}
@@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct rt6_info *rt, bool replace)
+				    struct fib6_info *rt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5325,16 +5325,16 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 	if (mlxsw_sp->router->aborted)
 		return 0;
 
-	if (rt->rt6i_src.plen)
+	if (rt->fib6_src.plen)
 		return -EINVAL;
 
 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
 		return 0;
 
-	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
-					 &rt->rt6i_dst.addr,
-					 sizeof(rt->rt6i_dst.addr),
-					 rt->rt6i_dst.plen,
+	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
+					 &rt->fib6_dst.addr,
+					 sizeof(rt->fib6_dst.addr),
+					 rt->fib6_dst.plen,
 					 MLXSW_SP_L3_PROTO_IPV6);
 	if (IS_ERR(fib_node))
 		return PTR_ERR(fib_node);
@@ -5373,7 +5373,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
-				     struct rt6_info *rt)
+				     struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
 					 info);
 		fib_work->fen6_info = *fen6_info;
-		rt6_hold(fib_work->fen6_info.rt);
+		fib6_info_hold(fib_work->fen6_info.rt);
 		break;
 	}
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1eb6549..d9111c0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 			act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-			pkt_len -= xdp.data - orig_data;
+			pkt_len = xdp.data_end - xdp.data;
 			pkt_off += xdp.data - orig_data;
 
 			switch (act) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e874504..8b1b7e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2850,6 +2850,24 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
 				      cqe);
 }
 
+static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac)
+{
+	int i, ret;
+
+	if (IS_PF(cdev))
+		return 0;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		ret = qed_vf_pf_bulletin_update_mac(p_hwfn, mac);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_QED_SRIOV
 extern const struct qed_iov_hv_ops qed_iov_ops_pass;
 #endif
@@ -2887,6 +2905,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
 	.ntuple_filter_config = &qed_ntuple_arfs_filter_config,
 	.configure_arfs_searcher = &qed_configure_arfs_searcher,
 	.get_coalesce = &qed_get_coalesce,
+	.req_bulletin_update_mac = &qed_req_bulletin_update_mac,
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 5acb91b..f01bf52 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -48,7 +48,7 @@ static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
 			       u8 opcode,
 			       __le16 echo,
 			       union event_ring_data *data, u8 fw_return_code);
-
+static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid);
 
 static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf)
 {
@@ -1790,7 +1790,8 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
 	if (!p_vf->vport_instance)
 		return -EINVAL;
 
-	if (events & BIT(MAC_ADDR_FORCED)) {
+	if ((events & BIT(MAC_ADDR_FORCED)) ||
+	    p_vf->p_vf_info.is_trusted_configured) {
 		/* Since there's no way [currently] of removing the MAC,
 		 * we can always assume this means we need to force it.
 		 */
@@ -1809,8 +1810,12 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
 				  "PF failed to configure MAC for VF\n");
 			return rc;
 		}
-
-		p_vf->configured_features |= 1 << MAC_ADDR_FORCED;
+		if (p_vf->p_vf_info.is_trusted_configured)
+			p_vf->configured_features |=
+				BIT(VFPF_BULLETIN_MAC_ADDR);
+		else
+			p_vf->configured_features |=
+				BIT(MAC_ADDR_FORCED);
 	}
 
 	if (events & BIT(VLAN_ADDR_FORCED)) {
@@ -3170,6 +3175,10 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn,
 	if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))
 		return 0;
 
+	/* Don't keep track of shadow copy since we don't intend to restore. */
+	if (p_vf->p_vf_info.is_trusted_configured)
+		return 0;
+
 	/* First remove entries and then add new ones */
 	if (p_params->opcode == QED_FILTER_REMOVE) {
 		for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
@@ -3244,9 +3253,17 @@ static int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
 
 	/* No real decision to make; Store the configured MAC */
 	if (params->type == QED_FILTER_MAC ||
-	    params->type == QED_FILTER_MAC_VLAN)
+	    params->type == QED_FILTER_MAC_VLAN) {
 		ether_addr_copy(vf->mac, params->mac);
 
+		if (vf->is_trusted_configured) {
+			qed_iov_bulletin_set_mac(hwfn, vf->mac, vfid);
+
+			/* Update and post bulleitin again */
+			qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+		}
+	}
+
 	return 0;
 }
 
@@ -3803,6 +3820,40 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
 		__qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
 }
 
+static int
+qed_iov_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_vf_info *p_vf)
+{
+	struct qed_bulletin_content *p_bulletin = p_vf->bulletin.p_virt;
+	struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+	struct vfpf_bulletin_update_mac_tlv *p_req;
+	u8 status = PFVF_STATUS_SUCCESS;
+	int rc = 0;
+
+	if (!p_vf->p_vf_info.is_trusted_configured) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "Blocking bulletin update request from untrusted VF[%d]\n",
+			   p_vf->abs_vf_id);
+		status = PFVF_STATUS_NOT_SUPPORTED;
+		rc = -EINVAL;
+		goto send_status;
+	}
+
+	p_req = &mbx->req_virt->bulletin_update_mac;
+	ether_addr_copy(p_bulletin->mac, p_req->mac);
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "Updated bulletin of VF[%d] with requested MAC[%pM]\n",
+		   p_vf->abs_vf_id, p_req->mac);
+
+send_status:
+	qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf,
+			     CHANNEL_TLV_BULLETIN_UPDATE_MAC,
+			     sizeof(struct pfvf_def_resp_tlv), status);
+	return rc;
+}
+
 static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt, int vfid)
 {
@@ -3882,6 +3933,9 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 		case CHANNEL_TLV_COALESCE_READ:
 			qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf);
 			break;
+		case CHANNEL_TLV_BULLETIN_UPDATE_MAC:
+			qed_iov_vf_pf_bulletin_update_mac(p_hwfn, p_ptt, p_vf);
+			break;
 		}
 	} else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
@@ -4081,16 +4135,60 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn,
 		return;
 	}
 
-	feature = 1 << MAC_ADDR_FORCED;
+	if (vf_info->p_vf_info.is_trusted_configured) {
+		feature = BIT(VFPF_BULLETIN_MAC_ADDR);
+		/* Trust mode will disable Forced MAC */
+		vf_info->bulletin.p_virt->valid_bitmap &=
+			~BIT(MAC_ADDR_FORCED);
+	} else {
+		feature = BIT(MAC_ADDR_FORCED);
+		/* Forced MAC will disable MAC_ADDR */
+		vf_info->bulletin.p_virt->valid_bitmap &=
+			~BIT(VFPF_BULLETIN_MAC_ADDR);
+	}
+
 	memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN);
 
 	vf_info->bulletin.p_virt->valid_bitmap |= feature;
-	/* Forced MAC will disable MAC_ADDR */
-	vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR);
 
 	qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
 
+static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid)
+{
+	struct qed_vf_info *vf_info;
+	u64 feature;
+
+	vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
+	if (!vf_info) {
+		DP_NOTICE(p_hwfn->cdev, "Can not set MAC, invalid vfid [%d]\n",
+			  vfid);
+		return -EINVAL;
+	}
+
+	if (vf_info->b_malicious) {
+		DP_NOTICE(p_hwfn->cdev, "Can't set MAC to malicious VF [%d]\n",
+			  vfid);
+		return -EINVAL;
+	}
+
+	if (vf_info->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "Can not set MAC, Forced MAC is configured\n");
+		return -EINVAL;
+	}
+
+	feature = BIT(VFPF_BULLETIN_MAC_ADDR);
+	ether_addr_copy(vf_info->bulletin.p_virt->mac, mac);
+
+	vf_info->bulletin.p_virt->valid_bitmap |= feature;
+
+	if (vf_info->p_vf_info.is_trusted_configured)
+		qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
+
+	return 0;
+}
+
 static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
 					     u16 pvid, int vfid)
 {
@@ -4204,6 +4302,21 @@ static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val)
 	return rc;
 }
 
+static u8 *qed_iov_bulletin_get_mac(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
+{
+	struct qed_vf_info *p_vf;
+
+	p_vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, true);
+	if (!p_vf || !p_vf->bulletin.p_virt)
+		return NULL;
+
+	if (!(p_vf->bulletin.p_virt->valid_bitmap &
+	      BIT(VFPF_BULLETIN_MAC_ADDR)))
+		return NULL;
+
+	return p_vf->bulletin.p_virt->mac;
+}
+
 static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn,
 					   u16 rel_vf_id)
 {
@@ -4493,8 +4606,12 @@ static int qed_sriov_pf_set_mac(struct qed_dev *cdev, u8 *mac, int vfid)
 		if (!vf_info)
 			continue;
 
-		/* Set the forced MAC, and schedule the IOV task */
-		ether_addr_copy(vf_info->forced_mac, mac);
+		/* Set the MAC, and schedule the IOV task */
+		if (vf_info->is_trusted_configured)
+			ether_addr_copy(vf_info->mac, mac);
+		else
+			ether_addr_copy(vf_info->forced_mac, mac);
+
 		qed_schedule_iov(hwfn, QED_IOV_WQ_SET_UNICAST_FILTER_FLAG);
 	}
 
@@ -4802,6 +4919,33 @@ static void qed_handle_vf_msg(struct qed_hwfn *hwfn)
 	qed_ptt_release(hwfn, ptt);
 }
 
+static bool qed_pf_validate_req_vf_mac(struct qed_hwfn *hwfn,
+				       u8 *mac,
+				       struct qed_public_vf_info *info)
+{
+	if (info->is_trusted_configured) {
+		if (is_valid_ether_addr(info->mac) &&
+		    (!mac || !ether_addr_equal(mac, info->mac)))
+			return true;
+	} else {
+		if (is_valid_ether_addr(info->forced_mac) &&
+		    (!mac || !ether_addr_equal(mac, info->forced_mac)))
+			return true;
+	}
+
+	return false;
+}
+
+static void qed_set_bulletin_mac(struct qed_hwfn *hwfn,
+				 struct qed_public_vf_info *info,
+				 int vfid)
+{
+	if (info->is_trusted_configured)
+		qed_iov_bulletin_set_mac(hwfn, info->mac, vfid);
+	else
+		qed_iov_bulletin_set_forced_mac(hwfn, info->forced_mac, vfid);
+}
+
 static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn)
 {
 	int i;
@@ -4816,18 +4960,20 @@ static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn)
 			continue;
 
 		/* Update data on bulletin board */
-		mac = qed_iov_bulletin_get_forced_mac(hwfn, i);
-		if (is_valid_ether_addr(info->forced_mac) &&
-		    (!mac || !ether_addr_equal(mac, info->forced_mac))) {
+		if (info->is_trusted_configured)
+			mac = qed_iov_bulletin_get_mac(hwfn, i);
+		else
+			mac = qed_iov_bulletin_get_forced_mac(hwfn, i);
+
+		if (qed_pf_validate_req_vf_mac(hwfn, mac, info)) {
 			DP_VERBOSE(hwfn,
 				   QED_MSG_IOV,
 				   "Handling PF setting of VF MAC to VF 0x%02x [Abs 0x%02x]\n",
 				   i,
 				   hwfn->cdev->p_iov_info->first_vf_in_pf + i);
 
-			/* Update bulletin board with forced MAC */
-			qed_iov_bulletin_set_forced_mac(hwfn,
-							info->forced_mac, i);
+			/* Update bulletin board with MAC */
+			qed_set_bulletin_mac(hwfn, info, i);
 			update = true;
 		}
 
@@ -4867,6 +5013,72 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn)
 	qed_ptt_release(hwfn, ptt);
 }
 
+static void qed_update_mac_for_vf_trust_change(struct qed_hwfn *hwfn, int vf_id)
+{
+	struct qed_public_vf_info *vf_info;
+	struct qed_vf_info *vf;
+	u8 *force_mac;
+	int i;
+
+	vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true);
+	vf = qed_iov_get_vf_info(hwfn, vf_id, true);
+
+	if (!vf_info || !vf)
+		return;
+
+	/* Force MAC converted to generic MAC in case of VF trust on */
+	if (vf_info->is_trusted_configured &&
+	    (vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))) {
+		force_mac = qed_iov_bulletin_get_forced_mac(hwfn, vf_id);
+
+		if (force_mac) {
+			/* Clear existing shadow copy of MAC to have a clean
+			 * slate.
+			 */
+			for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
+				if (ether_addr_equal(vf->shadow_config.macs[i],
+						     vf_info->mac)) {
+					memset(vf->shadow_config.macs[i], 0,
+					       ETH_ALEN);
+					DP_VERBOSE(hwfn, QED_MSG_IOV,
+						   "Shadow MAC %pM removed for VF 0x%02x, VF trust mode is ON\n",
+						    vf_info->mac, vf_id);
+					break;
+				}
+			}
+
+			ether_addr_copy(vf_info->mac, force_mac);
+			memset(vf_info->forced_mac, 0, ETH_ALEN);
+			vf->bulletin.p_virt->valid_bitmap &=
+					~BIT(MAC_ADDR_FORCED);
+			qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+		}
+	}
+
+	/* Update shadow copy with VF MAC when trust mode is turned off */
+	if (!vf_info->is_trusted_configured) {
+		u8 empty_mac[ETH_ALEN];
+
+		memset(empty_mac, 0, ETH_ALEN);
+		for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
+			if (ether_addr_equal(vf->shadow_config.macs[i],
+					     empty_mac)) {
+				ether_addr_copy(vf->shadow_config.macs[i],
+						vf_info->mac);
+				DP_VERBOSE(hwfn, QED_MSG_IOV,
+					   "Shadow is updated with %pM for VF 0x%02x, VF trust mode is OFF\n",
+					    vf_info->mac, vf_id);
+				break;
+			}
+		}
+		/* Clear bulletin when trust mode is turned off,
+		 * to have a clean slate for next (normal) operations.
+		 */
+		qed_iov_bulletin_set_mac(hwfn, empty_mac, vf_id);
+		qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+	}
+}
+
 static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
 {
 	struct qed_sp_vport_update_params params;
@@ -4890,6 +5102,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
 			continue;
 		vf_info->is_trusted_configured = vf_info->is_trusted_request;
 
+		/* Handle forced MAC mode */
+		qed_update_mac_for_vf_trust_change(hwfn, i);
+
 		/* Validate that the VF has a configured vport */
 		vf = qed_iov_get_vf_info(hwfn, i, true);
 		if (!vf->vport_instance)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 91b5e9f..2d7fcd6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1375,6 +1375,35 @@ int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
 }
 
 int
+qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+			      u8 *p_mac)
+{
+	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct vfpf_bulletin_update_mac_tlv *p_req;
+	struct pfvf_def_resp_tlv *p_resp;
+	int rc;
+
+	if (!p_mac)
+		return -EINVAL;
+
+	/* clear mailbox and prep header tlv */
+	p_req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_BULLETIN_UPDATE_MAC,
+			       sizeof(*p_req));
+	ether_addr_copy(p_req->mac, p_mac);
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "Requesting bulletin update for MAC[%pM]\n", p_mac);
+
+	/* add list termination tlv */
+	qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+
+	p_resp = &p_iov->pf2vf_reply->default_resp;
+	rc = qed_send_msg2pf(p_hwfn, &p_resp->hdr.status, sizeof(*p_resp));
+	qed_vf_pf_req_end(p_hwfn, rc);
+	return rc;
+}
+
+int
 qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
 		       u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 97d44df..4f05d5e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -518,6 +518,12 @@ struct pfvf_read_coal_resp_tlv {
 	u8 padding[6];
 };
 
+struct vfpf_bulletin_update_mac_tlv {
+	struct vfpf_first_tlv first_tlv;
+	u8 mac[ETH_ALEN];
+	u8 padding[2];
+};
+
 union vfpf_tlvs {
 	struct vfpf_first_tlv first_tlv;
 	struct vfpf_acquire_tlv acquire;
@@ -532,6 +538,7 @@ union vfpf_tlvs {
 	struct vfpf_update_tunn_param_tlv tunn_param_update;
 	struct vfpf_update_coalesce update_coalesce;
 	struct vfpf_read_coal_req_tlv read_coal_req;
+	struct vfpf_bulletin_update_mac_tlv bulletin_update_mac;
 	struct tlv_buffer_size tlv_buf_size;
 };
 
@@ -650,6 +657,7 @@ enum {
 	CHANNEL_TLV_COALESCE_UPDATE,
 	CHANNEL_TLV_QID,
 	CHANNEL_TLV_COALESCE_READ,
+	CHANNEL_TLV_BULLETIN_UPDATE_MAC,
 	CHANNEL_TLV_MAX,
 
 	/* Required for iterating over vport-update tlvs.
@@ -1042,6 +1050,13 @@ int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
 				  struct qed_tunnel_info *p_tunn);
 
 u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id);
+/**
+ * @brief - Ask PF to update the MAC address in it's bulletin board
+ *
+ * @param p_mac - mac address to be updated in bulletin board
+ */
+int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac);
+
 #else
 static inline void qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
 					  struct qed_mcp_link_params *params)
@@ -1228,6 +1243,12 @@ static inline int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
 	return -EINVAL;
 }
 
+static inline int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+						u8 *p_mac)
+{
+	return -EINVAL;
+}
+
 static inline u32
 qed_vf_hw_bar_size(struct qed_hwfn  *p_hwfn,
 		   enum BAR_ID bar_id)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 6687e04..43569b1 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -550,8 +550,7 @@ void qede_force_mac(void *dev, u8 *mac, bool forced)
 
 	__qede_lock(edev);
 
-	/* MAC hints take effect only if we haven't set one already */
-	if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced) {
+	if (!is_valid_ether_addr(mac)) {
 		__qede_unlock(edev);
 		return;
 	}
@@ -1161,6 +1160,10 @@ int qede_set_mac_addr(struct net_device *ndev, void *p)
 	if (edev->state != QEDE_STATE_OPEN) {
 		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
 			   "The device is currently down\n");
+		/* Ask PF to explicitly update a copy in bulletin board */
+		if (IS_VF(edev) && edev->ops->req_bulletin_update_mac)
+			edev->ops->req_bulletin_update_mac(edev->cdev,
+							   ndev->dev_addr);
 		goto out;
 	}
 
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 604ae78..fcd42d0 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -84,12 +84,11 @@
    The RTL chips use a 64 element hash table based on the Ethernet CRC. */
 static const int multicast_filter_limit = 32;
 
-#define MAX_READ_REQUEST_SHIFT	12
 #define TX_DMA_BURST	7	/* Maximum PCI burst, '7' is unlimited */
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
 
 #define R8169_REGS_SIZE		256
-#define R8169_NAPI_WEIGHT	64
+#define R8169_RX_BUF_SIZE	(SZ_16K - 1)
 #define NUM_TX_DESC	64	/* Number of Tx descriptor registers */
 #define NUM_RX_DESC	256U	/* Number of Rx descriptor registers */
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
@@ -172,12 +171,11 @@ enum rtl_tx_desc_version {
 #define JUMBO_7K	(7*1024 - ETH_HLEN - 2)
 #define JUMBO_9K	(9*1024 - ETH_HLEN - 2)
 
-#define _R(NAME,TD,FW,SZ,B) {	\
+#define _R(NAME,TD,FW,SZ) {	\
 	.name = NAME,		\
 	.txd_version = TD,	\
 	.fw_name = FW,		\
 	.jumbo_max = SZ,	\
-	.jumbo_tx_csum = B	\
 }
 
 static const struct {
@@ -185,135 +183,111 @@ static const struct {
 	enum rtl_tx_desc_version txd_version;
 	const char *fw_name;
 	u16 jumbo_max;
-	bool jumbo_tx_csum;
 } rtl_chip_infos[] = {
 	/* PCI devices. */
 	[RTL_GIGA_MAC_VER_01] =
-		_R("RTL8169",		RTL_TD_0, NULL, JUMBO_7K, true),
+		_R("RTL8169",		RTL_TD_0, NULL, JUMBO_7K),
 	[RTL_GIGA_MAC_VER_02] =
-		_R("RTL8169s",		RTL_TD_0, NULL, JUMBO_7K, true),
+		_R("RTL8169s",		RTL_TD_0, NULL, JUMBO_7K),
 	[RTL_GIGA_MAC_VER_03] =
-		_R("RTL8110s",		RTL_TD_0, NULL, JUMBO_7K, true),
+		_R("RTL8110s",		RTL_TD_0, NULL, JUMBO_7K),
 	[RTL_GIGA_MAC_VER_04] =
-		_R("RTL8169sb/8110sb",	RTL_TD_0, NULL, JUMBO_7K, true),
+		_R("RTL8169sb/8110sb",	RTL_TD_0, NULL, JUMBO_7K),
 	[RTL_GIGA_MAC_VER_05] =
-		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL, JUMBO_7K, true),
+		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL, JUMBO_7K),
 	[RTL_GIGA_MAC_VER_06] =
-		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL, JUMBO_7K, true),
+		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL, JUMBO_7K),
 	/* PCI-E devices. */
 	[RTL_GIGA_MAC_VER_07] =
-		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K, true),
+		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_08] =
-		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K, true),
+		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_09] =
-		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K, true),
+		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_10] =
-		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K, true),
+		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_11] =
-		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K, false),
+		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K),
 	[RTL_GIGA_MAC_VER_12] =
-		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K, false),
+		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K),
 	[RTL_GIGA_MAC_VER_13] =
-		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K, true),
+		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_14] =
-		_R("RTL8100e",		RTL_TD_0, NULL, JUMBO_1K, true),
+		_R("RTL8100e",		RTL_TD_0, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_15] =
-		_R("RTL8100e",		RTL_TD_0, NULL, JUMBO_1K, true),
+		_R("RTL8100e",		RTL_TD_0, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_16] =
-		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K, true),
+		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_17] =
-		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K, false),
+		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K),
 	[RTL_GIGA_MAC_VER_18] =
-		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_19] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_20] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_21] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_22] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_23] =
-		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_24] =
-		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K, false),
+		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K),
 	[RTL_GIGA_MAC_VER_25] =
-		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_1,
-							JUMBO_9K, false),
+		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_1, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_26] =
-		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_2,
-							JUMBO_9K, false),
+		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_2, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_27] =
-		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K, false),
+		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_28] =
-		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K, false),
+		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_29] =
-		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1,
-							JUMBO_1K, true),
+		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_30] =
-		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1,
-							JUMBO_1K, true),
+		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_31] =
-		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K, false),
+		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_32] =
-		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_1,
-							JUMBO_9K, false),
+		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_1, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_33] =
-		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_2,
-							JUMBO_9K, false),
+		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_2, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_34] =
-		_R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3,
-							JUMBO_9K, false),
+		_R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_35] =
-		_R("RTL8168f/8111f",	RTL_TD_1, FIRMWARE_8168F_1,
-							JUMBO_9K, false),
+		_R("RTL8168f/8111f",	RTL_TD_1, FIRMWARE_8168F_1, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_36] =
-		_R("RTL8168f/8111f",	RTL_TD_1, FIRMWARE_8168F_2,
-							JUMBO_9K, false),
+		_R("RTL8168f/8111f",	RTL_TD_1, FIRMWARE_8168F_2, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_37] =
-		_R("RTL8402",		RTL_TD_1, FIRMWARE_8402_1,
-							JUMBO_1K, true),
+		_R("RTL8402",		RTL_TD_1, FIRMWARE_8402_1,  JUMBO_1K),
 	[RTL_GIGA_MAC_VER_38] =
-		_R("RTL8411",		RTL_TD_1, FIRMWARE_8411_1,
-							JUMBO_9K, false),
+		_R("RTL8411",		RTL_TD_1, FIRMWARE_8411_1,  JUMBO_9K),
 	[RTL_GIGA_MAC_VER_39] =
-		_R("RTL8106e",		RTL_TD_1, FIRMWARE_8106E_1,
-							JUMBO_1K, true),
+		_R("RTL8106e",		RTL_TD_1, FIRMWARE_8106E_1, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_40] =
-		_R("RTL8168g/8111g",	RTL_TD_1, FIRMWARE_8168G_2,
-							JUMBO_9K, false),
+		_R("RTL8168g/8111g",	RTL_TD_1, FIRMWARE_8168G_2, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_41] =
-		_R("RTL8168g/8111g",	RTL_TD_1, NULL, JUMBO_9K, false),
+		_R("RTL8168g/8111g",	RTL_TD_1, NULL, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_42] =
-		_R("RTL8168g/8111g",	RTL_TD_1, FIRMWARE_8168G_3,
-							JUMBO_9K, false),
+		_R("RTL8168g/8111g",	RTL_TD_1, FIRMWARE_8168G_3, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_43] =
-		_R("RTL8106e",		RTL_TD_1, FIRMWARE_8106E_2,
-							JUMBO_1K, true),
+		_R("RTL8106e",		RTL_TD_1, FIRMWARE_8106E_2, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_44] =
-		_R("RTL8411",		RTL_TD_1, FIRMWARE_8411_2,
-							JUMBO_9K, false),
+		_R("RTL8411",		RTL_TD_1, FIRMWARE_8411_2,  JUMBO_9K),
 	[RTL_GIGA_MAC_VER_45] =
-		_R("RTL8168h/8111h",	RTL_TD_1, FIRMWARE_8168H_1,
-							JUMBO_9K, false),
+		_R("RTL8168h/8111h",	RTL_TD_1, FIRMWARE_8168H_1, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_46] =
-		_R("RTL8168h/8111h",	RTL_TD_1, FIRMWARE_8168H_2,
-							JUMBO_9K, false),
+		_R("RTL8168h/8111h",	RTL_TD_1, FIRMWARE_8168H_2, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_47] =
-		_R("RTL8107e",		RTL_TD_1, FIRMWARE_8107E_1,
-							JUMBO_1K, false),
+		_R("RTL8107e",		RTL_TD_1, FIRMWARE_8107E_1, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_48] =
-		_R("RTL8107e",		RTL_TD_1, FIRMWARE_8107E_2,
-							JUMBO_1K, false),
+		_R("RTL8107e",		RTL_TD_1, FIRMWARE_8107E_2, JUMBO_1K),
 	[RTL_GIGA_MAC_VER_49] =
-		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL,
-							JUMBO_9K, false),
+		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_50] =
-		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL,
-							JUMBO_9K, false),
+		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL, JUMBO_9K),
 	[RTL_GIGA_MAC_VER_51] =
-		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL,
-							JUMBO_9K, false),
+		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL, JUMBO_9K),
 };
 #undef _R
 
@@ -345,7 +319,6 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
 
 MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
 
-static int rx_buf_sz = 16383;
 static int use_dac = -1;
 static struct {
 	u32 msg_enable;
@@ -778,7 +751,6 @@ struct rtl8169_private {
 	struct net_device *dev;
 	struct napi_struct napi;
 	u32 msg_enable;
-	u16 txd_version;
 	u16 mac_version;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
@@ -821,7 +793,7 @@ struct rtl8169_private {
 	int (*get_link_ksettings)(struct net_device *,
 				  struct ethtool_link_ksettings *);
 	void (*phy_reset_enable)(struct rtl8169_private *tp);
-	void (*hw_start)(struct net_device *);
+	void (*hw_start)(struct rtl8169_private *tp);
 	unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
 	unsigned int (*link_ok)(struct rtl8169_private *tp);
 	int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
@@ -833,14 +805,11 @@ struct rtl8169_private {
 		struct work_struct work;
 	} wk;
 
-	unsigned features;
-
 	struct mii_if_info mii;
 	dma_addr_t counters_phys_addr;
 	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
 	u32 saved_wolopts;
-	u32 opts1_mask;
 
 	struct rtl_fw {
 		const struct firmware *fw;
@@ -1960,7 +1929,7 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_ALL_TSO;
 
 	if (dev->mtu > JUMBO_1K &&
-	    !rtl_chip_infos[tp->mac_version].jumbo_tx_csum)
+	    tp->mac_version > RTL_GIGA_MAC_VER_06)
 		features &= ~NETIF_F_IP_CSUM;
 
 	return features;
@@ -2155,9 +2124,8 @@ DECLARE_RTL_COND(rtl_counters_cond)
 	return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
-static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
+static bool rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
 	dma_addr_t paddr = tp->counters_phys_addr;
 	u32 cmd;
 
@@ -2170,10 +2138,8 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 	return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
 
-static bool rtl8169_reset_counters(struct net_device *dev)
+static bool rtl8169_reset_counters(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
 	/*
 	 * Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the
 	 * tally counters.
@@ -2181,13 +2147,11 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 	if (tp->mac_version < RTL_GIGA_MAC_VER_19)
 		return true;
 
-	return rtl8169_do_counters(dev, CounterReset);
+	return rtl8169_do_counters(tp, CounterReset);
 }
 
-static bool rtl8169_update_counters(struct net_device *dev)
+static bool rtl8169_update_counters(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
 	/*
 	 * Some chips are unable to dump tally counters when the receiver
 	 * is disabled.
@@ -2195,12 +2159,11 @@ static bool rtl8169_update_counters(struct net_device *dev)
 	if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
 		return true;
 
-	return rtl8169_do_counters(dev, CounterDump);
+	return rtl8169_do_counters(tp, CounterDump);
 }
 
-static bool rtl8169_init_counter_offsets(struct net_device *dev)
+static bool rtl8169_init_counter_offsets(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
 	struct rtl8169_counters *counters = tp->counters;
 	bool ret = false;
 
@@ -2223,10 +2186,10 @@ static bool rtl8169_init_counter_offsets(struct net_device *dev)
 		return true;
 
 	/* If both, reset and update fail, propagate to caller. */
-	if (rtl8169_reset_counters(dev))
+	if (rtl8169_reset_counters(tp))
 		ret = true;
 
-	if (rtl8169_update_counters(dev))
+	if (rtl8169_update_counters(tp))
 		ret = true;
 
 	tp->tc_offset.tx_errors = counters->tx_errors;
@@ -2249,7 +2212,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
 	pm_runtime_get_noresume(d);
 
 	if (pm_runtime_active(d))
-		rtl8169_update_counters(dev);
+		rtl8169_update_counters(tp);
 
 	pm_runtime_put_noidle(d);
 
@@ -2560,12 +2523,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 
 		/* 8168E family. */
 		{ 0x7c800000, 0x2c800000,	RTL_GIGA_MAC_VER_34 },
-		{ 0x7cf00000, 0x2c200000,	RTL_GIGA_MAC_VER_33 },
 		{ 0x7cf00000, 0x2c100000,	RTL_GIGA_MAC_VER_32 },
 		{ 0x7c800000, 0x2c000000,	RTL_GIGA_MAC_VER_33 },
 
 		/* 8168D family. */
-		{ 0x7cf00000, 0x28300000,	RTL_GIGA_MAC_VER_26 },
 		{ 0x7cf00000, 0x28100000,	RTL_GIGA_MAC_VER_25 },
 		{ 0x7c800000, 0x28000000,	RTL_GIGA_MAC_VER_26 },
 
@@ -2575,32 +2536,24 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 		{ 0x7cf00000, 0x28b00000,	RTL_GIGA_MAC_VER_31 },
 
 		/* 8168C family. */
-		{ 0x7cf00000, 0x3cb00000,	RTL_GIGA_MAC_VER_24 },
 		{ 0x7cf00000, 0x3c900000,	RTL_GIGA_MAC_VER_23 },
 		{ 0x7cf00000, 0x3c800000,	RTL_GIGA_MAC_VER_18 },
 		{ 0x7c800000, 0x3c800000,	RTL_GIGA_MAC_VER_24 },
 		{ 0x7cf00000, 0x3c000000,	RTL_GIGA_MAC_VER_19 },
 		{ 0x7cf00000, 0x3c200000,	RTL_GIGA_MAC_VER_20 },
 		{ 0x7cf00000, 0x3c300000,	RTL_GIGA_MAC_VER_21 },
-		{ 0x7cf00000, 0x3c400000,	RTL_GIGA_MAC_VER_22 },
 		{ 0x7c800000, 0x3c000000,	RTL_GIGA_MAC_VER_22 },
 
 		/* 8168B family. */
 		{ 0x7cf00000, 0x38000000,	RTL_GIGA_MAC_VER_12 },
-		{ 0x7cf00000, 0x38500000,	RTL_GIGA_MAC_VER_17 },
 		{ 0x7c800000, 0x38000000,	RTL_GIGA_MAC_VER_17 },
 		{ 0x7c800000, 0x30000000,	RTL_GIGA_MAC_VER_11 },
 
 		/* 8101 family. */
-		{ 0x7cf00000, 0x44900000,	RTL_GIGA_MAC_VER_39 },
 		{ 0x7c800000, 0x44800000,	RTL_GIGA_MAC_VER_39 },
 		{ 0x7c800000, 0x44000000,	RTL_GIGA_MAC_VER_37 },
-		{ 0x7cf00000, 0x40b00000,	RTL_GIGA_MAC_VER_30 },
-		{ 0x7cf00000, 0x40a00000,	RTL_GIGA_MAC_VER_30 },
 		{ 0x7cf00000, 0x40900000,	RTL_GIGA_MAC_VER_29 },
 		{ 0x7c800000, 0x40800000,	RTL_GIGA_MAC_VER_30 },
-		{ 0x7cf00000, 0x34a00000,	RTL_GIGA_MAC_VER_09 },
-		{ 0x7cf00000, 0x24a00000,	RTL_GIGA_MAC_VER_09 },
 		{ 0x7cf00000, 0x34900000,	RTL_GIGA_MAC_VER_08 },
 		{ 0x7cf00000, 0x24900000,	RTL_GIGA_MAC_VER_08 },
 		{ 0x7cf00000, 0x34800000,	RTL_GIGA_MAC_VER_07 },
@@ -5125,7 +5078,7 @@ static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5151,7 +5104,7 @@ static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, 0x0c);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 }
 
 static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5163,7 +5116,7 @@ static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
 static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 {
 	rtl_tx_performance_tweak(tp,
-		(0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
+		PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5358,12 +5311,9 @@ static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
 		(InterFrameGap << TxInterFrameGapShift));
 }
 
-static void rtl_hw_start(struct net_device *dev)
+static void rtl_hw_start(struct  rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
-	tp->hw_start(dev);
-
+	tp->hw_start(tp);
 	rtl_irq_enable_all(tp);
 }
 
@@ -5389,10 +5339,10 @@ static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
 	return cmd;
 }
 
-static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp)
 {
 	/* Low hurts. Let's disable the filtering. */
-	RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
+	RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
 }
 
 static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
@@ -5472,14 +5422,11 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	RTL_W32(tp, RxConfig, tmp);
 }
 
-static void rtl_hw_start_8169(struct net_device *dev)
+static void rtl_hw_start_8169(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-	struct pci_dev *pdev = tp->pci_dev;
-
 	if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
 		RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
-		pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
+		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
 	}
 
 	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
@@ -5493,7 +5440,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 
 	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-	rtl_set_rx_max_size(tp, rx_buf_sz);
+	rtl_set_rx_max_size(tp);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
@@ -5537,7 +5484,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 
 	RTL_W32(tp, RxMissed, 0);
 
-	rtl_set_rx_mode(dev);
+	rtl_set_rx_mode(tp->dev);
 
 	/* no early-rx interrupts */
 	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
@@ -5736,7 +5683,7 @@ static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN) {
-		rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) |
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 	}
 }
@@ -5757,7 +5704,7 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_disable_clock_request(tp);
 
@@ -5788,7 +5735,7 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
@@ -5805,7 +5752,7 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
@@ -5862,7 +5809,7 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
@@ -5872,7 +5819,7 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 	rtl_csi_access_enable_1(tp);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5889,7 +5836,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5921,7 +5868,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5946,7 +5893,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -5976,7 +5923,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6047,7 +5994,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6147,7 +6094,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6229,7 +6176,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6325,15 +6272,13 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 	r8168_mac_ocp_write(tp, 0xe860, data);
 }
 
-static void rtl_hw_start_8168(struct net_device *dev)
+static void rtl_hw_start_8168(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
 	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(tp, rx_buf_sz);
+	rtl_set_rx_max_size(tp);
 
 	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
 
@@ -6453,7 +6398,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 	default:
 		printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n",
-			dev->name, tp->mac_version);
+		       tp->dev->name, tp->mac_version);
 		break;
 	}
 
@@ -6461,7 +6406,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
-	rtl_set_rx_mode(dev);
+	rtl_set_rx_mode(tp->dev);
 
 	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
@@ -6495,7 +6440,7 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 	RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W8(tp, Config1,
 	       LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
@@ -6512,7 +6457,7 @@ static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -6575,7 +6520,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
-	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);
@@ -6600,24 +6545,21 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
 
-static void rtl_hw_start_8101(struct net_device *dev)
+static void rtl_hw_start_8101(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-	struct pci_dev *pdev = tp->pci_dev;
-
 	if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
 		tp->event_slow &= ~RxFIFOOver;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_16)
-		pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
+		pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 
 	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(tp, rx_buf_sz);
+	rtl_set_rx_max_size(tp);
 
 	tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
@@ -6668,7 +6610,7 @@ static void rtl_hw_start_8101(struct net_device *dev)
 
 	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
-	rtl_set_rx_mode(dev);
+	rtl_set_rx_mode(tp->dev);
 
 	RTL_R8(tp, IntrMask);
 
@@ -6699,29 +6641,22 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
 static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
 				     void **data_buff, struct RxDesc *desc)
 {
-	dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz,
-			 DMA_FROM_DEVICE);
+	dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr),
+			 R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
 	kfree(*data_buff);
 	*data_buff = NULL;
 	rtl8169_make_unusable_by_asic(desc);
 }
 
-static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
+static inline void rtl8169_mark_to_asic(struct RxDesc *desc)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
 
 	/* Force memory writes to complete before releasing descriptor */
 	dma_wmb();
 
-	desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
-}
-
-static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
-				       u32 rx_buf_sz)
-{
-	desc->addr = cpu_to_le64(mapping);
-	rtl8169_mark_to_asic(desc, rx_buf_sz);
+	desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE);
 }
 
 static inline void *rtl8169_align(void *data)
@@ -6735,21 +6670,20 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 	void *data;
 	dma_addr_t mapping;
 	struct device *d = tp_to_dev(tp);
-	struct net_device *dev = tp->dev;
-	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
+	int node = dev_to_node(d);
 
-	data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node);
+	data = kmalloc_node(R8169_RX_BUF_SIZE, GFP_KERNEL, node);
 	if (!data)
 		return NULL;
 
 	if (rtl8169_align(data) != data) {
 		kfree(data);
-		data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node);
+		data = kmalloc_node(R8169_RX_BUF_SIZE + 15, GFP_KERNEL, node);
 		if (!data)
 			return NULL;
 	}
 
-	mapping = dma_map_single(d, rtl8169_align(data), rx_buf_sz,
+	mapping = dma_map_single(d, rtl8169_align(data), R8169_RX_BUF_SIZE,
 				 DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(d, mapping))) {
 		if (net_ratelimit())
@@ -6757,7 +6691,8 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 		goto err_out;
 	}
 
-	rtl8169_map_to_asic(desc, mapping, rx_buf_sz);
+	desc->addr = cpu_to_le64(mapping);
+	rtl8169_mark_to_asic(desc);
 	return data;
 
 err_out:
@@ -6789,9 +6724,6 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
 	for (i = 0; i < NUM_RX_DESC; i++) {
 		void *data;
 
-		if (tp->Rx_databuff[i])
-			continue;
-
 		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
 		if (!data) {
 			rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
@@ -6808,14 +6740,12 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
 	return -ENOMEM;
 }
 
-static int rtl8169_init_ring(struct net_device *dev)
+static int rtl8169_init_ring(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
 	rtl8169_init_ring_indexes(tp);
 
-	memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info));
-	memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *));
+	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
+	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
 
 	return rtl8169_rx_fill(tp);
 }
@@ -6874,13 +6804,13 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	rtl8169_hw_reset(tp);
 
 	for (i = 0; i < NUM_RX_DESC; i++)
-		rtl8169_mark_to_asic(tp->RxDescArray + i, rx_buf_sz);
+		rtl8169_mark_to_asic(tp->RxDescArray + i);
 
 	rtl8169_tx_clear(tp);
 	rtl8169_init_ring_indexes(tp);
 
 	napi_enable(&tp->napi);
-	rtl_hw_start(dev);
+	rtl_hw_start(tp);
 	netif_wake_queue(dev);
 	rtl8169_check_link_status(dev, tp);
 }
@@ -7362,7 +7292,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 	prefetch(data);
 	skb = napi_alloc_skb(&tp->napi, pkt_size);
 	if (skb)
-		memcpy(skb->data, data, pkt_size);
+		skb_copy_to_linear_data(skb, data, pkt_size);
 	dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
 	return skb;
@@ -7380,7 +7310,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 		struct RxDesc *desc = tp->RxDescArray + entry;
 		u32 status;
 
-		status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
+		status = le32_to_cpu(desc->opts1);
 		if (status & DescOwn)
 			break;
 
@@ -7398,14 +7328,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 				dev->stats.rx_length_errors++;
 			if (status & RxCRC)
 				dev->stats.rx_crc_errors++;
-			if (status & RxFOVF) {
+			/* RxFOVF is a reserved bit on later chip versions */
+			if (tp->mac_version == RTL_GIGA_MAC_VER_01 &&
+			    status & RxFOVF) {
 				rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 				dev->stats.rx_fifo_errors++;
-			}
-			if ((status & (RxRUNT | RxCRC)) &&
-			    !(status & (RxRWT | RxFOVF)) &&
-			    (dev->features & NETIF_F_RXALL))
+			} else if (status & (RxRUNT | RxCRC) &&
+				   !(status & RxRWT) &&
+				   dev->features & NETIF_F_RXALL) {
 				goto process_pkt;
+			}
 		} else {
 			struct sk_buff *skb;
 			dma_addr_t addr;
@@ -7454,7 +7386,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 		}
 release_descriptor:
 		desc->opts2 = 0;
-		rtl8169_mark_to_asic(desc, rx_buf_sz);
+		rtl8169_mark_to_asic(desc);
 	}
 
 	count = cur_rx - tp->cur_rx;
@@ -7465,8 +7397,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 
 static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 {
-	struct net_device *dev = dev_instance;
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = dev_instance;
 	int handled = 0;
 	u16 status;
 
@@ -7477,7 +7408,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 			handled = 1;
 
 			rtl_irq_disable(tp);
-			napi_schedule(&tp->napi);
+			napi_schedule_irqoff(&tp->napi);
 		}
 	}
 	return IRQ_RETVAL(handled);
@@ -7628,7 +7559,7 @@ static int rtl8169_close(struct net_device *dev)
 	pm_runtime_get_sync(&pdev->dev);
 
 	/* Update counters before going down */
-	rtl8169_update_counters(dev);
+	rtl8169_update_counters(tp);
 
 	rtl_lock_work(tp);
 	clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
@@ -7638,7 +7569,7 @@ static int rtl8169_close(struct net_device *dev)
 
 	cancel_work_sync(&tp->wk.work);
 
-	pci_free_irq(pdev, 0, dev);
+	pci_free_irq(pdev, 0, tp);
 
 	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
 			  tp->RxPhyAddr);
@@ -7683,7 +7614,7 @@ static int rtl_open(struct net_device *dev)
 	if (!tp->RxDescArray)
 		goto err_free_tx_0;
 
-	retval = rtl8169_init_ring(dev);
+	retval = rtl8169_init_ring(tp);
 	if (retval < 0)
 		goto err_free_rx_1;
 
@@ -7693,7 +7624,7 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_request_firmware(tp);
 
-	retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+	retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp,
 				 dev->name);
 	if (retval < 0)
 		goto err_release_fw_2;
@@ -7710,9 +7641,9 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_pll_power_up(tp);
 
-	rtl_hw_start(dev);
+	rtl_hw_start(tp);
 
-	if (!rtl8169_init_counter_offsets(dev))
+	if (!rtl8169_init_counter_offsets(tp))
 		netif_warn(tp, hw, dev, "counter reset/update failed\n");
 
 	netif_start_queue(dev);
@@ -7781,7 +7712,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	 * from tally counters.
 	 */
 	if (pm_runtime_active(&pdev->dev))
-		rtl8169_update_counters(dev);
+		rtl8169_update_counters(tp);
 
 	/*
 	 * Subtract values fetched during initalization.
@@ -7877,7 +7808,7 @@ static int rtl8169_runtime_suspend(struct device *device)
 
 	/* Update counters before going runtime suspend */
 	rtl8169_rx_missed(dev);
-	rtl8169_update_counters(dev);
+	rtl8169_update_counters(tp);
 
 	return 0;
 }
@@ -8017,9 +7948,7 @@ static const struct net_device_ops rtl_netdev_ops = {
 };
 
 static const struct rtl_cfg_info {
-	void (*hw_start)(struct net_device *);
-	unsigned int region;
-	unsigned int align;
+	void (*hw_start)(struct rtl8169_private *tp);
 	u16 event_slow;
 	unsigned int has_gmii:1;
 	const struct rtl_coalesce_info *coalesce_info;
@@ -8027,8 +7956,6 @@ static const struct rtl_cfg_info {
 } rtl_cfg_infos [] = {
 	[RTL_CFG_0] = {
 		.hw_start	= rtl_hw_start_8169,
-		.region		= 1,
-		.align		= 0,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
 		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8169,
@@ -8036,8 +7963,6 @@ static const struct rtl_cfg_info {
 	},
 	[RTL_CFG_1] = {
 		.hw_start	= rtl_hw_start_8168,
-		.region		= 2,
-		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow,
 		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
@@ -8045,8 +7970,6 @@ static const struct rtl_cfg_info {
 	},
 	[RTL_CFG_2] = {
 		.hw_start	= rtl_hw_start_8101,
-		.region		= 2,
-		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver |
 				  PCSTimeout,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
@@ -8146,11 +8069,10 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
-	const unsigned int region = cfg->region;
 	struct rtl8169_private *tp;
 	struct mii_if_info *mii;
 	struct net_device *dev;
-	int chipset, i;
+	int chipset, region, i;
 	int rc;
 
 	if (netif_msg_drv(&debug)) {
@@ -8192,11 +8114,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pcim_set_mwi(pdev) < 0)
 		netif_info(tp, probe, dev, "Mem-Wr-Inval unavailable\n");
 
-	/* make sure PCI base addr 1 is MMIO */
-	if (!(pci_resource_flags(pdev, region) & IORESOURCE_MEM)) {
-		netif_err(tp, probe, dev,
-			  "region #%d not an MMIO resource, aborting\n",
-			  region);
+	/* use first MMIO region */
+	region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1;
+	if (region < 0) {
+		netif_err(tp, probe, dev, "no MMIO resource found\n");
 		return -ENODEV;
 	}
 
@@ -8261,7 +8182,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	rtl8169_print_mac_version(tp);
 
 	chipset = tp->mac_version;
-	tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
 	rc = rtl_alloc_irq(tp);
 	if (rc < 0) {
@@ -8323,7 +8243,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
 
-	netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT);
+	netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT);
 
 	/* don't enable SG, IP_CSUM and TSO by default - it might not work
 	 * properly for all devices */
@@ -8346,13 +8266,17 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		/* Disallow toggling */
 		dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 
-	if (tp->txd_version == RTL_TD_0)
+	switch (rtl_chip_infos[chipset].txd_version) {
+	case RTL_TD_0:
 		tp->tso_csum = rtl8169_tso_csum_v1;
-	else if (tp->txd_version == RTL_TD_1) {
+		break;
+	case RTL_TD_1:
 		tp->tso_csum = rtl8169_tso_csum_v2;
 		dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
-	} else
+		break;
+	default:
 		WARN_ON_ONCE(1);
+	}
 
 	dev->hw_features |= NETIF_F_RXALL;
 	dev->hw_features |= NETIF_F_RXFCS;
@@ -8365,9 +8289,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->event_slow = cfg->event_slow;
 	tp->coalesce_info = cfg->coalesce_info;
 
-	tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ?
-		~(RxBOVF | RxFOVF) : ~0;
-
 	timer_setup(&tp->timer, rtl8169_phy_timer, 0);
 
 	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
@@ -8384,15 +8305,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc < 0)
 		return rc;
 
-	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
-		   rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
-		   (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+	netif_info(tp, probe, dev, "%s, %pM, XID %08x, IRQ %d\n",
+		   rtl_chip_infos[chipset].name, dev->dev_addr,
+		   (u32)(RTL_R32(tp, TxConfig) & 0xfcf0f8ff),
 		   pci_irq_vector(pdev, 0));
 	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
 		netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
 			   "tx checksumming: %s]\n",
 			   rtl_chip_infos[chipset].jumbo_max,
-			   rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
+			  tp->mac_version <= RTL_GIGA_MAC_VER_06 ? "ok" : "ko");
 	}
 
 	if (r8168_check_dash(tp))
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 692dd72..c30b9e2 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1551,6 +1551,38 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 	return 0;
 }
 
+#if defined(CONFIG_SMP)
+static void efx_set_interrupt_affinity(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+	unsigned int cpu;
+
+	efx_for_each_channel(channel, efx) {
+		cpu = cpumask_local_spread(channel->channel,
+					   pcibus_to_node(efx->pci_dev->bus));
+		irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
+	}
+}
+
+static void efx_clear_interrupt_affinity(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		irq_set_affinity_hint(channel->irq, NULL);
+}
+#else
+static void
+efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+
+static void
+efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+#endif /* CONFIG_SMP */
+
 static int efx_soft_enable_interrupts(struct efx_nic *efx)
 {
 	struct efx_channel *channel, *end_channel;
@@ -3165,6 +3197,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
 	cancel_work_sync(&efx->reset_work);
 
 	efx_disable_interrupts(efx);
+	efx_clear_interrupt_affinity(efx);
 	efx_nic_fini_interrupt(efx);
 	efx_fini_port(efx);
 	efx->type->fini(efx);
@@ -3314,6 +3347,8 @@ static int efx_pci_probe_main(struct efx_nic *efx)
 	rc = efx_nic_init_interrupt(efx);
 	if (rc)
 		goto fail5;
+
+	efx_set_interrupt_affinity(efx);
 	rc = efx_enable_interrupts(efx);
 	if (rc)
 		goto fail6;
@@ -3321,6 +3356,7 @@ static int efx_pci_probe_main(struct efx_nic *efx)
 	return 0;
 
  fail6:
+	efx_clear_interrupt_affinity(efx);
 	efx_nic_fini_interrupt(efx);
  fail5:
 	efx_fini_port(efx);
diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig
index 6bcfe27..b80048c 100644
--- a/drivers/net/ethernet/socionext/Kconfig
+++ b/drivers/net/ethernet/socionext/Kconfig
@@ -14,6 +14,8 @@
 config SNI_AVE
 	tristate "Socionext AVE ethernet support"
 	depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+	depends on HAS_IOMEM
+	select MFD_SYSCON
 	select PHYLIB
 	---help---
 	  Driver for gigabit ethernet MACs, called AVE, in the
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f4c0b02..aa50331 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1057,7 +1057,8 @@ static int netsec_netdev_load_microcode(struct netsec_priv *priv)
 	return 0;
 }
 
-static int netsec_reset_hardware(struct netsec_priv *priv)
+static int netsec_reset_hardware(struct netsec_priv *priv,
+				 bool load_ucode)
 {
 	u32 value;
 	int err;
@@ -1102,11 +1103,14 @@ static int netsec_reset_hardware(struct netsec_priv *priv)
 	netsec_write(priv, NETSEC_REG_NRM_RX_CONFIG,
 		     1 << NETSEC_REG_DESC_ENDIAN);
 
-	err = netsec_netdev_load_microcode(priv);
-	if (err) {
-		netif_err(priv, probe, priv->ndev,
-			  "%s: failed to load microcode (%d)\n", __func__, err);
-		return err;
+	if (load_ucode) {
+		err = netsec_netdev_load_microcode(priv);
+		if (err) {
+			netif_err(priv, probe, priv->ndev,
+				  "%s: failed to load microcode (%d)\n",
+				  __func__, err);
+			return err;
+		}
 	}
 
 	/* start DMA engines */
@@ -1313,8 +1317,8 @@ static int netsec_netdev_open(struct net_device *ndev)
 	napi_enable(&priv->napi);
 	netif_start_queue(ndev);
 
-	/* Enable RX intr. */
-	netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX);
+	/* Enable TX+RX intr. */
+	netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX | NETSEC_IRQ_TX);
 
 	return 0;
 err3:
@@ -1328,6 +1332,7 @@ static int netsec_netdev_open(struct net_device *ndev)
 
 static int netsec_netdev_stop(struct net_device *ndev)
 {
+	int ret;
 	struct netsec_priv *priv = netdev_priv(ndev);
 
 	netif_stop_queue(priv->ndev);
@@ -1343,12 +1348,14 @@ static int netsec_netdev_stop(struct net_device *ndev)
 	netsec_uninit_pkt_dring(priv, NETSEC_RING_TX);
 	netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
 
+	ret = netsec_reset_hardware(priv, false);
+
 	phy_stop(ndev->phydev);
 	phy_disconnect(ndev->phydev);
 
 	pm_runtime_put_sync(priv->dev);
 
-	return 0;
+	return ret;
 }
 
 static int netsec_netdev_init(struct net_device *ndev)
@@ -1364,7 +1371,7 @@ static int netsec_netdev_init(struct net_device *ndev)
 	if (ret)
 		goto err1;
 
-	ret = netsec_reset_hardware(priv);
+	ret = netsec_reset_hardware(priv, true);
 	if (ret)
 		goto err2;
 
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 0b3b7a4..f7eccee 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
 #include <linux/mii.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
@@ -18,6 +19,7 @@
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/phy.h>
+#include <linux/regmap.h>
 #include <linux/reset.h>
 #include <linux/types.h>
 #include <linux/u64_stats_sync.h>
@@ -197,8 +199,16 @@
 #define AVE_INTM_COUNT		20
 #define AVE_FORCE_TXINTCNT	1
 
+/* SG */
+#define SG_ETPINMODE		0x540
+#define SG_ETPINMODE_EXTPHY	BIT(1)	/* for LD11 */
+#define SG_ETPINMODE_RMII(ins)	BIT(ins)
+
 #define IS_DESC_64BIT(p)	((p)->data->is_desc_64bit)
 
+#define AVE_MAX_CLKS		4
+#define AVE_MAX_RSTS		2
+
 enum desc_id {
 	AVE_DESCID_RX,
 	AVE_DESCID_TX,
@@ -225,10 +235,6 @@ struct ave_desc_info {
 	struct ave_desc *desc;	/* skb info related descriptor */
 };
 
-struct ave_soc_data {
-	bool	is_desc_64bit;
-};
-
 struct ave_stats {
 	struct	u64_stats_sync	syncp;
 	u64	packets;
@@ -245,11 +251,16 @@ struct ave_private {
 	int			phy_id;
 	unsigned int		desc_size;
 	u32			msg_enable;
-	struct clk		*clk;
-	struct reset_control	*rst;
+	int			nclks;
+	struct clk		*clk[AVE_MAX_CLKS];
+	int			nrsts;
+	struct reset_control	*rst[AVE_MAX_RSTS];
 	phy_interface_t		phy_mode;
 	struct phy_device	*phydev;
 	struct mii_bus		*mdio;
+	struct regmap		*regmap;
+	unsigned int		pinmode_mask;
+	unsigned int		pinmode_val;
 
 	/* stats */
 	struct ave_stats	stats_rx;
@@ -272,6 +283,14 @@ struct ave_private {
 	const struct ave_soc_data *data;
 };
 
+struct ave_soc_data {
+	bool	is_desc_64bit;
+	const char	*clock_names[AVE_MAX_CLKS];
+	const char	*reset_names[AVE_MAX_RSTS];
+	int	(*get_pinmode)(struct ave_private *priv,
+			       phy_interface_t phy_mode, u32 arg);
+};
+
 static u32 ave_desc_read(struct net_device *ndev, enum desc_id id, int entry,
 			 int offset)
 {
@@ -1153,19 +1172,29 @@ static int ave_init(struct net_device *ndev)
 	struct device_node *np = dev->of_node;
 	struct device_node *mdio_np;
 	struct phy_device *phydev;
-	int ret;
+	int nc, nr, ret;
 
 	/* enable clk because of hw access until ndo_open */
-	ret = clk_prepare_enable(priv->clk);
-	if (ret) {
-		dev_err(dev, "can't enable clock\n");
+	for (nc = 0; nc < priv->nclks; nc++) {
+		ret = clk_prepare_enable(priv->clk[nc]);
+		if (ret) {
+			dev_err(dev, "can't enable clock\n");
+			goto out_clk_disable;
+		}
+	}
+
+	for (nr = 0; nr < priv->nrsts; nr++) {
+		ret = reset_control_deassert(priv->rst[nr]);
+		if (ret) {
+			dev_err(dev, "can't deassert reset\n");
+			goto out_reset_assert;
+		}
+	}
+
+	ret = regmap_update_bits(priv->regmap, SG_ETPINMODE,
+				 priv->pinmode_mask, priv->pinmode_val);
+	if (ret)
 		return ret;
-	}
-	ret = reset_control_deassert(priv->rst);
-	if (ret) {
-		dev_err(dev, "can't deassert reset\n");
-		goto out_clk_disable;
-	}
 
 	ave_global_reset(ndev);
 
@@ -1207,9 +1236,11 @@ static int ave_init(struct net_device *ndev)
 out_mdio_unregister:
 	mdiobus_unregister(priv->mdio);
 out_reset_assert:
-	reset_control_assert(priv->rst);
+	while (--nr >= 0)
+		reset_control_assert(priv->rst[nr]);
 out_clk_disable:
-	clk_disable_unprepare(priv->clk);
+	while (--nc >= 0)
+		clk_disable_unprepare(priv->clk[nc]);
 
 	return ret;
 }
@@ -1217,13 +1248,16 @@ static int ave_init(struct net_device *ndev)
 static void ave_uninit(struct net_device *ndev)
 {
 	struct ave_private *priv = netdev_priv(ndev);
+	int i;
 
 	phy_disconnect(priv->phydev);
 	mdiobus_unregister(priv->mdio);
 
 	/* disable clk because of hw access after ndo_stop */
-	reset_control_assert(priv->rst);
-	clk_disable_unprepare(priv->clk);
+	for (i = 0; i < priv->nrsts; i++)
+		reset_control_assert(priv->rst[i]);
+	for (i = 0; i < priv->nclks; i++)
+		clk_disable_unprepare(priv->clk[i]);
 }
 
 static int ave_open(struct net_device *ndev)
@@ -1520,6 +1554,7 @@ static int ave_probe(struct platform_device *pdev)
 	const struct ave_soc_data *data;
 	struct device *dev = &pdev->dev;
 	char buf[ETHTOOL_FWVERS_LEN];
+	struct of_phandle_args args;
 	phy_interface_t phy_mode;
 	struct ave_private *priv;
 	struct net_device *ndev;
@@ -1527,8 +1562,9 @@ static int ave_probe(struct platform_device *pdev)
 	struct resource	*res;
 	const void *mac_addr;
 	void __iomem *base;
+	const char *name;
+	int i, irq, ret;
 	u64 dma_mask;
-	int irq, ret;
 	u32 ave_id;
 
 	data = of_device_get_match_data(dev);
@@ -1541,12 +1577,6 @@ static int ave_probe(struct platform_device *pdev)
 		dev_err(dev, "phy-mode not found\n");
 		return -EINVAL;
 	}
-	if ((!phy_interface_mode_is_rgmii(phy_mode)) &&
-	    phy_mode != PHY_INTERFACE_MODE_RMII &&
-	    phy_mode != PHY_INTERFACE_MODE_MII) {
-		dev_err(dev, "phy-mode is invalid\n");
-		return -EINVAL;
-	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
@@ -1614,15 +1644,47 @@ static int ave_probe(struct platform_device *pdev)
 	u64_stats_init(&priv->stats_tx.syncp);
 	u64_stats_init(&priv->stats_rx.syncp);
 
-	priv->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(priv->clk)) {
-		ret = PTR_ERR(priv->clk);
-		goto out_free_netdev;
+	for (i = 0; i < AVE_MAX_CLKS; i++) {
+		name = priv->data->clock_names[i];
+		if (!name)
+			break;
+		priv->clk[i] = devm_clk_get(dev, name);
+		if (IS_ERR(priv->clk[i])) {
+			ret = PTR_ERR(priv->clk[i]);
+			goto out_free_netdev;
+		}
+		priv->nclks++;
 	}
 
-	priv->rst = devm_reset_control_get_optional_shared(dev, NULL);
-	if (IS_ERR(priv->rst)) {
-		ret = PTR_ERR(priv->rst);
+	for (i = 0; i < AVE_MAX_RSTS; i++) {
+		name = priv->data->reset_names[i];
+		if (!name)
+			break;
+		priv->rst[i] = devm_reset_control_get_shared(dev, name);
+		if (IS_ERR(priv->rst[i])) {
+			ret = PTR_ERR(priv->rst[i]);
+			goto out_free_netdev;
+		}
+		priv->nrsts++;
+	}
+
+	ret = of_parse_phandle_with_fixed_args(np,
+					       "socionext,syscon-phy-mode",
+					       1, 0, &args);
+	if (ret) {
+		netdev_err(ndev, "can't get syscon-phy-mode property\n");
+		goto out_free_netdev;
+	}
+	priv->regmap = syscon_node_to_regmap(args.np);
+	of_node_put(args.np);
+	if (IS_ERR(priv->regmap)) {
+		netdev_err(ndev, "can't map syscon-phy-mode\n");
+		ret = PTR_ERR(priv->regmap);
+		goto out_free_netdev;
+	}
+	ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]);
+	if (ret) {
+		netdev_err(ndev, "invalid phy-mode setting\n");
 		goto out_free_netdev;
 	}
 
@@ -1685,24 +1747,148 @@ static int ave_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int ave_pro4_get_pinmode(struct ave_private *priv,
+				phy_interface_t phy_mode, u32 arg)
+{
+	if (arg > 0)
+		return -EINVAL;
+
+	priv->pinmode_mask = SG_ETPINMODE_RMII(0);
+
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		priv->pinmode_val = SG_ETPINMODE_RMII(0);
+		break;
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_RGMII:
+		priv->pinmode_val = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ave_ld11_get_pinmode(struct ave_private *priv,
+				phy_interface_t phy_mode, u32 arg)
+{
+	if (arg > 0)
+		return -EINVAL;
+
+	priv->pinmode_mask = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0);
+
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_INTERNAL:
+		priv->pinmode_val = 0;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		priv->pinmode_val = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ave_ld20_get_pinmode(struct ave_private *priv,
+				phy_interface_t phy_mode, u32 arg)
+{
+	if (arg > 0)
+		return -EINVAL;
+
+	priv->pinmode_mask = SG_ETPINMODE_RMII(0);
+
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		priv->pinmode_val = SG_ETPINMODE_RMII(0);
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+		priv->pinmode_val = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ave_pxs3_get_pinmode(struct ave_private *priv,
+				phy_interface_t phy_mode, u32 arg)
+{
+	if (arg > 1)
+		return -EINVAL;
+
+	priv->pinmode_mask = SG_ETPINMODE_RMII(arg);
+
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		priv->pinmode_val = SG_ETPINMODE_RMII(arg);
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+		priv->pinmode_val = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct ave_soc_data ave_pro4_data = {
 	.is_desc_64bit = false,
+	.clock_names = {
+		"gio", "ether", "ether-gb", "ether-phy",
+	},
+	.reset_names = {
+		"gio", "ether",
+	},
+	.get_pinmode = ave_pro4_get_pinmode,
 };
 
 static const struct ave_soc_data ave_pxs2_data = {
 	.is_desc_64bit = false,
+	.clock_names = {
+		"ether",
+	},
+	.reset_names = {
+		"ether",
+	},
+	.get_pinmode = ave_pro4_get_pinmode,
 };
 
 static const struct ave_soc_data ave_ld11_data = {
 	.is_desc_64bit = false,
+	.clock_names = {
+		"ether",
+	},
+	.reset_names = {
+		"ether",
+	},
+	.get_pinmode = ave_ld11_get_pinmode,
 };
 
 static const struct ave_soc_data ave_ld20_data = {
 	.is_desc_64bit = true,
+	.clock_names = {
+		"ether",
+	},
+	.reset_names = {
+		"ether",
+	},
+	.get_pinmode = ave_ld20_get_pinmode,
 };
 
 static const struct ave_soc_data ave_pxs3_data = {
 	.is_desc_64bit = false,
+	.clock_names = {
+		"ether",
+	},
+	.reset_names = {
+		"ether",
+	},
+	.get_pinmode = ave_pxs3_get_pinmode,
 };
 
 static const struct of_device_id of_ave_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index e93c40b..b9c9003 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -24,7 +24,7 @@
 
 #include "stmmac.h"
 
-static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
 {
 	struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
 	unsigned int nopaged_len = skb_headlen(skb);
@@ -51,8 +51,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 	tx_q->tx_skbuff_dma[entry].buf = des2;
 	tx_q->tx_skbuff_dma[entry].len = bmax;
 	/* do not close the descriptor and do not set own bit */
-	priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
-					0, false, skb->len);
+	stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
+			0, false, skb->len);
 
 	while (len != 0) {
 		tx_q->tx_skbuff[entry] = NULL;
@@ -68,9 +68,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 				return -1;
 			tx_q->tx_skbuff_dma[entry].buf = des2;
 			tx_q->tx_skbuff_dma[entry].len = bmax;
-			priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
-							STMMAC_CHAIN_MODE, 1,
-							false, skb->len);
+			stmmac_prepare_tx_desc(priv, desc, 0, bmax, csum,
+					STMMAC_CHAIN_MODE, 1, false, skb->len);
 			len -= bmax;
 			i++;
 		} else {
@@ -83,9 +82,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 			tx_q->tx_skbuff_dma[entry].buf = des2;
 			tx_q->tx_skbuff_dma[entry].len = len;
 			/* last descriptor can be set now */
-			priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
-							STMMAC_CHAIN_MODE, 1,
-							true, skb->len);
+			stmmac_prepare_tx_desc(priv, desc, 0, len, csum,
+					STMMAC_CHAIN_MODE, 1, true, skb->len);
 			len = 0;
 		}
 	}
@@ -95,7 +93,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 	return entry;
 }
 
-static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
+static unsigned int is_jumbo_frm(int len, int enh_desc)
 {
 	unsigned int ret = 0;
 
@@ -107,7 +105,7 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
 	return ret;
 }
 
-static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
+static void init_dma_chain(void *des, dma_addr_t phy_addr,
 				  unsigned int size, unsigned int extend_desc)
 {
 	/*
@@ -137,7 +135,7 @@ static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
 	}
 }
 
-static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
+static void refill_desc3(void *priv_ptr, struct dma_desc *p)
 {
 	struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)priv_ptr;
 	struct stmmac_priv *priv = rx_q->priv_data;
@@ -153,7 +151,7 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 				      sizeof(struct dma_desc)));
 }
 
-static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
+static void clean_desc3(void *priv_ptr, struct dma_desc *p)
 {
 	struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
 	struct stmmac_priv *priv = tx_q->priv_data;
@@ -171,9 +169,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 }
 
 const struct stmmac_mode_ops chain_mode_ops = {
-	.init = stmmac_init_dma_chain,
-	.is_jumbo_frm = stmmac_is_jumbo_frm,
-	.jumbo_frm = stmmac_jumbo_frm,
-	.refill_desc3 = stmmac_refill_desc3,
-	.clean_desc3 = stmmac_clean_desc3,
+	.init = init_dma_chain,
+	.is_jumbo_frm = is_jumbo_frm,
+	.jumbo_frm = jumbo_frm,
+	.refill_desc3 = refill_desc3,
+	.clean_desc3 = clean_desc3,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index ad2388a..59673c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -32,6 +32,7 @@
 #endif
 
 #include "descs.h"
+#include "hwif.h"
 #include "mmc.h"
 
 /* Synopsys Core versions */
@@ -377,197 +378,11 @@ struct dma_features {
 
 #define JUMBO_LEN		9000
 
-/* Descriptors helpers */
-struct stmmac_desc_ops {
-	/* DMA RX descriptor ring initialization */
-	void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode,
-			      int end);
-	/* DMA TX descriptor ring initialization */
-	void (*init_tx_desc) (struct dma_desc *p, int mode, int end);
-
-	/* Invoked by the xmit function to prepare the tx descriptor */
-	void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len,
-				 bool csum_flag, int mode, bool tx_own,
-				 bool ls, unsigned int tot_pkt_len);
-	void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1,
-				    int len2, bool tx_own, bool ls,
-				    unsigned int tcphdrlen,
-				    unsigned int tcppayloadlen);
-	/* Set/get the owner of the descriptor */
-	void (*set_tx_owner) (struct dma_desc *p);
-	int (*get_tx_owner) (struct dma_desc *p);
-	/* Clean the tx descriptor as soon as the tx irq is received */
-	void (*release_tx_desc) (struct dma_desc *p, int mode);
-	/* Clear interrupt on tx frame completion. When this bit is
-	 * set an interrupt happens as soon as the frame is transmitted */
-	void (*set_tx_ic)(struct dma_desc *p);
-	/* Last tx segment reports the transmit status */
-	int (*get_tx_ls) (struct dma_desc *p);
-	/* Return the transmit status looking at the TDES1 */
-	int (*tx_status) (void *data, struct stmmac_extra_stats *x,
-			  struct dma_desc *p, void __iomem *ioaddr);
-	/* Get the buffer size from the descriptor */
-	int (*get_tx_len) (struct dma_desc *p);
-	/* Handle extra events on specific interrupts hw dependent */
-	void (*set_rx_owner) (struct dma_desc *p);
-	/* Get the receive frame size */
-	int (*get_rx_frame_len) (struct dma_desc *p, int rx_coe_type);
-	/* Return the reception status looking at the RDES1 */
-	int (*rx_status) (void *data, struct stmmac_extra_stats *x,
-			  struct dma_desc *p);
-	void (*rx_extended_status) (void *data, struct stmmac_extra_stats *x,
-				    struct dma_extended_desc *p);
-	/* Set tx timestamp enable bit */
-	void (*enable_tx_timestamp) (struct dma_desc *p);
-	/* get tx timestamp status */
-	int (*get_tx_timestamp_status) (struct dma_desc *p);
-	/* get timestamp value */
-	 u64(*get_timestamp) (void *desc, u32 ats);
-	/* get rx timestamp status */
-	int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
-	/* Display ring */
-	void (*display_ring)(void *head, unsigned int size, bool rx);
-	/* set MSS via context descriptor */
-	void (*set_mss)(struct dma_desc *p, unsigned int mss);
-};
-
 extern const struct stmmac_desc_ops enh_desc_ops;
 extern const struct stmmac_desc_ops ndesc_ops;
 
-/* Specific DMA helpers */
-struct stmmac_dma_ops {
-	/* DMA core initialization */
-	int (*reset)(void __iomem *ioaddr);
-	void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
-		     u32 dma_tx, u32 dma_rx, int atds);
-	void (*init_chan)(void __iomem *ioaddr,
-			  struct stmmac_dma_cfg *dma_cfg, u32 chan);
-	void (*init_rx_chan)(void __iomem *ioaddr,
-			     struct stmmac_dma_cfg *dma_cfg,
-			     u32 dma_rx_phy, u32 chan);
-	void (*init_tx_chan)(void __iomem *ioaddr,
-			     struct stmmac_dma_cfg *dma_cfg,
-			     u32 dma_tx_phy, u32 chan);
-	/* Configure the AXI Bus Mode Register */
-	void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
-	/* Dump DMA registers */
-	void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space);
-	/* Set tx/rx threshold in the csr6 register
-	 * An invalid value enables the store-and-forward mode */
-	void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
-			 int rxfifosz);
-	void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
-			    int fifosz, u8 qmode);
-	void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
-			    int fifosz, u8 qmode);
-	/* To track extra statistic (if supported) */
-	void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
-				   void __iomem *ioaddr);
-	void (*enable_dma_transmission) (void __iomem *ioaddr);
-	void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
-	void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
-	void (*start_tx)(void __iomem *ioaddr, u32 chan);
-	void (*stop_tx)(void __iomem *ioaddr, u32 chan);
-	void (*start_rx)(void __iomem *ioaddr, u32 chan);
-	void (*stop_rx)(void __iomem *ioaddr, u32 chan);
-	int (*dma_interrupt) (void __iomem *ioaddr,
-			      struct stmmac_extra_stats *x, u32 chan);
-	/* If supported then get the optional core features */
-	void (*get_hw_feature)(void __iomem *ioaddr,
-			       struct dma_features *dma_cap);
-	/* Program the HW RX Watchdog */
-	void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
-	void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
-	void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
-	void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
-	void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
-	void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
-};
-
 struct mac_device_info;
 
-/* Helpers to program the MAC core */
-struct stmmac_ops {
-	/* MAC core initialization */
-	void (*core_init)(struct mac_device_info *hw, struct net_device *dev);
-	/* Enable the MAC RX/TX */
-	void (*set_mac)(void __iomem *ioaddr, bool enable);
-	/* Enable and verify that the IPC module is supported */
-	int (*rx_ipc)(struct mac_device_info *hw);
-	/* Enable RX Queues */
-	void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue);
-	/* RX Queues Priority */
-	void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
-	/* TX Queues Priority */
-	void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
-	/* RX Queues Routing */
-	void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet,
-				 u32 queue);
-	/* Program RX Algorithms */
-	void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg);
-	/* Program TX Algorithms */
-	void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
-	/* Set MTL TX queues weight */
-	void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
-					u32 weight, u32 queue);
-	/* RX MTL queue to RX dma mapping */
-	void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
-	/* Configure AV Algorithm */
-	void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
-			   u32 idle_slope, u32 high_credit, u32 low_credit,
-			   u32 queue);
-	/* Dump MAC registers */
-	void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
-	/* Handle extra events on specific interrupts hw dependent */
-	int (*host_irq_status)(struct mac_device_info *hw,
-			       struct stmmac_extra_stats *x);
-	/* Handle MTL interrupts */
-	int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
-	/* Multicast filter setting */
-	void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
-	/* Flow control setting */
-	void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
-			  unsigned int fc, unsigned int pause_time, u32 tx_cnt);
-	/* Set power management mode (e.g. magic frame) */
-	void (*pmt)(struct mac_device_info *hw, unsigned long mode);
-	/* Set/Get Unicast MAC addresses */
-	void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
-			      unsigned int reg_n);
-	void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
-			      unsigned int reg_n);
-	void (*set_eee_mode)(struct mac_device_info *hw,
-			     bool en_tx_lpi_clockgating);
-	void (*reset_eee_mode)(struct mac_device_info *hw);
-	void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
-	void (*set_eee_pls)(struct mac_device_info *hw, int link);
-	void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
-		      u32 rx_queues, u32 tx_queues);
-	/* PCS calls */
-	void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
-			     bool loopback);
-	void (*pcs_rane)(void __iomem *ioaddr, bool restart);
-	void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
-	/* Safety Features */
-	int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp);
-	bool (*safety_feat_irq_status)(struct net_device *ndev,
-			void __iomem *ioaddr, unsigned int asp,
-			struct stmmac_safety_stats *stats);
-	const char *(*safety_feat_dump)(struct stmmac_safety_stats *stats,
-			int index, unsigned long *count);
-};
-
-/* PTP and HW Timer helpers */
-struct stmmac_hwtimestamp {
-	void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
-	u32 (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock,
-					   int gmac4);
-	int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec);
-	int (*config_addend) (void __iomem *ioaddr, u32 addend);
-	int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
-			       int add_sub, int gmac4);
-	 u64(*get_systime) (void __iomem *ioaddr);
-};
-
 extern const struct stmmac_hwtimestamp stmmac_ptp;
 extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
 
@@ -590,18 +405,6 @@ struct mii_regs {
 	unsigned int clk_csr_mask;
 };
 
-/* Helpers to manage the descriptors for chain and ring modes */
-struct stmmac_mode_ops {
-	void (*init) (void *des, dma_addr_t phy_addr, unsigned int size,
-		      unsigned int extend_desc);
-	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
-	int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum);
-	int (*set_16kib_bfsize)(int mtu);
-	void (*init_desc3)(struct dma_desc *p);
-	void (*refill_desc3) (void *priv, struct dma_desc *p);
-	void (*clean_desc3) (void *priv, struct dma_desc *p);
-};
-
 struct mac_device_info {
 	const struct stmmac_ops *mac;
 	const struct stmmac_desc_ops *desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 2a6521d..65ed896c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -223,7 +223,7 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
 	return 0;
 }
 
-static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
+static inline void dwmac4_get_timestamp(void *desc, u32 ats, u64 *ts)
 {
 	struct dma_desc *p = (struct dma_desc *)desc;
 	u64 ns;
@@ -232,7 +232,7 @@ static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
 	/* convert high/sec time stamp value to nanosecond */
 	ns += le32_to_cpu(p->des1) * 1000000000ULL;
 
-	return ns;
+	*ts = ns;
 }
 
 static int dwmac4_rx_check_timestamp(void *desc)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 860de39..2978550 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -237,15 +237,16 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
 	return 0;
 }
 
-bool dwmac5_safety_feat_irq_status(struct net_device *ndev,
+int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		void __iomem *ioaddr, unsigned int asp,
 		struct stmmac_safety_stats *stats)
 {
-	bool ret = false, err, corr;
+	bool err, corr;
 	u32 mtl, dma;
+	int ret = 0;
 
 	if (!asp)
-		return false;
+		return -EINVAL;
 
 	mtl = readl(ioaddr + MTL_SAFETY_INT_STATUS);
 	dma = readl(ioaddr + DMA_SAFETY_INT_STATUS);
@@ -282,17 +283,19 @@ static const struct dwmac5_error {
 	{ dwmac5_dma_errors },
 };
 
-const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
-			int index, unsigned long *count)
+int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
+			int index, unsigned long *count, const char **desc)
 {
 	int module = index / 32, offset = index % 32;
 	unsigned long *ptr = (unsigned long *)stats;
 
 	if (module >= ARRAY_SIZE(dwmac5_all_errors))
-		return NULL;
+		return -EINVAL;
 	if (!dwmac5_all_errors[module].desc[offset].valid)
-		return NULL;
+		return -EINVAL;
 	if (count)
 		*count = *(ptr + index);
-	return dwmac5_all_errors[module].desc[offset].desc;
+	if (desc)
+		*desc = dwmac5_all_errors[module].desc[offset].desc;
+	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index a0d2c44..bd4c466 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -43,10 +43,10 @@
 #define DMA_ECC_INT_STATUS		0x00001088
 
 int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
-bool dwmac5_safety_feat_irq_status(struct net_device *ndev,
+int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		void __iomem *ioaddr, unsigned int asp,
 		struct stmmac_safety_stats *stats);
-const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
-			int index, unsigned long *count);
+int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
+			int index, unsigned long *count, const char **desc);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 6768a25..3bfb3f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -382,7 +382,7 @@ static int enh_desc_get_tx_timestamp_status(struct dma_desc *p)
 	return (le32_to_cpu(p->des0) & ETDES0_TIME_STAMP_STATUS) >> 17;
 }
 
-static u64 enh_desc_get_timestamp(void *desc, u32 ats)
+static void enh_desc_get_timestamp(void *desc, u32 ats, u64 *ts)
 {
 	u64 ns;
 
@@ -397,7 +397,7 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
 		ns += le32_to_cpu(p->des3) * 1000000000ULL;
 	}
 
-	return ns;
+	*ts = ns;
 }
 
 static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
new file mode 100644
index 0000000..f81ded4
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
+// stmmac HW Interface Callbacks
+
+#ifndef __STMMAC_HWIF_H__
+#define __STMMAC_HWIF_H__
+
+#define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
+({ \
+	int __result = -EINVAL; \
+	if ((__priv)->hw->__module->__cname) { \
+		(__priv)->hw->__module->__cname((__arg0), ##__args); \
+		__result = 0; \
+	} \
+	__result; \
+})
+#define stmmac_do_callback(__priv, __module, __cname,  __arg0, __args...) \
+({ \
+	int __result = -EINVAL; \
+	if ((__priv)->hw->__module->__cname) \
+		__result = (__priv)->hw->__module->__cname((__arg0), ##__args); \
+	__result; \
+})
+
+struct stmmac_extra_stats;
+struct stmmac_safety_stats;
+struct dma_desc;
+struct dma_extended_desc;
+
+/* Descriptors helpers */
+struct stmmac_desc_ops {
+	/* DMA RX descriptor ring initialization */
+	void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode,
+			int end);
+	/* DMA TX descriptor ring initialization */
+	void (*init_tx_desc)(struct dma_desc *p, int mode, int end);
+	/* Invoked by the xmit function to prepare the tx descriptor */
+	void (*prepare_tx_desc)(struct dma_desc *p, int is_fs, int len,
+			bool csum_flag, int mode, bool tx_own, bool ls,
+			unsigned int tot_pkt_len);
+	void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1,
+			int len2, bool tx_own, bool ls, unsigned int tcphdrlen,
+			unsigned int tcppayloadlen);
+	/* Set/get the owner of the descriptor */
+	void (*set_tx_owner)(struct dma_desc *p);
+	int (*get_tx_owner)(struct dma_desc *p);
+	/* Clean the tx descriptor as soon as the tx irq is received */
+	void (*release_tx_desc)(struct dma_desc *p, int mode);
+	/* Clear interrupt on tx frame completion. When this bit is
+	 * set an interrupt happens as soon as the frame is transmitted */
+	void (*set_tx_ic)(struct dma_desc *p);
+	/* Last tx segment reports the transmit status */
+	int (*get_tx_ls)(struct dma_desc *p);
+	/* Return the transmit status looking at the TDES1 */
+	int (*tx_status)(void *data, struct stmmac_extra_stats *x,
+			struct dma_desc *p, void __iomem *ioaddr);
+	/* Get the buffer size from the descriptor */
+	int (*get_tx_len)(struct dma_desc *p);
+	/* Handle extra events on specific interrupts hw dependent */
+	void (*set_rx_owner)(struct dma_desc *p);
+	/* Get the receive frame size */
+	int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
+	/* Return the reception status looking at the RDES1 */
+	int (*rx_status)(void *data, struct stmmac_extra_stats *x,
+			struct dma_desc *p);
+	void (*rx_extended_status)(void *data, struct stmmac_extra_stats *x,
+			struct dma_extended_desc *p);
+	/* Set tx timestamp enable bit */
+	void (*enable_tx_timestamp) (struct dma_desc *p);
+	/* get tx timestamp status */
+	int (*get_tx_timestamp_status) (struct dma_desc *p);
+	/* get timestamp value */
+	void (*get_timestamp)(void *desc, u32 ats, u64 *ts);
+	/* get rx timestamp status */
+	int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
+	/* Display ring */
+	void (*display_ring)(void *head, unsigned int size, bool rx);
+	/* set MSS via context descriptor */
+	void (*set_mss)(struct dma_desc *p, unsigned int mss);
+};
+
+#define stmmac_init_rx_desc(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, init_rx_desc, __args)
+#define stmmac_init_tx_desc(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, init_tx_desc, __args)
+#define stmmac_prepare_tx_desc(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, prepare_tx_desc, __args)
+#define stmmac_prepare_tso_tx_desc(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, prepare_tso_tx_desc, __args)
+#define stmmac_set_tx_owner(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_tx_owner, __args)
+#define stmmac_get_tx_owner(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_tx_owner, __args)
+#define stmmac_release_tx_desc(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, release_tx_desc, __args)
+#define stmmac_set_tx_ic(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_tx_ic, __args)
+#define stmmac_get_tx_ls(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_tx_ls, __args)
+#define stmmac_tx_status(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, tx_status, __args)
+#define stmmac_get_tx_len(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_tx_len, __args)
+#define stmmac_set_rx_owner(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_rx_owner, __args)
+#define stmmac_get_rx_frame_len(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_frame_len, __args)
+#define stmmac_rx_status(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, rx_status, __args)
+#define stmmac_rx_extended_status(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, rx_extended_status, __args)
+#define stmmac_enable_tx_timestamp(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, enable_tx_timestamp, __args)
+#define stmmac_get_tx_timestamp_status(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_tx_timestamp_status, __args)
+#define stmmac_get_timestamp(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, get_timestamp, __args)
+#define stmmac_get_rx_timestamp_status(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_timestamp_status, __args)
+#define stmmac_display_ring(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, display_ring, __args)
+#define stmmac_set_mss(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_mss, __args)
+
+struct stmmac_dma_cfg;
+struct dma_features;
+
+/* Specific DMA helpers */
+struct stmmac_dma_ops {
+	/* DMA core initialization */
+	int (*reset)(void __iomem *ioaddr);
+	void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
+		     u32 dma_tx, u32 dma_rx, int atds);
+	void (*init_chan)(void __iomem *ioaddr,
+			  struct stmmac_dma_cfg *dma_cfg, u32 chan);
+	void (*init_rx_chan)(void __iomem *ioaddr,
+			     struct stmmac_dma_cfg *dma_cfg,
+			     u32 dma_rx_phy, u32 chan);
+	void (*init_tx_chan)(void __iomem *ioaddr,
+			     struct stmmac_dma_cfg *dma_cfg,
+			     u32 dma_tx_phy, u32 chan);
+	/* Configure the AXI Bus Mode Register */
+	void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
+	/* Dump DMA registers */
+	void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space);
+	/* Set tx/rx threshold in the csr6 register
+	 * An invalid value enables the store-and-forward mode */
+	void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
+			 int rxfifosz);
+	void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+			    int fifosz, u8 qmode);
+	void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+			    int fifosz, u8 qmode);
+	/* To track extra statistic (if supported) */
+	void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
+				   void __iomem *ioaddr);
+	void (*enable_dma_transmission) (void __iomem *ioaddr);
+	void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
+	void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
+	void (*start_tx)(void __iomem *ioaddr, u32 chan);
+	void (*stop_tx)(void __iomem *ioaddr, u32 chan);
+	void (*start_rx)(void __iomem *ioaddr, u32 chan);
+	void (*stop_rx)(void __iomem *ioaddr, u32 chan);
+	int (*dma_interrupt) (void __iomem *ioaddr,
+			      struct stmmac_extra_stats *x, u32 chan);
+	/* If supported then get the optional core features */
+	void (*get_hw_feature)(void __iomem *ioaddr,
+			       struct dma_features *dma_cap);
+	/* Program the HW RX Watchdog */
+	void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+	void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
+	void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
+	void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
+	void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
+	void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
+};
+
+#define stmmac_reset(__priv, __args...) \
+	stmmac_do_callback(__priv, dma, reset, __args)
+#define stmmac_dma_init(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, init, __args)
+#define stmmac_init_chan(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, init_chan, __args)
+#define stmmac_init_rx_chan(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, init_rx_chan, __args)
+#define stmmac_init_tx_chan(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, init_tx_chan, __args)
+#define stmmac_axi(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, axi, __args)
+#define stmmac_dump_dma_regs(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, dump_regs, __args)
+#define stmmac_dma_mode(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, dma_mode, __args)
+#define stmmac_dma_rx_mode(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, dma_rx_mode, __args)
+#define stmmac_dma_tx_mode(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, dma_tx_mode, __args)
+#define stmmac_dma_diagnostic_fr(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, dma_diagnostic_fr, __args)
+#define stmmac_enable_dma_transmission(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, enable_dma_transmission, __args)
+#define stmmac_enable_dma_irq(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, enable_dma_irq, __args)
+#define stmmac_disable_dma_irq(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, disable_dma_irq, __args)
+#define stmmac_start_tx(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, start_tx, __args)
+#define stmmac_stop_tx(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, stop_tx, __args)
+#define stmmac_start_rx(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, start_rx, __args)
+#define stmmac_stop_rx(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, stop_rx, __args)
+#define stmmac_dma_interrupt_status(__priv, __args...) \
+	stmmac_do_callback(__priv, dma, dma_interrupt, __args)
+#define stmmac_get_hw_feature(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, get_hw_feature, __args)
+#define stmmac_rx_watchdog(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, rx_watchdog, __args)
+#define stmmac_set_tx_ring_len(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, set_tx_ring_len, __args)
+#define stmmac_set_rx_ring_len(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, set_rx_ring_len, __args)
+#define stmmac_set_rx_tail_ptr(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, set_rx_tail_ptr, __args)
+#define stmmac_set_tx_tail_ptr(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
+#define stmmac_enable_tso(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, enable_tso, __args)
+
+struct mac_device_info;
+struct net_device;
+struct rgmii_adv;
+struct stmmac_safety_stats;
+
+/* Helpers to program the MAC core */
+struct stmmac_ops {
+	/* MAC core initialization */
+	void (*core_init)(struct mac_device_info *hw, struct net_device *dev);
+	/* Enable the MAC RX/TX */
+	void (*set_mac)(void __iomem *ioaddr, bool enable);
+	/* Enable and verify that the IPC module is supported */
+	int (*rx_ipc)(struct mac_device_info *hw);
+	/* Enable RX Queues */
+	void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue);
+	/* RX Queues Priority */
+	void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+	/* TX Queues Priority */
+	void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+	/* RX Queues Routing */
+	void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet,
+				 u32 queue);
+	/* Program RX Algorithms */
+	void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg);
+	/* Program TX Algorithms */
+	void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
+	/* Set MTL TX queues weight */
+	void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
+					u32 weight, u32 queue);
+	/* RX MTL queue to RX dma mapping */
+	void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
+	/* Configure AV Algorithm */
+	void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
+			   u32 idle_slope, u32 high_credit, u32 low_credit,
+			   u32 queue);
+	/* Dump MAC registers */
+	void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
+	/* Handle extra events on specific interrupts hw dependent */
+	int (*host_irq_status)(struct mac_device_info *hw,
+			       struct stmmac_extra_stats *x);
+	/* Handle MTL interrupts */
+	int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
+	/* Multicast filter setting */
+	void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
+	/* Flow control setting */
+	void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
+			  unsigned int fc, unsigned int pause_time, u32 tx_cnt);
+	/* Set power management mode (e.g. magic frame) */
+	void (*pmt)(struct mac_device_info *hw, unsigned long mode);
+	/* Set/Get Unicast MAC addresses */
+	void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
+			      unsigned int reg_n);
+	void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
+			      unsigned int reg_n);
+	void (*set_eee_mode)(struct mac_device_info *hw,
+			     bool en_tx_lpi_clockgating);
+	void (*reset_eee_mode)(struct mac_device_info *hw);
+	void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
+	void (*set_eee_pls)(struct mac_device_info *hw, int link);
+	void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+		      u32 rx_queues, u32 tx_queues);
+	/* PCS calls */
+	void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
+			     bool loopback);
+	void (*pcs_rane)(void __iomem *ioaddr, bool restart);
+	void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
+	/* Safety Features */
+	int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp);
+	int (*safety_feat_irq_status)(struct net_device *ndev,
+			void __iomem *ioaddr, unsigned int asp,
+			struct stmmac_safety_stats *stats);
+	int (*safety_feat_dump)(struct stmmac_safety_stats *stats,
+			int index, unsigned long *count, const char **desc);
+};
+
+#define stmmac_core_init(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, core_init, __args)
+#define stmmac_mac_set(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_mac, __args)
+#define stmmac_rx_ipc(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, rx_ipc, __args)
+#define stmmac_rx_queue_enable(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, rx_queue_enable, __args)
+#define stmmac_rx_queue_prio(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, rx_queue_prio, __args)
+#define stmmac_tx_queue_prio(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, tx_queue_prio, __args)
+#define stmmac_rx_queue_routing(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, rx_queue_routing, __args)
+#define stmmac_prog_mtl_rx_algorithms(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, prog_mtl_rx_algorithms, __args)
+#define stmmac_prog_mtl_tx_algorithms(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, prog_mtl_tx_algorithms, __args)
+#define stmmac_set_mtl_tx_queue_weight(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_mtl_tx_queue_weight, __args)
+#define stmmac_map_mtl_to_dma(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, map_mtl_to_dma, __args)
+#define stmmac_config_cbs(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, config_cbs, __args)
+#define stmmac_dump_mac_regs(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, dump_regs, __args)
+#define stmmac_host_irq_status(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, host_irq_status, __args)
+#define stmmac_host_mtl_irq_status(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, host_mtl_irq_status, __args)
+#define stmmac_set_filter(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_filter, __args)
+#define stmmac_flow_ctrl(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, flow_ctrl, __args)
+#define stmmac_pmt(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, pmt, __args)
+#define stmmac_set_umac_addr(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_umac_addr, __args)
+#define stmmac_get_umac_addr(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, get_umac_addr, __args)
+#define stmmac_set_eee_mode(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_eee_mode, __args)
+#define stmmac_reset_eee_mode(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args)
+#define stmmac_set_eee_timer(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_eee_timer, __args)
+#define stmmac_set_eee_pls(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_eee_pls, __args)
+#define stmmac_mac_debug(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, debug, __args)
+#define stmmac_pcs_ctrl_ane(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, pcs_ctrl_ane, __args)
+#define stmmac_pcs_rane(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, pcs_rane, __args)
+#define stmmac_pcs_get_adv_lp(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, pcs_get_adv_lp, __args)
+#define stmmac_safety_feat_config(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, safety_feat_config, __args)
+#define stmmac_safety_feat_irq_status(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, safety_feat_irq_status, __args)
+#define stmmac_safety_feat_dump(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, safety_feat_dump, __args)
+
+/* PTP and HW Timer helpers */
+struct stmmac_hwtimestamp {
+	void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
+	void (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock,
+					   int gmac4, u32 *ssinc);
+	int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec);
+	int (*config_addend) (void __iomem *ioaddr, u32 addend);
+	int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
+			       int add_sub, int gmac4);
+	void (*get_systime) (void __iomem *ioaddr, u64 *systime);
+};
+
+#define stmmac_config_hw_tstamping(__priv, __args...) \
+	stmmac_do_void_callback(__priv, ptp, config_hw_tstamping, __args)
+#define stmmac_config_sub_second_increment(__priv, __args...) \
+	stmmac_do_void_callback(__priv, ptp, config_sub_second_increment, __args)
+#define stmmac_init_systime(__priv, __args...) \
+	stmmac_do_callback(__priv, ptp, init_systime, __args)
+#define stmmac_config_addend(__priv, __args...) \
+	stmmac_do_callback(__priv, ptp, config_addend, __args)
+#define stmmac_adjust_systime(__priv, __args...) \
+	stmmac_do_callback(__priv, ptp, adjust_systime, __args)
+#define stmmac_get_systime(__priv, __args...) \
+	stmmac_do_void_callback(__priv, ptp, get_systime, __args)
+
+/* Helpers to manage the descriptors for chain and ring modes */
+struct stmmac_mode_ops {
+	void (*init) (void *des, dma_addr_t phy_addr, unsigned int size,
+		      unsigned int extend_desc);
+	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
+	int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum);
+	int (*set_16kib_bfsize)(int mtu);
+	void (*init_desc3)(struct dma_desc *p);
+	void (*refill_desc3) (void *priv, struct dma_desc *p);
+	void (*clean_desc3) (void *priv, struct dma_desc *p);
+};
+
+#define stmmac_mode_init(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mode, init, __args)
+#define stmmac_is_jumbo_frm(__priv, __args...) \
+	stmmac_do_callback(__priv, mode, is_jumbo_frm, __args)
+#define stmmac_jumbo_frm(__priv, __args...) \
+	stmmac_do_callback(__priv, mode, jumbo_frm, __args)
+#define stmmac_set_16kib_bfsize(__priv, __args...) \
+	stmmac_do_callback(__priv, mode, set_16kib_bfsize, __args)
+#define stmmac_init_desc3(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mode, init_desc3, __args)
+#define stmmac_refill_desc3(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mode, refill_desc3, __args)
+#define stmmac_clean_desc3(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mode, clean_desc3, __args)
+
+#endif /* __STMMAC_HWIF_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index ebd9e5e..7b1d901 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -253,7 +253,7 @@ static int ndesc_get_tx_timestamp_status(struct dma_desc *p)
 	return (le32_to_cpu(p->des0) & TDES0_TIME_STAMP_STATUS) >> 17;
 }
 
-static u64 ndesc_get_timestamp(void *desc, u32 ats)
+static void ndesc_get_timestamp(void *desc, u32 ats, u64 *ts)
 {
 	struct dma_desc *p = (struct dma_desc *)desc;
 	u64 ns;
@@ -262,7 +262,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
 	/* convert high/sec time stamp value to nanosecond */
 	ns += le32_to_cpu(p->des3) * 1000000000ULL;
 
-	return ns;
+	*ts = ns;
 }
 
 static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 28e4b5d..a7ffc73 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -24,7 +24,7 @@
 
 #include "stmmac.h"
 
-static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
 {
 	struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
 	unsigned int nopaged_len = skb_headlen(skb);
@@ -58,9 +58,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		tx_q->tx_skbuff_dma[entry].is_jumbo = true;
 
 		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
-		priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
-						STMMAC_RING_MODE, 0,
-						false, skb->len);
+		stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum,
+				STMMAC_RING_MODE, 0, false, skb->len);
 		tx_q->tx_skbuff[entry] = NULL;
 		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 
@@ -79,9 +78,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		tx_q->tx_skbuff_dma[entry].is_jumbo = true;
 
 		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
-		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
-						STMMAC_RING_MODE, 1,
-						true, skb->len);
+		stmmac_prepare_tx_desc(priv, desc, 0, len, csum,
+				STMMAC_RING_MODE, 1, true, skb->len);
 	} else {
 		des2 = dma_map_single(priv->device, skb->data,
 				      nopaged_len, DMA_TO_DEVICE);
@@ -92,9 +90,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		tx_q->tx_skbuff_dma[entry].len = nopaged_len;
 		tx_q->tx_skbuff_dma[entry].is_jumbo = true;
 		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
-		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
-						STMMAC_RING_MODE, 0,
-						true, skb->len);
+		stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum,
+				STMMAC_RING_MODE, 0, true, skb->len);
 	}
 
 	tx_q->cur_tx = entry;
@@ -102,7 +99,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 	return entry;
 }
 
-static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
+static unsigned int is_jumbo_frm(int len, int enh_desc)
 {
 	unsigned int ret = 0;
 
@@ -112,7 +109,7 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
 	return ret;
 }
 
-static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
+static void refill_desc3(void *priv_ptr, struct dma_desc *p)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
 
@@ -122,12 +119,12 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 }
 
 /* In ring mode we need to fill the desc3 because it is used as buffer */
-static void stmmac_init_desc3(struct dma_desc *p)
+static void init_desc3(struct dma_desc *p)
 {
 	p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
 }
 
-static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
+static void clean_desc3(void *priv_ptr, struct dma_desc *p)
 {
 	struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
 	struct stmmac_priv *priv = tx_q->priv_data;
@@ -140,7 +137,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 		p->des3 = 0;
 }
 
-static int stmmac_set_16kib_bfsize(int mtu)
+static int set_16kib_bfsize(int mtu)
 {
 	int ret = 0;
 	if (unlikely(mtu >= BUF_SIZE_8KiB))
@@ -149,10 +146,10 @@ static int stmmac_set_16kib_bfsize(int mtu)
 }
 
 const struct stmmac_mode_ops ring_mode_ops = {
-	.is_jumbo_frm = stmmac_is_jumbo_frm,
-	.jumbo_frm = stmmac_jumbo_frm,
-	.refill_desc3 = stmmac_refill_desc3,
-	.init_desc3 = stmmac_init_desc3,
-	.clean_desc3 = stmmac_clean_desc3,
-	.set_16kib_bfsize = stmmac_set_16kib_bfsize,
+	.is_jumbo_frm = is_jumbo_frm,
+	.jumbo_frm = jumbo_frm,
+	.refill_desc3 = refill_desc3,
+	.init_desc3 = init_desc3,
+	.clean_desc3 = clean_desc3,
+	.set_16kib_bfsize = set_16kib_bfsize,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2c6ed47..6d82b3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -291,11 +291,9 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
 		cmd->base.speed = priv->xstats.pcs_speed;
 
 		/* Get and convert ADV/LP_ADV from the HW AN registers */
-		if (!priv->hw->mac->pcs_get_adv_lp)
+		if (stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv))
 			return -EOPNOTSUPP;	/* should never happen indeed */
 
-		priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv);
-
 		/* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */
 
 		ethtool_convert_link_mode_to_legacy_u32(
@@ -393,11 +391,7 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev,
 			ADVERTISED_10baseT_Full);
 
 		spin_lock(&priv->lock);
-
-		if (priv->hw->mac->pcs_ctrl_ane)
-			priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1,
-						    priv->hw->ps, 0);
-
+		stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
 		spin_unlock(&priv->lock);
 
 		return 0;
@@ -442,8 +436,8 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
 
 	memset(reg_space, 0x0, REG_SPACE_SIZE);
 
-	priv->hw->mac->dump_regs(priv->hw, reg_space);
-	priv->hw->dma->dump_regs(priv->ioaddr, reg_space);
+	stmmac_dump_mac_regs(priv, priv->hw, reg_space);
+	stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);
 	/* Copy DMA registers to where ethtool expects them */
 	memcpy(&reg_space[ETHTOOL_DMA_OFFSET], &reg_space[DMA_BUS_MODE / 4],
 	       NUM_DWMAC1000_DMA_REGS * 4);
@@ -454,15 +448,13 @@ stmmac_get_pauseparam(struct net_device *netdev,
 		      struct ethtool_pauseparam *pause)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
+	struct rgmii_adv adv_lp;
 
 	pause->rx_pause = 0;
 	pause->tx_pause = 0;
 
-	if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
-		struct rgmii_adv adv_lp;
-
+	if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
 		pause->autoneg = 1;
-		priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp);
 		if (!adv_lp.pause)
 			return;
 	} else {
@@ -488,12 +480,10 @@ stmmac_set_pauseparam(struct net_device *netdev,
 	u32 tx_cnt = priv->plat->tx_queues_to_use;
 	struct phy_device *phy = netdev->phydev;
 	int new_pause = FLOW_OFF;
+	struct rgmii_adv adv_lp;
 
-	if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
-		struct rgmii_adv adv_lp;
-
+	if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
 		pause->autoneg = 1;
-		priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp);
 		if (!adv_lp.pause)
 			return -EOPNOTSUPP;
 	} else {
@@ -515,37 +505,32 @@ stmmac_set_pauseparam(struct net_device *netdev,
 			return phy_start_aneg(phy);
 	}
 
-	priv->hw->mac->flow_ctrl(priv->hw, phy->duplex, priv->flow_ctrl,
-				 priv->pause, tx_cnt);
+	stmmac_flow_ctrl(priv, priv->hw, phy->duplex, priv->flow_ctrl,
+			priv->pause, tx_cnt);
 	return 0;
 }
 
 static void stmmac_get_ethtool_stats(struct net_device *dev,
 				 struct ethtool_stats *dummy, u64 *data)
 {
-	const char *(*dump)(struct stmmac_safety_stats *stats, int index,
-			unsigned long *count);
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_queues_count = priv->plat->rx_queues_to_use;
 	u32 tx_queues_count = priv->plat->tx_queues_to_use;
 	unsigned long count;
-	int i, j = 0;
+	int i, j = 0, ret;
 
-	if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
-		dump = priv->hw->mac->safety_feat_dump;
-
+	if (priv->dma_cap.asp) {
 		for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
-			if (dump(&priv->sstats, i, &count))
+			if (!stmmac_safety_feat_dump(priv, &priv->sstats, i,
+						&count, NULL))
 				data[j++] = count;
 		}
 	}
 
 	/* Update the DMA HW counters for dwmac10/100 */
-	if (priv->hw->dma->dma_diagnostic_fr)
-		priv->hw->dma->dma_diagnostic_fr(&dev->stats,
-						 (void *) &priv->xstats,
-						 priv->ioaddr);
-	else {
+	ret = stmmac_dma_diagnostic_fr(priv, &dev->stats, (void *) &priv->xstats,
+			priv->ioaddr);
+	if (ret) {
 		/* If supported, for new GMAC chips expose the MMC counters */
 		if (priv->dma_cap.rmon) {
 			dwmac_mmc_read(priv->mmcaddr, &priv->mmc);
@@ -565,11 +550,10 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 				priv->xstats.phy_eee_wakeup_error_n = val;
 		}
 
-		if ((priv->hw->mac->debug) &&
-		    (priv->synopsys_id >= DWMAC_CORE_3_50))
-			priv->hw->mac->debug(priv->ioaddr,
-					     (void *)&priv->xstats,
-					     rx_queues_count, tx_queues_count);
+		if (priv->synopsys_id >= DWMAC_CORE_3_50)
+			stmmac_mac_debug(priv, priv->ioaddr,
+					(void *)&priv->xstats,
+					rx_queues_count, tx_queues_count);
 	}
 	for (i = 0; i < STMMAC_STATS_LEN; i++) {
 		char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
@@ -581,8 +565,6 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
-	const char *(*dump)(struct stmmac_safety_stats *stats, int index,
-			unsigned long *count);
 	int i, len, safety_len = 0;
 
 	switch (sset) {
@@ -591,11 +573,11 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 
 		if (priv->dma_cap.rmon)
 			len += STMMAC_MMC_STATS_LEN;
-		if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
-			dump = priv->hw->mac->safety_feat_dump;
-
+		if (priv->dma_cap.asp) {
 			for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
-				if (dump(&priv->sstats, i, NULL))
+				if (!stmmac_safety_feat_dump(priv,
+							&priv->sstats, i,
+							NULL, NULL))
 					safety_len++;
 			}
 
@@ -613,17 +595,15 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	int i;
 	u8 *p = data;
 	struct stmmac_priv *priv = netdev_priv(dev);
-	const char *(*dump)(struct stmmac_safety_stats *stats, int index,
-			unsigned long *count);
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
-			dump = priv->hw->mac->safety_feat_dump;
+		if (priv->dma_cap.asp) {
 			for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
-				const char *desc = dump(&priv->sstats, i, NULL);
-
-				if (desc) {
+				const char *desc;
+				if (!stmmac_safety_feat_dump(priv,
+							&priv->sstats, i,
+							NULL, &desc)) {
 					memcpy(p, desc, ETH_GSTRING_LEN);
 					p += ETH_GSTRING_LEN;
 				}
@@ -810,7 +790,7 @@ static int stmmac_set_coalesce(struct net_device *dev,
 	priv->tx_coal_frames = ec->tx_max_coalesced_frames;
 	priv->tx_coal_timer = ec->tx_coalesce_usecs;
 	priv->rx_riwt = rx_riwt;
-	priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt, rx_cnt);
+	stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 08c19eb..8d9cc21 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -24,13 +24,13 @@
 #include "common.h"
 #include "stmmac_ptp.h"
 
-static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data)
+static void config_hw_tstamping(void __iomem *ioaddr, u32 data)
 {
 	writel(data, ioaddr + PTP_TCR);
 }
 
-static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
-					      u32 ptp_clock, int gmac4)
+static void config_sub_second_increment(void __iomem *ioaddr,
+		u32 ptp_clock, int gmac4, u32 *ssinc)
 {
 	u32 value = readl(ioaddr + PTP_TCR);
 	unsigned long data;
@@ -57,10 +57,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 
 	writel(reg_value, ioaddr + PTP_SSIR);
 
-	return data;
+	if (ssinc)
+		*ssinc = data;
 }
 
-static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
+static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
 {
 	int limit;
 	u32 value;
@@ -85,7 +86,7 @@ static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
 	return 0;
 }
 
-static int stmmac_config_addend(void __iomem *ioaddr, u32 addend)
+static int config_addend(void __iomem *ioaddr, u32 addend)
 {
 	u32 value;
 	int limit;
@@ -109,8 +110,8 @@ static int stmmac_config_addend(void __iomem *ioaddr, u32 addend)
 	return 0;
 }
 
-static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
-				 int add_sub, int gmac4)
+static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
+		int add_sub, int gmac4)
 {
 	u32 value;
 	int limit;
@@ -152,7 +153,7 @@ static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
 	return 0;
 }
 
-static u64 stmmac_get_systime(void __iomem *ioaddr)
+static void get_systime(void __iomem *ioaddr, u64 *systime)
 {
 	u64 ns;
 
@@ -161,14 +162,15 @@ static u64 stmmac_get_systime(void __iomem *ioaddr)
 	/* Get the TSS and convert sec time value to nanosecond */
 	ns += readl(ioaddr + PTP_STSR) * 1000000000ULL;
 
-	return ns;
+	if (systime)
+		*systime = ns;
 }
 
 const struct stmmac_hwtimestamp stmmac_ptp = {
-	.config_hw_tstamping = stmmac_config_hw_tstamping,
-	.init_systime = stmmac_init_systime,
-	.config_sub_second_increment = stmmac_config_sub_second_increment,
-	.config_addend = stmmac_config_addend,
-	.adjust_systime = stmmac_adjust_systime,
-	.get_systime = stmmac_get_systime,
+	.config_hw_tstamping = config_hw_tstamping,
+	.init_systime = init_systime,
+	.config_sub_second_increment = config_sub_second_increment,
+	.config_addend = config_addend,
+	.adjust_systime = adjust_systime,
+	.get_systime = get_systime,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b65e2d1..48b5540 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -50,6 +50,7 @@
 #include <linux/reset.h>
 #include <linux/of_mdio.h>
 #include "dwmac1000.h"
+#include "hwif.h"
 
 #define STMMAC_ALIGN(x)	L1_CACHE_ALIGN(x)
 #define	TSO_MAX_BUFF_SIZE	(SZ_16K - 1)
@@ -335,8 +336,8 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
 
 	/* Check and enter in LPI mode */
 	if (!priv->tx_path_in_lpi_mode)
-		priv->hw->mac->set_eee_mode(priv->hw,
-					    priv->plat->en_tx_lpi_clockgating);
+		stmmac_set_eee_mode(priv, priv->hw,
+				priv->plat->en_tx_lpi_clockgating);
 }
 
 /**
@@ -347,7 +348,7 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
  */
 void stmmac_disable_eee_mode(struct stmmac_priv *priv)
 {
-	priv->hw->mac->reset_eee_mode(priv->hw);
+	stmmac_reset_eee_mode(priv, priv->hw);
 	del_timer_sync(&priv->eee_ctrl_timer);
 	priv->tx_path_in_lpi_mode = false;
 }
@@ -410,8 +411,8 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 			if (priv->eee_active) {
 				netdev_dbg(priv->dev, "disable EEE\n");
 				del_timer_sync(&priv->eee_ctrl_timer);
-				priv->hw->mac->set_eee_timer(priv->hw, 0,
-							     tx_lpi_timer);
+				stmmac_set_eee_timer(priv, priv->hw, 0,
+						tx_lpi_timer);
 			}
 			priv->eee_active = 0;
 			spin_unlock_irqrestore(&priv->lock, flags);
@@ -426,12 +427,11 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 			mod_timer(&priv->eee_ctrl_timer,
 				  STMMAC_LPI_T(eee_timer));
 
-			priv->hw->mac->set_eee_timer(priv->hw,
-						     STMMAC_DEFAULT_LIT_LS,
-						     tx_lpi_timer);
+			stmmac_set_eee_timer(priv, priv->hw,
+					STMMAC_DEFAULT_LIT_LS, tx_lpi_timer);
 		}
 		/* Set HW EEE according to the speed */
-		priv->hw->mac->set_eee_pls(priv->hw, ndev->phydev->link);
+		stmmac_set_eee_pls(priv, priv->hw, ndev->phydev->link);
 
 		ret = true;
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -464,9 +464,9 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 		return;
 
 	/* check tx tstamp status */
-	if (priv->hw->desc->get_tx_timestamp_status(p)) {
+	if (stmmac_get_tx_timestamp_status(priv, p)) {
 		/* get the valid tstamp */
-		ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
+		stmmac_get_timestamp(priv, p, priv->adv_ts, &ns);
 
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
@@ -502,8 +502,8 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 		desc = np;
 
 	/* Check if timestamp is available */
-	if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
-		ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
+	if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
+		stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
 		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
 		memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
@@ -707,18 +707,18 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 	priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
 
 	if (!priv->hwts_tx_en && !priv->hwts_rx_en)
-		priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, 0);
+		stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0);
 	else {
 		value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
 			 tstamp_all | ptp_v2 | ptp_over_ethernet |
 			 ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
 			 ts_master_en | snap_type_sel);
-		priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, value);
+		stmmac_config_hw_tstamping(priv, priv->ptpaddr, value);
 
 		/* program Sub Second Increment reg */
-		sec_inc = priv->hw->ptp->config_sub_second_increment(
-			priv->ptpaddr, priv->plat->clk_ptp_rate,
-			priv->plat->has_gmac4);
+		stmmac_config_sub_second_increment(priv,
+				priv->ptpaddr, priv->plat->clk_ptp_rate,
+				priv->plat->has_gmac4, &sec_inc);
 		temp = div_u64(1000000000ULL, sec_inc);
 
 		/* calculate default added value:
@@ -728,15 +728,14 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 		 */
 		temp = (u64)(temp << 32);
 		priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
-		priv->hw->ptp->config_addend(priv->ptpaddr,
-					     priv->default_addend);
+		stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
 
 		/* initialize system time */
 		ktime_get_real_ts64(&now);
 
 		/* lower 32 bits of tv_sec are safe until y2106 */
-		priv->hw->ptp->init_systime(priv->ptpaddr, (u32)now.tv_sec,
-					    now.tv_nsec);
+		stmmac_init_systime(priv, priv->ptpaddr,
+				(u32)now.tv_sec, now.tv_nsec);
 	}
 
 	return copy_to_user(ifr->ifr_data, &config,
@@ -795,8 +794,8 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
 {
 	u32 tx_cnt = priv->plat->tx_queues_to_use;
 
-	priv->hw->mac->flow_ctrl(priv->hw, duplex, priv->flow_ctrl,
-				 priv->pause, tx_cnt);
+	stmmac_flow_ctrl(priv, priv->hw, duplex, priv->flow_ctrl,
+			priv->pause, tx_cnt);
 }
 
 /**
@@ -1008,7 +1007,7 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 			head_rx = (void *)rx_q->dma_rx;
 
 		/* Display RX ring */
-		priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true);
+		stmmac_display_ring(priv, head_rx, DMA_RX_SIZE, true);
 	}
 }
 
@@ -1029,7 +1028,7 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 		else
 			head_tx = (void *)tx_q->dma_tx;
 
-		priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false);
+		stmmac_display_ring(priv, head_tx, DMA_TX_SIZE, false);
 	}
 }
 
@@ -1073,13 +1072,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
 	/* Clear the RX descriptors */
 	for (i = 0; i < DMA_RX_SIZE; i++)
 		if (priv->extend_desc)
-			priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic,
-						     priv->use_riwt, priv->mode,
-						     (i == DMA_RX_SIZE - 1));
+			stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic,
+					priv->use_riwt, priv->mode,
+					(i == DMA_RX_SIZE - 1));
 		else
-			priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i],
-						     priv->use_riwt, priv->mode,
-						     (i == DMA_RX_SIZE - 1));
+			stmmac_init_rx_desc(priv, &rx_q->dma_rx[i],
+					priv->use_riwt, priv->mode,
+					(i == DMA_RX_SIZE - 1));
 }
 
 /**
@@ -1097,13 +1096,11 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue)
 	/* Clear the TX descriptors */
 	for (i = 0; i < DMA_TX_SIZE; i++)
 		if (priv->extend_desc)
-			priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
-						     priv->mode,
-						     (i == DMA_TX_SIZE - 1));
+			stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic,
+					priv->mode, (i == DMA_TX_SIZE - 1));
 		else
-			priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
-						     priv->mode,
-						     (i == DMA_TX_SIZE - 1));
+			stmmac_init_tx_desc(priv, &tx_q->dma_tx[i],
+					priv->mode, (i == DMA_TX_SIZE - 1));
 }
 
 /**
@@ -1164,9 +1161,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 	else
 		p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
 
-	if ((priv->hw->mode->init_desc3) &&
-	    (priv->dma_buf_sz == BUF_SIZE_16KiB))
-		priv->hw->mode->init_desc3(p);
+	if (priv->dma_buf_sz == BUF_SIZE_16KiB)
+		stmmac_init_desc3(priv, p);
 
 	return 0;
 }
@@ -1232,13 +1228,14 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_count = priv->plat->rx_queues_to_use;
-	unsigned int bfsize = 0;
 	int ret = -ENOMEM;
+	int bfsize = 0;
 	int queue;
 	int i;
 
-	if (priv->hw->mode->set_16kib_bfsize)
-		bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu);
+	bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu);
+	if (bfsize < 0)
+		bfsize = 0;
 
 	if (bfsize < BUF_SIZE_16KiB)
 		bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
@@ -1282,13 +1279,11 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 		/* Setup the chained descriptor addresses */
 		if (priv->mode == STMMAC_CHAIN_MODE) {
 			if (priv->extend_desc)
-				priv->hw->mode->init(rx_q->dma_erx,
-						     rx_q->dma_rx_phy,
-						     DMA_RX_SIZE, 1);
+				stmmac_mode_init(priv, rx_q->dma_erx,
+						rx_q->dma_rx_phy, DMA_RX_SIZE, 1);
 			else
-				priv->hw->mode->init(rx_q->dma_rx,
-						     rx_q->dma_rx_phy,
-						     DMA_RX_SIZE, 0);
+				stmmac_mode_init(priv, rx_q->dma_rx,
+						rx_q->dma_rx_phy, DMA_RX_SIZE, 0);
 		}
 	}
 
@@ -1335,13 +1330,11 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 		/* Setup the chained descriptor addresses */
 		if (priv->mode == STMMAC_CHAIN_MODE) {
 			if (priv->extend_desc)
-				priv->hw->mode->init(tx_q->dma_etx,
-						     tx_q->dma_tx_phy,
-						     DMA_TX_SIZE, 1);
+				stmmac_mode_init(priv, tx_q->dma_etx,
+						tx_q->dma_tx_phy, DMA_TX_SIZE, 1);
 			else
-				priv->hw->mode->init(tx_q->dma_tx,
-						     tx_q->dma_tx_phy,
-						     DMA_TX_SIZE, 0);
+				stmmac_mode_init(priv, tx_q->dma_tx,
+						tx_q->dma_tx_phy, DMA_TX_SIZE, 0);
 		}
 
 		for (i = 0; i < DMA_TX_SIZE; i++) {
@@ -1664,7 +1657,7 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
 
 	for (queue = 0; queue < rx_queues_count; queue++) {
 		mode = priv->plat->rx_queues_cfg[queue].mode_to_use;
-		priv->hw->mac->rx_queue_enable(priv->hw, mode, queue);
+		stmmac_rx_queue_enable(priv, priv->hw, mode, queue);
 	}
 }
 
@@ -1678,7 +1671,7 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
 static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
 {
 	netdev_dbg(priv->dev, "DMA RX processes started in channel %d\n", chan);
-	priv->hw->dma->start_rx(priv->ioaddr, chan);
+	stmmac_start_rx(priv, priv->ioaddr, chan);
 }
 
 /**
@@ -1691,7 +1684,7 @@ static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
 static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
 {
 	netdev_dbg(priv->dev, "DMA TX processes started in channel %d\n", chan);
-	priv->hw->dma->start_tx(priv->ioaddr, chan);
+	stmmac_start_tx(priv, priv->ioaddr, chan);
 }
 
 /**
@@ -1704,7 +1697,7 @@ static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
 static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
 {
 	netdev_dbg(priv->dev, "DMA RX processes stopped in channel %d\n", chan);
-	priv->hw->dma->stop_rx(priv->ioaddr, chan);
+	stmmac_stop_rx(priv, priv->ioaddr, chan);
 }
 
 /**
@@ -1717,7 +1710,7 @@ static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
 static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
 {
 	netdev_dbg(priv->dev, "DMA TX processes stopped in channel %d\n", chan);
-	priv->hw->dma->stop_tx(priv->ioaddr, chan);
+	stmmac_stop_tx(priv, priv->ioaddr, chan);
 }
 
 /**
@@ -1808,19 +1801,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 		for (chan = 0; chan < rx_channels_count; chan++) {
 			qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
 
-			priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
-						   rxfifosz, qmode);
+			stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
+					rxfifosz, qmode);
 		}
 
 		for (chan = 0; chan < tx_channels_count; chan++) {
 			qmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
 
-			priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
-						   txfifosz, qmode);
+			stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan,
+					txfifosz, qmode);
 		}
 	} else {
-		priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
-					rxfifosz);
+		stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz);
 	}
 }
 
@@ -1851,9 +1843,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		else
 			p = tx_q->dma_tx + entry;
 
-		status = priv->hw->desc->tx_status(&priv->dev->stats,
-						      &priv->xstats, p,
-						      priv->ioaddr);
+		status = stmmac_tx_status(priv, &priv->dev->stats,
+				&priv->xstats, p, priv->ioaddr);
 		/* Check if the descriptor is owned by the DMA */
 		if (unlikely(status & tx_dma_own))
 			break;
@@ -1891,8 +1882,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 			tx_q->tx_skbuff_dma[entry].map_as_page = false;
 		}
 
-		if (priv->hw->mode->clean_desc3)
-			priv->hw->mode->clean_desc3(tx_q, p);
+		stmmac_clean_desc3(priv, tx_q, p);
 
 		tx_q->tx_skbuff_dma[entry].last_segment = false;
 		tx_q->tx_skbuff_dma[entry].is_jumbo = false;
@@ -1904,7 +1894,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 			tx_q->tx_skbuff[entry] = NULL;
 		}
 
-		priv->hw->desc->release_tx_desc(p, priv->mode);
+		stmmac_release_tx_desc(priv, p, priv->mode);
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 	}
@@ -1929,16 +1919,6 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 	netif_tx_unlock(priv->dev);
 }
 
-static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv, u32 chan)
-{
-	priv->hw->dma->enable_dma_irq(priv->ioaddr, chan);
-}
-
-static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv, u32 chan)
-{
-	priv->hw->dma->disable_dma_irq(priv->ioaddr, chan);
-}
-
 /**
  * stmmac_tx_err - to manage the tx error
  * @priv: driver private structure
@@ -1957,13 +1937,11 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 	dma_free_tx_skbufs(priv, chan);
 	for (i = 0; i < DMA_TX_SIZE; i++)
 		if (priv->extend_desc)
-			priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
-						     priv->mode,
-						     (i == DMA_TX_SIZE - 1));
+			stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic,
+					priv->mode, (i == DMA_TX_SIZE - 1));
 		else
-			priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
-						     priv->mode,
-						     (i == DMA_TX_SIZE - 1));
+			stmmac_init_tx_desc(priv, &tx_q->dma_tx[i],
+					priv->mode, (i == DMA_TX_SIZE - 1));
 	tx_q->dirty_tx = 0;
 	tx_q->cur_tx = 0;
 	tx_q->mss = 0;
@@ -2004,30 +1982,30 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
 	txfifosz /= tx_channels_count;
 
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-		priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
-					   rxfifosz, rxqmode);
-		priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
-					   txfifosz, txqmode);
+		stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz,
+				rxqmode);
+		stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz,
+				txqmode);
 	} else {
-		priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
-					rxfifosz);
+		stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz);
 	}
 }
 
 static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
 {
-	bool ret = false;
+	int ret = false;
 
 	/* Safety features are only available in cores >= 5.10 */
 	if (priv->synopsys_id < DWMAC_CORE_5_10)
 		return ret;
-	if (priv->hw->mac->safety_feat_irq_status)
-		ret = priv->hw->mac->safety_feat_irq_status(priv->dev,
-				priv->ioaddr, priv->dma_cap.asp, &priv->sstats);
-
-	if (ret)
+	ret = stmmac_safety_feat_irq_status(priv, priv->dev,
+			priv->ioaddr, priv->dma_cap.asp, &priv->sstats);
+	if (ret && (ret != -EINVAL)) {
 		stmmac_global_err(priv);
-	return ret;
+		return true;
+	}
+
+	return false;
 }
 
 /**
@@ -2054,16 +2032,15 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	 * all tx queues rather than just a single tx queue.
 	 */
 	for (chan = 0; chan < channels_to_check; chan++)
-		status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr,
-							    &priv->xstats,
-							    chan);
+		status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+				&priv->xstats, chan);
 
 	for (chan = 0; chan < rx_channel_count; chan++) {
 		if (likely(status[chan] & handle_rx)) {
 			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
 
 			if (likely(napi_schedule_prep(&rx_q->napi))) {
-				stmmac_disable_dma_irq(priv, chan);
+				stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
 				__napi_schedule(&rx_q->napi);
 				poll_scheduled = true;
 			}
@@ -2084,7 +2061,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 					&priv->rx_queue[0];
 
 				if (likely(napi_schedule_prep(&rx_q->napi))) {
-					stmmac_disable_dma_irq(priv, chan);
+					stmmac_disable_dma_irq(priv,
+							priv->ioaddr, chan);
 					__napi_schedule(&rx_q->napi);
 				}
 				break;
@@ -2180,15 +2158,7 @@ static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
  */
 static int stmmac_get_hw_features(struct stmmac_priv *priv)
 {
-	u32 ret = 0;
-
-	if (priv->hw->dma->get_hw_feature) {
-		priv->hw->dma->get_hw_feature(priv->ioaddr,
-					      &priv->dma_cap);
-		ret = 1;
-	}
-
-	return ret;
+	return stmmac_get_hw_feature(priv, priv->ioaddr, &priv->dma_cap) == 0;
 }
 
 /**
@@ -2201,8 +2171,7 @@ static int stmmac_get_hw_features(struct stmmac_priv *priv)
 static void stmmac_check_ether_addr(struct stmmac_priv *priv)
 {
 	if (!is_valid_ether_addr(priv->dev->dev_addr)) {
-		priv->hw->mac->get_umac_addr(priv->hw,
-					     priv->dev->dev_addr, 0);
+		stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0);
 		if (!is_valid_ether_addr(priv->dev->dev_addr))
 			eth_hw_addr_random(priv->dev);
 		netdev_info(priv->dev, "device MAC address %pM\n",
@@ -2238,7 +2207,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
 		atds = 1;
 
-	ret = priv->hw->dma->reset(priv->ioaddr);
+	ret = stmmac_reset(priv, priv->ioaddr);
 	if (ret) {
 		dev_err(priv->device, "Failed to reset the dma\n");
 		return ret;
@@ -2246,51 +2215,48 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 		/* DMA Configuration */
-		priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
-				    dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
+		stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg,
+				dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
 
 		/* DMA RX Channel Configuration */
 		for (chan = 0; chan < rx_channels_count; chan++) {
 			rx_q = &priv->rx_queue[chan];
 
-			priv->hw->dma->init_rx_chan(priv->ioaddr,
-						    priv->plat->dma_cfg,
-						    rx_q->dma_rx_phy, chan);
+			stmmac_init_rx_chan(priv, priv->ioaddr,
+					priv->plat->dma_cfg, rx_q->dma_rx_phy,
+					chan);
 
 			rx_q->rx_tail_addr = rx_q->dma_rx_phy +
 				    (DMA_RX_SIZE * sizeof(struct dma_desc));
-			priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
-						       rx_q->rx_tail_addr,
-						       chan);
+			stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+					rx_q->rx_tail_addr, chan);
 		}
 
 		/* DMA TX Channel Configuration */
 		for (chan = 0; chan < tx_channels_count; chan++) {
 			tx_q = &priv->tx_queue[chan];
 
-			priv->hw->dma->init_chan(priv->ioaddr,
-						 priv->plat->dma_cfg,
-						 chan);
+			stmmac_init_chan(priv, priv->ioaddr,
+					priv->plat->dma_cfg, chan);
 
-			priv->hw->dma->init_tx_chan(priv->ioaddr,
-						    priv->plat->dma_cfg,
-						    tx_q->dma_tx_phy, chan);
+			stmmac_init_tx_chan(priv, priv->ioaddr,
+					priv->plat->dma_cfg, tx_q->dma_tx_phy,
+					chan);
 
 			tx_q->tx_tail_addr = tx_q->dma_tx_phy +
 				    (DMA_TX_SIZE * sizeof(struct dma_desc));
-			priv->hw->dma->set_tx_tail_ptr(priv->ioaddr,
-						       tx_q->tx_tail_addr,
-						       chan);
+			stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+					tx_q->tx_tail_addr, chan);
 		}
 	} else {
 		rx_q = &priv->rx_queue[chan];
 		tx_q = &priv->tx_queue[chan];
-		priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
-				    tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
+		stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg,
+				tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
 	}
 
-	if (priv->plat->axi && priv->hw->dma->axi)
-		priv->hw->dma->axi(priv->ioaddr, priv->plat->axi);
+	if (priv->plat->axi)
+		stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
 
 	return ret;
 }
@@ -2336,18 +2302,14 @@ static void stmmac_set_rings_length(struct stmmac_priv *priv)
 	u32 chan;
 
 	/* set TX ring length */
-	if (priv->hw->dma->set_tx_ring_len) {
-		for (chan = 0; chan < tx_channels_count; chan++)
-			priv->hw->dma->set_tx_ring_len(priv->ioaddr,
-						       (DMA_TX_SIZE - 1), chan);
-	}
+	for (chan = 0; chan < tx_channels_count; chan++)
+		stmmac_set_tx_ring_len(priv, priv->ioaddr,
+				(DMA_TX_SIZE - 1), chan);
 
 	/* set RX ring length */
-	if (priv->hw->dma->set_rx_ring_len) {
-		for (chan = 0; chan < rx_channels_count; chan++)
-			priv->hw->dma->set_rx_ring_len(priv->ioaddr,
-						       (DMA_RX_SIZE - 1), chan);
-	}
+	for (chan = 0; chan < rx_channels_count; chan++)
+		stmmac_set_rx_ring_len(priv, priv->ioaddr,
+				(DMA_RX_SIZE - 1), chan);
 }
 
 /**
@@ -2363,7 +2325,7 @@ static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
 
 	for (queue = 0; queue < tx_queues_count; queue++) {
 		weight = priv->plat->tx_queues_cfg[queue].weight;
-		priv->hw->mac->set_mtl_tx_queue_weight(priv->hw, weight, queue);
+		stmmac_set_mtl_tx_queue_weight(priv, priv->hw, weight, queue);
 	}
 }
 
@@ -2384,7 +2346,7 @@ static void stmmac_configure_cbs(struct stmmac_priv *priv)
 		if (mode_to_use == MTL_QUEUE_DCB)
 			continue;
 
-		priv->hw->mac->config_cbs(priv->hw,
+		stmmac_config_cbs(priv, priv->hw,
 				priv->plat->tx_queues_cfg[queue].send_slope,
 				priv->plat->tx_queues_cfg[queue].idle_slope,
 				priv->plat->tx_queues_cfg[queue].high_credit,
@@ -2406,7 +2368,7 @@ static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
 
 	for (queue = 0; queue < rx_queues_count; queue++) {
 		chan = priv->plat->rx_queues_cfg[queue].chan;
-		priv->hw->mac->map_mtl_to_dma(priv->hw, queue, chan);
+		stmmac_map_mtl_to_dma(priv, priv->hw, queue, chan);
 	}
 }
 
@@ -2426,7 +2388,7 @@ static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
 			continue;
 
 		prio = priv->plat->rx_queues_cfg[queue].prio;
-		priv->hw->mac->rx_queue_prio(priv->hw, prio, queue);
+		stmmac_rx_queue_prio(priv, priv->hw, prio, queue);
 	}
 }
 
@@ -2446,7 +2408,7 @@ static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
 			continue;
 
 		prio = priv->plat->tx_queues_cfg[queue].prio;
-		priv->hw->mac->tx_queue_prio(priv->hw, prio, queue);
+		stmmac_tx_queue_prio(priv, priv->hw, prio, queue);
 	}
 }
 
@@ -2467,7 +2429,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
 			continue;
 
 		packet = priv->plat->rx_queues_cfg[queue].pkt_route;
-		priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
+		stmmac_rx_queue_routing(priv, priv->hw, packet, queue);
 	}
 }
 
@@ -2481,50 +2443,47 @@ static void stmmac_mtl_configuration(struct stmmac_priv *priv)
 	u32 rx_queues_count = priv->plat->rx_queues_to_use;
 	u32 tx_queues_count = priv->plat->tx_queues_to_use;
 
-	if (tx_queues_count > 1 && priv->hw->mac->set_mtl_tx_queue_weight)
+	if (tx_queues_count > 1)
 		stmmac_set_tx_queue_weight(priv);
 
 	/* Configure MTL RX algorithms */
-	if (rx_queues_count > 1 && priv->hw->mac->prog_mtl_rx_algorithms)
-		priv->hw->mac->prog_mtl_rx_algorithms(priv->hw,
-						priv->plat->rx_sched_algorithm);
+	if (rx_queues_count > 1)
+		stmmac_prog_mtl_rx_algorithms(priv, priv->hw,
+				priv->plat->rx_sched_algorithm);
 
 	/* Configure MTL TX algorithms */
-	if (tx_queues_count > 1 && priv->hw->mac->prog_mtl_tx_algorithms)
-		priv->hw->mac->prog_mtl_tx_algorithms(priv->hw,
-						priv->plat->tx_sched_algorithm);
+	if (tx_queues_count > 1)
+		stmmac_prog_mtl_tx_algorithms(priv, priv->hw,
+				priv->plat->tx_sched_algorithm);
 
 	/* Configure CBS in AVB TX queues */
-	if (tx_queues_count > 1 && priv->hw->mac->config_cbs)
+	if (tx_queues_count > 1)
 		stmmac_configure_cbs(priv);
 
 	/* Map RX MTL to DMA channels */
-	if (priv->hw->mac->map_mtl_to_dma)
-		stmmac_rx_queue_dma_chan_map(priv);
+	stmmac_rx_queue_dma_chan_map(priv);
 
 	/* Enable MAC RX Queues */
-	if (priv->hw->mac->rx_queue_enable)
-		stmmac_mac_enable_rx_queues(priv);
+	stmmac_mac_enable_rx_queues(priv);
 
 	/* Set RX priorities */
-	if (rx_queues_count > 1 && priv->hw->mac->rx_queue_prio)
+	if (rx_queues_count > 1)
 		stmmac_mac_config_rx_queues_prio(priv);
 
 	/* Set TX priorities */
-	if (tx_queues_count > 1 && priv->hw->mac->tx_queue_prio)
+	if (tx_queues_count > 1)
 		stmmac_mac_config_tx_queues_prio(priv);
 
 	/* Set RX routing */
-	if (rx_queues_count > 1 && priv->hw->mac->rx_queue_routing)
+	if (rx_queues_count > 1)
 		stmmac_mac_config_rx_queues_routing(priv);
 }
 
 static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 {
-	if (priv->hw->mac->safety_feat_config && priv->dma_cap.asp) {
+	if (priv->dma_cap.asp) {
 		netdev_info(priv->dev, "Enabling Safety Features\n");
-		priv->hw->mac->safety_feat_config(priv->ioaddr,
-				priv->dma_cap.asp);
+		stmmac_safety_feat_config(priv, priv->ioaddr, priv->dma_cap.asp);
 	} else {
 		netdev_info(priv->dev, "No Safety Features support found\n");
 	}
@@ -2559,7 +2518,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	}
 
 	/* Copy the MAC addr into the HW  */
-	priv->hw->mac->set_umac_addr(priv->hw, dev->dev_addr, 0);
+	stmmac_set_umac_addr(priv, priv->hw, dev->dev_addr, 0);
 
 	/* PS and related bits will be programmed according to the speed */
 	if (priv->hw->pcs) {
@@ -2575,7 +2534,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	}
 
 	/* Initialize the MAC Core */
-	priv->hw->mac->core_init(priv->hw, dev);
+	stmmac_core_init(priv, priv->hw, dev);
 
 	/* Initialize MTL*/
 	if (priv->synopsys_id >= DWMAC_CORE_4_00)
@@ -2585,7 +2544,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	if (priv->synopsys_id >= DWMAC_CORE_5_10)
 		stmmac_safety_feat_configuration(priv);
 
-	ret = priv->hw->mac->rx_ipc(priv->hw);
+	ret = stmmac_rx_ipc(priv, priv->hw);
 	if (!ret) {
 		netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n");
 		priv->plat->rx_coe = STMMAC_RX_COE_NONE;
@@ -2593,7 +2552,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	}
 
 	/* Enable the MAC Rx/Tx */
-	priv->hw->mac->set_mac(priv->ioaddr, true);
+	stmmac_mac_set(priv, priv->ioaddr, true);
 
 	/* Set the HW DMA mode and the COE */
 	stmmac_dma_operation_mode(priv);
@@ -2623,13 +2582,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 
 	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
-	if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) {
-		priv->rx_riwt = MAX_DMA_RIWT;
-		priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+	if (priv->use_riwt) {
+		ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+		if (!ret)
+			priv->rx_riwt = MAX_DMA_RIWT;
 	}
 
-	if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
-		priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
+	if (priv->hw->pcs)
+		stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0);
 
 	/* set TX and RX rings length */
 	stmmac_set_rings_length(priv);
@@ -2637,7 +2597,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	/* Enable TSO */
 	if (priv->tso) {
 		for (chan = 0; chan < tx_cnt; chan++)
-			priv->hw->dma->enable_tso(priv->ioaddr, 1, chan);
+			stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
 	}
 
 	return 0;
@@ -2808,7 +2768,7 @@ static int stmmac_release(struct net_device *dev)
 	free_dma_desc_resources(priv);
 
 	/* Disable the MAC Rx/Tx */
-	priv->hw->mac->set_mac(priv->ioaddr, false);
+	stmmac_mac_set(priv, priv->ioaddr, false);
 
 	netif_carrier_off(dev);
 
@@ -2851,10 +2811,10 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 		buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
 			    TSO_MAX_BUFF_SIZE : tmp_len;
 
-		priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size,
-			0, 1,
-			(last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
-			0, 0);
+		stmmac_prepare_tso_tx_desc(priv, desc, 0, buff_size,
+				0, 1,
+				(last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
+				0, 0);
 
 		tmp_len -= TSO_MAX_BUFF_SIZE;
 	}
@@ -2926,7 +2886,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* set new MSS value if needed */
 	if (mss != tx_q->mss) {
 		mss_desc = tx_q->dma_tx + tx_q->cur_tx;
-		priv->hw->desc->set_mss(mss_desc, mss);
+		stmmac_set_mss(priv, mss_desc, mss);
 		tx_q->mss = mss;
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
@@ -3012,7 +2972,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
 	} else {
 		priv->tx_count_frames = 0;
-		priv->hw->desc->set_tx_ic(desc);
+		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
 	}
 
@@ -3022,11 +2982,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		     priv->hwts_tx_en)) {
 		/* declare that device is doing timestamping */
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-		priv->hw->desc->enable_tx_timestamp(first);
+		stmmac_enable_tx_timestamp(priv, first);
 	}
 
 	/* Complete the first descriptor before granting the DMA */
-	priv->hw->desc->prepare_tso_tx_desc(first, 1,
+	stmmac_prepare_tso_tx_desc(priv, first, 1,
 			proto_hdr_len,
 			pay_len,
 			1, tx_q->tx_skbuff_dma[first_entry].last_segment,
@@ -3040,7 +3000,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * sure that MSS's own bit is the last thing written.
 		 */
 		dma_wmb();
-		priv->hw->desc->set_tx_owner(mss_desc);
+		stmmac_set_tx_owner(priv, mss_desc);
 	}
 
 	/* The own bit must be the latest setting done when prepare the
@@ -3054,8 +3014,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			__func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
 			tx_q->cur_tx, first, nfrags);
 
-		priv->hw->desc->display_ring((void *)tx_q->dma_tx, DMA_TX_SIZE,
-					     0);
+		stmmac_display_ring(priv, (void *)tx_q->dma_tx, DMA_TX_SIZE, 0);
 
 		pr_info(">>> frame to be transmitted: ");
 		print_pkt(skb->data, skb_headlen(skb));
@@ -3063,8 +3022,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-	priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
-				       queue);
+	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
 	return NETDEV_TX_OK;
 
@@ -3136,11 +3094,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	enh_desc = priv->plat->enh_desc;
 	/* To program the descriptors according to the size of the frame */
 	if (enh_desc)
-		is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc);
+		is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc);
 
 	if (unlikely(is_jumbo) && likely(priv->synopsys_id <
 					 DWMAC_CORE_4_00)) {
-		entry = priv->hw->mode->jumbo_frm(tx_q, skb, csum_insertion);
+		entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion);
 		if (unlikely(entry < 0))
 			goto dma_map_err;
 	}
@@ -3174,9 +3132,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
 
 		/* Prepare the descriptor and set the own bit too */
-		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion,
-						priv->mode, 1, last_segment,
-						skb->len);
+		stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion,
+				priv->mode, 1, last_segment, skb->len);
 	}
 
 	/* Only the last descriptor gets to point to the skb. */
@@ -3203,7 +3160,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			tx_head = (void *)tx_q->dma_tx;
 
-		priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false);
+		stmmac_display_ring(priv, tx_head, DMA_TX_SIZE, false);
 
 		netdev_dbg(priv->dev, ">>> frame to be transmitted: ");
 		print_pkt(skb->data, skb->len);
@@ -3228,7 +3185,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
 	} else {
 		priv->tx_count_frames = 0;
-		priv->hw->desc->set_tx_ic(desc);
+		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
 	}
 
@@ -3259,13 +3216,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			     priv->hwts_tx_en)) {
 			/* declare that device is doing timestamping */
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-			priv->hw->desc->enable_tx_timestamp(first);
+			stmmac_enable_tx_timestamp(priv, first);
 		}
 
 		/* Prepare the first descriptor setting the OWN bit too */
-		priv->hw->desc->prepare_tx_desc(first, 1, nopaged_len,
-						csum_insertion, priv->mode, 1,
-						last_segment, skb->len);
+		stmmac_prepare_tx_desc(priv, first, 1, nopaged_len,
+				csum_insertion, priv->mode, 1, last_segment,
+				skb->len);
 
 		/* The own bit must be the latest setting done when prepare the
 		 * descriptor and then barrier is needed to make sure that
@@ -3277,10 +3234,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
 	if (priv->synopsys_id < DWMAC_CORE_4_00)
-		priv->hw->dma->enable_dma_transmission(priv->ioaddr);
+		stmmac_enable_dma_transmission(priv, priv->ioaddr);
 	else
-		priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
-					       queue);
+		stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
+				queue);
 
 	return NETDEV_TX_OK;
 
@@ -3370,8 +3327,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 			} else {
 				p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
 			}
-			if (priv->hw->mode->refill_desc3)
-				priv->hw->mode->refill_desc3(rx_q, p);
+
+			stmmac_refill_desc3(priv, rx_q, p);
 
 			if (rx_q->rx_zeroc_thresh > 0)
 				rx_q->rx_zeroc_thresh--;
@@ -3382,9 +3339,9 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 		dma_wmb();
 
 		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-			priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0);
+			stmmac_init_rx_desc(priv, p, priv->use_riwt, 0, 0);
 		else
-			priv->hw->desc->set_rx_owner(p);
+			stmmac_set_rx_owner(priv, p);
 
 		dma_wmb();
 
@@ -3418,7 +3375,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 		else
 			rx_head = (void *)rx_q->dma_rx;
 
-		priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true);
+		stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
 	}
 	while (count < limit) {
 		int status;
@@ -3431,8 +3388,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			p = rx_q->dma_rx + entry;
 
 		/* read the status of the incoming frame */
-		status = priv->hw->desc->rx_status(&priv->dev->stats,
-						   &priv->xstats, p);
+		status = stmmac_rx_status(priv, &priv->dev->stats,
+				&priv->xstats, p);
 		/* check if managed by the DMA otherwise go ahead */
 		if (unlikely(status & dma_own))
 			break;
@@ -3449,11 +3406,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
 		prefetch(np);
 
-		if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status))
-			priv->hw->desc->rx_extended_status(&priv->dev->stats,
-							   &priv->xstats,
-							   rx_q->dma_erx +
-							   entry);
+		if (priv->extend_desc)
+			stmmac_rx_extended_status(priv, &priv->dev->stats,
+					&priv->xstats, rx_q->dma_erx + entry);
 		if (unlikely(status == discard_frame)) {
 			priv->dev->stats.rx_errors++;
 			if (priv->hwts_rx_en && !priv->extend_desc) {
@@ -3479,7 +3434,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			else
 				des = le32_to_cpu(p->des2);
 
-			frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
+			frame_len = stmmac_get_rx_frame_len(priv, p, coe);
 
 			/*  If frame length is greater than skb buffer size
 			 *  (preallocated during init) then the packet is
@@ -3621,7 +3576,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	work_done = stmmac_rx(priv, budget, rx_q->queue_index);
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
-		stmmac_enable_dma_irq(priv, chan);
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
 	}
 	return work_done;
 }
@@ -3654,7 +3609,7 @@ static void stmmac_set_rx_mode(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
-	priv->hw->mac->set_filter(priv->hw, dev);
+	stmmac_set_filter(priv, priv->hw, dev);
 }
 
 /**
@@ -3727,7 +3682,7 @@ static int stmmac_set_features(struct net_device *netdev,
 	/* No check needed because rx_coe has been set before and it will be
 	 * fixed in case of issue.
 	 */
-	priv->hw->mac->rx_ipc(priv->hw);
+	stmmac_rx_ipc(priv, priv->hw);
 
 	return 0;
 }
@@ -3771,8 +3726,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
-		int status = priv->hw->mac->host_irq_status(priv->hw,
-							    &priv->xstats);
+		int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
 
 		if (unlikely(status)) {
 			/* For LPI we need to save the tx status */
@@ -3787,15 +3741,14 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 				struct stmmac_rx_queue *rx_q =
 				&priv->rx_queue[queue];
 
-				status |=
-				priv->hw->mac->host_mtl_irq_status(priv->hw,
-								   queue);
+				status |= stmmac_host_mtl_irq_status(priv,
+						priv->hw, queue);
 
-				if (status & CORE_IRQ_MTL_RX_OVERFLOW &&
-				    priv->hw->dma->set_rx_tail_ptr)
-					priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
-								rx_q->rx_tail_addr,
-								queue);
+				if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+					stmmac_set_rx_tail_ptr(priv,
+							priv->ioaddr,
+							rx_q->rx_tail_addr,
+							queue);
 			}
 		}
 
@@ -3869,7 +3822,7 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 	if (ret)
 		return ret;
 
-	priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0);
+	stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0);
 
 	return ret;
 }
@@ -4458,7 +4411,7 @@ int stmmac_dvr_remove(struct device *dev)
 
 	stmmac_stop_all_dma(priv);
 
-	priv->hw->mac->set_mac(priv->ioaddr, false);
+	stmmac_mac_set(priv, priv->ioaddr, false);
 	netif_carrier_off(ndev);
 	unregister_netdev(ndev);
 	if (priv->plat->stmmac_rst)
@@ -4507,10 +4460,10 @@ int stmmac_suspend(struct device *dev)
 
 	/* Enable Power down mode by programming the PMT regs */
 	if (device_may_wakeup(priv->device)) {
-		priv->hw->mac->pmt(priv->hw, priv->wolopts);
+		stmmac_pmt(priv, priv->hw, priv->wolopts);
 		priv->irq_wake = 1;
 	} else {
-		priv->hw->mac->set_mac(priv->ioaddr, false);
+		stmmac_mac_set(priv, priv->ioaddr, false);
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
 		clk_disable(priv->plat->pclk);
@@ -4574,7 +4527,7 @@ int stmmac_resume(struct device *dev)
 	 */
 	if (device_may_wakeup(priv->device)) {
 		spin_lock_irqsave(&priv->lock, flags);
-		priv->hw->mac->pmt(priv->hw, 0);
+		stmmac_pmt(priv, priv->hw, 0);
 		spin_unlock_irqrestore(&priv->lock, flags);
 		priv->irq_wake = 0;
 	} else {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index e471a90..7d3a5c7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -49,9 +49,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb)
 	addend = neg_adj ? (addend - diff) : (addend + diff);
 
 	spin_lock_irqsave(&priv->ptp_lock, flags);
-
-	priv->hw->ptp->config_addend(priv->ptpaddr, addend);
-
+	stmmac_config_addend(priv, priv->ptpaddr, addend);
 	spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	return 0;
@@ -84,10 +82,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
 	nsec = reminder;
 
 	spin_lock_irqsave(&priv->ptp_lock, flags);
-
-	priv->hw->ptp->adjust_systime(priv->ptpaddr, sec, nsec, neg_adj,
-				      priv->plat->has_gmac4);
-
+	stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj,
+			priv->plat->has_gmac4);
 	spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	return 0;
@@ -110,9 +106,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	u64 ns;
 
 	spin_lock_irqsave(&priv->ptp_lock, flags);
-
-	ns = priv->hw->ptp->get_systime(priv->ptpaddr);
-
+	stmmac_get_systime(priv, priv->ptpaddr, &ns);
 	spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	*ts = ns_to_timespec64(ns);
@@ -137,9 +131,7 @@ static int stmmac_set_time(struct ptp_clock_info *ptp,
 	unsigned long flags;
 
 	spin_lock_irqsave(&priv->ptp_lock, flags);
-
-	priv->hw->ptp->init_systime(priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
-
+	stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
 	spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	return 0;
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index 8900a6f..c4ffdf4 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -33,6 +33,8 @@
 #define SGMII_LINK_MAC_MAC_FORCED	2
 #define SGMII_LINK_MAC_FIBER		3
 #define SGMII_LINK_MAC_PHY_NO_MDIO	4
+#define RGMII_LINK_MAC_PHY		5
+#define RGMII_LINK_MAC_PHY_NO_MDIO	7
 #define XGMII_LINK_MAC_PHY		10
 #define XGMII_LINK_MAC_MAC_FORCED	11
 
@@ -212,6 +214,7 @@ struct netcp_module {
 	int	(*add_vid)(void *intf_priv, int vid);
 	int	(*del_vid)(void *intf_priv, int vid);
 	int	(*ioctl)(void *intf_priv, struct ifreq *req, int cmd);
+	int	(*set_rx_mode)(void *intf_priv, bool promisc);
 
 	/* used internally */
 	struct list_head	module_list;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index f5a7eb2..e40aa3e 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1509,6 +1509,24 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp)
 	}
 }
 
+static int netcp_set_promiscuous(struct netcp_intf *netcp, bool promisc)
+{
+	struct netcp_intf_modpriv *priv;
+	struct netcp_module *module;
+	int error;
+
+	for_each_module(netcp, priv) {
+		module = priv->netcp_module;
+		if (!module->set_rx_mode)
+			continue;
+
+		error = module->set_rx_mode(priv->module_priv, promisc);
+		if (error)
+			return error;
+	}
+	return 0;
+}
+
 static void netcp_set_rx_mode(struct net_device *ndev)
 {
 	struct netcp_intf *netcp = netdev_priv(ndev);
@@ -1538,6 +1556,7 @@ static void netcp_set_rx_mode(struct net_device *ndev)
 	/* finally sweep and callout into modules */
 	netcp_addr_sweep_del(netcp);
 	netcp_addr_sweep_add(netcp);
+	netcp_set_promiscuous(netcp, promisc);
 	spin_unlock(&netcp->lock);
 }
 
@@ -2155,8 +2174,13 @@ static int netcp_probe(struct platform_device *pdev)
 	struct device_node *child, *interfaces;
 	struct netcp_device *netcp_device;
 	struct device *dev = &pdev->dev;
+	struct netcp_module *module;
 	int ret;
 
+	if (!knav_dma_device_ready() ||
+	    !knav_qmss_device_ready())
+		return -EPROBE_DEFER;
+
 	if (!node) {
 		dev_err(dev, "could not find device info\n");
 		return -ENODEV;
@@ -2203,6 +2227,14 @@ static int netcp_probe(struct platform_device *pdev)
 	/* Add the device instance to the list */
 	list_add_tail(&netcp_device->device_list, &netcp_devices);
 
+	/* Probe & attach any modules already registered */
+	mutex_lock(&netcp_modules_lock);
+	for_each_netcp_module(module) {
+		ret = netcp_module_probe(netcp_device, module);
+		if (ret < 0)
+			dev_err(dev, "module(%s) probe failed\n", module->name);
+	}
+	mutex_unlock(&netcp_modules_lock);
 	return 0;
 
 probe_quit_interface:
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 56dbc0b..6a728d3 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/of_address.h>
 #include <linux/if_vlan.h>
 #include <linux/ptp_classify.h>
@@ -42,7 +43,7 @@
 
 /* 1G Ethernet SS defines */
 #define GBE_MODULE_NAME			"netcp-gbe"
-#define GBE_SS_VERSION_14		0x4ed21104
+#define GBE_SS_VERSION_14		0x4ed2
 
 #define GBE_SS_REG_INDEX		0
 #define GBE_SGMII34_REG_INDEX		1
@@ -72,6 +73,11 @@
 #define IS_SS_ID_NU(d) \
 	(GBE_IDENT((d)->ss_version) == GBE_SS_ID_NU)
 
+#define IS_SS_ID_VER_14(d) \
+	(GBE_IDENT((d)->ss_version) == GBE_SS_VERSION_14)
+#define IS_SS_ID_2U(d) \
+	(GBE_IDENT((d)->ss_version) == GBE_SS_ID_2U)
+
 #define GBENU_SS_REG_INDEX		0
 #define GBENU_SM_REG_INDEX		1
 #define GBENU_SGMII_MODULE_OFFSET	0x100
@@ -86,7 +92,7 @@
 
 /* 10G Ethernet SS defines */
 #define XGBE_MODULE_NAME		"netcp-xgbe"
-#define XGBE_SS_VERSION_10		0x4ee42100
+#define XGBE_SS_VERSION_10		0x4ee4
 
 #define XGBE_SS_REG_INDEX		0
 #define XGBE_SM_REG_INDEX		1
@@ -166,6 +172,11 @@
 #define	GBE_RXHOOK_ORDER			0
 #define GBE_DEFAULT_ALE_AGEOUT			30
 #define SLAVE_LINK_IS_XGMII(s) ((s)->link_interface >= XGMII_LINK_MAC_PHY)
+#define SLAVE_LINK_IS_RGMII(s) \
+	(((s)->link_interface >= RGMII_LINK_MAC_PHY) && \
+	 ((s)->link_interface <= RGMII_LINK_MAC_PHY_NO_MDIO))
+#define SLAVE_LINK_IS_SGMII(s) \
+	((s)->link_interface <= SGMII_LINK_MAC_PHY_NO_MDIO)
 #define NETCP_LINK_STATE_INVALID		-1
 
 #define GBE_SET_REG_OFS(p, rb, rn) p->rb##_ofs.rn = \
@@ -549,6 +560,7 @@ struct gbe_ss_regs {
 struct gbe_ss_regs_ofs {
 	u16	id_ver;
 	u16	control;
+	u16	rgmii_status; /* 2U */
 };
 
 struct gbe_switch_regs {
@@ -591,6 +603,7 @@ struct gbe_port_regs {
 struct gbe_port_regs_ofs {
 	u16	port_vlan;
 	u16	tx_pri_map;
+	u16     rx_pri_map;
 	u16	sa_lo;
 	u16	sa_hi;
 	u16	ts_ctl;
@@ -695,6 +708,7 @@ struct gbe_slave {
 	u32				link_interface;
 	u32				mac_control;
 	u8				phy_port_t;
+	struct device_node		*node;
 	struct device_node		*phy_node;
 	struct ts_ctl                   ts_ctl;
 	struct list_head		slave_list;
@@ -1915,7 +1929,7 @@ static void keystone_get_ethtool_stats(struct net_device *ndev,
 
 	gbe_dev = gbe_intf->gbe_dev;
 	spin_lock_bh(&gbe_dev->hw_stats_lock);
-	if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+	if (IS_SS_ID_VER_14(gbe_dev))
 		gbe_update_stats_ver14(gbe_dev, data);
 	else
 		gbe_update_stats(gbe_dev, data);
@@ -2091,8 +2105,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev,
 				     ALE_PORT_STATE_FORWARD);
 
 		if (ndev && slave->open &&
-		    slave->link_interface != SGMII_LINK_MAC_PHY &&
-		    slave->link_interface != XGMII_LINK_MAC_PHY)
+		    ((slave->link_interface != SGMII_LINK_MAC_PHY) &&
+		    (slave->link_interface != RGMII_LINK_MAC_PHY) &&
+		    (slave->link_interface != XGMII_LINK_MAC_PHY)))
 			netif_carrier_on(ndev);
 	} else {
 		writel(mac_control, GBE_REG_ADDR(slave, emac_regs,
@@ -2101,8 +2116,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev,
 				     ALE_PORT_STATE,
 				     ALE_PORT_STATE_DISABLE);
 		if (ndev &&
-		    slave->link_interface != SGMII_LINK_MAC_PHY &&
-		    slave->link_interface != XGMII_LINK_MAC_PHY)
+		    ((slave->link_interface != SGMII_LINK_MAC_PHY) &&
+		    (slave->link_interface != RGMII_LINK_MAC_PHY) &&
+		    (slave->link_interface != XGMII_LINK_MAC_PHY)))
 			netif_carrier_off(ndev);
 	}
 
@@ -2115,23 +2131,35 @@ static bool gbe_phy_link_status(struct gbe_slave *slave)
 	 return !slave->phy || slave->phy->link;
 }
 
+#define RGMII_REG_STATUS_LINK	BIT(0)
+
+static void netcp_2u_rgmii_get_port_link(struct gbe_priv *gbe_dev, bool *status)
+{
+	u32 val = 0;
+
+	val = readl(GBE_REG_ADDR(gbe_dev, ss_regs, rgmii_status));
+	*status = !!(val & RGMII_REG_STATUS_LINK);
+}
+
 static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev,
 					  struct gbe_slave *slave,
 					  struct net_device *ndev)
 {
-	int sp = slave->slave_num;
-	int phy_link_state, sgmii_link_state = 1, link_state;
+	bool sw_link_state = true, phy_link_state;
+	int sp = slave->slave_num, link_state;
 
 	if (!slave->open)
 		return;
 
-	if (!SLAVE_LINK_IS_XGMII(slave)) {
-		sgmii_link_state =
-			netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp);
-	}
+	if (SLAVE_LINK_IS_RGMII(slave))
+		netcp_2u_rgmii_get_port_link(gbe_dev,
+					     &sw_link_state);
+	if (SLAVE_LINK_IS_SGMII(slave))
+		sw_link_state =
+		netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp);
 
 	phy_link_state = gbe_phy_link_status(slave);
-	link_state = phy_link_state & sgmii_link_state;
+	link_state = phy_link_state & sw_link_state;
 
 	if (atomic_xchg(&slave->link_state, link_state) != link_state)
 		netcp_ethss_link_state_action(gbe_dev, ndev, slave,
@@ -2205,7 +2233,7 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 		max_rx_len = NETCP_MAX_FRAME_SIZE;
 
 	/* Enable correct MII mode at SS level */
-	if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) &&
+	if (IS_SS_ID_XGBE(gbe_dev) &&
 	    (slave->link_interface >= XGMII_LINK_MAC_PHY)) {
 		xgmii_mode = readl(GBE_REG_ADDR(gbe_dev, ss_regs, control));
 		xgmii_mode |= (1 << slave->slave_num);
@@ -2236,7 +2264,8 @@ static void gbe_slave_stop(struct gbe_intf *intf)
 	struct gbe_priv *gbe_dev = intf->gbe_dev;
 	struct gbe_slave *slave = intf->slave;
 
-	gbe_sgmii_rtreset(gbe_dev, slave, true);
+	if (!IS_SS_ID_2U(gbe_dev))
+		gbe_sgmii_rtreset(gbe_dev, slave, true);
 	gbe_port_reset(slave);
 	/* Disable forwarding */
 	cpsw_ale_control_set(gbe_dev->ale, slave->port_num,
@@ -2271,11 +2300,20 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
 
 	void (*hndlr)(struct net_device *) = gbe_adjust_link;
 
-	gbe_sgmii_config(priv, slave);
+	if (!IS_SS_ID_2U(priv))
+		gbe_sgmii_config(priv, slave);
 	gbe_port_reset(slave);
-	gbe_sgmii_rtreset(priv, slave, false);
+	if (!IS_SS_ID_2U(priv))
+		gbe_sgmii_rtreset(priv, slave, false);
 	gbe_port_config(priv, slave, priv->rx_packet_max);
 	gbe_set_slave_mac(slave, gbe_intf);
+	/* For NU & 2U switch, map the vlan priorities to zero
+	 * as we only configure to use priority 0
+	 */
+	if (IS_SS_ID_MU(priv))
+		writel(HOST_TX_PRI_MAP_DEFAULT,
+		       GBE_REG_ADDR(slave, port_regs, rx_pri_map));
+
 	/* enable forwarding */
 	cpsw_ale_control_set(priv->ale, slave->port_num,
 			     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
@@ -2286,6 +2324,21 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
 		has_phy = true;
 		phy_mode = PHY_INTERFACE_MODE_SGMII;
 		slave->phy_port_t = PORT_MII;
+	} else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
+		has_phy = true;
+		phy_mode = of_get_phy_mode(slave->node);
+		/* if phy-mode is not present, default to
+		 * PHY_INTERFACE_MODE_RGMII
+		 */
+		if (phy_mode < 0)
+			phy_mode = PHY_INTERFACE_MODE_RGMII;
+
+		if (!phy_interface_mode_is_rgmii(phy_mode)) {
+			dev_err(priv->dev,
+				"Unsupported phy mode %d\n", phy_mode);
+			return -EINVAL;
+		}
+		slave->phy_port_t = PORT_MII;
 	} else if (slave->link_interface == XGMII_LINK_MAC_PHY) {
 		has_phy = true;
 		phy_mode = PHY_INTERFACE_MODE_NA;
@@ -2293,7 +2346,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
 	}
 
 	if (has_phy) {
-		if (priv->ss_version == XGBE_SS_VERSION_10)
+		if (IS_SS_ID_XGBE(priv))
 			hndlr = xgbe_adjust_link;
 
 		slave->phy = of_phy_connect(gbe_intf->ndev,
@@ -2722,6 +2775,61 @@ static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req)
 }
 #endif /* CONFIG_TI_CPTS */
 
+static int gbe_set_rx_mode(void *intf_priv, bool promisc)
+{
+	struct gbe_intf *gbe_intf = intf_priv;
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+	struct cpsw_ale *ale = gbe_dev->ale;
+	unsigned long timeout;
+	int i, ret = -ETIMEDOUT;
+
+	/* Disable(1)/Enable(0) Learn for all ports (host is port 0 and
+	 * slaves are port 1 and up
+	 */
+	for (i = 0; i <= gbe_dev->num_slaves; i++) {
+		cpsw_ale_control_set(ale, i,
+				     ALE_PORT_NOLEARN, !!promisc);
+		cpsw_ale_control_set(ale, i,
+				     ALE_PORT_NO_SA_UPDATE, !!promisc);
+	}
+
+	if (!promisc) {
+		/* Don't Flood All Unicast Packets to Host port */
+		cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0);
+		dev_vdbg(gbe_dev->dev, "promiscuous mode disabled\n");
+		return 0;
+	}
+
+	timeout = jiffies + HZ;
+
+	/* Clear All Untouched entries */
+	cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1);
+	do {
+		cpu_relax();
+		if (cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) {
+			ret = 0;
+			break;
+		}
+
+	} while (time_after(timeout, jiffies));
+
+	/* Make sure it is not a false timeout */
+	if (ret && !cpsw_ale_control_get(ale, 0, ALE_AGEOUT))
+		return ret;
+
+	cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1);
+
+	/* Clear all mcast from ALE */
+	cpsw_ale_flush_multicast(ale,
+				 GBE_PORT_MASK(gbe_dev->ale_ports),
+				 -1);
+
+	/* Flood All Unicast Packets to Host port */
+	cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1);
+	dev_vdbg(gbe_dev->dev, "promiscuous mode enabled\n");
+	return ret;
+}
+
 static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd)
 {
 	struct gbe_intf *gbe_intf = intf_priv;
@@ -2764,7 +2872,7 @@ static void netcp_ethss_timer(struct timer_list *t)
 	/* A timer runs as a BH, no need to block them */
 	spin_lock(&gbe_dev->hw_stats_lock);
 
-	if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+	if (IS_SS_ID_VER_14(gbe_dev))
 		gbe_update_stats_ver14(gbe_dev, NULL);
 	else
 		gbe_update_stats(gbe_dev, NULL);
@@ -2807,7 +2915,7 @@ static int gbe_open(void *intf_priv, struct net_device *ndev)
 		GBE_RTL_VERSION(reg), GBE_IDENT(reg));
 
 	/* For 10G and on NetCP 1.5, use directed to port */
-	if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) || IS_SS_ID_MU(gbe_dev))
+	if (IS_SS_ID_XGBE(gbe_dev) || IS_SS_ID_MU(gbe_dev))
 		gbe_intf->tx_pipe.flags = SWITCH_TO_PORT_IN_TAGINFO;
 
 	if (gbe_dev->enable_ale)
@@ -2911,8 +3019,10 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 		slave->link_interface = SGMII_LINK_MAC_PHY;
 	}
 
+	slave->node = node;
 	slave->open = false;
 	if ((slave->link_interface == SGMII_LINK_MAC_PHY) ||
+	    (slave->link_interface == RGMII_LINK_MAC_PHY) ||
 	    (slave->link_interface == XGMII_LINK_MAC_PHY))
 		slave->phy_node = of_parse_phandle(node, "phy-handle", 0);
 	slave->port_num = gbe_get_slave_port(gbe_dev, slave->slave_num);
@@ -2924,7 +3034,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 
 	/* Emac regs memmap are contiguous but port regs are not */
 	port_reg_num = slave->slave_num;
-	if (gbe_dev->ss_version == GBE_SS_VERSION_14) {
+	if (IS_SS_ID_VER_14(gbe_dev)) {
 		if (slave->slave_num > 1) {
 			port_reg_ofs = GBE13_SLAVE_PORT2_OFFSET;
 			port_reg_num -= 2;
@@ -2939,7 +3049,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 		emac_reg_ofs = GBENU_EMAC_OFFSET;
 		port_reg_blk_sz = 0x1000;
 		emac_reg_blk_sz = 0x1000;
-	} else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) {
+	} else if (IS_SS_ID_XGBE(gbe_dev)) {
 		port_reg_ofs = XGBE10_SLAVE_PORT_OFFSET;
 		emac_reg_ofs = XGBE10_EMAC_OFFSET;
 		port_reg_blk_sz = 0x30;
@@ -2955,7 +3065,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 	slave->emac_regs = gbe_dev->switch_regs + emac_reg_ofs +
 				(emac_reg_blk_sz * slave->slave_num);
 
-	if (gbe_dev->ss_version == GBE_SS_VERSION_14) {
+	if (IS_SS_ID_VER_14(gbe_dev)) {
 		/* Initialize  slave port register offsets */
 		GBE_SET_REG_OFS(slave, port_regs, port_vlan);
 		GBE_SET_REG_OFS(slave, port_regs, tx_pri_map);
@@ -2976,6 +3086,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 		/* Initialize  slave port register offsets */
 		GBENU_SET_REG_OFS(slave, port_regs, port_vlan);
 		GBENU_SET_REG_OFS(slave, port_regs, tx_pri_map);
+		GBENU_SET_REG_OFS(slave, port_regs, rx_pri_map);
 		GBENU_SET_REG_OFS(slave, port_regs, sa_lo);
 		GBENU_SET_REG_OFS(slave, port_regs, sa_hi);
 		GBENU_SET_REG_OFS(slave, port_regs, ts_ctl);
@@ -2989,7 +3100,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 		GBENU_SET_REG_OFS(slave, emac_regs, mac_control);
 		GBENU_SET_REG_OFS(slave, emac_regs, soft_reset);
 
-	} else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) {
+	} else if (IS_SS_ID_XGBE(gbe_dev)) {
 		/* Initialize  slave port register offsets */
 		XGBE_SET_REG_OFS(slave, port_regs, port_vlan);
 		XGBE_SET_REG_OFS(slave, port_regs, tx_pri_map);
@@ -3039,7 +3150,8 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
 			continue;
 		}
 
-		gbe_sgmii_config(gbe_dev, slave);
+		if (!IS_SS_ID_2U(gbe_dev))
+			gbe_sgmii_config(gbe_dev, slave);
 		gbe_port_reset(slave);
 		gbe_port_config(gbe_dev, slave, gbe_dev->rx_packet_max);
 		list_add_tail(&slave->slave_list, &gbe_dev->secondary_slaves);
@@ -3073,6 +3185,9 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
 	if (slave->link_interface == SGMII_LINK_MAC_PHY) {
 		phy_mode = PHY_INTERFACE_MODE_SGMII;
 		slave->phy_port_t = PORT_MII;
+	} else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
+		phy_mode = PHY_INTERFACE_MODE_RGMII;
+		slave->phy_port_t = PORT_MII;
 	} else {
 		phy_mode = PHY_INTERFACE_MODE_NA;
 		slave->phy_port_t = PORT_FIBRE;
@@ -3080,6 +3195,7 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
 
 	for_each_sec_slave(slave, gbe_dev) {
 		if ((slave->link_interface != SGMII_LINK_MAC_PHY) &&
+		    (slave->link_interface != RGMII_LINK_MAC_PHY) &&
 		    (slave->link_interface != XGMII_LINK_MAC_PHY))
 			continue;
 		slave->phy =
@@ -3355,7 +3471,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
 	gbe_dev->num_stats_mods = gbe_dev->max_num_ports;
 	gbe_dev->et_stats = gbenu_et_stats;
 
-	if (IS_SS_ID_NU(gbe_dev))
+	if (IS_SS_ID_MU(gbe_dev))
 		gbe_dev->num_et_stats = GBENU_ET_STATS_HOST_SIZE +
 			(gbe_dev->max_num_slaves * GBENU_ET_STATS_PORT_SIZE);
 	else
@@ -3396,7 +3512,9 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
 	}
 	gbe_dev->switch_regs = regs;
 
-	gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET;
+	if (!IS_SS_ID_2U(gbe_dev))
+		gbe_dev->sgmii_port_regs =
+		       gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET;
 
 	/* Although sgmii modules are mem mapped to one contiguous
 	 * region on GBENU devices, setting sgmii_port34_regs allows
@@ -3419,6 +3537,8 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
 
 	/* Subsystem registers */
 	GBENU_SET_REG_OFS(gbe_dev, ss_regs, id_ver);
+	/* ok to set for MU, but used by 2U only */
+	GBENU_SET_REG_OFS(gbe_dev, ss_regs, rgmii_status);
 
 	/* Switch module registers */
 	GBENU_SET_REG_OFS(gbe_dev, switch_regs, id_ver);
@@ -3464,6 +3584,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 		gbe_dev->max_num_slaves = 8;
 	} else if (of_device_is_compatible(node, "ti,netcp-gbe-2")) {
 		gbe_dev->max_num_slaves = 1;
+		gbe_module.set_rx_mode = gbe_set_rx_mode;
 	} else if (of_device_is_compatible(node, "ti,netcp-xgbe")) {
 		gbe_dev->max_num_slaves = 2;
 	} else {
@@ -3508,7 +3629,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 
 		dev_dbg(dev, "ss_version: 0x%08x\n", gbe_dev->ss_version);
 
-		if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+		if (IS_SS_ID_VER_14(gbe_dev))
 			ret = set_gbe_ethss14_priv(gbe_dev, node);
 		else if (IS_SS_ID_MU(gbe_dev))
 			ret = set_gbenu_ethss_priv(gbe_dev, node);
@@ -3606,7 +3727,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 
 	spin_lock_bh(&gbe_dev->hw_stats_lock);
 	for (i = 0; i < gbe_dev->num_stats_mods; i++) {
-		if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+		if (IS_SS_ID_VER_14(gbe_dev))
 			gbe_reset_mod_stats_ver14(gbe_dev, i);
 		else
 			gbe_reset_mod_stats(gbe_dev, i);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89..750eaa5 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -36,6 +36,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 #define GENEVE_VER 0
 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
+#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
 
 /* per-network namespace private data for this module */
 struct geneve_net {
@@ -826,8 +828,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 		return PTR_ERR(rt);
 
 	if (skb_dst(skb)) {
-		int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
-			  GENEVE_BASE_HLEN - info->options_len - 14;
+		int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
+			  info->options_len;
 
 		skb_dst_update_pmtu(skb, mtu);
 	}
@@ -872,8 +874,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 		return PTR_ERR(dst);
 
 	if (skb_dst(skb)) {
-		int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
-			  GENEVE_BASE_HLEN - info->options_len - 14;
+		int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
 
 		skb_dst_update_pmtu(skb, mtu);
 	}
@@ -941,11 +942,10 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
 {
-	/* Only possible if called internally, ndo_change_mtu path's new_mtu
-	 * is guaranteed to be between dev->min_mtu and dev->max_mtu.
-	 */
 	if (new_mtu > dev->max_mtu)
 		new_mtu = dev->max_mtu;
+	else if (new_mtu < dev->min_mtu)
+		new_mtu = dev->min_mtu;
 
 	dev->mtu = new_mtu;
 	return 0;
@@ -1261,7 +1261,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
 	}
 
 	if (data[IFLA_GENEVE_REMOTE6]) {
- #if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
 			attrtype = IFLA_GENEVE_REMOTE6;
 			goto change_notsup;
@@ -1387,6 +1387,48 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
 	return -EOPNOTSUPP;
 }
 
+static void geneve_link_config(struct net_device *dev,
+			       struct ip_tunnel_info *info, struct nlattr *tb[])
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	int ldev_mtu = 0;
+
+	if (tb[IFLA_MTU]) {
+		geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		return;
+	}
+
+	switch (ip_tunnel_info_af(info)) {
+	case AF_INET: {
+		struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
+		struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
+
+		if (!IS_ERR(rt) && rt->dst.dev) {
+			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
+			ip_rt_put(rt);
+		}
+		break;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6: {
+		struct rt6_info *rt = rt6_lookup(geneve->net,
+						 &info->key.u.ipv6.dst, NULL, 0,
+						 NULL, 0);
+
+		if (rt && rt->dst.dev)
+			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
+		ip6_rt_put(rt);
+		break;
+	}
+#endif
+	}
+
+	if (ldev_mtu <= 0)
+		return;
+
+	geneve_change_mtu(dev, ldev_mtu - info->options_len);
+}
+
 static int geneve_newlink(struct net *net, struct net_device *dev,
 			  struct nlattr *tb[], struct nlattr *data[],
 			  struct netlink_ext_ack *extack)
@@ -1402,8 +1444,14 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (err)
 		return err;
 
-	return geneve_configure(net, dev, extack, &info, metadata,
-				use_udp6_rx_checksums);
+	err = geneve_configure(net, dev, extack, &info, metadata,
+			       use_udp6_rx_checksums);
+	if (err)
+		return err;
+
+	geneve_link_config(dev, &info, tb);
+
+	return 0;
 }
 
 /* Quiesces the geneve device data path for both TX and RX.
@@ -1477,8 +1525,10 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
 	if (err)
 		return err;
 
-	if (!geneve_dst_addr_equal(&geneve->info, &info))
+	if (!geneve_dst_addr_equal(&geneve->info, &info)) {
 		dst_cache_reset(&info.dst_cache);
+		geneve_link_config(dev, &info, tb);
+	}
 
 	geneve_quiesce(geneve, &gs4, &gs6);
 	geneve->info = info;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 960f061..6ebe39a 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -237,6 +237,8 @@ void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
 #define NVSP_PROTOCOL_VERSION_2		0x30002
 #define NVSP_PROTOCOL_VERSION_4		0x40000
 #define NVSP_PROTOCOL_VERSION_5		0x50000
+#define NVSP_PROTOCOL_VERSION_6		0x60000
+#define NVSP_PROTOCOL_VERSION_61	0x60001
 
 enum {
 	NVSP_MSG_TYPE_NONE = 0,
@@ -308,6 +310,12 @@ enum {
 	NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
 
 	NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
+
+	/* Version 6 messages */
+	NVSP_MSG6_TYPE_PD_API,
+	NVSP_MSG6_TYPE_PD_POST_BATCH,
+
+	NVSP_MSG6_MAX = NVSP_MSG6_TYPE_PD_POST_BATCH
 };
 
 enum {
@@ -619,12 +627,168 @@ union nvsp_5_message_uber {
 	struct nvsp_5_send_indirect_table send_table;
 } __packed;
 
+enum nvsp_6_pd_api_op {
+	PD_API_OP_CONFIG = 1,
+	PD_API_OP_SW_DATAPATH, /* Switch Datapath */
+	PD_API_OP_OPEN_PROVIDER,
+	PD_API_OP_CLOSE_PROVIDER,
+	PD_API_OP_CREATE_QUEUE,
+	PD_API_OP_FLUSH_QUEUE,
+	PD_API_OP_FREE_QUEUE,
+	PD_API_OP_ALLOC_COM_BUF, /* Allocate Common Buffer */
+	PD_API_OP_FREE_COM_BUF, /* Free Common Buffer */
+	PD_API_OP_MAX
+};
+
+struct grp_affinity {
+	u64 mask;
+	u16 grp;
+	u16 reserved[3];
+} __packed;
+
+struct nvsp_6_pd_api_req {
+	u32 op;
+
+	union {
+		/* MMIO information is sent from the VM to VSP */
+		struct __packed {
+			u64 mmio_pa; /* MMIO Physical Address */
+			u32 mmio_len;
+
+			/* Number of PD queues a VM can support */
+			u16 num_subchn;
+		} config;
+
+		/* Switch Datapath */
+		struct __packed {
+			/* Host Datapath Is PacketDirect */
+			u8 host_dpath_is_pd;
+
+			/* Guest PacketDirect Is Enabled */
+			u8 guest_pd_enabled;
+		} sw_dpath;
+
+		/* Open Provider*/
+		struct __packed {
+			u32 prov_id; /* Provider id */
+			u32 flag;
+		} open_prov;
+
+		/* Close Provider */
+		struct __packed {
+			u32 prov_id;
+		} cls_prov;
+
+		/* Create Queue*/
+		struct __packed {
+			u32 prov_id;
+			u16 q_id;
+			u16 q_size;
+			u8 is_recv_q;
+			u8 is_rss_q;
+			u32 recv_data_len;
+			struct grp_affinity affy;
+		} cr_q;
+
+		/* Delete Queue*/
+		struct __packed {
+			u32 prov_id;
+			u16 q_id;
+		} del_q;
+
+		/* Flush Queue */
+		struct __packed {
+			u32 prov_id;
+			u16 q_id;
+		} flush_q;
+
+		/* Allocate Common Buffer */
+		struct __packed {
+			u32 len;
+			u32 pf_node; /* Preferred Node */
+			u16 region_id;
+		} alloc_com_buf;
+
+		/* Free Common Buffer */
+		struct __packed {
+			u32 len;
+			u64 pa; /* Physical Address */
+			u32 pf_node; /* Preferred Node */
+			u16 region_id;
+			u8 cache_type;
+		} free_com_buf;
+	} __packed;
+} __packed;
+
+struct nvsp_6_pd_api_comp {
+	u32 op;
+	u32 status;
+
+	union {
+		struct __packed {
+			/* actual number of PD queues allocated to the VM */
+			u16 num_pd_q;
+
+			/* Num Receive Rss PD Queues */
+			u8 num_rss_q;
+
+			u8 is_supported; /* Is supported by VSP */
+			u8 is_enabled; /* Is enabled by VSP */
+		} config;
+
+		/* Open Provider */
+		struct __packed {
+			u32 prov_id;
+		} open_prov;
+
+		/* Create Queue */
+		struct __packed {
+			u32 prov_id;
+			u16 q_id;
+			u16 q_size;
+			u32 recv_data_len;
+			struct grp_affinity affy;
+		} cr_q;
+
+		/* Allocate Common Buffer */
+		struct __packed {
+			u64 pa; /* Physical Address */
+			u32 len;
+			u32 pf_node; /* Preferred Node */
+			u16 region_id;
+			u8 cache_type;
+		} alloc_com_buf;
+	} __packed;
+} __packed;
+
+struct nvsp_6_pd_buf {
+	u32 region_offset;
+	u16 region_id;
+	u16 is_partial:1;
+	u16 reserved:15;
+} __packed;
+
+struct nvsp_6_pd_batch_msg {
+	struct nvsp_message_header hdr;
+	u16 count;
+	u16 guest2host:1;
+	u16 is_recv:1;
+	u16 reserved:14;
+	struct nvsp_6_pd_buf pd_buf[0];
+} __packed;
+
+union nvsp_6_message_uber {
+	struct nvsp_6_pd_api_req pd_req;
+	struct nvsp_6_pd_api_comp pd_comp;
+} __packed;
+
 union nvsp_all_messages {
 	union nvsp_message_init_uber init_msg;
 	union nvsp_1_message_uber v1_msg;
 	union nvsp_2_message_uber v2_msg;
 	union nvsp_4_message_uber v4_msg;
 	union nvsp_5_message_uber v5_msg;
+	union nvsp_6_message_uber v6_msg;
 } __packed;
 
 /* ALL Messages */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 04f611e..e730895 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -525,7 +525,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
 	struct net_device *ndev = hv_get_drvdata(device);
 	static const u32 ver_list[] = {
 		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
-		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
+		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
+		NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
 	};
 	struct nvsp_message *init_packet;
 	int ndis_version, i, ret;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 6b127be..3b6dbac 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -29,6 +29,7 @@
 #include <linux/nls.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <linux/ucs2_string.h>
 
 #include "hyperv_net.h"
 #include "netvsc_trace.h"
@@ -1223,6 +1224,29 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
 	return ret;
 }
 
+static void rndis_get_friendly_name(struct net_device *net,
+				    struct rndis_device *rndis_device,
+				    struct netvsc_device *net_device)
+{
+	ucs2_char_t wname[256];
+	unsigned long len;
+	u8 ifalias[256];
+	u32 size;
+
+	size = sizeof(wname);
+	if (rndis_filter_query_device(rndis_device, net_device,
+				      RNDIS_OID_GEN_FRIENDLY_NAME,
+				      wname, &size) != 0)
+		return;
+
+	/* Convert Windows Unicode string to UTF-8 */
+	len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias));
+
+	/* ignore the default value from host */
+	if (strcmp(ifalias, "Network Adapter") != 0)
+		dev_set_alias(net, ifalias, len);
+}
+
 struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 				      struct netvsc_device_info *device_info)
 {
@@ -1276,6 +1300,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 
 	memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
 
+	/* Get friendly name as ifalias*/
+	if (!net->ifalias)
+		rndis_get_friendly_name(net, rndis_device, net_device);
+
 	/* Query and set hardware capabilities */
 	ret = rndis_netdev_set_hwcaps(rndis_device, net_device);
 	if (ret != 0)
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index bdfbabb..edb8b9a 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -218,6 +218,12 @@
 	---help---
 	  Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
 
+config ASIX_PHY
+	tristate "Asix PHYs"
+	help
+	  Currently supports the Asix Electronics PHY found in the X-Surf 100
+	  AX88796B package.
+
 config AT803X_PHY
 	tristate "AT803X PHYs"
 	---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 01acbcb..701ca0b 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -45,6 +45,7 @@
 
 obj-$(CONFIG_AMD_PHY)		+= amd.o
 obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
+obj-$(CONFIG_ASIX_PHY)		+= asix.o
 obj-$(CONFIG_AT803X_PHY)	+= at803x.o
 obj-$(CONFIG_BCM63XX_PHY)	+= bcm63xx.o
 obj-$(CONFIG_BCM7XXX_PHY)	+= bcm7xxx.o
diff --git a/drivers/net/phy/asix.c b/drivers/net/phy/asix.c
new file mode 100644
index 0000000..8ebe7f5
--- /dev/null
+++ b/drivers/net/phy/asix.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Driver for Asix PHYs
+ *
+ * Author: Michael Schmitz <schmitzmic@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+#define PHY_ID_ASIX_AX88796B		0x003b1841
+
+MODULE_DESCRIPTION("Asix PHY driver");
+MODULE_AUTHOR("Michael Schmitz <schmitzmic@gmail.com>");
+MODULE_LICENSE("GPL");
+
+/**
+ * asix_soft_reset - software reset the PHY via BMCR_RESET bit
+ * @phydev: target phy_device struct
+ *
+ * Description: Perform a software PHY reset using the standard
+ * BMCR_RESET bit and poll for the reset bit to be cleared.
+ * Toggle BMCR_RESET bit off to accommodate broken AX8796B PHY implementation
+ * such as used on the Individual Computers' X-Surf 100 Zorro card.
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+static int asix_soft_reset(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Asix PHY won't reset unless reset bit toggles */
+	ret = phy_write(phydev, MII_BMCR, 0);
+	if (ret < 0)
+		return ret;
+
+	return genphy_soft_reset(phydev);
+}
+
+static struct phy_driver asix_driver[] = { {
+	.phy_id		= PHY_ID_ASIX_AX88796B,
+	.name		= "Asix Electronics AX88796B",
+	.phy_id_mask	= 0xfffffff0,
+	.features	= PHY_BASIC_FEATURES,
+	.soft_reset	= asix_soft_reset,
+} };
+
+module_phy_driver(asix_driver);
+
+static struct mdio_device_id __maybe_unused asix_tbl[] = {
+	{ PHY_ID_ASIX_AX88796B, 0xfffffff0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(mdio, asix_tbl);
diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
index 403b085..15352f9 100644
--- a/drivers/net/phy/mdio-bitbang.c
+++ b/drivers/net/phy/mdio-bitbang.c
@@ -205,14 +205,6 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 	return 0;
 }
 
-static int mdiobb_reset(struct mii_bus *bus)
-{
-	struct mdiobb_ctrl *ctrl = bus->priv;
-	if (ctrl->reset)
-		ctrl->reset(bus);
-	return 0;
-}
-
 struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 {
 	struct mii_bus *bus;
@@ -225,7 +217,6 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 
 	bus->read = mdiobb_read;
 	bus->write = mdiobb_write;
-	bus->reset = mdiobb_reset;
 	bus->priv = ctrl;
 
 	return bus;
diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c
index 1861f38..863496f 100644
--- a/drivers/net/phy/mdio-boardinfo.c
+++ b/drivers/net/phy/mdio-boardinfo.c
@@ -30,17 +30,20 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus,
 					    struct mdio_board_info *bi))
 {
 	struct mdio_board_entry *be;
+	struct mdio_board_entry *tmp;
 	struct mdio_board_info *bi;
 	int ret;
 
 	mutex_lock(&mdio_board_lock);
-	list_for_each_entry(be, &mdio_board_list, list) {
+	list_for_each_entry_safe(be, tmp, &mdio_board_list, list) {
 		bi = &be->board_info;
 
 		if (strcmp(bus->id, bi->bus_id))
 			continue;
 
+		mutex_unlock(&mdio_board_lock);
 		ret = cb(bus, bi);
+		mutex_lock(&mdio_board_lock);
 		if (ret)
 			continue;
 
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 4333c6e..b501221 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -24,8 +24,10 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/mdio-gpio.h>
 #include <linux/gpio.h>
-#include <linux/platform_data/mdio-gpio.h>
+#include <linux/gpio/consumer.h>
 
 #include <linux/of_gpio.h>
 #include <linux/of_mdio.h>
@@ -35,37 +37,22 @@ struct mdio_gpio_info {
 	struct gpio_desc *mdc, *mdio, *mdo;
 };
 
-static void *mdio_gpio_of_get_data(struct platform_device *pdev)
+static int mdio_gpio_get_data(struct device *dev,
+			      struct mdio_gpio_info *bitbang)
 {
-	struct device_node *np = pdev->dev.of_node;
-	struct mdio_gpio_platform_data *pdata;
-	enum of_gpio_flags flags;
-	int ret;
+	bitbang->mdc = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDC,
+					    GPIOD_OUT_LOW);
+	if (IS_ERR(bitbang->mdc))
+		return PTR_ERR(bitbang->mdc);
 
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return NULL;
+	bitbang->mdio = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDIO,
+					     GPIOD_IN);
+	if (IS_ERR(bitbang->mdio))
+		return PTR_ERR(bitbang->mdio);
 
-	ret = of_get_gpio_flags(np, 0, &flags);
-	if (ret < 0)
-		return NULL;
-
-	pdata->mdc = ret;
-	pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW;
-
-	ret = of_get_gpio_flags(np, 1, &flags);
-	if (ret < 0)
-		return NULL;
-	pdata->mdio = ret;
-	pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW;
-
-	ret = of_get_gpio_flags(np, 2, &flags);
-	if (ret > 0) {
-		pdata->mdo = ret;
-		pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW;
-	}
-
-	return pdata;
+	bitbang->mdo = devm_gpiod_get_index_optional(dev, NULL, MDIO_GPIO_MDO,
+						     GPIOD_OUT_LOW);
+	return PTR_ERR_OR_ZERO(bitbang->mdo);
 }
 
 static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
@@ -125,78 +112,28 @@ static const struct mdiobb_ops mdio_gpio_ops = {
 };
 
 static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
-					  struct mdio_gpio_platform_data *pdata,
+					  struct mdio_gpio_info *bitbang,
 					  int bus_id)
 {
 	struct mii_bus *new_bus;
-	struct mdio_gpio_info *bitbang;
-	int i;
-	int mdc, mdio, mdo;
-	unsigned long mdc_flags = GPIOF_OUT_INIT_LOW;
-	unsigned long mdio_flags = GPIOF_DIR_IN;
-	unsigned long mdo_flags = GPIOF_OUT_INIT_HIGH;
-
-	bitbang = devm_kzalloc(dev, sizeof(*bitbang), GFP_KERNEL);
-	if (!bitbang)
-		goto out;
 
 	bitbang->ctrl.ops = &mdio_gpio_ops;
-	bitbang->ctrl.reset = pdata->reset;
-	mdc = pdata->mdc;
-	bitbang->mdc = gpio_to_desc(mdc);
-	if (pdata->mdc_active_low)
-		mdc_flags = GPIOF_OUT_INIT_HIGH | GPIOF_ACTIVE_LOW;
-	mdio = pdata->mdio;
-	bitbang->mdio = gpio_to_desc(mdio);
-	if (pdata->mdio_active_low)
-		mdio_flags |= GPIOF_ACTIVE_LOW;
-	mdo = pdata->mdo;
-	if (mdo) {
-		bitbang->mdo = gpio_to_desc(mdo);
-		if (pdata->mdo_active_low)
-			mdo_flags = GPIOF_OUT_INIT_LOW | GPIOF_ACTIVE_LOW;
-	}
 
 	new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
 	if (!new_bus)
-		goto out;
+		return NULL;
 
-	new_bus->name = "GPIO Bitbanged MDIO",
-
-	new_bus->phy_mask = pdata->phy_mask;
-	new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask;
-	memcpy(new_bus->irq, pdata->irqs, sizeof(new_bus->irq));
+	new_bus->name = "GPIO Bitbanged MDIO";
 	new_bus->parent = dev;
 
-	if (new_bus->phy_mask == ~0)
-		goto out_free_bus;
-
-	for (i = 0; i < PHY_MAX_ADDR; i++)
-		if (!new_bus->irq[i])
-			new_bus->irq[i] = PHY_POLL;
-
 	if (bus_id != -1)
 		snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id);
 	else
 		strncpy(new_bus->id, "gpio", MII_BUS_ID_SIZE);
 
-	if (devm_gpio_request_one(dev, mdc, mdc_flags, "mdc"))
-		goto out_free_bus;
-
-	if (devm_gpio_request_one(dev, mdio, mdio_flags, "mdio"))
-		goto out_free_bus;
-
-	if (mdo && devm_gpio_request_one(dev, mdo, mdo_flags, "mdo"))
-		goto out_free_bus;
-
 	dev_set_drvdata(dev, new_bus);
 
 	return new_bus;
-
-out_free_bus:
-	free_mdio_bitbang(new_bus);
-out:
-	return NULL;
 }
 
 static void mdio_gpio_bus_deinit(struct device *dev)
@@ -216,26 +153,29 @@ static void mdio_gpio_bus_destroy(struct device *dev)
 
 static int mdio_gpio_probe(struct platform_device *pdev)
 {
-	struct mdio_gpio_platform_data *pdata;
+	struct mdio_gpio_info *bitbang;
 	struct mii_bus *new_bus;
 	int ret, bus_id;
 
+	bitbang = devm_kzalloc(&pdev->dev, sizeof(*bitbang), GFP_KERNEL);
+	if (!bitbang)
+		return -ENOMEM;
+
+	ret = mdio_gpio_get_data(&pdev->dev, bitbang);
+	if (ret)
+		return ret;
+
 	if (pdev->dev.of_node) {
-		pdata = mdio_gpio_of_get_data(pdev);
 		bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio");
 		if (bus_id < 0) {
 			dev_warn(&pdev->dev, "failed to get alias id\n");
 			bus_id = 0;
 		}
 	} else {
-		pdata = dev_get_platdata(&pdev->dev);
 		bus_id = pdev->id;
 	}
 
-	if (!pdata)
-		return -ENODEV;
-
-	new_bus = mdio_gpio_bus_init(&pdev->dev, pdata, bus_id);
+	new_bus = mdio_gpio_bus_init(&pdev->dev, bitbang, bus_id);
 	if (!new_bus)
 		return -ENODEV;
 
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index a97ac8c..2d67937 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -21,6 +21,8 @@
 #include <linux/phy.h>
 #include <linux/microchipphy.h>
 #include <linux/delay.h>
+#include <linux/of.h>
+#include <dt-bindings/net/microchip-lan78xx.h>
 
 #define DRIVER_AUTHOR	"WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC	"Microchip LAN88XX PHY driver"
@@ -225,6 +227,8 @@ static int lan88xx_probe(struct phy_device *phydev)
 {
 	struct device *dev = &phydev->mdio.dev;
 	struct lan88xx_priv *priv;
+	u32 led_modes[4];
+	int len;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -232,6 +236,27 @@ static int lan88xx_probe(struct phy_device *phydev)
 
 	priv->wolopts = 0;
 
+	len = of_property_read_variable_u32_array(dev->of_node,
+						  "microchip,led-modes",
+						  led_modes,
+						  0,
+						  ARRAY_SIZE(led_modes));
+	if (len >= 0) {
+		u32 reg = 0;
+		int i;
+
+		for (i = 0; i < len; i++) {
+			if (led_modes[i] > 15)
+				return -EINVAL;
+			reg |= led_modes[i] << (i * 4);
+		}
+		for (; i < ARRAY_SIZE(led_modes); i++)
+			reg |= LAN78XX_FORCE_LED_OFF << (i * 4);
+		(void)phy_write(phydev, LAN78XX_PHY_LED_MODE_SELECT, reg);
+	} else if (len == -EOVERFLOW) {
+		return -EINVAL;
+	}
+
 	/* these values can be used to identify internal PHY */
 	priv->chip_id = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_ID);
 	priv->chip_rev = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_REV);
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index acbe849..5c77fa4 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2937,7 +2937,7 @@ static int team_device_event(struct notifier_block *unused,
 	case NETDEV_CHANGE:
 		if (netif_running(port->dev))
 			team_port_change_check(port,
-					       !!netif_carrier_ok(port->dev));
+					       !!netif_oper_up(port->dev));
 		break;
 	case NETDEV_UNREGISTER:
 		team_del_slave(port->team->dev, dev);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ef33950..d3c04ab 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -248,11 +248,11 @@ struct veth {
 	__be16 h_vlan_TCI;
 };
 
-bool tun_is_xdp_buff(void *ptr)
+bool tun_is_xdp_frame(void *ptr)
 {
 	return (unsigned long)ptr & TUN_XDP_FLAG;
 }
-EXPORT_SYMBOL(tun_is_xdp_buff);
+EXPORT_SYMBOL(tun_is_xdp_frame);
 
 void *tun_xdp_to_ptr(void *ptr)
 {
@@ -525,11 +525,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
 
 	rcu_read_lock();
 
-	/* We may get a very small possibility of OOO during switching, not
-	 * worth to optimize.*/
-	if (tun->numqueues == 1 || tfile->detached)
-		goto unlock;
-
 	e = tun_flow_find(head, rxhash);
 	if (likely(e)) {
 		/* TODO: keep queueing to old queue until it's empty? */
@@ -548,7 +543,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
 		spin_unlock_bh(&tun->lock);
 	}
 
-unlock:
 	rcu_read_unlock();
 }
 
@@ -660,10 +654,10 @@ void tun_ptr_free(void *ptr)
 {
 	if (!ptr)
 		return;
-	if (tun_is_xdp_buff(ptr)) {
-		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+	if (tun_is_xdp_frame(ptr)) {
+		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
 
-		put_page(virt_to_head_page(xdp->data));
+		xdp_return_frame(xdpf);
 	} else {
 		__skb_array_destroy_skb(ptr);
 	}
@@ -854,6 +848,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
 				       tun->dev, tfile->queue_index);
 		if (err < 0)
 			goto out;
+		err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
+						 MEM_TYPE_PAGE_SHARED, NULL);
+		if (err < 0) {
+			xdp_rxq_info_unreg(&tfile->xdp_rxq);
+			goto out;
+		}
 		err = 0;
 	}
 
@@ -1290,21 +1290,13 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_get_stats64	= tun_net_get_stats64,
 };
 
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
 {
 	struct tun_struct *tun = netdev_priv(dev);
-	struct xdp_buff *buff = xdp->data_hard_start;
-	int headroom = xdp->data - xdp->data_hard_start;
 	struct tun_file *tfile;
 	u32 numqueues;
 	int ret = 0;
 
-	/* Assure headroom is available and buff is properly aligned */
-	if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
-		return -ENOSPC;
-
-	*buff = *xdp;
-
 	rcu_read_lock();
 
 	numqueues = READ_ONCE(tun->numqueues);
@@ -1318,7 +1310,7 @@ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 	/* Encode the XDP flag into lowest bit for consumer to differ
 	 * XDP buffer from sk_buff.
 	 */
-	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
+	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
 		this_cpu_inc(tun->pcpu_stats->tx_dropped);
 		ret = -ENOSPC;
 	}
@@ -1328,6 +1320,16 @@ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 	return ret;
 }
 
+static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct xdp_frame *frame = convert_to_xdp_frame(xdp);
+
+	if (unlikely(!frame))
+		return -EOVERFLOW;
+
+	return tun_xdp_xmit(dev, frame);
+}
+
 static void tun_xdp_flush(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
@@ -1675,7 +1677,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		case XDP_TX:
 			get_page(alloc_frag->page);
 			alloc_frag->offset += buflen;
-			if (tun_xdp_xmit(tun->dev, &xdp))
+			if (tun_xdp_tx(tun->dev, &xdp))
 				goto err_redirect;
 			tun_xdp_flush(tun->dev);
 			rcu_read_unlock();
@@ -1683,6 +1685,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 			return NULL;
 		case XDP_PASS:
 			delta = orig_data - xdp.data;
+			len = xdp.data_end - xdp.data;
 			break;
 		default:
 			bpf_warn_invalid_xdp_action(act);
@@ -1703,7 +1706,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	}
 
 	skb_reserve(skb, pad - delta);
-	skb_put(skb, len + delta);
+	skb_put(skb, len);
 	get_page(alloc_frag->page);
 	alloc_frag->offset += buflen;
 
@@ -1924,10 +1927,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		rcu_read_unlock();
 	}
 
-	rcu_read_lock();
-	if (!rcu_dereference(tun->steering_prog))
+	/* Compute the costly rx hash only if needed for flow updates.
+	 * We may get a very small possibility of OOO during switching, not
+	 * worth to optimize.
+	 */
+	if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
+	    !tfile->detached)
 		rxhash = __skb_get_hash_symmetric(skb);
-	rcu_read_unlock();
 
 	if (frags) {
 		/* Exercise flow dissector code path. */
@@ -1996,11 +2002,11 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 static ssize_t tun_put_user_xdp(struct tun_struct *tun,
 				struct tun_file *tfile,
-				struct xdp_buff *xdp,
+				struct xdp_frame *xdp_frame,
 				struct iov_iter *iter)
 {
 	int vnet_hdr_sz = 0;
-	size_t size = xdp->data_end - xdp->data;
+	size_t size = xdp_frame->len;
 	struct tun_pcpu_stats *stats;
 	size_t ret;
 
@@ -2016,7 +2022,7 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
 		iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
 	}
 
-	ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
+	ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
 
 	stats = get_cpu_ptr(tun->pcpu_stats);
 	u64_stats_update_begin(&stats->syncp);
@@ -2184,11 +2190,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			return err;
 	}
 
-	if (tun_is_xdp_buff(ptr)) {
-		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+	if (tun_is_xdp_frame(ptr)) {
+		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
 
-		ret = tun_put_user_xdp(tun, tfile, xdp, to);
-		put_page(virt_to_head_page(xdp->data));
+		ret = tun_put_user_xdp(tun, tfile, xdpf, to);
+		xdp_return_frame(xdpf);
 	} else {
 		struct sk_buff *skb = ptr;
 
@@ -2427,10 +2433,10 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 static int tun_ptr_peek_len(void *ptr)
 {
 	if (likely(ptr)) {
-		if (tun_is_xdp_buff(ptr)) {
-			struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+		if (tun_is_xdp_frame(ptr)) {
+			struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
 
-			return xdp->data_end - xdp->data;
+			return xdpf->len;
 		}
 		return __skb_array_len_with_tag(ptr);
 	} else {
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 0867f72..c59f8af 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -37,6 +37,8 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/microchipphy.h>
 #include <linux/phy.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include "lan78xx.h"
 
 #define DRIVER_AUTHOR	"WOOJUNG HUH <woojung.huh@microchip.com>"
@@ -278,6 +280,30 @@ struct lan78xx_statstage64 {
 	u64 eee_tx_lpi_time;
 };
 
+static u32 lan78xx_regs[] = {
+	ID_REV,
+	INT_STS,
+	HW_CFG,
+	PMT_CTL,
+	E2P_CMD,
+	E2P_DATA,
+	USB_STATUS,
+	VLAN_TYPE,
+	MAC_CR,
+	MAC_RX,
+	MAC_TX,
+	FLOW,
+	ERR_STS,
+	MII_ACC,
+	MII_DATA,
+	EEE_TX_LPI_REQ_DLY,
+	EEE_TW_TX_SYS,
+	EEE_TX_LPI_REM_DLY,
+	WUCSR
+};
+
+#define PHY_REG_SIZE (32 * sizeof(u32))
+
 struct lan78xx_net;
 
 struct lan78xx_priv {
@@ -1605,6 +1631,34 @@ static int lan78xx_set_pause(struct net_device *net,
 	return ret;
 }
 
+static int lan78xx_get_regs_len(struct net_device *netdev)
+{
+	if (!netdev->phydev)
+		return (sizeof(lan78xx_regs));
+	else
+		return (sizeof(lan78xx_regs) + PHY_REG_SIZE);
+}
+
+static void
+lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+		 void *buf)
+{
+	u32 *data = buf;
+	int i, j;
+	struct lan78xx_net *dev = netdev_priv(netdev);
+
+	/* Read Device/MAC registers */
+	for (i = 0; i < (sizeof(lan78xx_regs) / sizeof(u32)); i++)
+		lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]);
+
+	if (!netdev->phydev)
+		return;
+
+	/* Read PHY registers */
+	for (j = 0; j < 32; i++, j++)
+		data[i] = phy_read(netdev->phydev, j);
+}
+
 static const struct ethtool_ops lan78xx_ethtool_ops = {
 	.get_link	= lan78xx_get_link,
 	.nway_reset	= phy_ethtool_nway_reset,
@@ -1625,6 +1679,8 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
 	.set_pauseparam	= lan78xx_set_pause,
 	.get_link_ksettings = lan78xx_get_link_ksettings,
 	.set_link_ksettings = lan78xx_set_link_ksettings,
+	.get_regs_len	= lan78xx_get_regs_len,
+	.get_regs	= lan78xx_get_regs,
 };
 
 static int lan78xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -1652,34 +1708,31 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
 	addr[5] = (addr_hi >> 8) & 0xFF;
 
 	if (!is_valid_ether_addr(addr)) {
-		/* reading mac address from EEPROM or OTP */
-		if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN,
-					 addr) == 0) ||
-		    (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN,
-				      addr) == 0)) {
-			if (is_valid_ether_addr(addr)) {
-				/* eeprom values are valid so use them */
-				netif_dbg(dev, ifup, dev->net,
-					  "MAC address read from EEPROM");
-			} else {
-				/* generate random MAC */
-				random_ether_addr(addr);
-				netif_dbg(dev, ifup, dev->net,
-					  "MAC address set to random addr");
-			}
-
-			addr_lo = addr[0] | (addr[1] << 8) |
-				  (addr[2] << 16) | (addr[3] << 24);
-			addr_hi = addr[4] | (addr[5] << 8);
-
-			ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
-			ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+		if (!eth_platform_get_mac_address(&dev->udev->dev, addr)) {
+			/* valid address present in Device Tree */
+			netif_dbg(dev, ifup, dev->net,
+				  "MAC address read from Device Tree");
+		} else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET,
+						 ETH_ALEN, addr) == 0) ||
+			    (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET,
+					      ETH_ALEN, addr) == 0)) &&
+			   is_valid_ether_addr(addr)) {
+			/* eeprom values are valid so use them */
+			netif_dbg(dev, ifup, dev->net,
+				  "MAC address read from EEPROM");
 		} else {
 			/* generate random MAC */
 			random_ether_addr(addr);
 			netif_dbg(dev, ifup, dev->net,
 				  "MAC address set to random addr");
 		}
+
+		addr_lo = addr[0] | (addr[1] << 8) |
+			  (addr[2] << 16) | (addr[3] << 24);
+		addr_hi = addr[4] | (addr[5] << 8);
+
+		ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+		ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
 	}
 
 	ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
@@ -1762,6 +1815,7 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx,
 
 static int lan78xx_mdio_init(struct lan78xx_net *dev)
 {
+	struct device_node *node;
 	int ret;
 
 	dev->mdiobus = mdiobus_alloc();
@@ -1790,7 +1844,13 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
 		break;
 	}
 
-	ret = mdiobus_register(dev->mdiobus);
+	node = of_get_child_by_name(dev->udev->dev.of_node, "mdio");
+	if (node) {
+		ret = of_mdiobus_register(dev->mdiobus, node);
+		of_node_put(node);
+	} else {
+		ret = mdiobus_register(dev->mdiobus);
+	}
 	if (ret) {
 		netdev_err(dev->net, "can't register MDIO bus\n");
 		goto exit1;
@@ -2079,6 +2139,28 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 	mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control);
 	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
 
+	if (phydev->mdio.dev.of_node) {
+		u32 reg;
+		int len;
+
+		len = of_property_count_elems_of_size(phydev->mdio.dev.of_node,
+						      "microchip,led-modes",
+						      sizeof(u32));
+		if (len >= 0) {
+			/* Ensure the appropriate LEDs are enabled */
+			lan78xx_read_reg(dev, HW_CFG, &reg);
+			reg &= ~(HW_CFG_LED0_EN_ |
+				 HW_CFG_LED1_EN_ |
+				 HW_CFG_LED2_EN_ |
+				 HW_CFG_LED3_EN_);
+			reg |= (len > 0) * HW_CFG_LED0_EN_ |
+				(len > 1) * HW_CFG_LED1_EN_ |
+				(len > 2) * HW_CFG_LED2_EN_ |
+				(len > 3) * HW_CFG_LED3_EN_;
+			lan78xx_write_reg(dev, HW_CFG, reg);
+		}
+	}
+
 	genphy_config_aneg(phydev);
 
 	dev->fc_autoneg = phydev->autoneg;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 770422e..3b59917 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -419,46 +419,51 @@ static void virtnet_xdp_flush(struct net_device *dev)
 	virtqueue_kick(sq->vq);
 }
 
-static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
-			       struct xdp_buff *xdp)
+static int __virtnet_xdp_xmit(struct virtnet_info *vi,
+			       struct xdp_frame *xdpf)
 {
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
-	unsigned int len;
+	struct xdp_frame *xdpf_sent;
 	struct send_queue *sq;
+	unsigned int len;
 	unsigned int qp;
-	void *xdp_sent;
 	int err;
 
 	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
 	sq = &vi->sq[qp];
 
 	/* Free up any pending old buffers before queueing new ones. */
-	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-		struct page *sent_page = virt_to_head_page(xdp_sent);
+	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+		xdp_return_frame(xdpf_sent);
 
-		put_page(sent_page);
-	}
+	/* virtqueue want to use data area in-front of packet */
+	if (unlikely(xdpf->metasize > 0))
+		return -EOPNOTSUPP;
 
-	xdp->data -= vi->hdr_len;
+	if (unlikely(xdpf->headroom < vi->hdr_len))
+		return -EOVERFLOW;
+
+	/* Make room for virtqueue hdr (also change xdpf->headroom?) */
+	xdpf->data -= vi->hdr_len;
 	/* Zero header and leave csum up to XDP layers */
-	hdr = xdp->data;
+	hdr = xdpf->data;
 	memset(hdr, 0, vi->hdr_len);
+	xdpf->len   += vi->hdr_len;
 
-	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+	sg_init_one(sq->sg, xdpf->data, xdpf->len);
 
-	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
+	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
 	if (unlikely(err))
-		return false; /* Caller handle free/refcnt */
+		return -ENOSPC; /* Caller handle free/refcnt */
 
-	return true;
+	return 0;
 }
 
-static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct receive_queue *rq = vi->rq;
 	struct bpf_prog *xdp_prog;
-	bool sent;
 
 	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
 	 * indicate XDP resources have been successfully allocated.
@@ -467,10 +472,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
 	if (!xdp_prog)
 		return -ENXIO;
 
-	sent = __virtnet_xdp_xmit(vi, xdp);
-	if (!sent)
-		return -ENOSPC;
-	return 0;
+	return __virtnet_xdp_xmit(vi, xdpf);
 }
 
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -559,7 +561,6 @@ static struct sk_buff *receive_small(struct net_device *dev,
 	struct page *page = virt_to_head_page(buf);
 	unsigned int delta = 0;
 	struct page *xdp_page;
-	bool sent;
 	int err;
 
 	len -= vi->hdr_len;
@@ -568,6 +569,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
 		struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
+		struct xdp_frame *xdpf;
 		struct xdp_buff xdp;
 		void *orig_data;
 		u32 act;
@@ -608,10 +610,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		case XDP_PASS:
 			/* Recalculate length in case bpf program changed it */
 			delta = orig_data - xdp.data;
+			len = xdp.data_end - xdp.data;
 			break;
 		case XDP_TX:
-			sent = __virtnet_xdp_xmit(vi, &xdp);
-			if (unlikely(!sent)) {
+			xdpf = convert_to_xdp_frame(&xdp);
+			if (unlikely(!xdpf))
+				goto err_xdp;
+			err = __virtnet_xdp_xmit(vi, xdpf);
+			if (unlikely(err)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				goto err_xdp;
 			}
@@ -641,7 +647,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		goto err;
 	}
 	skb_reserve(skb, headroom - delta);
-	skb_put(skb, len + delta);
+	skb_put(skb, len);
 	if (!delta) {
 		buf += header_offset;
 		memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
@@ -694,7 +700,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct bpf_prog *xdp_prog;
 	unsigned int truesize;
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
-	bool sent;
 	int err;
 
 	head_skb = NULL;
@@ -702,6 +707,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
+		struct xdp_frame *xdpf;
 		struct page *xdp_page;
 		struct xdp_buff xdp;
 		void *data;
@@ -756,6 +762,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			offset = xdp.data -
 					page_address(xdp_page) - vi->hdr_len;
 
+			/* recalculate len if xdp.data or xdp.data_end were
+			 * adjusted
+			 */
+			len = xdp.data_end - xdp.data;
 			/* We can only create skb based on xdp_page. */
 			if (unlikely(xdp_page != page)) {
 				rcu_read_unlock();
@@ -766,8 +776,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			}
 			break;
 		case XDP_TX:
-			sent = __virtnet_xdp_xmit(vi, &xdp);
-			if (unlikely(!sent)) {
+			xdpf = convert_to_xdp_frame(&xdp);
+			if (unlikely(!xdpf))
+				goto err_xdp;
+			err = __virtnet_xdp_xmit(vi, xdpf);
+			if (unlikely(err)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				if (unlikely(xdp_page != page))
 					put_page(xdp_page);
@@ -1312,6 +1325,13 @@ static int virtnet_open(struct net_device *dev)
 		if (err < 0)
 			return err;
 
+		err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
+						 MEM_TYPE_PAGE_SHARED, NULL);
+		if (err < 0) {
+			xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
+			return err;
+		}
+
 		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 		virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
 	}
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 0a2b180..90b5f39 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -48,6 +48,9 @@ static unsigned int vrf_net_id;
 struct net_vrf {
 	struct rtable __rcu	*rth;
 	struct rt6_info	__rcu	*rt6;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct fib6_table	*fib6_table;
+#endif
 	u32                     tb_id;
 };
 
@@ -496,7 +499,6 @@ static int vrf_rt6_create(struct net_device *dev)
 	int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM;
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct net *net = dev_net(dev);
-	struct fib6_table *rt6i_table;
 	struct rt6_info *rt6;
 	int rc = -ENOMEM;
 
@@ -504,8 +506,8 @@ static int vrf_rt6_create(struct net_device *dev)
 	if (!ipv6_mod_enabled())
 		return 0;
 
-	rt6i_table = fib6_new_table(net, vrf->tb_id);
-	if (!rt6i_table)
+	vrf->fib6_table = fib6_new_table(net, vrf->tb_id);
+	if (!vrf->fib6_table)
 		goto out;
 
 	/* create a dst for routing packets out a VRF device */
@@ -513,7 +515,6 @@ static int vrf_rt6_create(struct net_device *dev)
 	if (!rt6)
 		goto out;
 
-	rt6->rt6i_table = rt6i_table;
 	rt6->dst.output	= vrf_output6;
 
 	rcu_assign_pointer(vrf->rt6, rt6);
@@ -946,22 +947,8 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 					     int flags)
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	struct fib6_table *table = NULL;
-	struct rt6_info *rt6;
 
-	rcu_read_lock();
-
-	/* fib6_table does not have a refcnt and can not be freed */
-	rt6 = rcu_dereference(vrf->rt6);
-	if (likely(rt6))
-		table = rt6->rt6i_table;
-
-	rcu_read_unlock();
-
-	if (!table)
-		return NULL;
-
-	return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
+	return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags);
 }
 
 static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fab7a4d..aee0e60 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2085,9 +2085,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		local_ip = vxlan->cfg.saddr;
 		dst_cache = &rdst->dst_cache;
 		md->gbp = skb->mark;
-		ttl = vxlan->cfg.ttl;
-		if (!ttl && vxlan_addr_multicast(dst))
-			ttl = 1;
+		if (flags & VXLAN_F_TTL_INHERIT) {
+			ttl = ip_tunnel_get_ttl(old_iph, skb);
+		} else {
+			ttl = vxlan->cfg.ttl;
+			if (!ttl && vxlan_addr_multicast(dst))
+				ttl = 1;
+		}
 
 		tos = vxlan->cfg.tos;
 		if (tos == 1)
@@ -2709,6 +2713,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 	[IFLA_VXLAN_GPE]	= { .type = NLA_FLAG, },
 	[IFLA_VXLAN_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG },
+	[IFLA_VXLAN_TTL_INHERIT]	= { .type = NLA_FLAG },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -3254,6 +3259,12 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 	if (data[IFLA_VXLAN_TTL])
 		conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
 
+	if (data[IFLA_VXLAN_TTL_INHERIT]) {
+		if (changelink)
+			return -EOPNOTSUPP;
+		conf->flags |= VXLAN_F_TTL_INHERIT;
+	}
+
 	if (data[IFLA_VXLAN_LABEL])
 		conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
 			     IPV6_FLOWLABEL_MASK;
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
index 026182d..224d7dd 100644
--- a/drivers/soc/ti/knav_dma.c
+++ b/drivers/soc/ti/knav_dma.c
@@ -134,6 +134,13 @@ struct knav_dma_chan {
 
 static struct knav_dma_pool_device *kdev;
 
+static bool device_ready;
+bool knav_dma_device_ready(void)
+{
+	return device_ready;
+}
+EXPORT_SYMBOL_GPL(knav_dma_device_ready);
+
 static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg)
 {
 	if (!memcmp(&chan->cfg, cfg, sizeof(*cfg)))
@@ -773,6 +780,7 @@ static int knav_dma_probe(struct platform_device *pdev)
 	debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL,
 			    &knav_dma_debug_ops);
 
+	device_ready = true;
 	return ret;
 }
 
diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
index 905b974..56866ba4 100644
--- a/drivers/soc/ti/knav_qmss.h
+++ b/drivers/soc/ti/knav_qmss.h
@@ -292,6 +292,11 @@ struct knav_queue {
 	struct list_head		list;
 };
 
+enum qmss_version {
+	QMSS,
+	QMSS_66AK2G,
+};
+
 struct knav_device {
 	struct device				*dev;
 	unsigned				base_id;
@@ -305,6 +310,7 @@ struct knav_device {
 	struct list_head			pools;
 	struct list_head			pdsps;
 	struct list_head			qmgrs;
+	enum qmss_version			version;
 };
 
 struct knav_range_ops {
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index 77d6b5c..419365a 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -42,6 +42,15 @@ static DEFINE_MUTEX(knav_dev_lock);
 #define KNAV_QUEUE_PUSH_REG_INDEX	4
 #define KNAV_QUEUE_POP_REG_INDEX	5
 
+/* Queue manager register indices in DTS for QMSS in K2G NAVSS.
+ * There are no status and vbusm push registers on this version
+ * of QMSS. Push registers are same as pop, So all indices above 1
+ * are to be re-defined
+ */
+#define KNAV_L_QUEUE_CONFIG_REG_INDEX	1
+#define KNAV_L_QUEUE_REGION_REG_INDEX	2
+#define KNAV_L_QUEUE_PUSH_REG_INDEX	3
+
 /* PDSP register indices in DTS */
 #define KNAV_QUEUE_PDSP_IRAM_REG_INDEX	0
 #define KNAV_QUEUE_PDSP_REGS_REG_INDEX	1
@@ -65,6 +74,13 @@ static DEFINE_MUTEX(knav_dev_lock);
  */
 const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"};
 
+static bool device_ready;
+bool knav_qmss_device_ready(void)
+{
+	return device_ready;
+}
+EXPORT_SYMBOL_GPL(knav_qmss_device_ready);
+
 /**
  * knav_queue_notify: qmss queue notfier call
  *
@@ -1169,8 +1185,12 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev)
 		dev_dbg(kdev->dev, "linkram0: dma:%pad, virt:%p, size:%x\n",
 			&block->dma, block->virt, block->size);
 		writel_relaxed((u32)block->dma, &qmgr->reg_config->link_ram_base0);
-		writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0);
-
+		if (kdev->version == QMSS_66AK2G)
+			writel_relaxed(block->size,
+				       &qmgr->reg_config->link_ram_size0);
+		else
+			writel_relaxed(block->size - 1,
+				       &qmgr->reg_config->link_ram_size0);
 		block++;
 		if (!block->size)
 			continue;
@@ -1387,42 +1407,64 @@ static int knav_queue_init_qmgrs(struct knav_device *kdev,
 		qmgr->reg_peek =
 			knav_queue_map_reg(kdev, child,
 					   KNAV_QUEUE_PEEK_REG_INDEX);
-		qmgr->reg_status =
-			knav_queue_map_reg(kdev, child,
-					   KNAV_QUEUE_STATUS_REG_INDEX);
+
+		if (kdev->version == QMSS) {
+			qmgr->reg_status =
+				knav_queue_map_reg(kdev, child,
+						   KNAV_QUEUE_STATUS_REG_INDEX);
+		}
+
 		qmgr->reg_config =
 			knav_queue_map_reg(kdev, child,
+					   (kdev->version == QMSS_66AK2G) ?
+					   KNAV_L_QUEUE_CONFIG_REG_INDEX :
 					   KNAV_QUEUE_CONFIG_REG_INDEX);
 		qmgr->reg_region =
 			knav_queue_map_reg(kdev, child,
+					   (kdev->version == QMSS_66AK2G) ?
+					   KNAV_L_QUEUE_REGION_REG_INDEX :
 					   KNAV_QUEUE_REGION_REG_INDEX);
+
 		qmgr->reg_push =
 			knav_queue_map_reg(kdev, child,
-					   KNAV_QUEUE_PUSH_REG_INDEX);
-		qmgr->reg_pop =
-			knav_queue_map_reg(kdev, child,
-					   KNAV_QUEUE_POP_REG_INDEX);
+					   (kdev->version == QMSS_66AK2G) ?
+					    KNAV_L_QUEUE_PUSH_REG_INDEX :
+					    KNAV_QUEUE_PUSH_REG_INDEX);
 
-		if (IS_ERR(qmgr->reg_peek) || IS_ERR(qmgr->reg_status) ||
+		if (kdev->version == QMSS) {
+			qmgr->reg_pop =
+				knav_queue_map_reg(kdev, child,
+						   KNAV_QUEUE_POP_REG_INDEX);
+		}
+
+		if (IS_ERR(qmgr->reg_peek) ||
+		    ((kdev->version == QMSS) &&
+		    (IS_ERR(qmgr->reg_status) || IS_ERR(qmgr->reg_pop))) ||
 		    IS_ERR(qmgr->reg_config) || IS_ERR(qmgr->reg_region) ||
-		    IS_ERR(qmgr->reg_push) || IS_ERR(qmgr->reg_pop)) {
+		    IS_ERR(qmgr->reg_push)) {
 			dev_err(dev, "failed to map qmgr regs\n");
+			if (kdev->version == QMSS) {
+				if (!IS_ERR(qmgr->reg_status))
+					devm_iounmap(dev, qmgr->reg_status);
+				if (!IS_ERR(qmgr->reg_pop))
+					devm_iounmap(dev, qmgr->reg_pop);
+			}
 			if (!IS_ERR(qmgr->reg_peek))
 				devm_iounmap(dev, qmgr->reg_peek);
-			if (!IS_ERR(qmgr->reg_status))
-				devm_iounmap(dev, qmgr->reg_status);
 			if (!IS_ERR(qmgr->reg_config))
 				devm_iounmap(dev, qmgr->reg_config);
 			if (!IS_ERR(qmgr->reg_region))
 				devm_iounmap(dev, qmgr->reg_region);
 			if (!IS_ERR(qmgr->reg_push))
 				devm_iounmap(dev, qmgr->reg_push);
-			if (!IS_ERR(qmgr->reg_pop))
-				devm_iounmap(dev, qmgr->reg_pop);
 			devm_kfree(dev, qmgr);
 			continue;
 		}
 
+		/* Use same push register for pop as well */
+		if (kdev->version == QMSS_66AK2G)
+			qmgr->reg_pop = qmgr->reg_push;
+
 		list_add_tail(&qmgr->list, &kdev->qmgrs);
 		dev_info(dev, "added qmgr start queue %d, num of queues %d, reg_peek %p, reg_status %p, reg_config %p, reg_region %p, reg_push %p, reg_pop %p\n",
 			 qmgr->start_queue, qmgr->num_queues,
@@ -1681,10 +1723,24 @@ static int knav_queue_init_queues(struct knav_device *kdev)
 	return 0;
 }
 
+/* Match table for of_platform binding */
+static const struct of_device_id keystone_qmss_of_match[] = {
+	{
+		.compatible = "ti,keystone-navigator-qmss",
+	},
+	{
+		.compatible = "ti,66ak2g-navss-qm",
+		.data	= (void *)QMSS_66AK2G,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, keystone_qmss_of_match);
+
 static int knav_queue_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct device_node *qmgrs, *queue_pools, *regions, *pdsps;
+	const struct of_device_id *match;
 	struct device *dev = &pdev->dev;
 	u32 temp[2];
 	int ret;
@@ -1700,6 +1756,10 @@ static int knav_queue_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	match = of_match_device(of_match_ptr(keystone_qmss_of_match), dev);
+	if (match && match->data)
+		kdev->version = QMSS_66AK2G;
+
 	platform_set_drvdata(pdev, kdev);
 	kdev->dev = dev;
 	INIT_LIST_HEAD(&kdev->queue_ranges);
@@ -1796,6 +1856,7 @@ static int knav_queue_probe(struct platform_device *pdev)
 
 	debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL,
 			    &knav_queue_debug_ops);
+	device_ready = true;
 	return 0;
 
 err:
@@ -1815,13 +1876,6 @@ static int knav_queue_remove(struct platform_device *pdev)
 	return 0;
 }
 
-/* Match table for of_platform binding */
-static struct of_device_id keystone_qmss_of_match[] = {
-	{ .compatible = "ti,keystone-navigator-qmss", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, keystone_qmss_of_match);
-
 static struct platform_driver keystone_qmss_driver = {
 	.probe		= knav_queue_probe,
 	.remove		= knav_queue_remove,
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 986058a..bbf38be 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -32,6 +32,7 @@
 #include <linux/skbuff.h>
 
 #include <net/sock.h>
+#include <net/xdp.h>
 
 #include "vhost.h"
 
@@ -181,10 +182,10 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
 
 static int vhost_net_buf_peek_len(void *ptr)
 {
-	if (tun_is_xdp_buff(ptr)) {
-		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+	if (tun_is_xdp_frame(ptr)) {
+		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
 
-		return xdp->data_end - xdp->data;
+		return xdpf->len;
 	}
 
 	return __skb_array_len_with_tag(ptr);
diff --git a/include/dt-bindings/net/microchip-lan78xx.h b/include/dt-bindings/net/microchip-lan78xx.h
new file mode 100644
index 0000000..0742ff0
--- /dev/null
+++ b/include/dt-bindings/net/microchip-lan78xx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DT_BINDINGS_MICROCHIP_LAN78XX_H
+#define _DT_BINDINGS_MICROCHIP_LAN78XX_H
+
+/* LED modes for LAN7800/LAN7850 embedded PHY */
+
+#define LAN78XX_LINK_ACTIVITY           0
+#define LAN78XX_LINK_1000_ACTIVITY      1
+#define LAN78XX_LINK_100_ACTIVITY       2
+#define LAN78XX_LINK_10_ACTIVITY        3
+#define LAN78XX_LINK_100_1000_ACTIVITY  4
+#define LAN78XX_LINK_10_1000_ACTIVITY   5
+#define LAN78XX_LINK_10_100_ACTIVITY    6
+#define LAN78XX_DUPLEX_COLLISION        8
+#define LAN78XX_COLLISION               9
+#define LAN78XX_ACTIVITY                10
+#define LAN78XX_AUTONEG_FAULT           12
+#define LAN78XX_FORCE_LED_OFF           14
+#define LAN78XX_FORCE_LED_ON            15
+
+#endif
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 95a7abd..ee5275e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -22,6 +22,8 @@ struct perf_event;
 struct bpf_prog;
 struct bpf_map;
 struct sock;
+struct seq_file;
+struct btf;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
@@ -43,10 +45,14 @@ struct bpf_map_ops {
 	void (*map_fd_put_ptr)(void *ptr);
 	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
+	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
+				  struct seq_file *m);
+	int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
+			     u32 key_type_id, u32 value_type_id);
 };
 
 struct bpf_map {
-	/* 1st cacheline with read-mostly members of which some
+	/* The first two cachelines with read-mostly members of which some
 	 * are also accessed in fast-path (e.g. ops, max_entries).
 	 */
 	const struct bpf_map_ops *ops ____cacheline_aligned;
@@ -62,10 +68,13 @@ struct bpf_map {
 	u32 pages;
 	u32 id;
 	int numa_node;
+	u32 btf_key_id;
+	u32 btf_value_id;
+	struct btf *btf;
 	bool unpriv_array;
-	/* 7 bytes hole */
+	/* 55 bytes hole */
 
-	/* 2nd cacheline with misc members to avoid false sharing
+	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
 	 */
 	struct user_struct *user ____cacheline_aligned;
@@ -100,6 +109,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
 	return container_of(map, struct bpf_offloaded_map, map);
 }
 
+static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
+{
+	return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+}
+
 extern const struct bpf_map_ops bpf_map_offload_ops;
 
 /* function argument constraints */
diff --git a/include/linux/btf.h b/include/linux/btf.h
new file mode 100644
index 0000000..a966dc6
--- /dev/null
+++ b/include/linux/btf.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef _LINUX_BTF_H
+#define _LINUX_BTF_H 1
+
+#include <linux/types.h>
+
+struct btf;
+struct btf_type;
+union bpf_attr;
+
+extern const struct file_operations btf_fops;
+
+void btf_put(struct btf *btf);
+int btf_new_fd(const union bpf_attr *attr);
+struct btf *btf_get_by_fd(int fd);
+int btf_get_info_by_fd(const struct btf *btf,
+		       const union bpf_attr *attr,
+		       union bpf_attr __user *uattr);
+/* Figure out the size of a type_id.  If type_id is a modifier
+ * (e.g. const), it will be resolved to find out the type with size.
+ *
+ * For example:
+ * In describing "const void *",  type_id is "const" and "const"
+ * refers to "void *".  The return type will be "void *".
+ *
+ * If type_id is a simple "int", then return type will be "int".
+ *
+ * @btf: struct btf object
+ * @type_id: Find out the size of type_id. The type_id of the return
+ *           type is set to *type_id.
+ * @ret_size: It can be NULL.  If not NULL, the size of the return
+ *            type is set to *ret_size.
+ * Return: The btf_type (resolved to another type with size info if needed).
+ *         NULL is returned if type_id itself does not have size info
+ *         (e.g. void) or it cannot be resolved to another type that
+ *         has size info.
+ *         *type_id and *ret_size will not be changed in the
+ *         NULL return case.
+ */
+const struct btf_type *btf_type_id_size(const struct btf *btf,
+					u32 *type_id,
+					u32 *ret_size);
+void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+		       struct seq_file *m);
+
+#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fc4e8f9..4da8b23 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -30,6 +30,7 @@ struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
 struct xdp_rxq_info;
+struct xdp_buff;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -500,14 +501,6 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
-struct xdp_buff {
-	void *data;
-	void *data_end;
-	void *data_meta;
-	void *data_hard_start;
-	struct xdp_rxq_info *rxq;
-};
-
 struct sk_msg_buff {
 	void *data;
 	void *data_end;
@@ -772,21 +765,6 @@ int xdp_do_redirect(struct net_device *dev,
 		    struct bpf_prog *prog);
 void xdp_do_flush_map(void);
 
-/* Drivers not supporting XDP metadata can use this helper, which
- * rejects any room expansion for metadata as a result.
- */
-static __always_inline void
-xdp_set_data_meta_invalid(struct xdp_buff *xdp)
-{
-	xdp->data_meta = xdp->data + 1;
-}
-
-static __always_inline bool
-xdp_data_meta_unsupported(const struct xdp_buff *xdp)
-{
-	return unlikely(xdp->data_meta > xdp->data);
-}
-
 void bpf_warn_invalid_xdp_action(u32 act);
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index fd00170..3d2996d 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -22,7 +22,7 @@
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
-bool tun_is_xdp_buff(void *ptr);
+bool tun_is_xdp_frame(void *ptr);
 void *tun_xdp_to_ptr(void *ptr);
 void *tun_ptr_to_xdp(void *ptr);
 void tun_ptr_free(void *ptr);
@@ -39,7 +39,7 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
-static inline bool tun_is_xdp_buff(void *ptr)
+static inline bool tun_is_xdp_frame(void *ptr)
 {
 	return false;
 }
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index a8ac9cf..5d71e8a 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -33,8 +33,6 @@ struct mdiobb_ops {
 
 struct mdiobb_ctrl {
 	const struct mdiobb_ops *ops;
-	/* reset callback */
-	int (*reset)(struct mii_bus *bus);
 };
 
 /* The returned bus is not yet registered with the phy layer. */
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
new file mode 100644
index 0000000..cea443a6
--- /dev/null
+++ b/include/linux/mdio-gpio.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_MDIO_GPIO_H
+#define __LINUX_MDIO_GPIO_H
+
+#define MDIO_GPIO_MDC	0
+#define MDIO_GPIO_MDIO	1
+#define MDIO_GPIO_MDO	2
+
+#endif
diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
index 8f9c903..8c40128 100644
--- a/include/linux/microchipphy.h
+++ b/include/linux/microchipphy.h
@@ -70,6 +70,9 @@
 #define	LAN88XX_MMD3_CHIP_ID			(32877)
 #define	LAN88XX_MMD3_CHIP_REV			(32878)
 
+/* Registers specific to the LAN7800/LAN7850 embedded phy */
+#define LAN78XX_PHY_LED_MODE_SELECT		(0x1D)
+
 /* DSP registers */
 #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG		(0x806A)
 #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_	(0x2000)
diff --git a/include/linux/net.h b/include/linux/net.h
index 2248a05..6554d3b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -197,6 +197,7 @@ struct proto_ops {
 					   int offset, size_t size, int flags);
 	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
 					  size_t size);
+	int		(*set_rcvlowat)(struct sock *sk, int val);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf44503..14e0777 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1165,7 +1165,7 @@ struct dev_ifalias {
  *	This function is used to set or query state related to XDP on the
  *	netdevice and manage BPF offload. See definition of
  *	enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
+ * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
  *	This function is used to submit a XDP packet for transmit on a
  *	netdevice.
  * void (*ndo_xdp_flush)(struct net_device *dev);
@@ -1356,7 +1356,7 @@ struct net_device_ops {
 	int			(*ndo_bpf)(struct net_device *dev,
 					   struct netdev_bpf *bpf);
 	int			(*ndo_xdp_xmit)(struct net_device *dev,
-						struct xdp_buff *xdp);
+						struct xdp_frame *xdp);
 	void			(*ndo_xdp_flush)(struct net_device *dev);
 };
 
diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h
deleted file mode 100644
index 11f00cd..0000000
--- a/include/linux/platform_data/mdio-gpio.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * MDIO-GPIO bus platform data structures
- *
- * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __LINUX_MDIO_GPIO_H
-#define __LINUX_MDIO_GPIO_H
-
-#include <linux/mdio-bitbang.h>
-
-struct mdio_gpio_platform_data {
-	/* GPIO numbers for bus pins */
-	unsigned int mdc;
-	unsigned int mdio;
-	unsigned int mdo;
-
-	bool mdc_active_low;
-	bool mdio_active_low;
-	bool mdo_active_low;
-
-	u32 phy_mask;
-	u32 phy_ignore_ta_mask;
-	int irqs[PHY_MAX_ADDR];
-	/* reset callback */
-	int (*reset)(struct mii_bus *bus);
-};
-
-#endif /* __LINUX_MDIO_GPIO_H */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 147d08c..7f9756f 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -352,6 +352,7 @@ struct qed_eth_ops {
 	int (*configure_arfs_searcher)(struct qed_dev *cdev,
 				       enum qed_filter_config_mode mode);
 	int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle);
+	int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac);
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9065477..d274059 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3131,6 +3131,7 @@ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len)
 	return skb->data;
 }
 
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
 /**
  *	pskb_trim_rcsum - trim received skb and update checksum
  *	@skb: buffer to trim
@@ -3144,9 +3145,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 {
 	if (likely(len >= skb->len))
 		return 0;
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->ip_summed = CHECKSUM_NONE;
-	return __pskb_trim(skb, len);
+	return pskb_trim_rcsum_slow(skb, len);
 }
 
 static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 66693bc..7127ec3 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -167,6 +167,8 @@ struct knav_dma_desc {
 void *knav_dma_open_channel(struct device *dev, const char *name,
 				struct knav_dma_cfg *config);
 void knav_dma_close_channel(void *channel);
+int knav_dma_get_flow(void *channel);
+bool knav_dma_device_ready(void);
 #else
 static inline void *knav_dma_open_channel(struct device *dev, const char *name,
 				struct knav_dma_cfg *config)
@@ -176,6 +178,16 @@ static inline void *knav_dma_open_channel(struct device *dev, const char *name,
 static inline void knav_dma_close_channel(void *channel)
 {}
 
+static inline int knav_dma_get_flow(void *channel)
+{
+	return -EINVAL;
+}
+
+static inline bool knav_dma_device_ready(void)
+{
+	return false;
+}
+
 #endif
 
 #endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */
diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h
index 9f0ebb3b..9745df6e 100644
--- a/include/linux/soc/ti/knav_qmss.h
+++ b/include/linux/soc/ti/knav_qmss.h
@@ -86,5 +86,6 @@ int knav_pool_desc_map(void *ph, void *desc, unsigned size,
 void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz);
 dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt);
 void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma);
+bool knav_qmss_device_ready(void);
 
 #endif /* __SOC_TI_KNAV_QMSS_H__ */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8f4c549..20585d5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -281,6 +281,7 @@ struct tcp_sock {
 				 * receiver in Recovery. */
 	u32	prr_out;	/* Total number of pkts sent during Recovery. */
 	u32	delivered;	/* Total data packets delivered incl. rexmits */
+	u32	delivered_ce;	/* Like the above but only ECE marked packets */
 	u32	lost;		/* Total data packets lost incl. rexmits */
 	u32	app_limited;	/* limited until "delivered" reaches this val */
 	u64	first_tx_mstamp;  /* start of window send phase */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 378d601..8312cc2 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -308,6 +308,20 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
 }
 
 /**
+ * __in6_dev_get_safely - get inet6_dev pointer from netdevice
+ * @dev: network device
+ *
+ * This is a safer version of __in6_dev_get
+ */
+static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev)
+{
+	if (likely(dev))
+		return rcu_dereference_rtnl(dev->ip6_ptr);
+	else
+		return NULL;
+}
+
+/**
  * in6_dev_get - get inet6_dev pointer from netdevice
  * @dev: network device
  *
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index b9a3bec..84b3785 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -12,6 +12,10 @@
 #ifndef __NET_AX88796_PLAT_H
 #define __NET_AX88796_PLAT_H
 
+struct sk_buff;
+struct net_device;
+struct platform_device;
+
 #define AXFLG_HAS_EEPROM		(1<<0)
 #define AXFLG_MAC_FROMDEV		(1<<1)	/* device already has MAC */
 #define AXFLG_HAS_93CX6			(1<<2)	/* use eeprom_93cx6 driver */
@@ -26,6 +30,16 @@ struct ax_plat_data {
 	u32		*reg_offsets;	/* register offsets */
 	u8		*mac_addr;	/* MAC addr (only used when
 					   AXFLG_MAC_FROMPLATFORM is used */
+
+	/* uses default ax88796 buffer if set to NULL */
+	void (*block_output)(struct net_device *dev, int count,
+			const unsigned char *buf, int star_page);
+	void (*block_input)(struct net_device *dev, int count,
+			struct sk_buff *skb, int ring_offset);
+	/* returns nonzero if a pending interrupt request might by caused by
+	 * the ax88786. Handles all interrupts if set to NULL
+	 */
+	int (*check_irq)(struct platform_device *pdev);
 };
 
 #endif /* __NET_AX88796_PLAT_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d4088d1..db38925 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,7 +64,7 @@ struct inet6_ifaddr {
 	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
-	struct rt6_info		*rt;
+	struct fib6_info	*rt;
 
 	struct hlist_node	addr_lst;
 	struct list_head	if_list;
@@ -143,8 +143,7 @@ struct ipv6_ac_socklist {
 
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
-	struct inet6_dev	*aca_idev;
-	struct rt6_info		*aca_rt;
+	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
 	int			aca_users;
 	refcount_t		aca_refcnt;
diff --git a/include/net/ip.h b/include/net/ip.h
index ecffd84..dc4a2d6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -396,6 +396,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
 	return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
 }
 
+int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
+		       u32 *metrics);
+
 u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 5e86fd9..1af450d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -38,6 +38,7 @@
 #endif
 
 struct rt6_info;
+struct fib6_info;
 
 struct fib6_config {
 	u32		fc_table;
@@ -74,12 +75,12 @@ struct fib6_node {
 #ifdef CONFIG_IPV6_SUBTREES
 	struct fib6_node __rcu	*subtree;
 #endif
-	struct rt6_info __rcu	*leaf;
+	struct fib6_info __rcu	*leaf;
 
 	__u16			fn_bit;		/* bit key */
 	__u16			fn_flags;
 	int			fn_sernum;
-	struct rt6_info __rcu	*rr_ptr;
+	struct fib6_info __rcu	*rr_ptr;
 	struct rcu_head		rcu;
 };
 
@@ -94,11 +95,6 @@ struct fib6_gc_args {
 #define FIB6_SUBTREE(fn)	(rcu_dereference_protected((fn)->subtree, 1))
 #endif
 
-struct mx6_config {
-	const u32 *mx;
-	DECLARE_BITMAP(mx_valid, RTAX_MAX);
-};
-
 /*
  *	routing information
  *
@@ -127,56 +123,71 @@ struct rt6_exception {
 #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
 #define FIB6_MAX_DEPTH 5
 
-struct rt6_info {
-	struct dst_entry		dst;
-	struct rt6_info __rcu		*rt6_next;
-	struct rt6_info			*from;
+struct fib6_nh {
+	struct in6_addr		nh_gw;
+	struct net_device	*nh_dev;
+	struct lwtunnel_state	*nh_lwtstate;
 
-	/*
-	 * Tail elements of dst_entry (__refcnt etc.)
-	 * and these elements (rarely used in hot path) are in
-	 * the same cache line.
-	 */
-	struct fib6_table		*rt6i_table;
-	struct fib6_node __rcu		*rt6i_node;
+	unsigned int		nh_flags;
+	atomic_t		nh_upper_bound;
+	int			nh_weight;
+};
 
-	struct in6_addr			rt6i_gateway;
+struct fib6_info {
+	struct fib6_table		*fib6_table;
+	struct fib6_info __rcu		*rt6_next;
+	struct fib6_node __rcu		*fib6_node;
 
 	/* Multipath routes:
-	 * siblings is a list of rt6_info that have the the same metric/weight,
+	 * siblings is a list of fib6_info that have the the same metric/weight,
 	 * destination, but not the same gateway. nsiblings is just a cache
 	 * to speed up lookup.
 	 */
-	struct list_head		rt6i_siblings;
-	unsigned int			rt6i_nsiblings;
-	atomic_t			rt6i_nh_upper_bound;
+	struct list_head		fib6_siblings;
+	unsigned int			fib6_nsiblings;
 
-	atomic_t			rt6i_ref;
+	atomic_t			fib6_ref;
+	unsigned long			expires;
+	struct dst_metrics		*fib6_metrics;
+#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
 
-	unsigned int			rt6i_nh_flags;
+	struct rt6key			fib6_dst;
+	u32				fib6_flags;
+	struct rt6key			fib6_src;
+	struct rt6key			fib6_prefsrc;
 
-	/* These are in a separate cache line. */
-	struct rt6key			rt6i_dst ____cacheline_aligned_in_smp;
-	u32				rt6i_flags;
+	struct rt6_info * __percpu	*rt6i_pcpu;
+	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
+
+	u32				fib6_metric;
+	u8				fib6_protocol;
+	u8				fib6_type;
+	u8				exception_bucket_flushed:1,
+					should_flush:1,
+					dst_nocount:1,
+					dst_nopolicy:1,
+					dst_host:1,
+					unused:3;
+
+	struct fib6_nh			fib6_nh;
+};
+
+struct rt6_info {
+	struct dst_entry		dst;
+	struct fib6_info __rcu		*from;
+
+	struct rt6key			rt6i_dst;
 	struct rt6key			rt6i_src;
+	struct in6_addr			rt6i_gateway;
+	struct inet6_dev		*rt6i_idev;
+	u32				rt6i_flags;
 	struct rt6key			rt6i_prefsrc;
 
 	struct list_head		rt6i_uncached;
 	struct uncached_list		*rt6i_uncached_list;
 
-	struct inet6_dev		*rt6i_idev;
-	struct rt6_info * __percpu	*rt6i_pcpu;
-	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
-	u32				rt6i_metric;
-	u32				rt6i_pmtu;
 	/* more non-fragment space at head required */
-	int				rt6i_nh_weight;
 	unsigned short			rt6i_nfheader_len;
-	u8				rt6i_protocol;
-	u8				exception_bucket_flushed:1,
-					should_flush:1,
-					unused:6;
 };
 
 #define for_each_fib6_node_rt_rcu(fn)					\
@@ -192,27 +203,24 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 	return ((struct rt6_info *)dst)->rt6i_idev;
 }
 
-static inline void rt6_clean_expires(struct rt6_info *rt)
+static inline void fib6_clean_expires(struct fib6_info *f6i)
 {
-	rt->rt6i_flags &= ~RTF_EXPIRES;
-	rt->dst.expires = 0;
+	f6i->fib6_flags &= ~RTF_EXPIRES;
+	f6i->expires = 0;
 }
 
-static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
+static inline void fib6_set_expires(struct fib6_info *f6i,
+				    unsigned long expires)
 {
-	rt->dst.expires = expires;
-	rt->rt6i_flags |= RTF_EXPIRES;
+	f6i->expires = expires;
+	f6i->fib6_flags |= RTF_EXPIRES;
 }
 
-static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
+static inline bool fib6_check_expired(const struct fib6_info *f6i)
 {
-	struct rt6_info *rt;
-
-	for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
-	if (rt && rt != rt0)
-		rt0->dst.expires = rt->dst.expires;
-	dst_set_expires(&rt0->dst, timeout);
-	rt0->rt6i_flags |= RTF_EXPIRES;
+	if (f6i->fib6_flags & RTF_EXPIRES)
+		return time_after(jiffies, f6i->expires);
+	return false;
 }
 
 /* Function to safely get fn->sernum for passed in rt
@@ -220,14 +228,13 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
  * Return true if we can get cookie safely
  * Return false if not
  */
-static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
-				       u32 *cookie)
+static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
+					u32 *cookie)
 {
 	struct fib6_node *fn;
 	bool status = false;
 
-	rcu_read_lock();
-	fn = rcu_dereference(rt->rt6i_node);
+	fn = rcu_dereference(f6i->fib6_node);
 
 	if (fn) {
 		*cookie = fn->fn_sernum;
@@ -236,19 +243,22 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
 		status = true;
 	}
 
-	rcu_read_unlock();
 	return status;
 }
 
 static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 {
+	struct fib6_info *from;
 	u32 cookie = 0;
 
-	if (rt->rt6i_flags & RTF_PCPU ||
-	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
-		rt = rt->from;
+	rcu_read_lock();
 
-	rt6_get_cookie_safe(rt, &cookie);
+	from = rcu_dereference(rt->from);
+	if (from && (rt->rt6i_flags & RTF_PCPU ||
+	    unlikely(!list_empty(&rt->rt6i_uncached))))
+		fib6_get_cookie_safe(from, &cookie);
+
+	rcu_read_unlock();
 
 	return cookie;
 }
@@ -262,20 +272,18 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct fib6_info *f6i);
 
-static inline void rt6_hold(struct rt6_info *rt)
+static inline void fib6_info_hold(struct fib6_info *f6i)
 {
-	atomic_inc(&rt->rt6i_ref);
+	atomic_inc(&f6i->fib6_ref);
 }
 
-static inline void rt6_release(struct rt6_info *rt)
+static inline void fib6_info_release(struct fib6_info *f6i)
 {
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
+	if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
+		fib6_info_destroy(f6i);
 }
 
 enum fib6_walk_state {
@@ -291,7 +299,7 @@ enum fib6_walk_state {
 struct fib6_walker {
 	struct list_head lh;
 	struct fib6_node *root, *node;
-	struct rt6_info *leaf;
+	struct fib6_info *leaf;
 	enum fib6_walk_state state;
 	unsigned int skip;
 	unsigned int count;
@@ -355,7 +363,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 
 struct fib6_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 };
 
 /*
@@ -377,15 +385,19 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 			      const struct in6_addr *saddr, int src_len,
 			      bool exact_match);
 
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
 		    void *arg);
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
-	     struct nl_info *info, struct mx6_config *mxc,
-	     struct netlink_ext_ack *extack);
-int fib6_del(struct rt6_info *rt, struct nl_info *info);
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+	     struct nl_info *info, struct netlink_ext_ack *extack);
+int fib6_del(struct fib6_info *rt, struct nl_info *info);
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
+{
+	return f6i->fib6_nh.nh_dev;
+}
+
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
 void fib6_run_gc(unsigned long expires, struct net *net, bool force);
@@ -408,8 +420,14 @@ void __net_exit fib6_notifier_exit(struct net *net);
 unsigned int fib6_tables_seq_read(struct net *net);
 int fib6_tables_dump(struct net *net, struct notifier_block *nb);
 
-void fib6_update_sernum(struct rt6_info *rt);
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
+void fib6_update_sernum(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
+
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
+static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
+{
+	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
+}
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 08b1323..8df4ff7 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,9 +66,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
-static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
 {
-	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+	return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
 	       RTF_GATEWAY;
 }
 
@@ -100,29 +100,29 @@ void ip6_route_cleanup(void);
 
 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 
-int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
-int ip6_ins_rt(struct rt6_info *);
-int ip6_del_rt(struct rt6_info *);
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
+		  struct netlink_ext_ack *extack);
+int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
+int ip6_del_rt(struct net *net, struct fib6_info *f6i);
 
-void rt6_flush_exceptions(struct rt6_info *rt);
-int rt6_remove_exception_rt(struct rt6_info *rt);
-void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+void rt6_flush_exceptions(struct fib6_info *f6i);
+void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
 			unsigned long now);
 
-static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i,
 				      const struct in6_addr *daddr,
 				      unsigned int prefs,
 				      struct in6_addr *saddr)
 {
-	struct inet6_dev *idev =
-			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
 	int err = 0;
 
-	if (rt && rt->rt6i_prefsrc.plen)
-		*saddr = rt->rt6i_prefsrc.addr;
-	else
-		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
-					 daddr, prefs, saddr);
+	if (f6i && f6i->fib6_prefsrc.plen) {
+		*saddr = f6i->fib6_prefsrc.addr;
+	} else {
+		struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL;
+
+		err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr);
+	}
 
 	return err;
 }
@@ -137,8 +137,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
 void fib6_force_start_gc(struct net *net);
 
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
-				    const struct in6_addr *addr, bool anycast);
+struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev,
+				     const struct in6_addr *addr, bool anycast,
+				     gfp_t gfp_flags);
 
 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 			       int flags);
@@ -147,9 +148,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
  *	support functions for ND
  *
  */
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
+				     const struct in6_addr *addr,
 				     struct net_device *dev);
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
+				     const struct in6_addr *gwaddr,
 				     struct net_device *dev, unsigned int pref);
 
 void rt6_purge_dflt_routers(struct net *net);
@@ -174,14 +177,14 @@ struct rt6_rtnl_dump_arg {
 	struct net *net;
 };
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
 void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
-void rt6_multipath_rebalance(struct rt6_info *rt);
+void rt6_multipath_rebalance(struct fib6_info *f6i);
 
 void rt6_uncached_list_add(struct rt6_info *rt);
 void rt6_uncached_list_del(struct rt6_info *rt);
@@ -269,12 +272,14 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
 		return daddr;
 }
 
-static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
+static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
 {
-	return a->dst.dev == b->dst.dev &&
-	       a->rt6i_idev == b->rt6i_idev &&
-	       ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
-	       !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
+	return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
+	       ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
+	       !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
 }
 
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+				   struct net_device *dev, struct sk_buff *skb,
+				   const void *daddr);
 #endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 540a4b4..751646a 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -379,6 +379,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
 		return 0;
 }
 
+static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
+				       const struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP))
+		return iph->ttl;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return ((const struct ipv6hdr *)iph)->hop_limit;
+	else
+		return 0;
+}
+
 /* Propogate ECN bits out */
 static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 				     const struct sk_buff *skb)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 836f31a..68b167d 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1044,8 +1044,6 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
 void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
 
 int inet6_release(struct socket *sock);
-int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
-		 bool force_bind_address_no_port, bool with_lock);
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int peer);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c29f09c..97b3a54 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -60,7 +60,8 @@ struct netns_ipv6 {
 #endif
 	struct xt_table		*ip6table_nat;
 #endif
-	struct rt6_info         *ip6_null_entry;
+	struct fib6_info	*fib6_null_entry;
+	struct rt6_info		*ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
 	struct timer_list       ip6_fib_timer;
 	struct hlist_head       *fib_table_hash;
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
new file mode 100644
index 0000000..c790871
--- /dev/null
+++ b/include/net/page_pool.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool.h
+ *	Author:	Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ *	Copyright (C) 2016 Red Hat, Inc.
+ */
+
+/**
+ * DOC: page_pool allocator
+ *
+ * This page_pool allocator is optimized for the XDP mode that
+ * uses one-frame-per-page, but have fallbacks that act like the
+ * regular page allocator APIs.
+ *
+ * Basic use involve replacing alloc_pages() calls with the
+ * page_pool_alloc_pages() call.  Drivers should likely use
+ * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
+ *
+ * If page_pool handles DMA mapping (use page->private), then API user
+ * is responsible for invoking page_pool_put_page() once.  In-case of
+ * elevated refcnt, the DMA state is released, assuming other users of
+ * the page will eventually call put_page().
+ *
+ * If no DMA mapping is done, then it can act as shim-layer that
+ * fall-through to alloc_page.  As no state is kept on the page, the
+ * regular put_page() call is sufficient.
+ */
+#ifndef _NET_PAGE_POOL_H
+#define _NET_PAGE_POOL_H
+
+#include <linux/mm.h> /* Needed by ptr_ring */
+#include <linux/ptr_ring.h>
+#include <linux/dma-direction.h>
+
+#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
+#define PP_FLAG_ALL	PP_FLAG_DMA_MAP
+
+/*
+ * Fast allocation side cache array/stack
+ *
+ * The cache size and refill watermark is related to the network
+ * use-case.  The NAPI budget is 64 packets.  After a NAPI poll the RX
+ * ring is usually refilled and the max consumed elements will be 64,
+ * thus a natural max size of objects needed in the cache.
+ *
+ * Keeping room for more objects, is due to XDP_DROP use-case.  As
+ * XDP_DROP allows the opportunity to recycle objects directly into
+ * this array, as it shares the same softirq/NAPI protection.  If
+ * cache is already full (or partly full) then the XDP_DROP recycles
+ * would have to take a slower code path.
+ */
+#define PP_ALLOC_CACHE_SIZE	128
+#define PP_ALLOC_CACHE_REFILL	64
+struct pp_alloc_cache {
+	u32 count;
+	void *cache[PP_ALLOC_CACHE_SIZE];
+};
+
+struct page_pool_params {
+	unsigned int	flags;
+	unsigned int	order;
+	unsigned int	pool_size;
+	int		nid;  /* Numa node id to allocate from pages from */
+	struct device	*dev; /* device, for DMA pre-mapping purposes */
+	enum dma_data_direction dma_dir; /* DMA mapping direction */
+};
+
+struct page_pool {
+	struct rcu_head rcu;
+	struct page_pool_params p;
+
+	/*
+	 * Data structure for allocation side
+	 *
+	 * Drivers allocation side usually already perform some kind
+	 * of resource protection.  Piggyback on this protection, and
+	 * require driver to protect allocation side.
+	 *
+	 * For NIC drivers this means, allocate a page_pool per
+	 * RX-queue. As the RX-queue is already protected by
+	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
+	 * guarantee that a single napi_struct will only be scheduled
+	 * on a single CPU (see napi_schedule).
+	 */
+	struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
+
+	/* Data structure for storing recycled pages.
+	 *
+	 * Returning/freeing pages is more complicated synchronization
+	 * wise, because free's can happen on remote CPUs, with no
+	 * association with allocation resource.
+	 *
+	 * Use ptr_ring, as it separates consumer and producer
+	 * effeciently, it a way that doesn't bounce cache-lines.
+	 *
+	 * TODO: Implement bulk return pages into this structure.
+	 */
+	struct ptr_ring ring;
+};
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
+{
+	gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+	return page_pool_alloc_pages(pool, gfp);
+}
+
+struct page_pool *page_pool_create(const struct page_pool_params *params);
+
+void page_pool_destroy(struct page_pool *pool);
+
+/* Never call this directly, use helpers below */
+void __page_pool_put_page(struct page_pool *pool,
+			  struct page *page, bool allow_direct);
+
+static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
+{
+	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+	 */
+#ifdef CONFIG_PAGE_POOL
+	__page_pool_put_page(pool, page, false);
+#endif
+}
+/* Very limited use-cases allow recycle direct */
+static inline void page_pool_recycle_direct(struct page_pool *pool,
+					    struct page *page)
+{
+	__page_pool_put_page(pool, page, true);
+}
+
+static inline bool is_page_pool_compiled_in(void)
+{
+#ifdef CONFIG_PAGE_POOL
+	return true;
+#else
+	return false;
+#endif
+}
+
+#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c9b376..833154e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -402,6 +402,10 @@ void tcp_set_keepalive(struct sock *sk, int val);
 void tcp_syn_ack_timeout(const struct request_sock *req);
 int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		int flags, int *addr_len);
+int tcp_set_rcvlowat(struct sock *sk, int val);
+void tcp_data_ready(struct sock *sk);
+int tcp_mmap(struct file *file, struct socket *sock,
+	     struct vm_area_struct *vma);
 void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
 		       int estab, struct tcp_fastopen_cookie *foc);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index ad73d8b..b99a02ae 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -262,6 +262,7 @@ struct vxlan_dev {
 #define VXLAN_F_COLLECT_METADATA	0x2000
 #define VXLAN_F_GPE			0x4000
 #define VXLAN_F_IPV6_LINKLOCAL		0x8000
+#define VXLAN_F_TTL_INHERIT		0x10000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b2362dd..137ad5f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -33,16 +33,99 @@
  * also mandatory during RX-ring setup.
  */
 
+enum xdp_mem_type {
+	MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
+	MEM_TYPE_PAGE_ORDER0,     /* Orig XDP full page model */
+	MEM_TYPE_PAGE_POOL,
+	MEM_TYPE_MAX,
+};
+
+struct xdp_mem_info {
+	u32 type; /* enum xdp_mem_type, but known size type */
+	u32 id;
+};
+
+struct page_pool;
+
 struct xdp_rxq_info {
 	struct net_device *dev;
 	u32 queue_index;
 	u32 reg_state;
+	struct xdp_mem_info mem;
 } ____cacheline_aligned; /* perf critical, avoid false-sharing */
 
+struct xdp_buff {
+	void *data;
+	void *data_end;
+	void *data_meta;
+	void *data_hard_start;
+	struct xdp_rxq_info *rxq;
+};
+
+struct xdp_frame {
+	void *data;
+	u16 len;
+	u16 headroom;
+	u16 metasize;
+	/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
+	 * while mem info is valid on remote CPU.
+	 */
+	struct xdp_mem_info mem;
+	struct net_device *dev_rx; /* used by cpumap */
+};
+
+/* Convert xdp_buff to xdp_frame */
+static inline
+struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdp_frame;
+	int metasize;
+	int headroom;
+
+	/* Assure headroom is available for storing info */
+	headroom = xdp->data - xdp->data_hard_start;
+	metasize = xdp->data - xdp->data_meta;
+	metasize = metasize > 0 ? metasize : 0;
+	if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
+		return NULL;
+
+	/* Store info in top of packet */
+	xdp_frame = xdp->data_hard_start;
+
+	xdp_frame->data = xdp->data;
+	xdp_frame->len  = xdp->data_end - xdp->data;
+	xdp_frame->headroom = headroom - sizeof(*xdp_frame);
+	xdp_frame->metasize = metasize;
+
+	/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
+	xdp_frame->mem = xdp->rxq->mem;
+
+	return xdp_frame;
+}
+
+void xdp_return_frame(struct xdp_frame *xdpf);
+
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 		     struct net_device *dev, u32 queue_index);
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+			       enum xdp_mem_type type, void *allocator);
+
+/* Drivers not supporting XDP metadata can use this helper, which
+ * rejects any room expansion for metadata as a result.
+ */
+static __always_inline void
+xdp_set_data_meta_invalid(struct xdp_buff *xdp)
+{
+	xdp->data_meta = xdp->data + 1;
+}
+
+static __always_inline bool
+xdp_data_meta_unsupported(const struct xdp_buff *xdp)
+{
+	return unlikely(xdp->data_meta > xdp->data);
+}
 
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 878b2be..3dd6802 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -166,53 +166,6 @@ DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock,
 	TP_ARGS(sk)
 );
 
-TRACE_EVENT(tcp_set_state,
-
-	TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
-
-	TP_ARGS(sk, oldstate, newstate),
-
-	TP_STRUCT__entry(
-		__field(const void *, skaddr)
-		__field(int, oldstate)
-		__field(int, newstate)
-		__field(__u16, sport)
-		__field(__u16, dport)
-		__array(__u8, saddr, 4)
-		__array(__u8, daddr, 4)
-		__array(__u8, saddr_v6, 16)
-		__array(__u8, daddr_v6, 16)
-	),
-
-	TP_fast_assign(
-		struct inet_sock *inet = inet_sk(sk);
-		__be32 *p32;
-
-		__entry->skaddr = sk;
-		__entry->oldstate = oldstate;
-		__entry->newstate = newstate;
-
-		__entry->sport = ntohs(inet->inet_sport);
-		__entry->dport = ntohs(inet->inet_dport);
-
-		p32 = (__be32 *) __entry->saddr;
-		*p32 = inet->inet_saddr;
-
-		p32 = (__be32 *) __entry->daddr;
-		*p32 =  inet->inet_daddr;
-
-		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
-			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
-	),
-
-	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
-		  __entry->sport, __entry->dport,
-		  __entry->saddr, __entry->daddr,
-		  __entry->saddr_v6, __entry->daddr_v6,
-		  show_tcp_state_name(__entry->oldstate),
-		  show_tcp_state_name(__entry->newstate))
-);
-
 TRACE_EVENT(tcp_retransmit_synack,
 
 	TP_PROTO(const struct sock *sk, const struct request_sock *req),
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c5ec897..c8383a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_cmd {
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
 	BPF_RAW_TRACEPOINT_OPEN,
+	BPF_BTF_LOAD,
 };
 
 enum bpf_map_type {
@@ -279,6 +280,9 @@ union bpf_attr {
 					 */
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
+		__u32	btf_fd;		/* fd pointing to a BTF type data */
+		__u32	btf_key_id;	/* BTF type_id of the key */
+		__u32	btf_value_id;	/* BTF type_id of the value */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -363,6 +367,14 @@ union bpf_attr {
 		__u64 name;
 		__u32 prog_fd;
 	} raw_tracepoint;
+
+	struct { /* anonymous struct for BPF_BTF_LOAD */
+		__aligned_u64	btf;
+		__aligned_u64	btf_log_buf;
+		__u32		btf_size;
+		__u32		btf_log_size;
+		__u32		btf_log_level;
+	};
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -755,6 +767,13 @@ union bpf_attr {
  *     @addr: pointer to struct sockaddr to bind socket to
  *     @addr_len: length of sockaddr structure
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_xdp_adjust_tail(xdp_md, delta)
+ *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
+ *     size is supported.
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: A negative integer to be added to xdp_md.data_end
+ *     Return: 0 on success or negative on error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -821,7 +840,8 @@ union bpf_attr {
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
-	FN(bind),
+	FN(bind),			\
+	FN(xdp_adjust_tail),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
new file mode 100644
index 0000000..74a30b1
--- /dev/null
+++ b/include/uapi/linux/btf.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC	0xeB9F
+#define BTF_MAGIC_SWAP	0x9FeB
+#define BTF_VERSION	1
+#define BTF_FLAGS_COMPR	0x01
+
+struct btf_header {
+	__u16	magic;
+	__u8	version;
+	__u8	flags;
+
+	__u32	parent_label;
+	__u32	parent_name;
+
+	/* All offsets are in bytes relative to the end of this header */
+	__u32	label_off;	/* offset of label section	*/
+	__u32	object_off;	/* offset of data object section*/
+	__u32	func_off;	/* offset of function section	*/
+	__u32	type_off;	/* offset of type section	*/
+	__u32	str_off;	/* offset of string section	*/
+	__u32	str_len;	/* length of string section	*/
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE	0x7fffffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET	0x7fffffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN	0xffff
+
+/* The type id is referring to a parent BTF */
+#define BTF_TYPE_PARENT(id)	(((id) >> 31) & 0x1)
+#define BTF_TYPE_ID(id)		((id) & BTF_MAX_TYPE)
+
+/* String is in the ELF string section */
+#define BTF_STR_TBL_ELF_ID(ref)	(((ref) >> 31) & 0x1)
+#define BTF_STR_OFFSET(ref)	((ref) & BTF_MAX_NAME_OFFSET)
+
+struct btf_type {
+	__u32 name;
+	/* "info" bits arrangement
+	 * bits  0-15: vlen (e.g. # of struct's members)
+	 * bits 16-23: unused
+	 * bits 24-28: kind (e.g. int, ptr, array...etc)
+	 * bits 29-30: unused
+	 * bits    31: root
+	 */
+	__u32 info;
+	/* "size" is used by INT, ENUM, STRUCT and UNION.
+	 * "size" tells the size of the type it is describing.
+	 *
+	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+	 * "type" is a type_id referring to another type.
+	 */
+	union {
+		__u32 size;
+		__u32 type;
+	};
+};
+
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
+#define BTF_INFO_ISROOT(info)	(!!(((info) >> 24) & 0x80))
+#define BTF_INFO_VLEN(info)	((info) & 0xffff)
+
+#define BTF_KIND_UNKN		0	/* Unknown	*/
+#define BTF_KIND_INT		1	/* Integer	*/
+#define BTF_KIND_PTR		2	/* Pointer	*/
+#define BTF_KIND_ARRAY		3	/* Array	*/
+#define BTF_KIND_STRUCT		4	/* Struct	*/
+#define BTF_KIND_UNION		5	/* Union	*/
+#define BTF_KIND_ENUM		6	/* Enumeration	*/
+#define BTF_KIND_FWD		7	/* Forward	*/
+#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
+#define BTF_KIND_VOLATILE	9	/* Volatile	*/
+#define BTF_KIND_CONST		10	/* Const	*/
+#define BTF_KIND_RESTRICT	11	/* Restrict	*/
+#define BTF_KIND_MAX		11
+#define NR_BTF_KINDS		12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL)	(((VAL) & 0xff000000) >> 24)
+#define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL)	((VAL)  & 0x0000ffff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED	0x1
+#define BTF_INT_CHAR	0x2
+#define BTF_INT_BOOL	0x4
+#define BTF_INT_VARARGS	0x8
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+	__u32	name;
+	__s32	val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+	__u32	type;
+	__u32	index_type;
+	__u32	nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member".  The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+	__u32	name;
+	__u32	type;
+	__u32	offset;	/* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 68699f6..b852664 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -516,6 +516,7 @@ enum {
 	IFLA_VXLAN_COLLECT_METADATA,
 	IFLA_VXLAN_LABEL,
 	IFLA_VXLAN_GPE,
+	IFLA_VXLAN_TTL_INHERIT,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 103ba79..83ade9b 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -506,6 +506,8 @@
 #define  PCI_EXP_DEVCTL_READRQ_256B  0x1000 /* 256 Bytes */
 #define  PCI_EXP_DEVCTL_READRQ_512B  0x2000 /* 512 Bytes */
 #define  PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
 #define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
 #define PCI_EXP_DEVSTA		10	/* Device Status */
 #define  PCI_EXP_DEVSTA_CED	0x0001	/* Correctable Error Detected */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 33a70ec..d02e859 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -276,6 +276,8 @@ enum
 	LINUX_MIB_TCPKEEPALIVE,			/* TCPKeepAlive */
 	LINUX_MIB_TCPMTUPFAIL,			/* TCPMTUPFail */
 	LINUX_MIB_TCPMTUPSUCCESS,		/* TCPMTUPSuccess */
+	LINUX_MIB_TCPDELIVERED,			/* TCPDelivered */
+	LINUX_MIB_TCPDELIVEREDCE,		/* TCPDeliveredCE */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 560374c..379b087 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -224,6 +224,9 @@ struct tcp_info {
 	__u64	tcpi_busy_time;      /* Time (usec) busy sending data */
 	__u64	tcpi_rwnd_limited;   /* Time (usec) limited by receive window */
 	__u64	tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
+
+	__u32	tcpi_delivered;
+	__u32	tcpi_delivered_ce;
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -244,6 +247,8 @@ enum {
 	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
 	TCP_NLA_CA_STATE,	/* ca_state of socket */
 	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
+	TCP_NLA_DELIVERED,	/* Data pkts delivered incl. out-of-order */
+	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
 
 };
 
diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 3f29e3c..4b2c93b 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -185,6 +185,11 @@
 #define TIPC_DEF_LINK_WIN 50
 #define TIPC_MAX_LINK_WIN 8191
 
+/*
+ * Default MTU for UDP media
+ */
+
+#define TIPC_DEF_LINK_UDP_MTU 14000
 
 struct tipc_node_info {
 	__be32 addr;			/* network address of node */
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 0affb68..85c1198 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -266,6 +266,7 @@ enum {
 	TIPC_NLA_PROP_PRIO,		/* u32 */
 	TIPC_NLA_PROP_TOL,		/* u32 */
 	TIPC_NLA_PROP_WIN,		/* u32 */
+	TIPC_NLA_PROP_MTU,		/* u32 */
 
 	__TIPC_NLA_PROP_MAX,
 	TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index a713fd2..35c485f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -4,6 +4,7 @@
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 14750e7..02a1893 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -11,11 +11,13 @@
  * General Public License for more details.
  */
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/filter.h>
 #include <linux/perf_event.h>
+#include <uapi/linux/btf.h>
 
 #include "map_in_map.h"
 
@@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map)
 	bpf_map_area_free(array);
 }
 
+static void array_map_seq_show_elem(struct bpf_map *map, void *key,
+				    struct seq_file *m)
+{
+	void *value;
+
+	rcu_read_lock();
+
+	value = array_map_lookup_elem(map, key);
+	if (!value) {
+		rcu_read_unlock();
+		return;
+	}
+
+	seq_printf(m, "%u: ", *(u32 *)key);
+	btf_type_seq_show(map->btf, map->btf_value_id, value, m);
+	seq_puts(m, "\n");
+
+	rcu_read_unlock();
+}
+
+static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
+			       u32 btf_key_id, u32 btf_value_id)
+{
+	const struct btf_type *key_type, *value_type;
+	u32 key_size, value_size;
+	u32 int_data;
+
+	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+	if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+		return -EINVAL;
+
+	int_data = *(u32 *)(key_type + 1);
+	/* bpf array can only take a u32 key.  This check makes
+	 * sure that the btf matches the attr used during map_create.
+	 */
+	if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
+	    BTF_INT_OFFSET(int_data))
+		return -EINVAL;
+
+	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+	if (!value_type || value_size > map->value_size)
+		return -EINVAL;
+
+	return 0;
+}
+
 const struct bpf_map_ops array_map_ops = {
 	.map_alloc_check = array_map_alloc_check,
 	.map_alloc = array_map_alloc,
@@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = {
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
 	.map_gen_lookup = array_map_gen_lookup,
+	.map_seq_show_elem = array_map_seq_show_elem,
+	.map_check_btf = array_map_check_btf,
 };
 
 const struct bpf_map_ops percpu_array_map_ops = {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
new file mode 100644
index 0000000..eb56ac7
--- /dev/null
+++ b/kernel/bpf/btf.c
@@ -0,0 +1,2064 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <uapi/linux/btf.h>
+#include <uapi/linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+
+/* BTF (BPF Type Format) is the meta data format which describes
+ * the data types of BPF program/map.  Hence, it basically focus
+ * on the C programming language which the modern BPF is primary
+ * using.
+ *
+ * ELF Section:
+ * ~~~~~~~~~~~
+ * The BTF data is stored under the ".BTF" ELF section
+ *
+ * struct btf_type:
+ * ~~~~~~~~~~~~~~~
+ * Each 'struct btf_type' object describes a C data type.
+ * Depending on the type it is describing, a 'struct btf_type'
+ * object may be followed by more data.  F.e.
+ * To describe an array, 'struct btf_type' is followed by
+ * 'struct btf_array'.
+ *
+ * 'struct btf_type' and any extra data following it are
+ * 4 bytes aligned.
+ *
+ * Type section:
+ * ~~~~~~~~~~~~~
+ * The BTF type section contains a list of 'struct btf_type' objects.
+ * Each one describes a C type.  Recall from the above section
+ * that a 'struct btf_type' object could be immediately followed by extra
+ * data in order to desribe some particular C types.
+ *
+ * type_id:
+ * ~~~~~~~
+ * Each btf_type object is identified by a type_id.  The type_id
+ * is implicitly implied by the location of the btf_type object in
+ * the BTF type section.  The first one has type_id 1.  The second
+ * one has type_id 2...etc.  Hence, an earlier btf_type has
+ * a smaller type_id.
+ *
+ * A btf_type object may refer to another btf_type object by using
+ * type_id (i.e. the "type" in the "struct btf_type").
+ *
+ * NOTE that we cannot assume any reference-order.
+ * A btf_type object can refer to an earlier btf_type object
+ * but it can also refer to a later btf_type object.
+ *
+ * For example, to describe "const void *".  A btf_type
+ * object describing "const" may refer to another btf_type
+ * object describing "void *".  This type-reference is done
+ * by specifying type_id:
+ *
+ * [1] CONST (anon) type_id=2
+ * [2] PTR (anon) type_id=0
+ *
+ * The above is the btf_verifier debug log:
+ *   - Each line started with "[?]" is a btf_type object
+ *   - [?] is the type_id of the btf_type object.
+ *   - CONST/PTR is the BTF_KIND_XXX
+ *   - "(anon)" is the name of the type.  It just
+ *     happens that CONST and PTR has no name.
+ *   - type_id=XXX is the 'u32 type' in btf_type
+ *
+ * NOTE: "void" has type_id 0
+ *
+ * String section:
+ * ~~~~~~~~~~~~~~
+ * The BTF string section contains the names used by the type section.
+ * Each string is referred by an "offset" from the beginning of the
+ * string section.
+ *
+ * Each string is '\0' terminated.
+ *
+ * The first character in the string section must be '\0'
+ * which is used to mean 'anonymous'. Some btf_type may not
+ * have a name.
+ */
+
+/* BTF verification:
+ *
+ * To verify BTF data, two passes are needed.
+ *
+ * Pass #1
+ * ~~~~~~~
+ * The first pass is to collect all btf_type objects to
+ * an array: "btf->types".
+ *
+ * Depending on the C type that a btf_type is describing,
+ * a btf_type may be followed by extra data.  We don't know
+ * how many btf_type is there, and more importantly we don't
+ * know where each btf_type is located in the type section.
+ *
+ * Without knowing the location of each type_id, most verifications
+ * cannot be done.  e.g. an earlier btf_type may refer to a later
+ * btf_type (recall the "const void *" above), so we cannot
+ * check this type-reference in the first pass.
+ *
+ * In the first pass, it still does some verifications (e.g.
+ * checking the name is a valid offset to the string section).
+ *
+ * Pass #2
+ * ~~~~~~~
+ * The main focus is to resolve a btf_type that is referring
+ * to another type.
+ *
+ * We have to ensure the referring type:
+ * 1) does exist in the BTF (i.e. in btf->types[])
+ * 2) does not cause a loop:
+ *	struct A {
+ *		struct B b;
+ *	};
+ *
+ *	struct B {
+ *		struct A a;
+ *	};
+ *
+ * btf_type_needs_resolve() decides if a btf_type needs
+ * to be resolved.
+ *
+ * The needs_resolve type implements the "resolve()" ops which
+ * essentially does a DFS and detects backedge.
+ *
+ * During resolve (or DFS), different C types have different
+ * "RESOLVED" conditions.
+ *
+ * When resolving a BTF_KIND_STRUCT, we need to resolve all its
+ * members because a member is always referring to another
+ * type.  A struct's member can be treated as "RESOLVED" if
+ * it is referring to a BTF_KIND_PTR.  Otherwise, the
+ * following valid C struct would be rejected:
+ *
+ *	struct A {
+ *		int m;
+ *		struct A *a;
+ *	};
+ *
+ * When resolving a BTF_KIND_PTR, it needs to keep resolving if
+ * it is referring to another BTF_KIND_PTR.  Otherwise, we cannot
+ * detect a pointer loop, e.g.:
+ * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR +
+ *                        ^                                         |
+ *                        +-----------------------------------------+
+ *
+ */
+
+#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE)
+#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+#define BITS_ROUNDUP_BYTES(bits) \
+	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+
+/* 16MB for 64k structs and each has 16 members and
+ * a few MB spaces for the string section.
+ * The hard limit is S32_MAX.
+ */
+#define BTF_MAX_SIZE (16 * 1024 * 1024)
+/* 64k. We can raise it later. The hard limit is S32_MAX. */
+#define BTF_MAX_NR_TYPES 65535
+
+#define for_each_member(i, struct_type, member)			\
+	for (i = 0, member = btf_type_member(struct_type);	\
+	     i < btf_type_vlen(struct_type);			\
+	     i++, member++)
+
+#define for_each_member_from(i, from, struct_type, member)		\
+	for (i = from, member = btf_type_member(struct_type) + from;	\
+	     i < btf_type_vlen(struct_type);				\
+	     i++, member++)
+
+struct btf {
+	union {
+		struct btf_header *hdr;
+		void *data;
+	};
+	struct btf_type **types;
+	u32 *resolved_ids;
+	u32 *resolved_sizes;
+	const char *strings;
+	void *nohdr_data;
+	u32 nr_types;
+	u32 types_size;
+	u32 data_size;
+	refcount_t refcnt;
+};
+
+enum verifier_phase {
+	CHECK_META,
+	CHECK_TYPE,
+};
+
+struct resolve_vertex {
+	const struct btf_type *t;
+	u32 type_id;
+	u16 next_member;
+};
+
+enum visit_state {
+	NOT_VISITED,
+	VISITED,
+	RESOLVED,
+};
+
+enum resolve_mode {
+	RESOLVE_TBD,	/* To Be Determined */
+	RESOLVE_PTR,	/* Resolving for Pointer */
+	RESOLVE_STRUCT_OR_ARRAY,	/* Resolving for struct/union
+					 * or array
+					 */
+};
+
+#define MAX_RESOLVE_DEPTH 32
+
+struct btf_verifier_env {
+	struct btf *btf;
+	u8 *visit_states;
+	struct resolve_vertex stack[MAX_RESOLVE_DEPTH];
+	struct bpf_verifier_log log;
+	u32 log_type_id;
+	u32 top_stack;
+	enum verifier_phase phase;
+	enum resolve_mode resolve_mode;
+};
+
+static const char * const btf_kind_str[NR_BTF_KINDS] = {
+	[BTF_KIND_UNKN]		= "UNKNOWN",
+	[BTF_KIND_INT]		= "INT",
+	[BTF_KIND_PTR]		= "PTR",
+	[BTF_KIND_ARRAY]	= "ARRAY",
+	[BTF_KIND_STRUCT]	= "STRUCT",
+	[BTF_KIND_UNION]	= "UNION",
+	[BTF_KIND_ENUM]		= "ENUM",
+	[BTF_KIND_FWD]		= "FWD",
+	[BTF_KIND_TYPEDEF]	= "TYPEDEF",
+	[BTF_KIND_VOLATILE]	= "VOLATILE",
+	[BTF_KIND_CONST]	= "CONST",
+	[BTF_KIND_RESTRICT]	= "RESTRICT",
+};
+
+struct btf_kind_operations {
+	s32 (*check_meta)(struct btf_verifier_env *env,
+			  const struct btf_type *t,
+			  u32 meta_left);
+	int (*resolve)(struct btf_verifier_env *env,
+		       const struct resolve_vertex *v);
+	int (*check_member)(struct btf_verifier_env *env,
+			    const struct btf_type *struct_type,
+			    const struct btf_member *member,
+			    const struct btf_type *member_type);
+	void (*log_details)(struct btf_verifier_env *env,
+			    const struct btf_type *t);
+	void (*seq_show)(const struct btf *btf, const struct btf_type *t,
+			 u32 type_id, void *data, u8 bits_offsets,
+			 struct seq_file *m);
+};
+
+static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
+static struct btf_type btf_void;
+
+static bool btf_type_is_modifier(const struct btf_type *t)
+{
+	/* Some of them is not strictly a C modifier
+	 * but they are grouped into the same bucket
+	 * for BTF concern:
+	 *   A type (t) that refers to another
+	 *   type through t->type AND its size cannot
+	 *   be determined without following the t->type.
+	 *
+	 * ptr does not fall into this bucket
+	 * because its size is always sizeof(void *).
+	 */
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return true;
+	}
+
+	return false;
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+	/* void => no type and size info.
+	 * Hence, FWD is also treated as void.
+	 */
+	return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+	return !t || btf_type_is_void(t);
+}
+
+/* union is only a special case of struct:
+ * all its offsetof(member) == 0
+ */
+static bool btf_type_is_struct(const struct btf_type *t)
+{
+	u8 kind = BTF_INFO_KIND(t->info);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static bool btf_type_is_array(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
+static bool btf_type_is_ptr(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
+}
+
+static bool btf_type_is_int(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
+}
+
+/* What types need to be resolved?
+ *
+ * btf_type_is_modifier() is an obvious one.
+ *
+ * btf_type_is_struct() because its member refers to
+ * another type (through member->type).
+
+ * btf_type_is_array() because its element (array->type)
+ * refers to another type.  Array can be thought of a
+ * special case of struct while array just has the same
+ * member-type repeated by array->nelems of times.
+ */
+static bool btf_type_needs_resolve(const struct btf_type *t)
+{
+	return btf_type_is_modifier(t) ||
+		btf_type_is_ptr(t) ||
+		btf_type_is_struct(t) ||
+		btf_type_is_array(t);
+}
+
+/* t->size can be used */
+static bool btf_type_has_size(const struct btf_type *t)
+{
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+		return true;
+	}
+
+	return false;
+}
+
+static const char *btf_int_encoding_str(u8 encoding)
+{
+	if (encoding == 0)
+		return "(none)";
+	else if (encoding == BTF_INT_SIGNED)
+		return "SIGNED";
+	else if (encoding == BTF_INT_CHAR)
+		return "CHAR";
+	else if (encoding == BTF_INT_BOOL)
+		return "BOOL";
+	else if (encoding == BTF_INT_VARARGS)
+		return "VARARGS";
+	else
+		return "UNKN";
+}
+
+static u16 btf_type_vlen(const struct btf_type *t)
+{
+	return BTF_INFO_VLEN(t->info);
+}
+
+static u32 btf_type_int(const struct btf_type *t)
+{
+	return *(u32 *)(t + 1);
+}
+
+static const struct btf_array *btf_type_array(const struct btf_type *t)
+{
+	return (const struct btf_array *)(t + 1);
+}
+
+static const struct btf_member *btf_type_member(const struct btf_type *t)
+{
+	return (const struct btf_member *)(t + 1);
+}
+
+static const struct btf_enum *btf_type_enum(const struct btf_type *t)
+{
+	return (const struct btf_enum *)(t + 1);
+}
+
+static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
+{
+	return kind_ops[BTF_INFO_KIND(t->info)];
+}
+
+static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
+{
+	return !BTF_STR_TBL_ELF_ID(offset) &&
+		BTF_STR_OFFSET(offset) < btf->hdr->str_len;
+}
+
+static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
+{
+	if (!BTF_STR_OFFSET(offset))
+		return "(anon)";
+	else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+		return &btf->strings[BTF_STR_OFFSET(offset)];
+	else
+		return "(invalid-name-offset)";
+}
+
+static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
+{
+	if (type_id > btf->nr_types)
+		return NULL;
+
+	return btf->types[type_id];
+}
+
+__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
+					      const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	bpf_verifier_vlog(log, fmt, args);
+	va_end(args);
+}
+
+__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env,
+					    const char *fmt, ...)
+{
+	struct bpf_verifier_log *log = &env->log;
+	va_list args;
+
+	if (!bpf_verifier_log_needed(log))
+		return;
+
+	va_start(args, fmt);
+	bpf_verifier_vlog(log, fmt, args);
+	va_end(args);
+}
+
+__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
+						   const struct btf_type *t,
+						   bool log_details,
+						   const char *fmt, ...)
+{
+	struct bpf_verifier_log *log = &env->log;
+	u8 kind = BTF_INFO_KIND(t->info);
+	struct btf *btf = env->btf;
+	va_list args;
+
+	if (!bpf_verifier_log_needed(log))
+		return;
+
+	__btf_verifier_log(log, "[%u] %s %s%s",
+			   env->log_type_id,
+			   btf_kind_str[kind],
+			   btf_name_by_offset(btf, t->name),
+			   log_details ? " " : "");
+
+	if (log_details)
+		btf_type_ops(t)->log_details(env, t);
+
+	if (fmt && *fmt) {
+		__btf_verifier_log(log, " ");
+		va_start(args, fmt);
+		bpf_verifier_vlog(log, fmt, args);
+		va_end(args);
+	}
+
+	__btf_verifier_log(log, "\n");
+}
+
+#define btf_verifier_log_type(env, t, ...) \
+	__btf_verifier_log_type((env), (t), true, __VA_ARGS__)
+#define btf_verifier_log_basic(env, t, ...) \
+	__btf_verifier_log_type((env), (t), false, __VA_ARGS__)
+
+__printf(4, 5)
+static void btf_verifier_log_member(struct btf_verifier_env *env,
+				    const struct btf_type *struct_type,
+				    const struct btf_member *member,
+				    const char *fmt, ...)
+{
+	struct bpf_verifier_log *log = &env->log;
+	struct btf *btf = env->btf;
+	va_list args;
+
+	if (!bpf_verifier_log_needed(log))
+		return;
+
+	/* The CHECK_META phase already did a btf dump.
+	 *
+	 * If member is logged again, it must hit an error in
+	 * parsing this member.  It is useful to print out which
+	 * struct this member belongs to.
+	 */
+	if (env->phase != CHECK_META)
+		btf_verifier_log_type(env, struct_type, NULL);
+
+	__btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u",
+			   btf_name_by_offset(btf, member->name),
+			   member->type, member->offset);
+
+	if (fmt && *fmt) {
+		__btf_verifier_log(log, " ");
+		va_start(args, fmt);
+		bpf_verifier_vlog(log, fmt, args);
+		va_end(args);
+	}
+
+	__btf_verifier_log(log, "\n");
+}
+
+static void btf_verifier_log_hdr(struct btf_verifier_env *env)
+{
+	struct bpf_verifier_log *log = &env->log;
+	const struct btf *btf = env->btf;
+	const struct btf_header *hdr;
+
+	if (!bpf_verifier_log_needed(log))
+		return;
+
+	hdr = btf->hdr;
+	__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
+	__btf_verifier_log(log, "version: %u\n", hdr->version);
+	__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
+	__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
+	__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
+	__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
+	__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
+	__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
+	__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+	__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
+	__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
+	__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
+}
+
+static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
+{
+	struct btf *btf = env->btf;
+
+	/* < 2 because +1 for btf_void which is always in btf->types[0].
+	 * btf_void is not accounted in btf->nr_types because btf_void
+	 * does not come from the BTF file.
+	 */
+	if (btf->types_size - btf->nr_types < 2) {
+		/* Expand 'types' array */
+
+		struct btf_type **new_types;
+		u32 expand_by, new_size;
+
+		if (btf->types_size == BTF_MAX_NR_TYPES) {
+			btf_verifier_log(env, "Exceeded max num of types");
+			return -E2BIG;
+		}
+
+		expand_by = max_t(u32, btf->types_size >> 2, 16);
+		new_size = min_t(u32, BTF_MAX_NR_TYPES,
+				 btf->types_size + expand_by);
+
+		new_types = kvzalloc(new_size * sizeof(*new_types),
+				     GFP_KERNEL | __GFP_NOWARN);
+		if (!new_types)
+			return -ENOMEM;
+
+		if (btf->nr_types == 0)
+			new_types[0] = &btf_void;
+		else
+			memcpy(new_types, btf->types,
+			       sizeof(*btf->types) * (btf->nr_types + 1));
+
+		kvfree(btf->types);
+		btf->types = new_types;
+		btf->types_size = new_size;
+	}
+
+	btf->types[++(btf->nr_types)] = t;
+
+	return 0;
+}
+
+static void btf_free(struct btf *btf)
+{
+	kvfree(btf->types);
+	kvfree(btf->resolved_sizes);
+	kvfree(btf->resolved_ids);
+	kvfree(btf->data);
+	kfree(btf);
+}
+
+static void btf_get(struct btf *btf)
+{
+	refcount_inc(&btf->refcnt);
+}
+
+void btf_put(struct btf *btf)
+{
+	if (btf && refcount_dec_and_test(&btf->refcnt))
+		btf_free(btf);
+}
+
+static int env_resolve_init(struct btf_verifier_env *env)
+{
+	struct btf *btf = env->btf;
+	u32 nr_types = btf->nr_types;
+	u32 *resolved_sizes = NULL;
+	u32 *resolved_ids = NULL;
+	u8 *visit_states = NULL;
+
+	/* +1 for btf_void */
+	resolved_sizes = kvzalloc((nr_types + 1) * sizeof(*resolved_sizes),
+				  GFP_KERNEL | __GFP_NOWARN);
+	if (!resolved_sizes)
+		goto nomem;
+
+	resolved_ids = kvzalloc((nr_types + 1) * sizeof(*resolved_ids),
+				GFP_KERNEL | __GFP_NOWARN);
+	if (!resolved_ids)
+		goto nomem;
+
+	visit_states = kvzalloc((nr_types + 1) * sizeof(*visit_states),
+				GFP_KERNEL | __GFP_NOWARN);
+	if (!visit_states)
+		goto nomem;
+
+	btf->resolved_sizes = resolved_sizes;
+	btf->resolved_ids = resolved_ids;
+	env->visit_states = visit_states;
+
+	return 0;
+
+nomem:
+	kvfree(resolved_sizes);
+	kvfree(resolved_ids);
+	kvfree(visit_states);
+	return -ENOMEM;
+}
+
+static void btf_verifier_env_free(struct btf_verifier_env *env)
+{
+	kvfree(env->visit_states);
+	kfree(env);
+}
+
+static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
+				     const struct btf_type *next_type)
+{
+	switch (env->resolve_mode) {
+	case RESOLVE_TBD:
+		/* int, enum or void is a sink */
+		return !btf_type_needs_resolve(next_type);
+	case RESOLVE_PTR:
+		/* int, enum, void, struct or array is a sink for ptr */
+		return !btf_type_is_modifier(next_type) &&
+			!btf_type_is_ptr(next_type);
+	case RESOLVE_STRUCT_OR_ARRAY:
+		/* int, enum, void or ptr is a sink for struct and array */
+		return !btf_type_is_modifier(next_type) &&
+			!btf_type_is_array(next_type) &&
+			!btf_type_is_struct(next_type);
+	default:
+		BUG_ON(1);
+	}
+}
+
+static bool env_type_is_resolved(const struct btf_verifier_env *env,
+				 u32 type_id)
+{
+	return env->visit_states[type_id] == RESOLVED;
+}
+
+static int env_stack_push(struct btf_verifier_env *env,
+			  const struct btf_type *t, u32 type_id)
+{
+	struct resolve_vertex *v;
+
+	if (env->top_stack == MAX_RESOLVE_DEPTH)
+		return -E2BIG;
+
+	if (env->visit_states[type_id] != NOT_VISITED)
+		return -EEXIST;
+
+	env->visit_states[type_id] = VISITED;
+
+	v = &env->stack[env->top_stack++];
+	v->t = t;
+	v->type_id = type_id;
+	v->next_member = 0;
+
+	if (env->resolve_mode == RESOLVE_TBD) {
+		if (btf_type_is_ptr(t))
+			env->resolve_mode = RESOLVE_PTR;
+		else if (btf_type_is_struct(t) || btf_type_is_array(t))
+			env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY;
+	}
+
+	return 0;
+}
+
+static void env_stack_set_next_member(struct btf_verifier_env *env,
+				      u16 next_member)
+{
+	env->stack[env->top_stack - 1].next_member = next_member;
+}
+
+static void env_stack_pop_resolved(struct btf_verifier_env *env,
+				   u32 resolved_type_id,
+				   u32 resolved_size)
+{
+	u32 type_id = env->stack[--(env->top_stack)].type_id;
+	struct btf *btf = env->btf;
+
+	btf->resolved_sizes[type_id] = resolved_size;
+	btf->resolved_ids[type_id] = resolved_type_id;
+	env->visit_states[type_id] = RESOLVED;
+}
+
+static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
+{
+	return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
+}
+
+/* The input param "type_id" must point to a needs_resolve type */
+static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
+						  u32 *type_id)
+{
+	*type_id = btf->resolved_ids[*type_id];
+	return btf_type_by_id(btf, *type_id);
+}
+
+const struct btf_type *btf_type_id_size(const struct btf *btf,
+					u32 *type_id, u32 *ret_size)
+{
+	const struct btf_type *size_type;
+	u32 size_type_id = *type_id;
+	u32 size = 0;
+
+	size_type = btf_type_by_id(btf, size_type_id);
+	if (btf_type_is_void_or_null(size_type))
+		return NULL;
+
+	if (btf_type_has_size(size_type)) {
+		size = size_type->size;
+	} else if (btf_type_is_array(size_type)) {
+		size = btf->resolved_sizes[size_type_id];
+	} else if (btf_type_is_ptr(size_type)) {
+		size = sizeof(void *);
+	} else {
+		if (WARN_ON_ONCE(!btf_type_is_modifier(size_type)))
+			return NULL;
+
+		size = btf->resolved_sizes[size_type_id];
+		size_type_id = btf->resolved_ids[size_type_id];
+		size_type = btf_type_by_id(btf, size_type_id);
+		if (btf_type_is_void(size_type))
+			return NULL;
+	}
+
+	*type_id = size_type_id;
+	if (ret_size)
+		*ret_size = size;
+
+	return size_type;
+}
+
+static int btf_df_check_member(struct btf_verifier_env *env,
+			       const struct btf_type *struct_type,
+			       const struct btf_member *member,
+			       const struct btf_type *member_type)
+{
+	btf_verifier_log_basic(env, struct_type,
+			       "Unsupported check_member");
+	return -EINVAL;
+}
+
+static int btf_df_resolve(struct btf_verifier_env *env,
+			  const struct resolve_vertex *v)
+{
+	btf_verifier_log_basic(env, v->t, "Unsupported resolve");
+	return -EINVAL;
+}
+
+static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t,
+			    u32 type_id, void *data, u8 bits_offsets,
+			    struct seq_file *m)
+{
+	seq_printf(m, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
+}
+
+static int btf_int_check_member(struct btf_verifier_env *env,
+				const struct btf_type *struct_type,
+				const struct btf_member *member,
+				const struct btf_type *member_type)
+{
+	u32 int_data = btf_type_int(member_type);
+	u32 struct_bits_off = member->offset;
+	u32 struct_size = struct_type->size;
+	u32 nr_copy_bits;
+	u32 bytes_offset;
+
+	if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"bits_offset exceeds U32_MAX");
+		return -EINVAL;
+	}
+
+	struct_bits_off += BTF_INT_OFFSET(int_data);
+	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+	nr_copy_bits = BTF_INT_BITS(int_data) +
+		BITS_PER_BYTE_MASKED(struct_bits_off);
+
+	if (nr_copy_bits > BITS_PER_U64) {
+		btf_verifier_log_member(env, struct_type, member,
+					"nr_copy_bits exceeds 64");
+		return -EINVAL;
+	}
+
+	if (struct_size < bytes_offset ||
+	    struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static s32 btf_int_check_meta(struct btf_verifier_env *env,
+			      const struct btf_type *t,
+			      u32 meta_left)
+{
+	u32 int_data, nr_bits, meta_needed = sizeof(int_data);
+	u16 encoding;
+
+	if (meta_left < meta_needed) {
+		btf_verifier_log_basic(env, t,
+				       "meta_left:%u meta_needed:%u",
+				       meta_left, meta_needed);
+		return -EINVAL;
+	}
+
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	int_data = btf_type_int(t);
+	nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
+
+	if (nr_bits > BITS_PER_U64) {
+		btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
+				      BITS_PER_U64);
+		return -EINVAL;
+	}
+
+	if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) {
+		btf_verifier_log_type(env, t, "nr_bits exceeds type_size");
+		return -EINVAL;
+	}
+
+	encoding = BTF_INT_ENCODING(int_data);
+	if (encoding &&
+	    encoding != BTF_INT_SIGNED &&
+	    encoding != BTF_INT_CHAR &&
+	    encoding != BTF_INT_BOOL &&
+	    encoding != BTF_INT_VARARGS) {
+		btf_verifier_log_type(env, t, "Unsupported encoding");
+		return -ENOTSUPP;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return meta_needed;
+}
+
+static void btf_int_log(struct btf_verifier_env *env,
+			const struct btf_type *t)
+{
+	int int_data = btf_type_int(t);
+
+	btf_verifier_log(env,
+			 "size=%u bits_offset=%u nr_bits=%u encoding=%s",
+			 t->size, BTF_INT_OFFSET(int_data),
+			 BTF_INT_BITS(int_data),
+			 btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
+}
+
+static void btf_int_bits_seq_show(const struct btf *btf,
+				  const struct btf_type *t,
+				  void *data, u8 bits_offset,
+				  struct seq_file *m)
+{
+	u32 int_data = btf_type_int(t);
+	u16 nr_bits = BTF_INT_BITS(int_data);
+	u16 total_bits_offset;
+	u16 nr_copy_bytes;
+	u16 nr_copy_bits;
+	u8 nr_upper_bits;
+	union {
+		u64 u64_num;
+		u8  u8_nums[8];
+	} print_num;
+
+	total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
+	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+	bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+	nr_copy_bits = nr_bits + bits_offset;
+	nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
+
+	print_num.u64_num = 0;
+	memcpy(&print_num.u64_num, data, nr_copy_bytes);
+
+	/* Ditch the higher order bits */
+	nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits);
+	if (nr_upper_bits) {
+		/* We need to mask out some bits of the upper byte. */
+		u8 mask = (1 << nr_upper_bits) - 1;
+
+		print_num.u8_nums[nr_copy_bytes - 1] &= mask;
+	}
+
+	print_num.u64_num >>= bits_offset;
+
+	seq_printf(m, "0x%llx", print_num.u64_num);
+}
+
+static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
+			     u32 type_id, void *data, u8 bits_offset,
+			     struct seq_file *m)
+{
+	u32 int_data = btf_type_int(t);
+	u8 encoding = BTF_INT_ENCODING(int_data);
+	bool sign = encoding & BTF_INT_SIGNED;
+	u32 nr_bits = BTF_INT_BITS(int_data);
+
+	if (bits_offset || BTF_INT_OFFSET(int_data) ||
+	    BITS_PER_BYTE_MASKED(nr_bits)) {
+		btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+		return;
+	}
+
+	switch (nr_bits) {
+	case 64:
+		if (sign)
+			seq_printf(m, "%lld", *(s64 *)data);
+		else
+			seq_printf(m, "%llu", *(u64 *)data);
+		break;
+	case 32:
+		if (sign)
+			seq_printf(m, "%d", *(s32 *)data);
+		else
+			seq_printf(m, "%u", *(u32 *)data);
+		break;
+	case 16:
+		if (sign)
+			seq_printf(m, "%d", *(s16 *)data);
+		else
+			seq_printf(m, "%u", *(u16 *)data);
+		break;
+	case 8:
+		if (sign)
+			seq_printf(m, "%d", *(s8 *)data);
+		else
+			seq_printf(m, "%u", *(u8 *)data);
+		break;
+	default:
+		btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+	}
+}
+
+static const struct btf_kind_operations int_ops = {
+	.check_meta = btf_int_check_meta,
+	.resolve = btf_df_resolve,
+	.check_member = btf_int_check_member,
+	.log_details = btf_int_log,
+	.seq_show = btf_int_seq_show,
+};
+
+static int btf_modifier_check_member(struct btf_verifier_env *env,
+				     const struct btf_type *struct_type,
+				     const struct btf_member *member,
+				     const struct btf_type *member_type)
+{
+	const struct btf_type *resolved_type;
+	u32 resolved_type_id = member->type;
+	struct btf_member resolved_member;
+	struct btf *btf = env->btf;
+
+	resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
+	if (!resolved_type) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Invalid member");
+		return -EINVAL;
+	}
+
+	resolved_member = *member;
+	resolved_member.type = resolved_type_id;
+
+	return btf_type_ops(resolved_type)->check_member(env, struct_type,
+							 &resolved_member,
+							 resolved_type);
+}
+
+static int btf_ptr_check_member(struct btf_verifier_env *env,
+				const struct btf_type *struct_type,
+				const struct btf_member *member,
+				const struct btf_type *member_type)
+{
+	u32 struct_size, struct_bits_off, bytes_offset;
+
+	struct_size = struct_type->size;
+	struct_bits_off = member->offset;
+	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+
+	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member is not byte aligned");
+		return -EINVAL;
+	}
+
+	if (struct_size - bytes_offset < sizeof(void *)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_ref_type_check_meta(struct btf_verifier_env *env,
+				   const struct btf_type *t,
+				   u32 meta_left)
+{
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	if (BTF_TYPE_PARENT(t->type)) {
+		btf_verifier_log_type(env, t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return 0;
+}
+
+static int btf_modifier_resolve(struct btf_verifier_env *env,
+				const struct resolve_vertex *v)
+{
+	const struct btf_type *t = v->t;
+	const struct btf_type *next_type;
+	u32 next_type_id = t->type;
+	struct btf *btf = env->btf;
+	u32 next_type_size = 0;
+
+	next_type = btf_type_by_id(btf, next_type_id);
+	if (!next_type) {
+		btf_verifier_log_type(env, v->t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+	/* "typedef void new_void", "const void"...etc */
+	if (btf_type_is_void(next_type))
+		goto resolved;
+
+	if (!env_type_is_resolve_sink(env, next_type) &&
+	    !env_type_is_resolved(env, next_type_id))
+		return env_stack_push(env, next_type, next_type_id);
+
+	/* Figure out the resolved next_type_id with size.
+	 * They will be stored in the current modifier's
+	 * resolved_ids and resolved_sizes such that it can
+	 * save us a few type-following when we use it later (e.g. in
+	 * pretty print).
+	 */
+	if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
+	    !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) {
+		btf_verifier_log_type(env, v->t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+resolved:
+	env_stack_pop_resolved(env, next_type_id, next_type_size);
+
+	return 0;
+}
+
+static int btf_ptr_resolve(struct btf_verifier_env *env,
+			   const struct resolve_vertex *v)
+{
+	const struct btf_type *next_type;
+	const struct btf_type *t = v->t;
+	u32 next_type_id = t->type;
+	struct btf *btf = env->btf;
+	u32 next_type_size = 0;
+
+	next_type = btf_type_by_id(btf, next_type_id);
+	if (!next_type) {
+		btf_verifier_log_type(env, v->t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+	/* "void *" */
+	if (btf_type_is_void(next_type))
+		goto resolved;
+
+	if (!env_type_is_resolve_sink(env, next_type) &&
+	    !env_type_is_resolved(env, next_type_id))
+		return env_stack_push(env, next_type, next_type_id);
+
+	/* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY,
+	 * the modifier may have stopped resolving when it was resolved
+	 * to a ptr (last-resolved-ptr).
+	 *
+	 * We now need to continue from the last-resolved-ptr to
+	 * ensure the last-resolved-ptr will not referring back to
+	 * the currenct ptr (t).
+	 */
+	if (btf_type_is_modifier(next_type)) {
+		const struct btf_type *resolved_type;
+		u32 resolved_type_id;
+
+		resolved_type_id = next_type_id;
+		resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+
+		if (btf_type_is_ptr(resolved_type) &&
+		    !env_type_is_resolve_sink(env, resolved_type) &&
+		    !env_type_is_resolved(env, resolved_type_id))
+			return env_stack_push(env, resolved_type,
+					      resolved_type_id);
+	}
+
+	if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
+	    !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) {
+		btf_verifier_log_type(env, v->t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+resolved:
+	env_stack_pop_resolved(env, next_type_id, 0);
+
+	return 0;
+}
+
+static void btf_modifier_seq_show(const struct btf *btf,
+				  const struct btf_type *t,
+				  u32 type_id, void *data,
+				  u8 bits_offset, struct seq_file *m)
+{
+	t = btf_type_id_resolve(btf, &type_id);
+
+	btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+}
+
+static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
+			     u32 type_id, void *data, u8 bits_offset,
+			     struct seq_file *m)
+{
+	/* It is a hashed value */
+	seq_printf(m, "%p", *(void **)data);
+}
+
+static void btf_ref_type_log(struct btf_verifier_env *env,
+			     const struct btf_type *t)
+{
+	btf_verifier_log(env, "type_id=%u", t->type);
+}
+
+static struct btf_kind_operations modifier_ops = {
+	.check_meta = btf_ref_type_check_meta,
+	.resolve = btf_modifier_resolve,
+	.check_member = btf_modifier_check_member,
+	.log_details = btf_ref_type_log,
+	.seq_show = btf_modifier_seq_show,
+};
+
+static struct btf_kind_operations ptr_ops = {
+	.check_meta = btf_ref_type_check_meta,
+	.resolve = btf_ptr_resolve,
+	.check_member = btf_ptr_check_member,
+	.log_details = btf_ref_type_log,
+	.seq_show = btf_ptr_seq_show,
+};
+
+static struct btf_kind_operations fwd_ops = {
+	.check_meta = btf_ref_type_check_meta,
+	.resolve = btf_df_resolve,
+	.check_member = btf_df_check_member,
+	.log_details = btf_ref_type_log,
+	.seq_show = btf_df_seq_show,
+};
+
+static int btf_array_check_member(struct btf_verifier_env *env,
+				  const struct btf_type *struct_type,
+				  const struct btf_member *member,
+				  const struct btf_type *member_type)
+{
+	u32 struct_bits_off = member->offset;
+	u32 struct_size, bytes_offset;
+	u32 array_type_id, array_size;
+	struct btf *btf = env->btf;
+
+	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member is not byte aligned");
+		return -EINVAL;
+	}
+
+	array_type_id = member->type;
+	btf_type_id_size(btf, &array_type_id, &array_size);
+	struct_size = struct_type->size;
+	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+	if (struct_size - bytes_offset < array_size) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static s32 btf_array_check_meta(struct btf_verifier_env *env,
+				const struct btf_type *t,
+				u32 meta_left)
+{
+	const struct btf_array *array = btf_type_array(t);
+	u32 meta_needed = sizeof(*array);
+
+	if (meta_left < meta_needed) {
+		btf_verifier_log_basic(env, t,
+				       "meta_left:%u meta_needed:%u",
+				       meta_left, meta_needed);
+		return -EINVAL;
+	}
+
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	/* We are a little forgiving on array->index_type since
+	 * the kernel is not using it.
+	 */
+	/* Array elem cannot be in type void,
+	 * so !array->type is not allowed.
+	 */
+	if (!array->type || BTF_TYPE_PARENT(array->type)) {
+		btf_verifier_log_type(env, t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return meta_needed;
+}
+
+static int btf_array_resolve(struct btf_verifier_env *env,
+			     const struct resolve_vertex *v)
+{
+	const struct btf_array *array = btf_type_array(v->t);
+	const struct btf_type *elem_type;
+	u32 elem_type_id = array->type;
+	struct btf *btf = env->btf;
+	u32 elem_size;
+
+	elem_type = btf_type_by_id(btf, elem_type_id);
+	if (btf_type_is_void_or_null(elem_type)) {
+		btf_verifier_log_type(env, v->t,
+				      "Invalid elem");
+		return -EINVAL;
+	}
+
+	if (!env_type_is_resolve_sink(env, elem_type) &&
+	    !env_type_is_resolved(env, elem_type_id))
+		return env_stack_push(env, elem_type, elem_type_id);
+
+	elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+	if (!elem_type) {
+		btf_verifier_log_type(env, v->t, "Invalid elem");
+		return -EINVAL;
+	}
+
+	if (btf_type_is_int(elem_type)) {
+		int int_type_data = btf_type_int(elem_type);
+		u16 nr_bits = BTF_INT_BITS(int_type_data);
+		u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+
+		/* Put more restriction on array of int.  The int cannot
+		 * be a bit field and it must be either u8/u16/u32/u64.
+		 */
+		if (BITS_PER_BYTE_MASKED(nr_bits) ||
+		    BTF_INT_OFFSET(int_type_data) ||
+		    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+		     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+			btf_verifier_log_type(env, v->t,
+					      "Invalid array of int");
+			return -EINVAL;
+		}
+	}
+
+	if (array->nelems && elem_size > U32_MAX / array->nelems) {
+		btf_verifier_log_type(env, v->t,
+				      "Array size overflows U32_MAX");
+		return -EINVAL;
+	}
+
+	env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems);
+
+	return 0;
+}
+
+static void btf_array_log(struct btf_verifier_env *env,
+			  const struct btf_type *t)
+{
+	const struct btf_array *array = btf_type_array(t);
+
+	btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u",
+			 array->type, array->index_type, array->nelems);
+}
+
+static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t,
+			       u32 type_id, void *data, u8 bits_offset,
+			       struct seq_file *m)
+{
+	const struct btf_array *array = btf_type_array(t);
+	const struct btf_kind_operations *elem_ops;
+	const struct btf_type *elem_type;
+	u32 i, elem_size, elem_type_id;
+
+	elem_type_id = array->type;
+	elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+	elem_ops = btf_type_ops(elem_type);
+	seq_puts(m, "[");
+	for (i = 0; i < array->nelems; i++) {
+		if (i)
+			seq_puts(m, ",");
+
+		elem_ops->seq_show(btf, elem_type, elem_type_id, data,
+				   bits_offset, m);
+		data += elem_size;
+	}
+	seq_puts(m, "]");
+}
+
+static struct btf_kind_operations array_ops = {
+	.check_meta = btf_array_check_meta,
+	.resolve = btf_array_resolve,
+	.check_member = btf_array_check_member,
+	.log_details = btf_array_log,
+	.seq_show = btf_array_seq_show,
+};
+
+static int btf_struct_check_member(struct btf_verifier_env *env,
+				   const struct btf_type *struct_type,
+				   const struct btf_member *member,
+				   const struct btf_type *member_type)
+{
+	u32 struct_bits_off = member->offset;
+	u32 struct_size, bytes_offset;
+
+	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member is not byte aligned");
+		return -EINVAL;
+	}
+
+	struct_size = struct_type->size;
+	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+	if (struct_size - bytes_offset < member_type->size) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static s32 btf_struct_check_meta(struct btf_verifier_env *env,
+				 const struct btf_type *t,
+				 u32 meta_left)
+{
+	bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION;
+	const struct btf_member *member;
+	struct btf *btf = env->btf;
+	u32 struct_size = t->size;
+	u32 meta_needed;
+	u16 i;
+
+	meta_needed = btf_type_vlen(t) * sizeof(*member);
+	if (meta_left < meta_needed) {
+		btf_verifier_log_basic(env, t,
+				       "meta_left:%u meta_needed:%u",
+				       meta_left, meta_needed);
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	for_each_member(i, t, member) {
+		if (!btf_name_offset_valid(btf, member->name)) {
+			btf_verifier_log_member(env, t, member,
+						"Invalid member name_offset:%u",
+						member->name);
+			return -EINVAL;
+		}
+
+		/* A member cannot be in type void */
+		if (!member->type || BTF_TYPE_PARENT(member->type)) {
+			btf_verifier_log_member(env, t, member,
+						"Invalid type_id");
+			return -EINVAL;
+		}
+
+		if (is_union && member->offset) {
+			btf_verifier_log_member(env, t, member,
+						"Invalid member bits_offset");
+			return -EINVAL;
+		}
+
+		if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) {
+			btf_verifier_log_member(env, t, member,
+						"Memmber bits_offset exceeds its struct size");
+			return -EINVAL;
+		}
+
+		btf_verifier_log_member(env, t, member, NULL);
+	}
+
+	return meta_needed;
+}
+
+static int btf_struct_resolve(struct btf_verifier_env *env,
+			      const struct resolve_vertex *v)
+{
+	const struct btf_member *member;
+	int err;
+	u16 i;
+
+	/* Before continue resolving the next_member,
+	 * ensure the last member is indeed resolved to a
+	 * type with size info.
+	 */
+	if (v->next_member) {
+		const struct btf_type *last_member_type;
+		const struct btf_member *last_member;
+		u16 last_member_type_id;
+
+		last_member = btf_type_member(v->t) + v->next_member - 1;
+		last_member_type_id = last_member->type;
+		if (WARN_ON_ONCE(!env_type_is_resolved(env,
+						       last_member_type_id)))
+			return -EINVAL;
+
+		last_member_type = btf_type_by_id(env->btf,
+						  last_member_type_id);
+		err = btf_type_ops(last_member_type)->check_member(env, v->t,
+							last_member,
+							last_member_type);
+		if (err)
+			return err;
+	}
+
+	for_each_member_from(i, v->next_member, v->t, member) {
+		u32 member_type_id = member->type;
+		const struct btf_type *member_type = btf_type_by_id(env->btf,
+								member_type_id);
+
+		if (btf_type_is_void_or_null(member_type)) {
+			btf_verifier_log_member(env, v->t, member,
+						"Invalid member");
+			return -EINVAL;
+		}
+
+		if (!env_type_is_resolve_sink(env, member_type) &&
+		    !env_type_is_resolved(env, member_type_id)) {
+			env_stack_set_next_member(env, i + 1);
+			return env_stack_push(env, member_type, member_type_id);
+		}
+
+		err = btf_type_ops(member_type)->check_member(env, v->t,
+							      member,
+							      member_type);
+		if (err)
+			return err;
+	}
+
+	env_stack_pop_resolved(env, 0, 0);
+
+	return 0;
+}
+
+static void btf_struct_log(struct btf_verifier_env *env,
+			   const struct btf_type *t)
+{
+	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
+				u32 type_id, void *data, u8 bits_offset,
+				struct seq_file *m)
+{
+	const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ",";
+	const struct btf_member *member;
+	u32 i;
+
+	seq_puts(m, "{");
+	for_each_member(i, t, member) {
+		const struct btf_type *member_type = btf_type_by_id(btf,
+								member->type);
+		u32 member_offset = member->offset;
+		u32 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
+		u8 bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
+		const struct btf_kind_operations *ops;
+
+		if (i)
+			seq_puts(m, seq);
+
+		ops = btf_type_ops(member_type);
+		ops->seq_show(btf, member_type, member->type,
+			      data + bytes_offset, bits8_offset, m);
+	}
+	seq_puts(m, "}");
+}
+
+static struct btf_kind_operations struct_ops = {
+	.check_meta = btf_struct_check_meta,
+	.resolve = btf_struct_resolve,
+	.check_member = btf_struct_check_member,
+	.log_details = btf_struct_log,
+	.seq_show = btf_struct_seq_show,
+};
+
+static int btf_enum_check_member(struct btf_verifier_env *env,
+				 const struct btf_type *struct_type,
+				 const struct btf_member *member,
+				 const struct btf_type *member_type)
+{
+	u32 struct_bits_off = member->offset;
+	u32 struct_size, bytes_offset;
+
+	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member is not byte aligned");
+		return -EINVAL;
+	}
+
+	struct_size = struct_type->size;
+	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+	if (struct_size - bytes_offset < sizeof(int)) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static s32 btf_enum_check_meta(struct btf_verifier_env *env,
+			       const struct btf_type *t,
+			       u32 meta_left)
+{
+	const struct btf_enum *enums = btf_type_enum(t);
+	struct btf *btf = env->btf;
+	u16 i, nr_enums;
+	u32 meta_needed;
+
+	nr_enums = btf_type_vlen(t);
+	meta_needed = nr_enums * sizeof(*enums);
+
+	if (meta_left < meta_needed) {
+		btf_verifier_log_basic(env, t,
+				       "meta_left:%u meta_needed:%u",
+				       meta_left, meta_needed);
+		return -EINVAL;
+	}
+
+	if (t->size != sizeof(int)) {
+		btf_verifier_log_type(env, t, "Expected size:%zu",
+				      sizeof(int));
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	for (i = 0; i < nr_enums; i++) {
+		if (!btf_name_offset_valid(btf, enums[i].name)) {
+			btf_verifier_log(env, "\tInvalid name_offset:%u",
+					 enums[i].name);
+			return -EINVAL;
+		}
+
+		btf_verifier_log(env, "\t%s val=%d\n",
+				 btf_name_by_offset(btf, enums[i].name),
+				 enums[i].val);
+	}
+
+	return meta_needed;
+}
+
+static void btf_enum_log(struct btf_verifier_env *env,
+			 const struct btf_type *t)
+{
+	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t,
+			      u32 type_id, void *data, u8 bits_offset,
+			      struct seq_file *m)
+{
+	const struct btf_enum *enums = btf_type_enum(t);
+	u32 i, nr_enums = btf_type_vlen(t);
+	int v = *(int *)data;
+
+	for (i = 0; i < nr_enums; i++) {
+		if (v == enums[i].val) {
+			seq_printf(m, "%s",
+				   btf_name_by_offset(btf, enums[i].name));
+			return;
+		}
+	}
+
+	seq_printf(m, "%d", v);
+}
+
+static struct btf_kind_operations enum_ops = {
+	.check_meta = btf_enum_check_meta,
+	.resolve = btf_df_resolve,
+	.check_member = btf_enum_check_member,
+	.log_details = btf_enum_log,
+	.seq_show = btf_enum_seq_show,
+};
+
+static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
+	[BTF_KIND_INT] = &int_ops,
+	[BTF_KIND_PTR] = &ptr_ops,
+	[BTF_KIND_ARRAY] = &array_ops,
+	[BTF_KIND_STRUCT] = &struct_ops,
+	[BTF_KIND_UNION] = &struct_ops,
+	[BTF_KIND_ENUM] = &enum_ops,
+	[BTF_KIND_FWD] = &fwd_ops,
+	[BTF_KIND_TYPEDEF] = &modifier_ops,
+	[BTF_KIND_VOLATILE] = &modifier_ops,
+	[BTF_KIND_CONST] = &modifier_ops,
+	[BTF_KIND_RESTRICT] = &modifier_ops,
+};
+
+static s32 btf_check_meta(struct btf_verifier_env *env,
+			  const struct btf_type *t,
+			  u32 meta_left)
+{
+	u32 saved_meta_left = meta_left;
+	s32 var_meta_size;
+
+	if (meta_left < sizeof(*t)) {
+		btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu",
+				 env->log_type_id, meta_left, sizeof(*t));
+		return -EINVAL;
+	}
+	meta_left -= sizeof(*t);
+
+	if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
+	    BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
+		btf_verifier_log(env, "[%u] Invalid kind:%u",
+				 env->log_type_id, BTF_INFO_KIND(t->info));
+		return -EINVAL;
+	}
+
+	if (!btf_name_offset_valid(env->btf, t->name)) {
+		btf_verifier_log(env, "[%u] Invalid name_offset:%u",
+				 env->log_type_id, t->name);
+		return -EINVAL;
+	}
+
+	var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left);
+	if (var_meta_size < 0)
+		return var_meta_size;
+
+	meta_left -= var_meta_size;
+
+	return saved_meta_left - meta_left;
+}
+
+static int btf_check_all_metas(struct btf_verifier_env *env)
+{
+	struct btf *btf = env->btf;
+	struct btf_header *hdr;
+	void *cur, *end;
+
+	hdr = btf->hdr;
+	cur = btf->nohdr_data + hdr->type_off;
+	end = btf->nohdr_data + hdr->str_off;
+
+	env->log_type_id = 1;
+	while (cur < end) {
+		struct btf_type *t = cur;
+		s32 meta_size;
+
+		meta_size = btf_check_meta(env, t, end - cur);
+		if (meta_size < 0)
+			return meta_size;
+
+		btf_add_type(env, t);
+		cur += meta_size;
+		env->log_type_id++;
+	}
+
+	return 0;
+}
+
+static int btf_resolve(struct btf_verifier_env *env,
+		       const struct btf_type *t, u32 type_id)
+{
+	const struct resolve_vertex *v;
+	int err = 0;
+
+	env->resolve_mode = RESOLVE_TBD;
+	env_stack_push(env, t, type_id);
+	while (!err && (v = env_stack_peak(env))) {
+		env->log_type_id = v->type_id;
+		err = btf_type_ops(v->t)->resolve(env, v);
+	}
+
+	env->log_type_id = type_id;
+	if (err == -E2BIG)
+		btf_verifier_log_type(env, t,
+				      "Exceeded max resolving depth:%u",
+				      MAX_RESOLVE_DEPTH);
+	else if (err == -EEXIST)
+		btf_verifier_log_type(env, t, "Loop detected");
+
+	return err;
+}
+
+static bool btf_resolve_valid(struct btf_verifier_env *env,
+			      const struct btf_type *t,
+			      u32 type_id)
+{
+	struct btf *btf = env->btf;
+
+	if (!env_type_is_resolved(env, type_id))
+		return false;
+
+	if (btf_type_is_struct(t))
+		return !btf->resolved_ids[type_id] &&
+			!btf->resolved_sizes[type_id];
+
+	if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) {
+		t = btf_type_id_resolve(btf, &type_id);
+		return t && !btf_type_is_modifier(t);
+	}
+
+	if (btf_type_is_array(t)) {
+		const struct btf_array *array = btf_type_array(t);
+		const struct btf_type *elem_type;
+		u32 elem_type_id = array->type;
+		u32 elem_size;
+
+		elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+		return elem_type && !btf_type_is_modifier(elem_type) &&
+			(array->nelems * elem_size ==
+			 btf->resolved_sizes[type_id]);
+	}
+
+	return false;
+}
+
+static int btf_check_all_types(struct btf_verifier_env *env)
+{
+	struct btf *btf = env->btf;
+	u32 type_id;
+	int err;
+
+	err = env_resolve_init(env);
+	if (err)
+		return err;
+
+	env->phase++;
+	for (type_id = 1; type_id <= btf->nr_types; type_id++) {
+		const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+		env->log_type_id = type_id;
+		if (btf_type_needs_resolve(t) &&
+		    !env_type_is_resolved(env, type_id)) {
+			err = btf_resolve(env, t, type_id);
+			if (err)
+				return err;
+		}
+
+		if (btf_type_needs_resolve(t) &&
+		    !btf_resolve_valid(env, t, type_id)) {
+			btf_verifier_log_type(env, t, "Invalid resolve state");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int btf_parse_type_sec(struct btf_verifier_env *env)
+{
+	int err;
+
+	err = btf_check_all_metas(env);
+	if (err)
+		return err;
+
+	return btf_check_all_types(env);
+}
+
+static int btf_parse_str_sec(struct btf_verifier_env *env)
+{
+	const struct btf_header *hdr;
+	struct btf *btf = env->btf;
+	const char *start, *end;
+
+	hdr = btf->hdr;
+	start = btf->nohdr_data + hdr->str_off;
+	end = start + hdr->str_len;
+
+	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+	    start[0] || end[-1]) {
+		btf_verifier_log(env, "Invalid string section");
+		return -EINVAL;
+	}
+
+	btf->strings = start;
+
+	return 0;
+}
+
+static int btf_parse_hdr(struct btf_verifier_env *env)
+{
+	const struct btf_header *hdr;
+	struct btf *btf = env->btf;
+	u32 meta_left;
+
+	if (btf->data_size < sizeof(*hdr)) {
+		btf_verifier_log(env, "btf_header not found");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_hdr(env);
+
+	hdr = btf->hdr;
+	if (hdr->magic != BTF_MAGIC) {
+		btf_verifier_log(env, "Invalid magic");
+		return -EINVAL;
+	}
+
+	if (hdr->version != BTF_VERSION) {
+		btf_verifier_log(env, "Unsupported version");
+		return -ENOTSUPP;
+	}
+
+	if (hdr->flags) {
+		btf_verifier_log(env, "Unsupported flags");
+		return -ENOTSUPP;
+	}
+
+	meta_left = btf->data_size - sizeof(*hdr);
+	if (!meta_left) {
+		btf_verifier_log(env, "No data");
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
+	    /* Type section must align to 4 bytes */
+	    hdr->type_off & (sizeof(u32) - 1)) {
+		btf_verifier_log(env, "Invalid type_off");
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->str_off ||
+	    meta_left - hdr->str_off < hdr->str_len) {
+		btf_verifier_log(env, "Invalid str_off or str_len");
+		return -EINVAL;
+	}
+
+	btf->nohdr_data = btf->hdr + 1;
+
+	return 0;
+}
+
+static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
+			     u32 log_level, char __user *log_ubuf, u32 log_size)
+{
+	struct btf_verifier_env *env = NULL;
+	struct bpf_verifier_log *log;
+	struct btf *btf = NULL;
+	u8 *data;
+	int err;
+
+	if (btf_data_size > BTF_MAX_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+	if (!env)
+		return ERR_PTR(-ENOMEM);
+
+	log = &env->log;
+	if (log_level || log_ubuf || log_size) {
+		/* user requested verbose verifier output
+		 * and supplied buffer to store the verification trace
+		 */
+		log->level = log_level;
+		log->ubuf = log_ubuf;
+		log->len_total = log_size;
+
+		/* log attributes have to be sane */
+		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
+		    !log->level || !log->ubuf) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+
+	btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+	if (!btf) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
+	if (!data) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	btf->data = data;
+	btf->data_size = btf_data_size;
+
+	if (copy_from_user(data, btf_data, btf_data_size)) {
+		err = -EFAULT;
+		goto errout;
+	}
+
+	env->btf = btf;
+
+	err = btf_parse_hdr(env);
+	if (err)
+		goto errout;
+
+	err = btf_parse_str_sec(env);
+	if (err)
+		goto errout;
+
+	err = btf_parse_type_sec(env);
+	if (err)
+		goto errout;
+
+	if (!err && log->level && bpf_verifier_log_full(log)) {
+		err = -ENOSPC;
+		goto errout;
+	}
+
+	if (!err) {
+		btf_verifier_env_free(env);
+		btf_get(btf);
+		return btf;
+	}
+
+errout:
+	btf_verifier_env_free(env);
+	if (btf)
+		btf_free(btf);
+	return ERR_PTR(err);
+}
+
+void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+		       struct seq_file *m)
+{
+	const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+	btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m);
+}
+
+static int btf_release(struct inode *inode, struct file *filp)
+{
+	btf_put(filp->private_data);
+	return 0;
+}
+
+const struct file_operations btf_fops = {
+	.release	= btf_release,
+};
+
+int btf_new_fd(const union bpf_attr *attr)
+{
+	struct btf *btf;
+	int fd;
+
+	btf = btf_parse(u64_to_user_ptr(attr->btf),
+			attr->btf_size, attr->btf_log_level,
+			u64_to_user_ptr(attr->btf_log_buf),
+			attr->btf_log_size);
+	if (IS_ERR(btf))
+		return PTR_ERR(btf);
+
+	fd = anon_inode_getfd("btf", &btf_fops, btf,
+			      O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		btf_put(btf);
+
+	return fd;
+}
+
+struct btf *btf_get_by_fd(int fd)
+{
+	struct btf *btf;
+	struct fd f;
+
+	f = fdget(fd);
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &btf_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	btf = f.file->private_data;
+	btf_get(btf);
+	fdput(f);
+
+	return btf;
+}
+
+int btf_get_info_by_fd(const struct btf *btf,
+		       const union bpf_attr *attr,
+		       union bpf_attr __user *uattr)
+{
+	void __user *udata = u64_to_user_ptr(attr->info.info);
+	u32 copy_len = min_t(u32, btf->data_size,
+			     attr->info.info_len);
+
+	if (copy_to_user(udata, btf->data, copy_len) ||
+	    put_user(btf->data_size, &uattr->info.info_len))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a4bb0b3..c95b04e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -19,6 +19,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/ptr_ring.h>
+#include <net/xdp.h>
 
 #include <linux/sched.h>
 #include <linux/workqueue.h>
@@ -137,27 +138,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static void __cpu_map_queue_destructor(void *ptr)
-{
-	/* The tear-down procedure should have made sure that queue is
-	 * empty.  See __cpu_map_entry_replace() and work-queue
-	 * invoked cpu_map_kthread_stop(). Catch any broken behaviour
-	 * gracefully and warn once.
-	 */
-	if (WARN_ON_ONCE(ptr))
-		page_frag_free(ptr);
-}
-
-static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
-{
-	if (atomic_dec_and_test(&rcpu->refcnt)) {
-		/* The queue should be empty at this point */
-		ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
-		kfree(rcpu->queue);
-		kfree(rcpu);
-	}
-}
-
 static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
 {
 	atomic_inc(&rcpu->refcnt);
@@ -179,45 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work)
 	kthread_stop(rcpu->kthread);
 }
 
-/* For now, xdp_pkt is a cpumap internal data structure, with info
- * carried between enqueue to dequeue. It is mapped into the top
- * headroom of the packet, to avoid allocating separate mem.
- */
-struct xdp_pkt {
-	void *data;
-	u16 len;
-	u16 headroom;
-	u16 metasize;
-	struct net_device *dev_rx;
-};
-
-/* Convert xdp_buff to xdp_pkt */
-static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
-{
-	struct xdp_pkt *xdp_pkt;
-	int metasize;
-	int headroom;
-
-	/* Assure headroom is available for storing info */
-	headroom = xdp->data - xdp->data_hard_start;
-	metasize = xdp->data - xdp->data_meta;
-	metasize = metasize > 0 ? metasize : 0;
-	if (unlikely((headroom - metasize) < sizeof(*xdp_pkt)))
-		return NULL;
-
-	/* Store info in top of packet */
-	xdp_pkt = xdp->data_hard_start;
-
-	xdp_pkt->data = xdp->data;
-	xdp_pkt->len  = xdp->data_end - xdp->data;
-	xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
-	xdp_pkt->metasize = metasize;
-
-	return xdp_pkt;
-}
-
 static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
-					 struct xdp_pkt *xdp_pkt)
+					 struct xdp_frame *xdpf)
 {
 	unsigned int frame_size;
 	void *pkt_data_start;
@@ -232,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	 * would be preferred to set frame_size to 2048 or 4096
 	 * depending on the driver.
 	 *   frame_size = 2048;
-	 *   frame_len  = frame_size - sizeof(*xdp_pkt);
+	 *   frame_len  = frame_size - sizeof(*xdp_frame);
 	 *
 	 * Instead, with info avail, skb_shared_info in placed after
 	 * packet len.  This, unfortunately fakes the truesize.
@@ -240,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	 * is not at a fixed memory location, with mixed length
 	 * packets, which is bad for cache-line hotness.
 	 */
-	frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
+	frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom +
 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
+	pkt_data_start = xdpf->data - xdpf->headroom;
 	skb = build_skb(pkt_data_start, frame_size);
 	if (!skb)
 		return NULL;
 
-	skb_reserve(skb, xdp_pkt->headroom);
-	__skb_put(skb, xdp_pkt->len);
-	if (xdp_pkt->metasize)
-		skb_metadata_set(skb, xdp_pkt->metasize);
+	skb_reserve(skb, xdpf->headroom);
+	__skb_put(skb, xdpf->len);
+	if (xdpf->metasize)
+		skb_metadata_set(skb, xdpf->metasize);
 
 	/* Essential SKB info: protocol and skb->dev */
-	skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
+	skb->protocol = eth_type_trans(skb, xdpf->dev_rx);
 
 	/* Optional SKB info, currently missing:
 	 * - HW checksum info		(skb->ip_summed)
@@ -265,6 +208,31 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	return skb;
 }
 
+static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+{
+	/* The tear-down procedure should have made sure that queue is
+	 * empty.  See __cpu_map_entry_replace() and work-queue
+	 * invoked cpu_map_kthread_stop(). Catch any broken behaviour
+	 * gracefully and warn once.
+	 */
+	struct xdp_frame *xdpf;
+
+	while ((xdpf = ptr_ring_consume(ring)))
+		if (WARN_ON_ONCE(xdpf))
+			xdp_return_frame(xdpf);
+}
+
+static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+{
+	if (atomic_dec_and_test(&rcpu->refcnt)) {
+		/* The queue should be empty at this point */
+		__cpu_map_ring_cleanup(rcpu->queue);
+		ptr_ring_cleanup(rcpu->queue, NULL);
+		kfree(rcpu->queue);
+		kfree(rcpu);
+	}
+}
+
 static int cpu_map_kthread_run(void *data)
 {
 	struct bpf_cpu_map_entry *rcpu = data;
@@ -278,7 +246,7 @@ static int cpu_map_kthread_run(void *data)
 	 */
 	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
 		unsigned int processed = 0, drops = 0, sched = 0;
-		struct xdp_pkt *xdp_pkt;
+		struct xdp_frame *xdpf;
 
 		/* Release CPU reschedule checks */
 		if (__ptr_ring_empty(rcpu->queue)) {
@@ -301,13 +269,13 @@ static int cpu_map_kthread_run(void *data)
 		 * kthread CPU pinned. Lockless access to ptr_ring
 		 * consume side valid as no-resize allowed of queue.
 		 */
-		while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
+		while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
 			struct sk_buff *skb;
 			int ret;
 
-			skb = cpu_map_build_skb(rcpu, xdp_pkt);
+			skb = cpu_map_build_skb(rcpu, xdpf);
 			if (!skb) {
-				page_frag_free(xdp_pkt);
+				xdp_return_frame(xdpf);
 				continue;
 			}
 
@@ -604,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 	spin_lock(&q->producer_lock);
 
 	for (i = 0; i < bq->count; i++) {
-		void *xdp_pkt = bq->q[i];
+		struct xdp_frame *xdpf = bq->q[i];
 		int err;
 
-		err = __ptr_ring_produce(q, xdp_pkt);
+		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 			drops++;
-			page_frag_free(xdp_pkt); /* Free xdp_pkt */
+			xdp_return_frame(xdpf);
 		}
 		processed++;
 	}
@@ -625,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 /* Runs under RCU-read-side, plus in softirq under NAPI protection.
  * Thus, safe percpu variable access.
  */
-static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
+static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
@@ -636,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
 	 * driver to code invoking us to finished, due to driver
 	 * (e.g. ixgbe) recycle tricks based on page-refcnt.
 	 *
-	 * Thus, incoming xdp_pkt is always queued here (else we race
+	 * Thus, incoming xdp_frame is always queued here (else we race
 	 * with another CPU on page-refcnt and remaining driver code).
 	 * Queue time is very short, as driver will invoke flush
 	 * operation, when completing napi->poll call.
 	 */
-	bq->q[bq->count++] = xdp_pkt;
+	bq->q[bq->count++] = xdpf;
 	return 0;
 }
 
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx)
 {
-	struct xdp_pkt *xdp_pkt;
+	struct xdp_frame *xdpf;
 
-	xdp_pkt = convert_to_xdp_pkt(xdp);
-	if (unlikely(!xdp_pkt))
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
 	/* Info needed when constructing SKB on remote CPU */
-	xdp_pkt->dev_rx = dev_rx;
+	xdpf->dev_rx = dev_rx;
 
-	bq_enqueue(rcpu, xdp_pkt);
+	bq_enqueue(rcpu, xdpf);
 	return 0;
 }
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index bf6da59..a413430 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return 0;
 }
 
+struct map_iter {
+	void *key;
+	bool done;
+};
+
+static struct map_iter *map_iter(struct seq_file *m)
+{
+	return m->private;
+}
+
+static struct bpf_map *seq_file_to_map(struct seq_file *m)
+{
+	return file_inode(m->file)->i_private;
+}
+
+static void map_iter_free(struct map_iter *iter)
+{
+	if (iter) {
+		kfree(iter->key);
+		kfree(iter);
+	}
+}
+
+static struct map_iter *map_iter_alloc(struct bpf_map *map)
+{
+	struct map_iter *iter;
+
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN);
+	if (!iter)
+		goto error;
+
+	iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN);
+	if (!iter->key)
+		goto error;
+
+	return iter;
+
+error:
+	map_iter_free(iter);
+	return NULL;
+}
+
+static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct bpf_map *map = seq_file_to_map(m);
+	void *key = map_iter(m)->key;
+
+	if (map_iter(m)->done)
+		return NULL;
+
+	if (unlikely(v == SEQ_START_TOKEN))
+		goto done;
+
+	if (map->ops->map_get_next_key(map, key, key)) {
+		map_iter(m)->done = true;
+		return NULL;
+	}
+
+done:
+	++(*pos);
+	return key;
+}
+
+static void *map_seq_start(struct seq_file *m, loff_t *pos)
+{
+	if (map_iter(m)->done)
+		return NULL;
+
+	return *pos ? map_iter(m)->key : SEQ_START_TOKEN;
+}
+
+static void map_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+static int map_seq_show(struct seq_file *m, void *v)
+{
+	struct bpf_map *map = seq_file_to_map(m);
+	void *key = map_iter(m)->key;
+
+	if (unlikely(v == SEQ_START_TOKEN)) {
+		seq_puts(m, "# WARNING!! The output is for debug purpose only\n");
+		seq_puts(m, "# WARNING!! The output format will change\n");
+	} else {
+		map->ops->map_seq_show_elem(map, key, m);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations bpffs_map_seq_ops = {
+	.start	= map_seq_start,
+	.next	= map_seq_next,
+	.show	= map_seq_show,
+	.stop	= map_seq_stop,
+};
+
+static int bpffs_map_open(struct inode *inode, struct file *file)
+{
+	struct bpf_map *map = inode->i_private;
+	struct map_iter *iter;
+	struct seq_file *m;
+	int err;
+
+	iter = map_iter_alloc(map);
+	if (!iter)
+		return -ENOMEM;
+
+	err = seq_open(file, &bpffs_map_seq_ops);
+	if (err) {
+		map_iter_free(iter);
+		return err;
+	}
+
+	m = file->private_data;
+	m->private = iter;
+
+	return 0;
+}
+
+static int bpffs_map_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+
+	map_iter_free(map_iter(m));
+
+	return seq_release(inode, file);
+}
+
+/* bpffs_map_fops should only implement the basic
+ * read operation for a BPF map.  The purpose is to
+ * provide a simple user intuitive way to do
+ * "cat bpffs/pathto/a-pinned-map".
+ *
+ * Other operations (e.g. write, lookup...) should be realized by
+ * the userspace tools (e.g. bpftool) through the
+ * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update
+ * interface.
+ */
+static const struct file_operations bpffs_map_fops = {
+	.open		= bpffs_map_open,
+	.read		= seq_read,
+	.release	= bpffs_map_release,
+};
+
 static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
-			 const struct inode_operations *iops)
+			 const struct inode_operations *iops,
+			 const struct file_operations *fops)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
 	struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
@@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
 		return PTR_ERR(inode);
 
 	inode->i_op = iops;
+	inode->i_fop = fops;
 	inode->i_private = raw;
 
 	bpf_dentry_finalize(dentry, inode, dir);
@@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
 
 static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
 {
-	return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
+	return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL);
 }
 
 static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 {
-	return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
+	struct bpf_map *map = arg;
+
+	return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
+			     map->btf ? &bpffs_map_fops : NULL);
 }
 
 static struct dentry *
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4ca46df..fe23dc5a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -11,6 +11,7 @@
  */
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
+#include <linux/btf.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
@@ -26,6 +27,7 @@
 #include <linux/cred.h>
 #include <linux/timekeeping.h>
 #include <linux/ctype.h>
+#include <linux/btf.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
 			   (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -250,6 +252,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
 
 	bpf_map_uncharge_memlock(map);
 	security_bpf_map_free(map);
+	btf_put(map->btf);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
 }
@@ -415,7 +418,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
 	return 0;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
+#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -449,6 +452,33 @@ static int map_create(union bpf_attr *attr)
 	atomic_set(&map->refcnt, 1);
 	atomic_set(&map->usercnt, 1);
 
+	if (bpf_map_support_seq_show(map) &&
+	    (attr->btf_key_id || attr->btf_value_id)) {
+		struct btf *btf;
+
+		if (!attr->btf_key_id || !attr->btf_value_id) {
+			err = -EINVAL;
+			goto free_map_nouncharge;
+		}
+
+		btf = btf_get_by_fd(attr->btf_fd);
+		if (IS_ERR(btf)) {
+			err = PTR_ERR(btf);
+			goto free_map_nouncharge;
+		}
+
+		err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
+					      attr->btf_value_id);
+		if (err) {
+			btf_put(btf);
+			goto free_map_nouncharge;
+		}
+
+		map->btf = btf;
+		map->btf_key_id = attr->btf_key_id;
+		map->btf_value_id = attr->btf_value_id;
+	}
+
 	err = security_bpf_map_alloc(map);
 	if (err)
 		goto free_map_nouncharge;
@@ -481,6 +511,7 @@ static int map_create(union bpf_attr *attr)
 free_map_sec:
 	security_bpf_map_free(map);
 free_map_nouncharge:
+	btf_put(map->btf);
 	map->ops->map_free(map);
 	return err;
 }
@@ -2016,6 +2047,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	else if (f.file->f_op == &bpf_map_fops)
 		err = bpf_map_get_info_by_fd(f.file->private_data, attr,
 					     uattr);
+	else if (f.file->f_op == &btf_fops)
+		err = btf_get_info_by_fd(f.file->private_data, attr, uattr);
 	else
 		err = -EINVAL;
 
@@ -2023,6 +2056,19 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	return err;
 }
 
+#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
+
+static int bpf_btf_load(const union bpf_attr *attr)
+{
+	if (CHECK_ATTR(BPF_BTF_LOAD))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return btf_new_fd(attr);
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -2103,6 +2149,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_RAW_TRACEPOINT_OPEN:
 		err = bpf_raw_tracepoint_open(&attr);
 		break;
+	case BPF_BTF_LOAD:
+		err = bpf_btf_load(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/net/Kconfig b/net/Kconfig
index 0428f12..6fa1a44 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -423,6 +423,9 @@
 	  on MAY_USE_DEVLINK to ensure they do not cause link errors when
 	  devlink is a loadable module and the driver using it is built-in.
 
+config PAGE_POOL
+       bool
+
 endif   # if NET
 
 # Used by archs to tell that they support BPF JIT compiler plus which flavour.
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2ced486..68c3578 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 	xdp.rxq = &rxqueue->xdp_rxq;
 
 	retval = bpf_test_run(prog, &xdp, repeat, &duration);
-	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
+	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
+	    xdp.data_end != xdp.data + size)
 		size = xdp.data_end - xdp.data;
 	ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
 	kfree(data);
diff --git a/net/core/Makefile b/net/core/Makefile
index 6dbbba8..7080417 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -14,6 +14,7 @@
 			fib_notifier.o xdp.o
 
 obj-y += net-sysfs.o
+obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/dev.c b/net/core/dev.c
index af0558b..c624a04 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1285,6 +1285,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 
 	return len;
 }
+EXPORT_SYMBOL(dev_set_alias);
 
 /**
  *	dev_get_alias - get ifalias of a device
@@ -3996,9 +3997,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 				     struct bpf_prog *xdp_prog)
 {
 	struct netdev_rx_queue *rxqueue;
+	void *orig_data, *orig_data_end;
 	u32 metalen, act = XDP_DROP;
 	struct xdp_buff xdp;
-	void *orig_data;
 	int hlen, off;
 	u32 mac_len;
 
@@ -4037,6 +4038,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	xdp.data_meta = xdp.data;
 	xdp.data_end = xdp.data + hlen;
 	xdp.data_hard_start = skb->data - skb_headroom(skb);
+	orig_data_end = xdp.data_end;
 	orig_data = xdp.data;
 
 	rxqueue = netif_get_rxqueue(skb);
@@ -4051,6 +4053,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		__skb_push(skb, -off);
 	skb->mac_header += off;
 
+	/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
+	 * pckt.
+	 */
+	off = orig_data_end - xdp.data_end;
+	if (off != 0)
+		skb_set_tail_pointer(skb, xdp.data_end - xdp.data);
+
 	switch (act) {
 	case XDP_REDIRECT:
 	case XDP_TX:
diff --git a/net/core/dst.c b/net/core/dst.c
index 007aa0b..2d9b37f 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = {
 	 */
 	.refcnt = REFCOUNT_INIT(1),
 };
+EXPORT_SYMBOL(dst_default_metrics);
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	      struct net_device *dev, int initial_ref, int initial_obsolete,
diff --git a/net/core/filter.c b/net/core/filter.c
index d31aff9..e25bc4a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2692,8 +2692,9 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
 
 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
 {
+	void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
 	unsigned long metalen = xdp_get_metalen(xdp);
-	void *data_start = xdp->data_hard_start + metalen;
+	void *data_start = xdp_frame_end + metalen;
 	void *data = xdp->data + offset;
 
 	if (unlikely(data < data_start ||
@@ -2717,14 +2718,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
+{
+	void *data_end = xdp->data_end + offset;
+
+	/* only shrinking is allowed for now. */
+	if (unlikely(offset >= 0))
+		return -EINVAL;
+
+	if (unlikely(data_end < xdp->data + ETH_HLEN))
+		return -EINVAL;
+
+	xdp->data_end = data_end;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
+	.func		= bpf_xdp_adjust_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
 {
+	void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
 	void *meta = xdp->data_meta + offset;
 	unsigned long metalen = xdp->data - meta;
 
 	if (xdp_data_meta_unsupported(xdp))
 		return -ENOTSUPP;
-	if (unlikely(meta < xdp->data_hard_start ||
+	if (unlikely(meta < xdp_frame_end ||
 		     meta > xdp->data))
 		return -EINVAL;
 	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
@@ -2749,13 +2775,18 @@ static int __bpf_tx_xdp(struct net_device *dev,
 			struct xdp_buff *xdp,
 			u32 index)
 {
+	struct xdp_frame *xdpf;
 	int err;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit) {
 		return -EOPNOTSUPP;
 	}
 
-	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
+		return -EOVERFLOW;
+
+	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
 	if (err)
 		return err;
 	dev->netdev_ops->ndo_xdp_flush(dev);
@@ -2771,11 +2802,19 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		struct net_device *dev = fwd;
+		struct xdp_frame *xdpf;
 
 		if (!dev->netdev_ops->ndo_xdp_xmit)
 			return -EOPNOTSUPP;
 
-		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+		xdpf = convert_to_xdp_frame(xdp);
+		if (unlikely(!xdpf))
+			return -EOVERFLOW;
+
+		/* TODO: move to inside map code instead, for bulk support
+		 * err = dev_map_enqueue(dev, xdp);
+		 */
+		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
 		if (err)
 			return err;
 		__dev_map_insert_ctx(map, index);
@@ -3053,7 +3092,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head ||
 	    func == bpf_xdp_adjust_meta ||
-	    func == bpf_msg_pull_data)
+	    func == bpf_msg_pull_data ||
+	    func == bpf_xdp_adjust_tail)
 		return true;
 
 	return false;
@@ -3867,6 +3907,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_redirect_proto;
 	case BPF_FUNC_redirect_map:
 		return &bpf_xdp_redirect_map_proto;
+	case BPF_FUNC_xdp_adjust_tail:
+		return &bpf_xdp_adjust_tail_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
new file mode 100644
index 0000000..68bf0720
--- /dev/null
+++ b/net/core/page_pool.c
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool.c
+ *	Author:	Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ *	Copyright (C) 2016 Red Hat, Inc.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <net/page_pool.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h> /* for __put_page() */
+
+static int page_pool_init(struct page_pool *pool,
+			  const struct page_pool_params *params)
+{
+	unsigned int ring_qsize = 1024; /* Default */
+
+	memcpy(&pool->p, params, sizeof(pool->p));
+
+	/* Validate only known flags were used */
+	if (pool->p.flags & ~(PP_FLAG_ALL))
+		return -EINVAL;
+
+	if (pool->p.pool_size)
+		ring_qsize = pool->p.pool_size;
+
+	/* Sanity limit mem that can be pinned down */
+	if (ring_qsize > 32768)
+		return -E2BIG;
+
+	/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
+	 * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
+	 * which is the XDP_TX use-case.
+	 */
+	if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
+	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
+		return -EINVAL;
+
+	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+	struct page_pool *pool;
+	int err = 0;
+
+	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
+	if (!pool)
+		return ERR_PTR(-ENOMEM);
+
+	err = page_pool_init(pool, params);
+	if (err < 0) {
+		pr_warn("%s() gave up with errno %d\n", __func__, err);
+		kfree(pool);
+		return ERR_PTR(err);
+	}
+	return pool;
+}
+EXPORT_SYMBOL(page_pool_create);
+
+/* fast path */
+static struct page *__page_pool_get_cached(struct page_pool *pool)
+{
+	struct ptr_ring *r = &pool->ring;
+	struct page *page;
+
+	/* Quicker fallback, avoid locks when ring is empty */
+	if (__ptr_ring_empty(r))
+		return NULL;
+
+	/* Test for safe-context, caller should provide this guarantee */
+	if (likely(in_serving_softirq())) {
+		if (likely(pool->alloc.count)) {
+			/* Fast-path */
+			page = pool->alloc.cache[--pool->alloc.count];
+			return page;
+		}
+		/* Slower-path: Alloc array empty, time to refill
+		 *
+		 * Open-coded bulk ptr_ring consumer.
+		 *
+		 * Discussion: the ring consumer lock is not really
+		 * needed due to the softirq/NAPI protection, but
+		 * later need the ability to reclaim pages on the
+		 * ring. Thus, keeping the locks.
+		 */
+		spin_lock(&r->consumer_lock);
+		while ((page = __ptr_ring_consume(r))) {
+			if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
+				break;
+			pool->alloc.cache[pool->alloc.count++] = page;
+		}
+		spin_unlock(&r->consumer_lock);
+		return page;
+	}
+
+	/* Slow-path: Get page from locked ring queue */
+	page = ptr_ring_consume(&pool->ring);
+	return page;
+}
+
+/* slow path */
+noinline
+static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
+						 gfp_t _gfp)
+{
+	struct page *page;
+	gfp_t gfp = _gfp;
+	dma_addr_t dma;
+
+	/* We could always set __GFP_COMP, and avoid this branch, as
+	 * prep_new_page() can handle order-0 with __GFP_COMP.
+	 */
+	if (pool->p.order)
+		gfp |= __GFP_COMP;
+
+	/* FUTURE development:
+	 *
+	 * Current slow-path essentially falls back to single page
+	 * allocations, which doesn't improve performance.  This code
+	 * need bulk allocation support from the page allocator code.
+	 */
+
+	/* Cache was empty, do real allocation */
+	page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
+	if (!page)
+		return NULL;
+
+	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+		goto skip_dma_map;
+
+	/* Setup DMA mapping: use page->private for DMA-addr
+	 * This mapping is kept for lifetime of page, until leaving pool.
+	 */
+	dma = dma_map_page(pool->p.dev, page, 0,
+			   (PAGE_SIZE << pool->p.order),
+			   pool->p.dma_dir);
+	if (dma_mapping_error(pool->p.dev, dma)) {
+		put_page(page);
+		return NULL;
+	}
+	set_page_private(page, dma); /* page->private = dma; */
+
+skip_dma_map:
+	/* When page just alloc'ed is should/must have refcnt 1. */
+	return page;
+}
+
+/* For using page_pool replace: alloc_pages() API calls, but provide
+ * synchronization guarantee for allocation side.
+ */
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+{
+	struct page *page;
+
+	/* Fast-path: Get a page from cache */
+	page = __page_pool_get_cached(pool);
+	if (page)
+		return page;
+
+	/* Slow-path: cache empty, do real allocation */
+	page = __page_pool_alloc_pages_slow(pool, gfp);
+	return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_pages);
+
+/* Cleanup page_pool state from page */
+static void __page_pool_clean_page(struct page_pool *pool,
+				   struct page *page)
+{
+	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+		return;
+
+	/* DMA unmap */
+	dma_unmap_page(pool->p.dev, page_private(page),
+		       PAGE_SIZE << pool->p.order, pool->p.dma_dir);
+	set_page_private(page, 0);
+}
+
+/* Return a page to the page allocator, cleaning up our state */
+static void __page_pool_return_page(struct page_pool *pool, struct page *page)
+{
+	__page_pool_clean_page(pool, page);
+	put_page(page);
+	/* An optimization would be to call __free_pages(page, pool->p.order)
+	 * knowing page is not part of page-cache (thus avoiding a
+	 * __page_cache_release() call).
+	 */
+}
+
+static bool __page_pool_recycle_into_ring(struct page_pool *pool,
+				   struct page *page)
+{
+	int ret;
+	/* BH protection not needed if current is serving softirq */
+	if (in_serving_softirq())
+		ret = ptr_ring_produce(&pool->ring, page);
+	else
+		ret = ptr_ring_produce_bh(&pool->ring, page);
+
+	return (ret == 0) ? true : false;
+}
+
+/* Only allow direct recycling in special circumstances, into the
+ * alloc side cache.  E.g. during RX-NAPI processing for XDP_DROP use-case.
+ *
+ * Caller must provide appropriate safe context.
+ */
+static bool __page_pool_recycle_direct(struct page *page,
+				       struct page_pool *pool)
+{
+	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
+		return false;
+
+	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
+	pool->alloc.cache[pool->alloc.count++] = page;
+	return true;
+}
+
+void __page_pool_put_page(struct page_pool *pool,
+			  struct page *page, bool allow_direct)
+{
+	/* This allocator is optimized for the XDP mode that uses
+	 * one-frame-per-page, but have fallbacks that act like the
+	 * regular page allocator APIs.
+	 *
+	 * refcnt == 1 means page_pool owns page, and can recycle it.
+	 */
+	if (likely(page_ref_count(page) == 1)) {
+		/* Read barrier done in page_ref_count / READ_ONCE */
+
+		if (allow_direct && in_serving_softirq())
+			if (__page_pool_recycle_direct(page, pool))
+				return;
+
+		if (!__page_pool_recycle_into_ring(pool, page)) {
+			/* Cache full, fallback to free pages */
+			__page_pool_return_page(pool, page);
+		}
+		return;
+	}
+	/* Fallback/non-XDP mode: API user have elevated refcnt.
+	 *
+	 * Many drivers split up the page into fragments, and some
+	 * want to keep doing this to save memory and do refcnt based
+	 * recycling. Support this use case too, to ease drivers
+	 * switching between XDP/non-XDP.
+	 *
+	 * In-case page_pool maintains the DMA mapping, API user must
+	 * call page_pool_put_page once.  In this elevated refcnt
+	 * case, the DMA is unmapped/released, as driver is likely
+	 * doing refcnt based recycle tricks, meaning another process
+	 * will be invoking put_page.
+	 */
+	__page_pool_clean_page(pool, page);
+	put_page(page);
+}
+EXPORT_SYMBOL(__page_pool_put_page);
+
+static void __page_pool_empty_ring(struct page_pool *pool)
+{
+	struct page *page;
+
+	/* Empty recycle ring */
+	while ((page = ptr_ring_consume(&pool->ring))) {
+		/* Verify the refcnt invariant of cached pages */
+		if (!(page_ref_count(page) == 1))
+			pr_crit("%s() page_pool refcnt %d violation\n",
+				__func__, page_ref_count(page));
+
+		__page_pool_return_page(pool, page);
+	}
+}
+
+static void __page_pool_destroy_rcu(struct rcu_head *rcu)
+{
+	struct page_pool *pool;
+
+	pool = container_of(rcu, struct page_pool, rcu);
+
+	WARN(pool->alloc.count, "API usage violation");
+
+	__page_pool_empty_ring(pool);
+	ptr_ring_cleanup(&pool->ring, NULL);
+	kfree(pool);
+}
+
+/* Cleanup and release resources */
+void page_pool_destroy(struct page_pool *pool)
+{
+	struct page *page;
+
+	/* Empty alloc cache, assume caller made sure this is
+	 * no-longer in use, and page_pool_alloc_pages() cannot be
+	 * call concurrently.
+	 */
+	while (pool->alloc.count) {
+		page = pool->alloc.cache[--pool->alloc.count];
+		__page_pool_return_page(pool, page);
+	}
+
+	/* No more consumers should exist, but producers could still
+	 * be in-flight.
+	 */
+	__page_pool_empty_ring(pool);
+
+	/* An xdp_mem_allocator can still ref page_pool pointer */
+	call_rcu(&pool->rcu, __page_pool_destroy_rcu);
+}
+EXPORT_SYMBOL(page_pool_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4593692..8080254 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -785,13 +785,15 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 		       long expires, u32 error)
 {
 	struct rta_cacheinfo ci = {
-		.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
-		.rta_used = dst->__use,
-		.rta_clntref = atomic_read(&(dst->__refcnt)),
 		.rta_error = error,
 		.rta_id =  id,
 	};
 
+	if (dst) {
+		ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
+		ci.rta_used = dst->__use;
+		ci.rta_clntref = atomic_read(&dst->__refcnt);
+	}
 	if (expires) {
 		unsigned long clock;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 345b518..ff49e35 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1839,6 +1839,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
 }
 EXPORT_SYMBOL(___pskb_trim);
 
+/* Note : use pskb_trim_rcsum() instead of calling this directly
+ */
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		int delta = skb->len - len;
+
+		skb->csum = csum_sub(skb->csum,
+				     skb_checksum(skb, len, delta, 0));
+	}
+	return __pskb_trim(skb, len);
+}
+EXPORT_SYMBOL(pskb_trim_rcsum_slow);
+
 /**
  *	__pskb_pull_tail - advance tail of skb header
  *	@skb: buffer to reallocate
diff --git a/net/core/sock.c b/net/core/sock.c
index 6444525..b2c3db1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -905,7 +905,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	case SO_RCVLOWAT:
 		if (val < 0)
 			val = INT_MAX;
-		sk->sk_rcvlowat = val ? : 1;
+		if (sock->ops->set_rcvlowat)
+			ret = sock->ops->set_rcvlowat(sk, val);
+		else
+			sk->sk_rcvlowat = val ? : 1;
 		break;
 
 	case SO_RCVTIMEO:
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 097a0f7..0c86b53 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -5,6 +5,10 @@
  */
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/rhashtable.h>
+#include <net/page_pool.h>
 
 #include <net/xdp.h>
 
@@ -13,6 +17,104 @@
 #define REG_STATE_UNREGISTERED	0x2
 #define REG_STATE_UNUSED	0x3
 
+static DEFINE_IDA(mem_id_pool);
+static DEFINE_MUTEX(mem_id_lock);
+#define MEM_ID_MAX 0xFFFE
+#define MEM_ID_MIN 1
+static int mem_id_next = MEM_ID_MIN;
+
+static bool mem_id_init; /* false */
+static struct rhashtable *mem_id_ht;
+
+struct xdp_mem_allocator {
+	struct xdp_mem_info mem;
+	union {
+		void *allocator;
+		struct page_pool *page_pool;
+	};
+	struct rhash_head node;
+	struct rcu_head rcu;
+};
+
+static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
+{
+	const u32 *k = data;
+	const u32 key = *k;
+
+	BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
+		     != sizeof(u32));
+
+	/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
+	return key << RHT_HASH_RESERVED_SPACE;
+}
+
+static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
+			  const void *ptr)
+{
+	const struct xdp_mem_allocator *xa = ptr;
+	u32 mem_id = *(u32 *)arg->key;
+
+	return xa->mem.id != mem_id;
+}
+
+static const struct rhashtable_params mem_id_rht_params = {
+	.nelem_hint = 64,
+	.head_offset = offsetof(struct xdp_mem_allocator, node),
+	.key_offset  = offsetof(struct xdp_mem_allocator, mem.id),
+	.key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
+	.max_size = MEM_ID_MAX,
+	.min_size = 8,
+	.automatic_shrinking = true,
+	.hashfn    = xdp_mem_id_hashfn,
+	.obj_cmpfn = xdp_mem_id_cmp,
+};
+
+static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
+{
+	struct xdp_mem_allocator *xa;
+
+	xa = container_of(rcu, struct xdp_mem_allocator, rcu);
+
+	/* Allow this ID to be reused */
+	ida_simple_remove(&mem_id_pool, xa->mem.id);
+
+	/* Notice, driver is expected to free the *allocator,
+	 * e.g. page_pool, and MUST also use RCU free.
+	 */
+
+	/* Poison memory */
+	xa->mem.id = 0xFFFF;
+	xa->mem.type = 0xF0F0;
+	xa->allocator = (void *)0xDEAD9001;
+
+	kfree(xa);
+}
+
+static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+	struct xdp_mem_allocator *xa;
+	int id = xdp_rxq->mem.id;
+	int err;
+
+	if (id == 0)
+		return;
+
+	mutex_lock(&mem_id_lock);
+
+	xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+	if (!xa) {
+		mutex_unlock(&mem_id_lock);
+		return;
+	}
+
+	err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
+	WARN_ON(err);
+
+	call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+
+	mutex_unlock(&mem_id_lock);
+}
+
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 {
 	/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -21,8 +123,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 
 	WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
 
+	__xdp_rxq_info_unreg_mem_model(xdp_rxq);
+
 	xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
 	xdp_rxq->dev = NULL;
+
+	/* Reset mem info to defaults */
+	xdp_rxq->mem.id = 0;
+	xdp_rxq->mem.type = 0;
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
 
@@ -71,3 +179,164 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
 	return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
+
+static int __mem_id_init_hash_table(void)
+{
+	struct rhashtable *rht;
+	int ret;
+
+	if (unlikely(mem_id_init))
+		return 0;
+
+	rht = kzalloc(sizeof(*rht), GFP_KERNEL);
+	if (!rht)
+		return -ENOMEM;
+
+	ret = rhashtable_init(rht, &mem_id_rht_params);
+	if (ret < 0) {
+		kfree(rht);
+		return ret;
+	}
+	mem_id_ht = rht;
+	smp_mb(); /* mutex lock should provide enough pairing */
+	mem_id_init = true;
+
+	return 0;
+}
+
+/* Allocate a cyclic ID that maps to allocator pointer.
+ * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
+ *
+ * Caller must lock mem_id_lock.
+ */
+static int __mem_id_cyclic_get(gfp_t gfp)
+{
+	int retries = 1;
+	int id;
+
+again:
+	id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
+	if (id < 0) {
+		if (id == -ENOSPC) {
+			/* Cyclic allocator, reset next id */
+			if (retries--) {
+				mem_id_next = MEM_ID_MIN;
+				goto again;
+			}
+		}
+		return id; /* errno */
+	}
+	mem_id_next = id + 1;
+
+	return id;
+}
+
+static bool __is_supported_mem_type(enum xdp_mem_type type)
+{
+	if (type == MEM_TYPE_PAGE_POOL)
+		return is_page_pool_compiled_in();
+
+	if (type >= MEM_TYPE_MAX)
+		return false;
+
+	return true;
+}
+
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+			       enum xdp_mem_type type, void *allocator)
+{
+	struct xdp_mem_allocator *xdp_alloc;
+	gfp_t gfp = GFP_KERNEL;
+	int id, errno, ret;
+	void *ptr;
+
+	if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+		WARN(1, "Missing register, driver bug");
+		return -EFAULT;
+	}
+
+	if (!__is_supported_mem_type(type))
+		return -EOPNOTSUPP;
+
+	xdp_rxq->mem.type = type;
+
+	if (!allocator) {
+		if (type == MEM_TYPE_PAGE_POOL)
+			return -EINVAL; /* Setup time check page_pool req */
+		return 0;
+	}
+
+	/* Delay init of rhashtable to save memory if feature isn't used */
+	if (!mem_id_init) {
+		mutex_lock(&mem_id_lock);
+		ret = __mem_id_init_hash_table();
+		mutex_unlock(&mem_id_lock);
+		if (ret < 0) {
+			WARN_ON(1);
+			return ret;
+		}
+	}
+
+	xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
+	if (!xdp_alloc)
+		return -ENOMEM;
+
+	mutex_lock(&mem_id_lock);
+	id = __mem_id_cyclic_get(gfp);
+	if (id < 0) {
+		errno = id;
+		goto err;
+	}
+	xdp_rxq->mem.id = id;
+	xdp_alloc->mem  = xdp_rxq->mem;
+	xdp_alloc->allocator = allocator;
+
+	/* Insert allocator into ID lookup table */
+	ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
+	if (IS_ERR(ptr)) {
+		errno = PTR_ERR(ptr);
+		goto err;
+	}
+
+	mutex_unlock(&mem_id_lock);
+
+	return 0;
+err:
+	mutex_unlock(&mem_id_lock);
+	kfree(xdp_alloc);
+	return errno;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+
+void xdp_return_frame(struct xdp_frame *xdpf)
+{
+	struct xdp_mem_info *mem = &xdpf->mem;
+	struct xdp_mem_allocator *xa;
+	void *data = xdpf->data;
+	struct page *page;
+
+	switch (mem->type) {
+	case MEM_TYPE_PAGE_POOL:
+		rcu_read_lock();
+		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
+		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+		page = virt_to_head_page(data);
+		if (xa)
+			page_pool_put_page(xa->page_pool, page);
+		else
+			put_page(page);
+		rcu_read_unlock();
+		break;
+	case MEM_TYPE_PAGE_SHARED:
+		page_frag_free(data);
+		break;
+	case MEM_TYPE_PAGE_ORDER0:
+		page = virt_to_page(data); /* Assumes order0 page*/
+		put_page(page);
+		break;
+	default:
+		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame);
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a07b7dd..b379520 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,7 +13,8 @@
 	     tcp_offload.o datagram.o raw.o udp.o udplite.o \
 	     udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
-	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
+	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
+	     metrics.o
 
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaed036..3ebf599 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -994,7 +994,7 @@ const struct proto_ops inet_stream_ops = {
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
-	.mmap		   = sock_no_mmap,
+	.mmap		   = tcp_mmap,
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
@@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = {
 	.compat_getsockopt = compat_sock_common_getsockopt,
 	.compat_ioctl	   = inet_compat_ioctl,
 #endif
+	.set_rcvlowat	   = tcp_set_rcvlowat,
 };
 EXPORT_SYMBOL(inet_stream_ops);
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c27122f..6608db2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1019,47 +1019,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
 static int
 fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 {
-	bool ecn_ca = false;
-	struct nlattr *nla;
-	int remaining;
-
-	if (!cfg->fc_mx)
-		return 0;
-
-	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
-		int type = nla_type(nla);
-		u32 val;
-
-		if (!type)
-			continue;
-		if (type > RTAX_MAX)
-			return -EINVAL;
-
-		if (type == RTAX_CC_ALGO) {
-			char tmp[TCP_CA_NAME_MAX];
-
-			nla_strlcpy(tmp, nla, sizeof(tmp));
-			val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
-			if (val == TCP_CA_UNSPEC)
-				return -EINVAL;
-		} else {
-			val = nla_get_u32(nla);
-		}
-		if (type == RTAX_ADVMSS && val > 65535 - 40)
-			val = 65535 - 40;
-		if (type == RTAX_MTU && val > 65535 - 15)
-			val = 65535 - 15;
-		if (type == RTAX_HOPLIMIT && val > 255)
-			val = 255;
-		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
-			return -EINVAL;
-		fi->fib_metrics->metrics[type - 1] = val;
-	}
-
-	if (ecn_ca)
-		fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
-
-	return 0;
+	return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
+				  fi->fib_metrics->metrics);
 }
 
 struct fib_info *fib_create_info(struct fib_config *cfg,
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
new file mode 100644
index 0000000..5121c64
--- /dev/null
+++ b/net/ipv4/metrics.c
@@ -0,0 +1,53 @@
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/net_namespace.h>
+#include <net/tcp.h>
+
+int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
+		       u32 *metrics)
+{
+	bool ecn_ca = false;
+	struct nlattr *nla;
+	int remaining;
+
+	if (!fc_mx)
+		return 0;
+
+	nla_for_each_attr(nla, fc_mx, fc_mx_len, remaining) {
+		int type = nla_type(nla);
+		u32 val;
+
+		if (!type)
+			continue;
+		if (type > RTAX_MAX)
+			return -EINVAL;
+
+		if (type == RTAX_CC_ALGO) {
+			char tmp[TCP_CA_NAME_MAX];
+
+			nla_strlcpy(tmp, nla, sizeof(tmp));
+			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
+			if (val == TCP_CA_UNSPEC)
+				return -EINVAL;
+		} else {
+			val = nla_get_u32(nla);
+		}
+		if (type == RTAX_ADVMSS && val > 65535 - 40)
+			val = 65535 - 40;
+		if (type == RTAX_MTU && val > 65535 - 15)
+			val = 65535 - 15;
+		if (type == RTAX_HOPLIMIT && val > 255)
+			val = 255;
+		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+			return -EINVAL;
+		metrics[type - 1] = val;
+	}
+
+	if (ecn_ca)
+		metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_metrics_convert);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a058de6..261b71d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
 	SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
 	SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
+	SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
+	SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9ce1c72..dfd090e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1701,6 +1701,144 @@ int tcp_peek_len(struct socket *sock)
 }
 EXPORT_SYMBOL(tcp_peek_len);
 
+/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
+int tcp_set_rcvlowat(struct sock *sk, int val)
+{
+	sk->sk_rcvlowat = val ? : 1;
+
+	/* Check if we need to signal EPOLLIN right now */
+	tcp_data_ready(sk);
+
+	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+		return 0;
+
+	/* val comes from user space and might be close to INT_MAX */
+	val <<= 1;
+	if (val < 0)
+		val = INT_MAX;
+
+	val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+	if (val > sk->sk_rcvbuf) {
+		sk->sk_rcvbuf = val;
+		tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcp_set_rcvlowat);
+
+/* When user wants to mmap X pages, we first need to perform the mapping
+ * before freeing any skbs in receive queue, otherwise user would be unable
+ * to fallback to standard recvmsg(). This happens if some data in the
+ * requested block is not exactly fitting in a page.
+ *
+ * We only support order-0 pages for the moment.
+ * mmap() on TCP is very strict, there is no point
+ * trying to accommodate with pathological layouts.
+ */
+int tcp_mmap(struct file *file, struct socket *sock,
+	     struct vm_area_struct *vma)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	unsigned int nr_pages = size >> PAGE_SHIFT;
+	struct page **pages_array = NULL;
+	u32 seq, len, offset, nr = 0;
+	struct sock *sk = sock->sk;
+	const skb_frag_t *frags;
+	struct tcp_sock *tp;
+	struct sk_buff *skb;
+	int ret;
+
+	if (vma->vm_pgoff || !nr_pages)
+		return -EINVAL;
+
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+	/* TODO: Maybe the following is not needed if pages are COW */
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	lock_sock(sk);
+
+	ret = -ENOTCONN;
+	if (sk->sk_state == TCP_LISTEN)
+		goto out;
+
+	sock_rps_record_flow(sk);
+
+	if (tcp_inq(sk) < size) {
+		ret = sock_flag(sk, SOCK_DONE) ? -EIO : -EAGAIN;
+		goto out;
+	}
+	tp = tcp_sk(sk);
+	seq = tp->copied_seq;
+	/* Abort if urgent data is in the area */
+	if (unlikely(tp->urg_data)) {
+		u32 urg_offset = tp->urg_seq - seq;
+
+		ret = -EINVAL;
+		if (urg_offset < size)
+			goto out;
+	}
+	ret = -ENOMEM;
+	pages_array = kvmalloc_array(nr_pages, sizeof(struct page *),
+				     GFP_KERNEL);
+	if (!pages_array)
+		goto out;
+	skb = tcp_recv_skb(sk, seq, &offset);
+	ret = -EINVAL;
+skb_start:
+	/* We do not support anything not in page frags */
+	offset -= skb_headlen(skb);
+	if ((int)offset < 0)
+		goto out;
+	if (skb_has_frag_list(skb))
+		goto out;
+	len = skb->data_len - offset;
+	frags = skb_shinfo(skb)->frags;
+	while (offset) {
+		if (frags->size > offset)
+			goto out;
+		offset -= frags->size;
+		frags++;
+	}
+	while (nr < nr_pages) {
+		if (len) {
+			if (len < PAGE_SIZE)
+				goto out;
+			if (frags->size != PAGE_SIZE || frags->page_offset)
+				goto out;
+			pages_array[nr++] = skb_frag_page(frags);
+			frags++;
+			len -= PAGE_SIZE;
+			seq += PAGE_SIZE;
+			continue;
+		}
+		skb = skb->next;
+		offset = seq - TCP_SKB_CB(skb)->seq;
+		goto skb_start;
+	}
+	/* OK, we have a full set of pages ready to be inserted into vma */
+	for (nr = 0; nr < nr_pages; nr++) {
+		ret = vm_insert_page(vma, vma->vm_start + (nr << PAGE_SHIFT),
+				     pages_array[nr]);
+		if (ret)
+			goto out;
+	}
+	/* operation is complete, we can 'consume' all skbs */
+	tp->copied_seq = seq;
+	tcp_rcv_space_adjust(sk);
+
+	/* Clean up data we have read: This will do ACK frames. */
+	tcp_recv_skb(sk, seq, &offset);
+	tcp_cleanup_rbuf(sk, size);
+
+	ret = 0;
+out:
+	release_sock(sk);
+	kvfree(pages_array);
+	return ret;
+}
+EXPORT_SYMBOL(tcp_mmap);
+
 static void tcp_update_recv_tstamps(struct sk_buff *skb,
 				    struct scm_timestamping *tss)
 {
@@ -2421,6 +2559,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
 	tp->window_clamp = 0;
+	tp->delivered_ce = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tp->is_sack_reneg = 0;
 	tcp_clear_retrans(tp);
@@ -3030,6 +3169,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	rate64 = tcp_compute_delivery_rate(tp);
 	if (rate64)
 		info->tcpi_delivery_rate = rate64;
+	info->tcpi_delivered = tp->delivered;
+	info->tcpi_delivered_ce = tp->delivered_ce;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3043,7 +3184,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	u32 rate;
 
 	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-			  5 * nla_total_size(sizeof(u32)) +
+			  7 * nla_total_size(sizeof(u32)) +
 			  3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
@@ -3074,9 +3215,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
 	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
 	nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+	nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
+	nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
 
 	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
 	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
+
 	return stats;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 367def6..0396fb9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3496,6 +3496,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
 	tcp_xmit_retransmit_queue(sk);
 }
 
+/* Returns the number of packets newly acked or sacked by the current ACK */
+static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
+{
+	const struct net *net = sock_net(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 delivered;
+
+	delivered = tp->delivered - prior_delivered;
+	NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
+	if (flag & FLAG_ECE) {
+		tp->delivered_ce += delivered;
+		NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
+	}
+	return delivered;
+}
+
 /* This routine deals with incoming acks, but not outgoing ones. */
 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
@@ -3619,7 +3635,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
 		sk_dst_confirm(sk);
 
-	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
+	delivered = tcp_newly_delivered(sk, delivered, flag);
 	lost = tp->lost - lost;			/* freshly marked lost */
 	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
 	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
@@ -3629,9 +3645,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 no_queue:
 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
-	if (flag & FLAG_DSACKING_ACK)
+	if (flag & FLAG_DSACKING_ACK) {
 		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
 				      &rexmit);
+		tcp_newly_delivered(sk, delivered, flag);
+	}
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
@@ -3655,6 +3673,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 						&sack_state);
 		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
 				      &rexmit);
+		tcp_newly_delivered(sk, delivered, flag);
 		tcp_xmit_recovery(sk, rexmit);
 	}
 
@@ -4576,6 +4595,17 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 
 }
 
+void tcp_data_ready(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	int avail = tp->rcv_nxt - tp->copied_seq;
+
+	if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
+		return;
+
+	sk->sk_data_ready(sk);
+}
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4633,7 +4663,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		if (eaten > 0)
 			kfree_skb_partial(skb, fragstolen);
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_data_ready(sk);
+			tcp_data_ready(sk);
 		return;
 	}
 
@@ -5026,9 +5056,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	    /* More than one full frame received... */
 	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
 	     /* ... and right edge of window advances far enough.
-	      * (tcp_recvmsg() will send ACK otherwise). Or...
+	      * (tcp_recvmsg() will send ACK otherwise).
+	      * If application uses SO_RCVLOWAT, we want send ack now if
+	      * we have not received enough bytes to satisfy the condition.
 	      */
-	     __tcp_select_window(sk) >= tp->rcv_wnd) ||
+	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(sk) ||
 	    /* We have out of order data. */
@@ -5431,7 +5464,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 no_ack:
 			if (eaten)
 				kfree_skb_partial(skb, fragstolen);
-			sk->sk_data_ready(sk);
+			tcp_data_ready(sk);
 			return;
 		}
 	}
@@ -5553,9 +5586,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 		return true;
 	}
 	tp->syn_data_acked = tp->syn_data;
-	if (tp->syn_data_acked)
-		NET_INC_STATS(sock_net(sk),
-				LINUX_MIB_TCPFASTOPENACTIVE);
+	if (tp->syn_data_acked) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+		/* SYN-data is counted as two separate packets in tcp_ack() */
+		if (tp->delivered > 1)
+			--tp->delivered;
+	}
 
 	tcp_fastopen_add_skb(sk, synack);
 
@@ -5887,6 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	}
 	switch (sk->sk_state) {
 	case TCP_SYN_RECV:
+		tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
 		if (!tp->srtt_us)
 			tcp_synack_rtt_meas(sk, req);
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 78cef00..7b4d7bb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev,
 				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags);
@@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warn("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	ip6_rt_put(ifp->rt);
 
 	kfree_rcu(ifp, rcu);
 }
@@ -995,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
 	struct net *net = dev_net(idev->dev);
 	struct inet6_ifaddr *ifa = NULL;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *f6i = NULL;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
 
@@ -1037,16 +1036,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		goto out;
 	}
 
-	rt = addrconf_dst_alloc(idev, addr, false);
-	if (IS_ERR(rt)) {
-		err = PTR_ERR(rt);
-		rt = NULL;
+	f6i = addrconf_f6i_alloc(net, idev, addr, false, gfp_flags);
+	if (IS_ERR(f6i)) {
+		err = PTR_ERR(f6i);
+		f6i = NULL;
 		goto out;
 	}
 
 	if (net->ipv6.devconf_all->disable_policy ||
 	    idev->cnf.disable_policy)
-		rt->dst.flags |= DST_NOPOLICY;
+		f6i->dst_nopolicy = true;
 
 	neigh_parms_data_state_setall(idev->nd_parms);
 
@@ -1068,7 +1067,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	ifa->cstamp = ifa->tstamp = jiffies;
 	ifa->tokenized = false;
 
-	ifa->rt = rt;
+	ifa->rt = f6i;
 
 	ifa->idev = idev;
 	in6_dev_hold(idev);
@@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 out:
 	if (unlikely(err < 0)) {
-		if (rt)
-			ip6_rt_put(rt);
+		fib6_info_release(f6i);
+
 		if (ifa) {
 			if (ifa->idev)
 				in6_dev_put(ifa->idev);
@@ -1179,19 +1178,19 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
 static void
 cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
 {
-	struct rt6_info *rt;
+	struct fib6_info *f6i;
 
-	rt = addrconf_get_prefix_route(&ifp->addr,
+	f6i = addrconf_get_prefix_route(&ifp->addr,
 				       ifp->prefix_len,
 				       ifp->idev->dev,
 				       0, RTF_GATEWAY | RTF_DEFAULT);
-	if (rt) {
+	if (f6i) {
 		if (del_rt)
-			ip6_del_rt(rt);
+			ip6_del_rt(dev_net(ifp->idev->dev), f6i);
 		else {
-			if (!(rt->rt6i_flags & RTF_EXPIRES))
-				rt6_set_expires(rt, expires);
-			ip6_rt_put(rt);
+			if (!(f6i->fib6_flags & RTF_EXPIRES))
+				fib6_set_expires(f6i, expires);
+			fib6_info_release(f6i);
 		}
 	}
 }
@@ -2320,7 +2319,7 @@ static void  ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
 
 static void
 addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
-		      unsigned long expires, u32 flags)
+		      unsigned long expires, u32 flags, gfp_t gfp_flags)
 {
 	struct fib6_config cfg = {
 		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
@@ -2331,6 +2330,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_flags = RTF_UP | flags,
 		.fc_nlinfo.nl_net = dev_net(dev),
 		.fc_protocol = RTPROT_KERNEL,
+		.fc_type = RTN_UNICAST,
 	};
 
 	cfg.fc_dst = *pfx;
@@ -2344,17 +2344,17 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		cfg.fc_flags |= RTF_NONEXTHOP;
 #endif
 
-	ip6_route_add(&cfg, NULL);
+	ip6_route_add(&cfg, gfp_flags, NULL);
 }
 
 
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags)
 {
 	struct fib6_node *fn;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 
@@ -2368,14 +2368,13 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->dst.dev->ifindex != dev->ifindex)
+		if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
 			continue;
-		if ((rt->rt6i_flags & flags) != flags)
+		if ((rt->fib6_flags & flags) != flags)
 			continue;
-		if ((rt->rt6i_flags & noflags) != 0)
+		if ((rt->fib6_flags & noflags) != 0)
 			continue;
-		if (!dst_hold_safe(&rt->dst))
-			rt = NULL;
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -2394,12 +2393,13 @@ static void addrconf_add_mroute(struct net_device *dev)
 		.fc_ifindex = dev->ifindex,
 		.fc_dst_len = 8,
 		.fc_flags = RTF_UP,
+		.fc_type = RTN_UNICAST,
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
 	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
 
-	ip6_route_add(&cfg, NULL);
+	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
 }
 
 static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
@@ -2529,7 +2529,6 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		if (IS_ERR_OR_NULL(ifp))
 			return -1;
 
-		update_lft = 0;
 		create = 1;
 		spin_lock_bh(&ifp->lock);
 		ifp->flags |= IFA_F_MANAGETEMPADDR;
@@ -2551,7 +2550,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 			stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
 		else
 			stored_lft = 0;
-		if (!update_lft && !create && stored_lft) {
+		if (!create && stored_lft) {
 			const u32 minimum_lft = min_t(u32,
 				stored_lft, MIN_VALID_LIFETIME);
 			valid_lft = max(valid_lft, minimum_lft);
@@ -2642,7 +2641,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	 */
 
 	if (pinfo->onlink) {
-		struct rt6_info *rt;
+		struct fib6_info *rt;
 		unsigned long rt_expires;
 
 		/* Avoid arithmetic overflow. Really, we could
@@ -2667,13 +2666,13 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 		if (rt) {
 			/* Autoconf prefix route */
 			if (valid_lft == 0) {
-				ip6_del_rt(rt);
+				ip6_del_rt(net, rt);
 				rt = NULL;
 			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
-				rt6_set_expires(rt, jiffies + rt_expires);
+				fib6_set_expires(rt, jiffies + rt_expires);
 			} else {
-				rt6_clean_expires(rt);
+				fib6_clean_expires(rt);
 			}
 		} else if (valid_lft) {
 			clock_t expires = 0;
@@ -2684,9 +2683,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				expires = jiffies_to_clock_t(rt_expires);
 			}
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-					      dev, expires, flags);
+					      dev, expires, flags, GFP_ATOMIC);
 		}
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -2899,9 +2898,14 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	if (!IS_ERR(ifp)) {
 		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
 			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
-					      expires, flags);
+					      expires, flags, GFP_KERNEL);
 		}
 
+		/* Send a netlink notification if DAD is enabled and
+		 * optimistic flag is not set
+		 */
+		if (!(ifp->flags & (IFA_F_OPTIMISTIC | IFA_F_NODAD)))
+			ipv6_ifa_notify(0, ifp);
 		/*
 		 * Note that section 3.1 of RFC 4429 indicates
 		 * that the Optimistic flag should not be set for
@@ -3047,7 +3051,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 
 	if (addr.s6_addr32[3]) {
 		add_addr(idev, &addr, plen, scope);
-		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);
+		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags,
+				      GFP_ATOMIC);
 		return;
 	}
 
@@ -3072,7 +3077,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 
 				add_addr(idev, &addr, plen, flag);
 				addrconf_prefix_route(&addr, plen, idev->dev, 0,
-						      pflags);
+						      pflags, GFP_ATOMIC);
 			}
 		}
 	}
@@ -3112,7 +3117,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
 	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
 			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
 	if (!IS_ERR(ifp)) {
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev,
+				      0, 0, GFP_ATOMIC);
 		addrconf_dad_start(ifp);
 		in6_ifa_put(ifp);
 	}
@@ -3227,7 +3233,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 			addrconf_add_linklocal(idev, &addr,
 					       IFA_F_STABLE_PRIVACY);
 		else if (prefix_route)
-			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+			addrconf_prefix_route(&addr, 64, idev->dev,
+					      0, 0, GFP_KERNEL);
 		break;
 	case IN6_ADDR_GEN_MODE_EUI64:
 		/* addrconf_add_linklocal also adds a prefix_route and we
@@ -3237,7 +3244,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 		if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
 			addrconf_add_linklocal(idev, &addr, 0);
 		else if (prefix_route)
-			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+			addrconf_prefix_route(&addr, 64, idev->dev,
+					      0, 0, GFP_KERNEL);
 		break;
 	case IN6_ADDR_GEN_MODE_NONE:
 	default:
@@ -3329,32 +3337,34 @@ static void addrconf_gre_config(struct net_device *dev)
 }
 #endif
 
-static int fixup_permanent_addr(struct inet6_dev *idev,
+static int fixup_permanent_addr(struct net *net,
+				struct inet6_dev *idev,
 				struct inet6_ifaddr *ifp)
 {
-	/* !rt6i_node means the host route was removed from the
+	/* !fib6_node means the host route was removed from the
 	 * FIB, for example, if 'lo' device is taken down. In that
 	 * case regenerate the host route.
 	 */
-	if (!ifp->rt || !ifp->rt->rt6i_node) {
-		struct rt6_info *rt, *prev;
+	if (!ifp->rt || !ifp->rt->fib6_node) {
+		struct fib6_info *f6i, *prev;
 
-		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
-		if (IS_ERR(rt))
-			return PTR_ERR(rt);
+		f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
+					 GFP_ATOMIC);
+		if (IS_ERR(f6i))
+			return PTR_ERR(f6i);
 
 		/* ifp->rt can be accessed outside of rtnl */
 		spin_lock(&ifp->lock);
 		prev = ifp->rt;
-		ifp->rt = rt;
+		ifp->rt = f6i;
 		spin_unlock(&ifp->lock);
 
-		ip6_rt_put(prev);
+		fib6_info_release(prev);
 	}
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
-				      idev->dev, 0, 0);
+				      idev->dev, 0, 0, GFP_ATOMIC);
 	}
 
 	if (ifp->state == INET6_IFADDR_STATE_PREDAD)
@@ -3363,7 +3373,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
 	return 0;
 }
 
-static void addrconf_permanent_addr(struct net_device *dev)
+static void addrconf_permanent_addr(struct net *net, struct net_device *dev)
 {
 	struct inet6_ifaddr *ifp, *tmp;
 	struct inet6_dev *idev;
@@ -3376,7 +3386,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
 
 	list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
 		if ((ifp->flags & IFA_F_PERMANENT) &&
-		    fixup_permanent_addr(idev, ifp) < 0) {
+		    fixup_permanent_addr(net, idev, ifp) < 0) {
 			write_unlock_bh(&idev->lock);
 			in6_ifa_hold(ifp);
 			ipv6_del_addr(ifp);
@@ -3445,7 +3455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 
 		if (event == NETDEV_UP) {
 			/* restore routes for permanent addresses */
-			addrconf_permanent_addr(dev);
+			addrconf_permanent_addr(net, dev);
 
 			if (!addrconf_link_ready(dev)) {
 				/* device is not ready yet. */
@@ -3703,7 +3713,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
-		struct rt6_info *rt = NULL;
+		struct fib6_info *rt = NULL;
 		bool keep;
 
 		addrconf_del_dad_work(ifa);
@@ -3731,7 +3741,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		spin_unlock_bh(&ifa->lock);
 
 		if (rt)
-			ip6_del_rt(rt);
+			ip6_del_rt(net, rt);
 
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -3849,6 +3859,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
 	bool bump_id, notify = false;
+	struct net *net;
 
 	addrconf_join_solict(dev, &ifp->addr);
 
@@ -3859,8 +3870,9 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
+	net = dev_net(dev);
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-	    (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
+	    (net->ipv6.devconf_all->accept_dad < 1 &&
 	     idev->cnf.accept_dad < 1) ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
@@ -3896,8 +3908,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	 * Frames right away
 	 */
 	if (ifp->flags & IFA_F_OPTIMISTIC) {
-		ip6_ins_rt(ifp->rt);
-		if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
+		ip6_ins_rt(net, ifp->rt);
+		if (ipv6_use_optimistic_addr(net, idev)) {
 			/* Because optimistic nodes can use this address,
 			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
 			 */
@@ -4563,8 +4575,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 		ipv6_ifa_notify(0, ifp);
 
 	if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
-				      expires, flags);
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+				      ifp->idev->dev, expires, flags,
+				      GFP_KERNEL);
 	} else if (had_prefixroute) {
 		enum cleanup_prefix_rt_t action;
 		unsigned long rt_expires;
@@ -4804,9 +4817,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 				u32 portid, u32 seq, int event, unsigned int flags)
 {
+	struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
+	int ifindex = dev ? dev->ifindex : 1;
 	struct nlmsghdr  *nlh;
 	u8 scope = RT_SCOPE_UNIVERSE;
-	int ifindex = ifaca->aca_idev->dev->ifindex;
 
 	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
@@ -5029,14 +5043,6 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 	struct net *net = dev_net(ifa->idev->dev);
 	int err = -ENOBUFS;
 
-	/* Don't send DELADDR notification for TENTATIVE address,
-	 * since NEWADDR notification is sent only after removing
-	 * TENTATIVE flag, if DAD has not failed.
-	 */
-	if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
-	    event == RTM_DELADDR)
-		return;
-
 	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -5607,29 +5613,30 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		 * our DAD process, so we don't need
 		 * to do it again
 		 */
-		if (!rcu_access_pointer(ifp->rt->rt6i_node))
-			ip6_ins_rt(ifp->rt);
+		if (!rcu_access_pointer(ifp->rt->fib6_node))
+			ip6_ins_rt(net, ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		if (!ipv6_addr_any(&ifp->peer_addr))
 			addrconf_prefix_route(&ifp->peer_addr, 128,
-					      ifp->idev->dev, 0, 0);
+					      ifp->idev->dev, 0, 0,
+					      GFP_ATOMIC);
 		break;
 	case RTM_DELADDR:
 		if (ifp->idev->cnf.forwarding)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		if (!ipv6_addr_any(&ifp->peer_addr)) {
-			struct rt6_info *rt;
+			struct fib6_info *rt;
 
 			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
 						       ifp->idev->dev, 0, 0);
 			if (rt)
-				ip6_del_rt(rt);
+				ip6_del_rt(net, rt);
 		}
 		if (ifp->rt) {
-			if (dst_hold_safe(&ifp->rt->dst))
-				ip6_del_rt(ifp->rt);
+			ip6_del_rt(net, ifp->rt);
+			ifp->rt = NULL;
 		}
 		rt_genid_bump_ipv6(net);
 		break;
@@ -5976,11 +5983,11 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
-			struct rt6_info *rt = ifa->rt;
+			struct fib6_info *rt = ifa->rt;
 			int cpu;
 
 			rcu_read_lock();
-			addrconf_set_nopolicy(ifa->rt, val);
+			ifa->rt->dst_nopolicy = val ? true : false;
 			if (rt->rt6i_pcpu) {
 				for_each_possible_cpu(cpu) {
 					struct rt6_info **rtp;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8da0b51..36d622c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -273,33 +273,8 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	goto out;
 }
 
-
-/* bind for INET6 API */
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	/* If the socket has its own bind function then use it. */
-	if (sk->sk_prot->bind)
-		return sk->sk_prot->bind(sk, uaddr, addr_len);
-
-	if (addr_len < SIN6_LEN_RFC2133)
-		return -EINVAL;
-
-	/* BPF prog is run before any checks are done so that if the prog
-	 * changes context in a wrong way it will be caught.
-	 */
-	err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
-	if (err)
-		return err;
-
-	return __inet6_bind(sk, uaddr, addr_len, false, true);
-}
-EXPORT_SYMBOL(inet6_bind);
-
-int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
-		 bool force_bind_address_no_port, bool with_lock)
+static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+			bool force_bind_address_no_port, bool with_lock)
 {
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
 	struct inet_sock *inet = inet_sk(sk);
@@ -444,6 +419,30 @@ int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	goto out;
 }
 
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	/* If the socket has its own bind function then use it. */
+	if (sk->sk_prot->bind)
+		return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+	if (addr_len < SIN6_LEN_RFC2133)
+		return -EINVAL;
+
+	/* BPF prog is run before any checks are done so that if the prog
+	 * changes context in a wrong way it will be caught.
+	 */
+	err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+	if (err)
+		return err;
+
+	return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
 int inet6_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -579,7 +578,7 @@ const struct proto_ops inet6_stream_ops = {
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet_sendmsg,		/* ok		*/
 	.recvmsg	   = inet_recvmsg,		/* ok		*/
-	.mmap		   = sock_no_mmap,
+	.mmap		   = tcp_mmap,
 	.sendpage	   = inet_sendpage,
 	.sendmsg_locked    = tcp_sendmsg_locked,
 	.sendpage_locked   = tcp_sendpage_locked,
@@ -590,6 +589,7 @@ const struct proto_ops inet6_stream_ops = {
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
+	.set_rcvlowat	   = tcp_set_rcvlowat,
 };
 
 const struct proto_ops inet6_dgram_ops = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bbcabbb..0250d19 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -212,16 +212,14 @@ static void aca_get(struct ifacaddr6 *aca)
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
-		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->dst);
+		fib6_info_release(ac->aca_rt);
 		kfree(ac);
 	}
 }
 
-static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
+static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 				   const struct in6_addr *addr)
 {
-	struct inet6_dev *idev = rt->rt6i_idev;
 	struct ifacaddr6 *aca;
 
 	aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
@@ -229,9 +227,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 		return NULL;
 
 	aca->aca_addr = *addr;
-	in6_dev_hold(idev);
-	aca->aca_idev = idev;
-	aca->aca_rt = rt;
+	fib6_info_hold(f6i);
+	aca->aca_rt = f6i;
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
 	aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -246,7 +243,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca;
-	struct rt6_info *rt;
+	struct fib6_info *f6i;
+	struct net *net;
 	int err;
 
 	ASSERT_RTNL();
@@ -265,14 +263,15 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		}
 	}
 
-	rt = addrconf_dst_alloc(idev, addr, true);
-	if (IS_ERR(rt)) {
-		err = PTR_ERR(rt);
+	net = dev_net(idev->dev);
+	f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+	if (IS_ERR(f6i)) {
+		err = PTR_ERR(f6i);
 		goto out;
 	}
-	aca = aca_alloc(rt, addr);
+	aca = aca_alloc(f6i, addr);
 	if (!aca) {
-		ip6_rt_put(rt);
+		fib6_info_release(f6i);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -286,7 +285,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	aca_get(aca);
 	write_unlock_bh(&idev->lock);
 
-	ip6_ins_rt(rt);
+	ip6_ins_rt(net, f6i);
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
 
@@ -328,8 +327,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->dst);
-	ip6_del_rt(aca->aca_rt);
+	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 	aca_put(aca);
 	return 0;
@@ -356,8 +354,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
-		dst_hold(&aca->aca_rt->dst);
-		ip6_del_rt(aca->aca_rt);
+		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 		aca_put(aca);
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bc68eb6..5bc2bf3 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -280,6 +280,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = {
 
 static int ipv6_destopt_rcv(struct sk_buff *skb)
 {
+	struct inet6_dev *idev = __in6_dev_get(skb->dev);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 	__u16 dstbuf;
@@ -291,7 +292,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
-		__IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+		__IP6_INC_STATS(dev_net(dst->dev), idev,
 				IPSTATS_MIB_INHDRERRORS);
 fail_and_free:
 		kfree_skb(skb);
@@ -319,8 +320,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	__IP6_INC_STATS(dev_net(dst->dev),
-			ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 	return -1;
 }
 
@@ -416,8 +416,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 	}
 
 	if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((&hdr->segments_left) -
 				   skb_network_header(skb)));
@@ -456,8 +455,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 
 	if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
 		if (ipv6_hdr(skb)->hop_limit <= 1) {
-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					IPSTATS_MIB_INHDRERRORS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
 				    ICMPV6_EXC_HOPLIMIT, 0);
 			kfree_skb(skb);
@@ -481,10 +479,10 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 /* called with rcu_read_lock() */
 static int ipv6_rthdr_rcv(struct sk_buff *skb)
 {
+	struct inet6_dev *idev = __in6_dev_get(skb->dev);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
-	struct inet6_dev *idev;
 	int n, i;
 	struct ipv6_rt_hdr *hdr;
 	struct rt0_hdr *rthdr;
@@ -498,8 +496,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
@@ -508,8 +505,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				IPSTATS_MIB_INADDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
@@ -527,7 +523,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 			 * processed by own
 			 */
 			if (!addr) {
-				__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+				__IP6_INC_STATS(net, idev,
 						IPSTATS_MIB_INADDRERRORS);
 				kfree_skb(skb);
 				return -1;
@@ -553,8 +549,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 			goto unknown_rh;
 		/* Silently discard invalid RTH type 2 */
 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					IPSTATS_MIB_INHDRERRORS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 			kfree_skb(skb);
 			return -1;
 		}
@@ -572,8 +567,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	n = hdr->hdrlen >> 1;
 
 	if (hdr->segments_left > n) {
-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((&hdr->segments_left) -
 				   skb_network_header(skb)));
@@ -609,14 +603,12 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
 				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 				     IPPROTO_ROUTING) < 0) {
-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					IPSTATS_MIB_INADDRERRORS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
 			return -1;
 		}
 		if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					IPSTATS_MIB_INADDRERRORS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
 			return -1;
 		}
@@ -627,8 +619,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	}
 
 	if (ipv6_addr_is_multicast(addr)) {
-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				IPSTATS_MIB_INADDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
@@ -647,8 +638,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 
 	if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
 		if (ipv6_hdr(skb)->hop_limit <= 1) {
-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					IPSTATS_MIB_INHDRERRORS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 				    0);
 			kfree_skb(skb);
@@ -663,7 +653,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	return -1;
 
 unknown_rh:
-	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 			  (&hdr->type) - skb_network_header(skb));
 	return -1;
@@ -755,34 +745,31 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 {
 	const unsigned char *nh = skb_network_header(skb);
+	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 	struct net *net = ipv6_skb_net(skb);
 	u32 pkt_len;
 
 	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
 				    nh[optoff+1]);
-		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
 	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
 	if (pkt_len <= IPV6_MAXPLEN) {
-		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
 		return false;
 	}
 	if (ipv6_hdr(skb)->payload_len) {
-		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
 		return false;
 	}
 
 	if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
-		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
-				IPSTATS_MIB_INTRUNCATEDPKTS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	}
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index deab2db..6421c89 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly;
 struct fib6_cleaner {
 	struct fib6_walker w;
 	struct net *net;
-	int (*func)(struct rt6_info *, void *arg);
+	int (*func)(struct fib6_info *, void *arg);
 	int sernum;
 	void *arg;
 };
@@ -54,7 +54,7 @@ struct fib6_cleaner {
 #define FWS_INIT FWS_L
 #endif
 
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn);
 static struct fib6_node *fib6_repair_tree(struct net *net,
@@ -105,13 +105,12 @@ enum {
 	FIB6_NO_SERNUM_CHANGE = 0,
 };
 
-void fib6_update_sernum(struct rt6_info *rt)
+void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
 {
-	struct net *net = dev_net(rt->dst.dev);
 	struct fib6_node *fn;
 
-	fn = rcu_dereference_protected(rt->rt6i_node,
-			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	fn = rcu_dereference_protected(f6i->fib6_node,
+			lockdep_is_held(&f6i->fib6_table->tb6_lock));
 	if (fn)
 		fn->fn_sernum = fib6_new_sernum(net);
 }
@@ -146,6 +145,69 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+{
+	struct fib6_info *f6i;
+
+	f6i = kzalloc(sizeof(*f6i), gfp_flags);
+	if (!f6i)
+		return NULL;
+
+	f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+	if (!f6i->rt6i_pcpu) {
+		kfree(f6i);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&f6i->fib6_siblings);
+	f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+
+	atomic_inc(&f6i->fib6_ref);
+
+	return f6i;
+}
+
+void fib6_info_destroy(struct fib6_info *f6i)
+{
+	struct rt6_exception_bucket *bucket;
+	struct dst_metrics *m;
+
+	WARN_ON(f6i->fib6_node);
+
+	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
+	if (bucket) {
+		f6i->rt6i_exception_bucket = NULL;
+		kfree(bucket);
+	}
+
+	if (f6i->rt6i_pcpu) {
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct rt6_info **ppcpu_rt;
+			struct rt6_info *pcpu_rt;
+
+			ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+			pcpu_rt = *ppcpu_rt;
+			if (pcpu_rt) {
+				dst_dev_put(&pcpu_rt->dst);
+				dst_release(&pcpu_rt->dst);
+				*ppcpu_rt = NULL;
+			}
+		}
+	}
+
+	if (f6i->fib6_nh.nh_dev)
+		dev_put(f6i->fib6_nh.nh_dev);
+
+	m = f6i->fib6_metrics;
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+		kfree(m);
+
+	kfree(f6i);
+}
+EXPORT_SYMBOL_GPL(fib6_info_destroy);
+
 static struct fib6_node *node_alloc(struct net *net)
 {
 	struct fib6_node *fn;
@@ -176,28 +238,6 @@ static void node_free(struct net *net, struct fib6_node *fn)
 	net->ipv6.rt6_stats->fib_nodes--;
 }
 
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
-{
-	int cpu;
-
-	if (!non_pcpu_rt->rt6i_pcpu)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct rt6_info **ppcpu_rt;
-		struct rt6_info *pcpu_rt;
-
-		ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
-		pcpu_rt = *ppcpu_rt;
-		if (pcpu_rt) {
-			dst_dev_put(&pcpu_rt->dst);
-			dst_release(&pcpu_rt->dst);
-			*ppcpu_rt = NULL;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(rt6_free_pcpu);
-
 static void fib6_free_table(struct fib6_table *table)
 {
 	inetpeer_invalidate_tree(&table->tb6_peers);
@@ -232,7 +272,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
 	if (table) {
 		table->tb6_id = id;
 		rcu_assign_pointer(table->tb6_root.leaf,
-				   net->ipv6.ip6_null_entry);
+				   net->ipv6.fib6_null_entry);
 		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 		inet_peer_base_init(&table->tb6_peers);
 	}
@@ -340,7 +380,7 @@ unsigned int fib6_tables_seq_read(struct net *net)
 
 static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 				    enum fib_event_type event_type,
-				    struct rt6_info *rt)
+				    struct fib6_info *rt)
 {
 	struct fib6_entry_notifier_info info = {
 		.rt = rt,
@@ -351,7 +391,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 
 static int call_fib6_entry_notifiers(struct net *net,
 				     enum fib_event_type event_type,
-				     struct rt6_info *rt,
+				     struct fib6_info *rt,
 				     struct netlink_ext_ack *extack)
 {
 	struct fib6_entry_notifier_info info = {
@@ -359,7 +399,7 @@ static int call_fib6_entry_notifiers(struct net *net,
 		.rt = rt,
 	};
 
-	rt->rt6i_table->fib_seq++;
+	rt->fib6_table->fib_seq++;
 	return call_fib6_notifiers(net, event_type, &info.info);
 }
 
@@ -368,16 +408,16 @@ struct fib6_dump_arg {
 	struct notifier_block *nb;
 };
 
-static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
 {
-	if (rt == arg->net->ipv6.ip6_null_entry)
+	if (rt == arg->net->ipv6.fib6_null_entry)
 		return;
 	call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
 }
 
 static int fib6_node_dump(struct fib6_walker *w)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w)
 		fib6_rt_dump(rt, w->args);
@@ -426,7 +466,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w) {
 		res = rt6_dump_route(rt, w->args);
@@ -441,10 +481,10 @@ static int fib6_dump_node(struct fib6_walker *w)
 		 * last sibling of this route (no need to dump the
 		 * sibling routes again)
 		 */
-		if (rt->rt6i_nsiblings)
-			rt = list_last_entry(&rt->rt6i_siblings,
-					     struct rt6_info,
-					     rt6i_siblings);
+		if (rt->fib6_nsiblings)
+			rt = list_last_entry(&rt->fib6_siblings,
+					     struct fib6_info,
+					     fib6_siblings);
 	}
 	w->leaf = NULL;
 	return 0;
@@ -579,6 +619,24 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	return res;
 }
 
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
+{
+	if (!f6i)
+		return;
+
+	if (f6i->fib6_metrics == &dst_default_metrics) {
+		struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
+
+		if (!p)
+			return;
+
+		refcount_set(&p->refcnt, 1);
+		f6i->fib6_metrics = p;
+	}
+
+	f6i->fib6_metrics->metrics[metric - 1] = val;
+}
+
 /*
  *	Routing Table
  *
@@ -608,7 +666,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	fn = root;
 
 	do {
-		struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+		struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
 		key = (struct rt6key *)((u8 *)leaf + offset);
 
@@ -637,11 +695,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
 			/* clean up an intermediate node */
 			if (!(fn->fn_flags & RTN_RTINFO)) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
-				rt6_release(leaf);
+				fib6_info_release(leaf);
 			/* remove null_entry in the root node */
 			} else if (fn->fn_flags & RTN_TL_ROOT &&
 				   rcu_access_pointer(fn->leaf) ==
-				   net->ipv6.ip6_null_entry) {
+				   net->ipv6.fib6_null_entry) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
 			}
 
@@ -750,7 +808,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 		RCU_INIT_POINTER(in->parent, pn);
 		in->leaf = fn->leaf;
 		atomic_inc(&rcu_dereference_protected(in->leaf,
-				lockdep_is_held(&table->tb6_lock))->rt6i_ref);
+				lockdep_is_held(&table->tb6_lock))->fib6_ref);
 
 		/* update parent pointer */
 		if (dir)
@@ -802,44 +860,37 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	return ln;
 }
 
-static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+				const struct fib6_table *table)
 {
-	int i;
+	int cpu;
 
-	for (i = 0; i < RTAX_MAX; i++) {
-		if (test_bit(i, mxc->mx_valid))
-			mp[i] = mxc->mx[i];
+	/* release the reference to this fib entry from
+	 * all of its cached pcpu routes
+	 */
+	for_each_possible_cpu(cpu) {
+		struct rt6_info **ppcpu_rt;
+		struct rt6_info *pcpu_rt;
+
+		ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+		pcpu_rt = *ppcpu_rt;
+		if (pcpu_rt) {
+			struct fib6_info *from;
+
+			from = rcu_dereference_protected(pcpu_rt->from,
+					     lockdep_is_held(&table->tb6_lock));
+			rcu_assign_pointer(pcpu_rt->from, NULL);
+			fib6_info_release(from);
+		}
 	}
 }
 
-static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
-{
-	if (!mxc->mx)
-		return 0;
-
-	if (dst->flags & DST_HOST) {
-		u32 *mp = dst_metrics_write_ptr(dst);
-
-		if (unlikely(!mp))
-			return -ENOMEM;
-
-		fib6_copy_metrics(mp, mxc);
-	} else {
-		dst_init_metrics(dst, mxc->mx, false);
-
-		/* We've stolen mx now. */
-		mxc->mx = NULL;
-	}
-
-	return 0;
-}
-
-static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
+static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
-	struct fib6_table *table = rt->rt6i_table;
+	struct fib6_table *table = rt->fib6_table;
 
-	if (atomic_read(&rt->rt6i_ref) != 1) {
+	if (atomic_read(&rt->fib6_ref) != 1) {
 		/* This route is used as dummy address holder in some split
 		 * nodes. It is not leaked, but it still holds other resources,
 		 * which must be released in time. So, scan ascendant nodes
@@ -847,18 +898,22 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 		 * to still alive ones.
 		 */
 		while (fn) {
-			struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+			struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-			struct rt6_info *new_leaf;
+			struct fib6_info *new_leaf;
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
-				atomic_inc(&new_leaf->rt6i_ref);
+				atomic_inc(&new_leaf->fib6_ref);
+
 				rcu_assign_pointer(fn->leaf, new_leaf);
-				rt6_release(rt);
+				fib6_info_release(rt);
 			}
 			fn = rcu_dereference_protected(fn->parent,
 				    lockdep_is_held(&table->tb6_lock));
 		}
+
+		if (rt->rt6i_pcpu)
+			fib6_drop_pcpu_from(rt, table);
 	}
 }
 
@@ -866,15 +921,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
  *	Insert routing information in a node.
  */
 
-static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-			    struct nl_info *info, struct mx6_config *mxc,
+static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
+			    struct nl_info *info,
 			    struct netlink_ext_ack *extack)
 {
-	struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
-				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
-	struct rt6_info *iter = NULL;
-	struct rt6_info __rcu **ins;
-	struct rt6_info __rcu **fallback_ins = NULL;
+	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+	struct fib6_info *iter = NULL;
+	struct fib6_info __rcu **ins;
+	struct fib6_info __rcu **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
 	int add = (!info->nlh ||
@@ -891,12 +946,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 	for (iter = leaf; iter;
 	     iter = rcu_dereference_protected(iter->rt6_next,
-				lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
+				lockdep_is_held(&rt->fib6_table->tb6_lock))) {
 		/*
 		 *	Search for duplicates
 		 */
 
-		if (iter->rt6i_metric == rt->rt6i_metric) {
+		if (iter->fib6_metric == rt->fib6_metric) {
 			/*
 			 *	Same priority level
 			 */
@@ -916,15 +971,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			}
 
 			if (rt6_duplicate_nexthop(iter, rt)) {
-				if (rt->rt6i_nsiblings)
-					rt->rt6i_nsiblings = 0;
-				if (!(iter->rt6i_flags & RTF_EXPIRES))
+				if (rt->fib6_nsiblings)
+					rt->fib6_nsiblings = 0;
+				if (!(iter->fib6_flags & RTF_EXPIRES))
 					return -EEXIST;
-				if (!(rt->rt6i_flags & RTF_EXPIRES))
-					rt6_clean_expires(iter);
+				if (!(rt->fib6_flags & RTF_EXPIRES))
+					fib6_clean_expires(iter);
 				else
-					rt6_set_expires(iter, rt->dst.expires);
-				iter->rt6i_pmtu = rt->rt6i_pmtu;
+					fib6_set_expires(iter, rt->expires);
+				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
 				return -EEXIST;
 			}
 			/* If we have the same destination and the same metric,
@@ -940,10 +995,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			 */
 			if (rt_can_ecmp &&
 			    rt6_qualify_for_ecmp(iter))
-				rt->rt6i_nsiblings++;
+				rt->fib6_nsiblings++;
 		}
 
-		if (iter->rt6i_metric > rt->rt6i_metric)
+		if (iter->fib6_metric > rt->fib6_metric)
 			break;
 
 next_iter:
@@ -954,7 +1009,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		/* No ECMP-able route found, replace first non-ECMP one */
 		ins = fallback_ins;
 		iter = rcu_dereference_protected(*ins,
-				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 		found++;
 	}
 
@@ -963,34 +1018,34 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		fn->rr_ptr = NULL;
 
 	/* Link this route to others same route. */
-	if (rt->rt6i_nsiblings) {
-		unsigned int rt6i_nsiblings;
-		struct rt6_info *sibling, *temp_sibling;
+	if (rt->fib6_nsiblings) {
+		unsigned int fib6_nsiblings;
+		struct fib6_info *sibling, *temp_sibling;
 
 		/* Find the first route that have the same metric */
 		sibling = leaf;
 		while (sibling) {
-			if (sibling->rt6i_metric == rt->rt6i_metric &&
+			if (sibling->fib6_metric == rt->fib6_metric &&
 			    rt6_qualify_for_ecmp(sibling)) {
-				list_add_tail(&rt->rt6i_siblings,
-					      &sibling->rt6i_siblings);
+				list_add_tail(&rt->fib6_siblings,
+					      &sibling->fib6_siblings);
 				break;
 			}
 			sibling = rcu_dereference_protected(sibling->rt6_next,
-				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 		}
 		/* For each sibling in the list, increment the counter of
 		 * siblings. BUG() if counters does not match, list of siblings
 		 * is broken!
 		 */
-		rt6i_nsiblings = 0;
+		fib6_nsiblings = 0;
 		list_for_each_entry_safe(sibling, temp_sibling,
-					 &rt->rt6i_siblings, rt6i_siblings) {
-			sibling->rt6i_nsiblings++;
-			BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
-			rt6i_nsiblings++;
+					 &rt->fib6_siblings, fib6_siblings) {
+			sibling->fib6_nsiblings++;
+			BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+			fib6_nsiblings++;
 		}
-		BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
 		rt6_multipath_rebalance(temp_sibling);
 	}
 
@@ -1003,9 +1058,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 add:
 		nlflags |= NLM_F_CREATE;
-		err = fib6_commit_metrics(&rt->dst, mxc);
-		if (err)
-			return err;
 
 		err = call_fib6_entry_notifiers(info->nl_net,
 						FIB_EVENT_ENTRY_ADD,
@@ -1014,8 +1066,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			return err;
 
 		rcu_assign_pointer(rt->rt6_next, iter);
-		atomic_inc(&rt->rt6i_ref);
-		rcu_assign_pointer(rt->rt6i_node, fn);
+		atomic_inc(&rt->fib6_ref);
+		rcu_assign_pointer(rt->fib6_node, fn);
 		rcu_assign_pointer(*ins, rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
@@ -1036,18 +1088,14 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			return -ENOENT;
 		}
 
-		err = fib6_commit_metrics(&rt->dst, mxc);
-		if (err)
-			return err;
-
 		err = call_fib6_entry_notifiers(info->nl_net,
 						FIB_EVENT_ENTRY_REPLACE,
 						rt, extack);
 		if (err)
 			return err;
 
-		atomic_inc(&rt->rt6i_ref);
-		rcu_assign_pointer(rt->rt6i_node, fn);
+		atomic_inc(&rt->fib6_ref);
+		rcu_assign_pointer(rt->fib6_node, fn);
 		rt->rt6_next = iter->rt6_next;
 		rcu_assign_pointer(*ins, rt);
 		if (!info->skip_notify)
@@ -1056,35 +1104,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
 			fn->fn_flags |= RTN_RTINFO;
 		}
-		nsiblings = iter->rt6i_nsiblings;
-		iter->rt6i_node = NULL;
+		nsiblings = iter->fib6_nsiblings;
+		iter->fib6_node = NULL;
 		fib6_purge_rt(iter, fn, info->nl_net);
 		if (rcu_access_pointer(fn->rr_ptr) == iter)
 			fn->rr_ptr = NULL;
-		rt6_release(iter);
+		fib6_info_release(iter);
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
 			ins = &rt->rt6_next;
 			iter = rcu_dereference_protected(*ins,
-				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 			while (iter) {
-				if (iter->rt6i_metric > rt->rt6i_metric)
+				if (iter->fib6_metric > rt->fib6_metric)
 					break;
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->rt6_next;
-					iter->rt6i_node = NULL;
+					iter->fib6_node = NULL;
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (rcu_access_pointer(fn->rr_ptr) == iter)
 						fn->rr_ptr = NULL;
-					rt6_release(iter);
+					fib6_info_release(iter);
 					nsiblings--;
 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
 					ins = &iter->rt6_next;
 				}
 				iter = rcu_dereference_protected(*ins,
-					lockdep_is_held(&rt->rt6i_table->tb6_lock));
+					lockdep_is_held(&rt->fib6_table->tb6_lock));
 			}
 			WARN_ON(nsiblings != 0);
 		}
@@ -1093,10 +1141,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	return 0;
 }
 
-static void fib6_start_gc(struct net *net, struct rt6_info *rt)
+static void fib6_start_gc(struct net *net, struct fib6_info *rt)
 {
 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
-	    (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+	    (rt->fib6_flags & RTF_EXPIRES))
 		mod_timer(&net->ipv6.ip6_fib_timer,
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
@@ -1108,22 +1156,22 @@ void fib6_force_start_gc(struct net *net)
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
-static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
 					   int sernum)
 {
-	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
-				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
+				lockdep_is_held(&rt->fib6_table->tb6_lock));
 
 	/* paired with smp_rmb() in rt6_get_cookie_safe() */
 	smp_wmb();
 	while (fn) {
 		fn->fn_sernum = sernum;
 		fn = rcu_dereference_protected(fn->parent,
-				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+				lockdep_is_held(&rt->fib6_table->tb6_lock));
 	}
 }
 
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
 {
 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
 }
@@ -1135,22 +1183,16 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
  *	Need to own table->tb6_lock
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
-	     struct nl_info *info, struct mx6_config *mxc,
-	     struct netlink_ext_ack *extack)
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+	     struct nl_info *info, struct netlink_ext_ack *extack)
 {
-	struct fib6_table *table = rt->rt6i_table;
+	struct fib6_table *table = rt->fib6_table;
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
 	int allow_create = 1;
 	int replace_required = 0;
 	int sernum = fib6_new_sernum(info->nl_net);
 
-	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
-		return -EINVAL;
-	if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
-
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 			allow_create = 0;
@@ -1161,8 +1203,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
 
 	fn = fib6_add_1(info->nl_net, table, root,
-			&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
-			offsetof(struct rt6_info, rt6i_dst), allow_create,
+			&rt->fib6_dst.addr, rt->fib6_dst.plen,
+			offsetof(struct fib6_info, fib6_dst), allow_create,
 			replace_required, extack);
 	if (IS_ERR(fn)) {
 		err = PTR_ERR(fn);
@@ -1173,7 +1215,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	pn = fn;
 
 #ifdef CONFIG_IPV6_SUBTREES
-	if (rt->rt6i_src.plen) {
+	if (rt->fib6_src.plen) {
 		struct fib6_node *sn;
 
 		if (!rcu_access_pointer(fn->subtree)) {
@@ -1194,16 +1236,16 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (!sfn)
 				goto failure;
 
-			atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+			atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
 			rcu_assign_pointer(sfn->leaf,
-					   info->nl_net->ipv6.ip6_null_entry);
+					   info->nl_net->ipv6.fib6_null_entry);
 			sfn->fn_flags = RTN_ROOT;
 
 			/* Now add the first leaf node to new subtree */
 
 			sn = fib6_add_1(info->nl_net, table, sfn,
-					&rt->rt6i_src.addr, rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src),
+					&rt->fib6_src.addr, rt->fib6_src.plen,
+					offsetof(struct fib6_info, fib6_src),
 					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
@@ -1221,8 +1263,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			rcu_assign_pointer(fn->subtree, sfn);
 		} else {
 			sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
-					&rt->rt6i_src.addr, rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src),
+					&rt->fib6_src.addr, rt->fib6_src.plen,
+					offsetof(struct fib6_info, fib6_src),
 					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
@@ -1235,9 +1277,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (fn->fn_flags & RTN_TL_ROOT) {
 				/* put back null_entry for root node */
 				rcu_assign_pointer(fn->leaf,
-					    info->nl_net->ipv6.ip6_null_entry);
+					    info->nl_net->ipv6.fib6_null_entry);
 			} else {
-				atomic_inc(&rt->rt6i_ref);
+				atomic_inc(&rt->fib6_ref);
 				rcu_assign_pointer(fn->leaf, rt);
 			}
 		}
@@ -1245,7 +1287,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, info, mxc, extack);
+	err = fib6_add_rt2node(fn, rt, info, extack);
 	if (!err) {
 		__fib6_update_sernum_upto_root(rt, sernum);
 		fib6_start_gc(info->nl_net, rt);
@@ -1259,13 +1301,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		 * super-tree leaf node we have to find a new one for it.
 		 */
 		if (pn != fn) {
-			struct rt6_info *pn_leaf =
+			struct fib6_info *pn_leaf =
 				rcu_dereference_protected(pn->leaf,
 				    lockdep_is_held(&table->tb6_lock));
 			if (pn_leaf == rt) {
 				pn_leaf = NULL;
 				RCU_INIT_POINTER(pn->leaf, NULL);
-				atomic_dec(&rt->rt6i_ref);
+				fib6_info_release(rt);
 			}
 			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
 				pn_leaf = fib6_find_prefix(info->nl_net, table,
@@ -1274,10 +1316,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 				if (!pn_leaf) {
 					WARN_ON(!pn_leaf);
 					pn_leaf =
-					    info->nl_net->ipv6.ip6_null_entry;
+					    info->nl_net->ipv6.fib6_null_entry;
 				}
 #endif
-				atomic_inc(&pn_leaf->rt6i_ref);
+				fib6_info_hold(pn_leaf);
 				rcu_assign_pointer(pn->leaf, pn_leaf);
 			}
 		}
@@ -1299,10 +1341,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	     (fn->fn_flags & RTN_TL_ROOT &&
 	      !rcu_access_pointer(fn->leaf))))
 		fib6_repair_tree(info->nl_net, table, fn);
-	/* Always release dst as dst->__refcnt is guaranteed
-	 * to be taken before entering this function
-	 */
-	dst_release_immediate(&rt->dst);
 	return err;
 }
 
@@ -1312,7 +1350,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
  */
 
 struct lookup_args {
-	int			offset;		/* key offset on rt6_info	*/
+	int			offset;		/* key offset on fib6_info */
 	const struct in6_addr	*addr;		/* search key			*/
 };
 
@@ -1350,7 +1388,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 		struct fib6_node *subtree = FIB6_SUBTREE(fn);
 
 		if (subtree || fn->fn_flags & RTN_RTINFO) {
-			struct rt6_info *leaf = rcu_dereference(fn->leaf);
+			struct fib6_info *leaf = rcu_dereference(fn->leaf);
 			struct rt6key *key;
 
 			if (!leaf)
@@ -1390,12 +1428,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
 	struct fib6_node *fn;
 	struct lookup_args args[] = {
 		{
-			.offset = offsetof(struct rt6_info, rt6i_dst),
+			.offset = offsetof(struct fib6_info, fib6_dst),
 			.addr = daddr,
 		},
 #ifdef CONFIG_IPV6_SUBTREES
 		{
-			.offset = offsetof(struct rt6_info, rt6i_src),
+			.offset = offsetof(struct fib6_info, fib6_src),
 			.addr = saddr,
 		},
 #endif
@@ -1431,7 +1469,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
 	struct fib6_node *fn, *prev = NULL;
 
 	for (fn = root; fn ; ) {
-		struct rt6_info *leaf = rcu_dereference(fn->leaf);
+		struct fib6_info *leaf = rcu_dereference(fn->leaf);
 		struct rt6key *key;
 
 		/* This node is being deleted */
@@ -1480,7 +1518,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 	struct fib6_node *fn;
 
 	fn = fib6_locate_1(root, daddr, dst_len,
-			   offsetof(struct rt6_info, rt6i_dst),
+			   offsetof(struct fib6_info, fib6_dst),
 			   exact_match);
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1491,7 +1529,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 
 			if (subtree) {
 				fn = fib6_locate_1(subtree, saddr, src_len,
-					   offsetof(struct rt6_info, rt6i_src),
+					   offsetof(struct fib6_info, fib6_src),
 					   exact_match);
 			}
 		}
@@ -1510,14 +1548,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
  *
  */
 
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn)
 {
 	struct fib6_node *child_left, *child_right;
 
 	if (fn->fn_flags & RTN_ROOT)
-		return net->ipv6.ip6_null_entry;
+		return net->ipv6.fib6_null_entry;
 
 	while (fn) {
 		child_left = rcu_dereference_protected(fn->left,
@@ -1554,7 +1592,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 
 	/* Set fn->leaf to null_entry for root node. */
 	if (fn->fn_flags & RTN_TL_ROOT) {
-		rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+		rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
 		return fn;
 	}
 
@@ -1569,11 +1607,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 					    lockdep_is_held(&table->tb6_lock));
 		struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+		struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+		struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *new_fn_leaf;
+		struct fib6_info *new_fn_leaf;
 
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
@@ -1599,10 +1637,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 #if RT6_DEBUG >= 2
 			if (!new_fn_leaf) {
 				WARN_ON(!new_fn_leaf);
-				new_fn_leaf = net->ipv6.ip6_null_entry;
+				new_fn_leaf = net->ipv6.fib6_null_entry;
 			}
 #endif
-			atomic_inc(&new_fn_leaf->rt6i_ref);
+			fib6_info_hold(new_fn_leaf);
 			rcu_assign_pointer(fn->leaf, new_fn_leaf);
 			return pn;
 		}
@@ -1658,26 +1696,24 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 			return pn;
 
 		RCU_INIT_POINTER(pn->leaf, NULL);
-		rt6_release(pn_leaf);
+		fib6_info_release(pn_leaf);
 		fn = pn;
 	}
 }
 
 static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
-			   struct rt6_info __rcu **rtp, struct nl_info *info)
+			   struct fib6_info __rcu **rtp, struct nl_info *info)
 {
 	struct fib6_walker *w;
-	struct rt6_info *rt = rcu_dereference_protected(*rtp,
+	struct fib6_info *rt = rcu_dereference_protected(*rtp,
 				    lockdep_is_held(&table->tb6_lock));
 	struct net *net = info->nl_net;
 
 	RT6_TRACE("fib6_del_route\n");
 
-	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
-
 	/* Unlink it */
 	*rtp = rt->rt6_next;
-	rt->rt6i_node = NULL;
+	rt->fib6_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
 
@@ -1689,14 +1725,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 		fn->rr_ptr = NULL;
 
 	/* Remove this entry from other siblings */
-	if (rt->rt6i_nsiblings) {
-		struct rt6_info *sibling, *next_sibling;
+	if (rt->fib6_nsiblings) {
+		struct fib6_info *sibling, *next_sibling;
 
 		list_for_each_entry_safe(sibling, next_sibling,
-					 &rt->rt6i_siblings, rt6i_siblings)
-			sibling->rt6i_nsiblings--;
-		rt->rt6i_nsiblings = 0;
-		list_del_init(&rt->rt6i_siblings);
+					 &rt->fib6_siblings, fib6_siblings)
+			sibling->fib6_nsiblings--;
+		rt->fib6_nsiblings = 0;
+		list_del_init(&rt->fib6_siblings);
 		rt6_multipath_rebalance(next_sibling);
 	}
 
@@ -1730,40 +1766,30 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 
 /* Need to own table->tb6_lock */
-int fib6_del(struct rt6_info *rt, struct nl_info *info)
+int fib6_del(struct fib6_info *rt, struct nl_info *info)
 {
-	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
-				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
-	struct fib6_table *table = rt->rt6i_table;
+	struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+	struct fib6_table *table = rt->fib6_table;
 	struct net *net = info->nl_net;
-	struct rt6_info __rcu **rtp;
-	struct rt6_info __rcu **rtp_next;
+	struct fib6_info __rcu **rtp;
+	struct fib6_info __rcu **rtp_next;
 
-#if RT6_DEBUG >= 2
-	if (rt->dst.obsolete > 0) {
-		WARN_ON(fn);
-		return -ENOENT;
-	}
-#endif
-	if (!fn || rt == net->ipv6.ip6_null_entry)
+	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-	/* remove cached dst from exception table */
-	if (rt->rt6i_flags & RTF_CACHE)
-		return rt6_remove_exception_rt(rt);
-
 	/*
 	 *	Walk the leaf entries looking for ourself
 	 */
 
 	for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
-		struct rt6_info *cur = rcu_dereference_protected(*rtp,
+		struct fib6_info *cur = rcu_dereference_protected(*rtp,
 					lockdep_is_held(&table->tb6_lock));
 		if (rt == cur) {
 			fib6_del_route(table, fn, rtp, info);
@@ -1907,7 +1933,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w)
 static int fib6_clean_node(struct fib6_walker *w)
 {
 	int res;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
 	struct nl_info info = {
 		.nl_net = c->net,
@@ -1932,17 +1958,17 @@ static int fib6_clean_node(struct fib6_walker *w)
 #if RT6_DEBUG >= 2
 				pr_debug("%s: del failed: rt=%p@%p err=%d\n",
 					 __func__, rt,
-					 rcu_access_pointer(rt->rt6i_node),
+					 rcu_access_pointer(rt->fib6_node),
 					 res);
 #endif
 				continue;
 			}
 			return 0;
 		} else if (res == -2) {
-			if (WARN_ON(!rt->rt6i_nsiblings))
+			if (WARN_ON(!rt->fib6_nsiblings))
 				continue;
-			rt = list_last_entry(&rt->rt6i_siblings,
-					     struct rt6_info, rt6i_siblings);
+			rt = list_last_entry(&rt->fib6_siblings,
+					     struct fib6_info, fib6_siblings);
 			continue;
 		}
 		WARN_ON(res != 0);
@@ -1961,7 +1987,7 @@ static int fib6_clean_node(struct fib6_walker *w)
  */
 
 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
-			    int (*func)(struct rt6_info *, void *arg),
+			    int (*func)(struct fib6_info *, void *arg),
 			    int sernum, void *arg)
 {
 	struct fib6_cleaner c;
@@ -1979,7 +2005,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
 }
 
 static void __fib6_clean_all(struct net *net,
-			     int (*func)(struct rt6_info *, void *),
+			     int (*func)(struct fib6_info *, void *),
 			     int sernum, void *arg)
 {
 	struct fib6_table *table;
@@ -1999,7 +2025,7 @@ static void __fib6_clean_all(struct net *net,
 	rcu_read_unlock();
 }
 
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
 		    void *arg)
 {
 	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
@@ -2016,7 +2042,7 @@ static void fib6_flush_trees(struct net *net)
  *	Garbage collection
  */
 
-static int fib6_age(struct rt6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, void *arg)
 {
 	struct fib6_gc_args *gc_args = arg;
 	unsigned long now = jiffies;
@@ -2026,8 +2052,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 	 *	Routes are expired even if they are in use.
 	 */
 
-	if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
-		if (time_after(now, rt->dst.expires)) {
+	if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
+		if (time_after(now, rt->expires)) {
 			RT6_TRACE("expiring %p\n", rt);
 			return -1;
 		}
@@ -2110,7 +2136,7 @@ static int __net_init fib6_net_init(struct net *net)
 
 	net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
 	rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
-			   net->ipv6.ip6_null_entry);
+			   net->ipv6.fib6_null_entry);
 	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2122,7 +2148,7 @@ static int __net_init fib6_net_init(struct net *net)
 		goto out_fib6_main_tbl;
 	net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
 	rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
-			   net->ipv6.ip6_null_entry);
+			   net->ipv6.fib6_null_entry);
 	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2220,25 +2246,26 @@ struct ipv6_route_iter {
 
 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 {
-	struct rt6_info *rt = v;
+	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
+	const struct net_device *dev;
 
-	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+	seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
 
 #ifdef CONFIG_IPV6_SUBTREES
-	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+	seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
 #else
 	seq_puts(seq, "00000000000000000000000000000000 00 ");
 #endif
-	if (rt->rt6i_flags & RTF_GATEWAY)
-		seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+	if (rt->fib6_flags & RTF_GATEWAY)
+		seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
 	else
 		seq_puts(seq, "00000000000000000000000000000000");
 
+	dev = rt->fib6_nh.nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
-		   rt->dst.__use, rt->rt6i_flags,
-		   rt->dst.dev ? rt->dst.dev->name : "");
+		   rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
+		   rt->fib6_flags, dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }
@@ -2311,14 +2338,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
 static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	int r;
-	struct rt6_info *n;
+	struct fib6_info *n;
 	struct net *net = seq_file_net(seq);
 	struct ipv6_route_iter *iter = seq->private;
 
 	if (!v)
 		goto iter_table;
 
-	n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
+	n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next);
 	if (n) {
 		++*pos;
 		return n;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9ee208a..f08d344 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -336,7 +336,7 @@ int ip6_mc_input(struct sk_buff *skb)
 	bool deliver;
 
 	__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
-			 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
+			 __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
 			 skb->len);
 
 	hdr = ipv6_hdr(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2e891d2..6a477d5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -425,6 +425,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 
 int ip6_forward(struct sk_buff *skb)
 {
+	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -444,8 +445,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
-		__IP6_INC_STATS(net, ip6_dst_idev(dst),
-				IPSTATS_MIB_INDISCARDS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
@@ -476,8 +476,7 @@ int ip6_forward(struct sk_buff *skb)
 		/* Force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
-		__IP6_INC_STATS(net, ip6_dst_idev(dst),
-				IPSTATS_MIB_INHDRERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 
 		kfree_skb(skb);
 		return -ETIMEDOUT;
@@ -490,15 +489,13 @@ int ip6_forward(struct sk_buff *skb)
 		if (proxied > 0)
 			return ip6_input(skb);
 		else if (proxied < 0) {
-			__IP6_INC_STATS(net, ip6_dst_idev(dst),
-					IPSTATS_MIB_INDISCARDS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 			goto drop;
 		}
 	}
 
 	if (!xfrm6_route_forward(skb)) {
-		__IP6_INC_STATS(net, ip6_dst_idev(dst),
-				IPSTATS_MIB_INDISCARDS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 	dst = skb_dst(skb);
@@ -554,8 +551,7 @@ int ip6_forward(struct sk_buff *skb)
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		__IP6_INC_STATS(net, ip6_dst_idev(dst),
-				IPSTATS_MIB_INTOOBIGERRORS);
+		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 				IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
@@ -579,7 +575,7 @@ int ip6_forward(struct sk_buff *skb)
 		       ip6_forward_finish);
 
 error:
-	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
+	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 drop:
 	kfree_skb(skb);
 	return -EINVAL;
@@ -966,15 +962,21 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 	 * that's why we try it again later.
 	 */
 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+		struct fib6_info *from;
 		struct rt6_info *rt;
 		bool had_dst = *dst != NULL;
 
 		if (!had_dst)
 			*dst = ip6_route_output(net, sk, fl6);
 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
-		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+
+		rcu_read_lock();
+		from = rt ? rcu_dereference(rt->from) : NULL;
+		err = ip6_route_get_saddr(net, from, &fl6->daddr,
 					  sk ? inet6_sk(sk)->srcprefs : 0,
 					  &fl6->saddr);
+		rcu_read_unlock();
+
 		if (err)
 			goto out_err_release;
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9de4dfb1..9ac5366 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1155,7 +1155,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
+	struct net *net;
 	int lifetime;
 	struct ndisc_options ndopts;
 	int optlen;
@@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	/* Do not accept RA with source-addr found on local machine unless
 	 * accept_ra_from_local is set to true.
 	 */
+	net = dev_net(in6_dev->dev);
 	if (!in6_dev->cnf.accept_ra_from_local &&
-	    ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  in6_dev->dev, 0)) {
+	    ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
 		ND_PRINTK(2, info,
 			  "RA from local address detected on dev: %s: default router ignored\n",
 			  skb->dev->name);
@@ -1272,20 +1273,22 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 #endif
 
-	rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
+	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
 
 	if (rt) {
-		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+		neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
+					 rt->fib6_nh.nh_dev, NULL,
+					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 	}
 	if (rt && lifetime == 0) {
-		ip6_del_rt(rt);
+		ip6_del_rt(net, rt);
 		rt = NULL;
 	}
 
@@ -1294,7 +1297,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (!rt && lifetime) {
 		ND_PRINTK(3, info, "RA: adding default router\n");
 
-		rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
+		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
+					 skb->dev, pref);
 		if (!rt) {
 			ND_PRINTK(0, err,
 				  "RA: %s failed to add default route\n",
@@ -1302,28 +1306,29 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			return;
 		}
 
-		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+		neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
+					 rt->fib6_nh.nh_dev, NULL,
+					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 		neigh->flags |= NTF_ROUTER;
 	} else if (rt) {
-		rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+		rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 	}
 
 	if (rt)
-		rt6_set_expires(rt, jiffies + (HZ * lifetime));
+		fib6_set_expires(rt, jiffies + (HZ * lifetime));
 	if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
 	    ra_msg->icmph.icmp6_hop_limit) {
 		if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
 			in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
-			if (rt)
-				dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
-					       ra_msg->icmph.icmp6_hop_limit);
+			fib6_metric_set(rt, RTAX_HOPLIMIT,
+					ra_msg->icmph.icmp6_hop_limit);
 		} else {
 			ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
 		}
@@ -1475,10 +1480,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
 		} else if (in6_dev->cnf.mtu6 != mtu) {
 			in6_dev->cnf.mtu6 = mtu;
-
-			if (rt)
-				dst_metric_set(&rt->dst, RTAX_MTU, mtu);
-
+			fib6_metric_set(rt, RTAX_MTU, mtu);
 			rt6_mtu_change(skb->dev, mtu);
 		}
 	}
@@ -1497,7 +1499,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		ND_PRINTK(2, warn, "RA: invalid RA options\n");
 	}
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	if (neigh)
 		neigh_release(neigh);
 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4979610..b939b94 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -163,7 +163,8 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 }
 
 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
-			   struct frag_hdr *fhdr, int nhoff)
+			  struct frag_hdr *fhdr, int nhoff,
+			  u32 *prob_offset)
 {
 	struct sk_buff *prev, *next;
 	struct net_device *dev;
@@ -179,11 +180,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-				  ((u8 *)&fhdr->frag_off -
-				   skb_network_header(skb)));
+		*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
 		return -1;
 	}
 
@@ -214,10 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			/* RFC2460 says always send parameter problem in
 			 * this case. -DaveM
 			 */
-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-					  offsetof(struct ipv6hdr, payload_len));
+			*prob_offset = offsetof(struct ipv6hdr, payload_len);
 			return -1;
 		}
 		if (end > fq->q.len) {
@@ -519,15 +513,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	iif = skb->dev ? skb->dev->ifindex : 0;
 	fq = fq_find(net, fhdr->identification, hdr, iif);
 	if (fq) {
+		u32 prob_offset = 0;
 		int ret;
 
 		spin_lock(&fq->q.lock);
 
 		fq->iif = iif;
-		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
+				     &prob_offset);
 
 		spin_unlock(&fq->q.lock);
 		inet_frag_put(&fq->q);
+		if (prob_offset) {
+			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
+					IPSTATS_MIB_INHDRERRORS);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
+		}
 		return ret;
 	}
 
@@ -536,7 +537,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	return -1;
 
 fail_hdr:
-	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+	__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 			IPSTATS_MIB_INHDRERRORS);
 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
 	return -1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49b954d..0407bbc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,7 +78,6 @@ enum rt6_nud_state {
 	RT6_NUD_SUCCEED = 1
 };
 
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
@@ -97,25 +96,24 @@ static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
-static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
-static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
-static size_t rt6_nlmsg_size(struct rt6_info *rt);
-static int rt6_fill_node(struct net *net,
-			 struct sk_buff *skb, struct rt6_info *rt,
-			 struct in6_addr *dst, struct in6_addr *src,
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+			 struct fib6_info *rt, struct dst_entry *dst,
+			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev,
 					   unsigned int pref);
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev);
@@ -184,29 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 	}
 }
 
-static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
-{
-	return dst_metrics_write_ptr(&rt->from->dst);
-}
-
-static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
-	struct rt6_info *rt = (struct rt6_info *)dst;
-
-	if (rt->rt6i_flags & RTF_PCPU)
-		return rt6_pcpu_cow_metrics(rt);
-	else if (rt->rt6i_flags & RTF_CACHE)
-		return NULL;
-	else
-		return dst_cow_metrics_generic(dst, old);
-}
-
-static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 					     struct sk_buff *skb,
 					     const void *daddr)
 {
-	struct in6_addr *p = &rt->rt6i_gateway;
-
 	if (!ipv6_addr_any(p))
 		return (const void *) p;
 	else if (skb)
@@ -214,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 	return daddr;
 }
 
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
-					  struct sk_buff *skb,
-					  const void *daddr)
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+				   struct net_device *dev,
+				   struct sk_buff *skb,
+				   const void *daddr)
 {
-	struct rt6_info *rt = (struct rt6_info *) dst;
 	struct neighbour *n;
 
-	daddr = choose_neigh_daddr(rt, skb, daddr);
-	n = __ipv6_neigh_lookup(dst->dev, daddr);
+	daddr = choose_neigh_daddr(gw, skb, daddr);
+	n = __ipv6_neigh_lookup(dev, daddr);
 	if (n)
 		return n;
-	return neigh_create(&nd_tbl, daddr, dst->dev);
+	return neigh_create(&nd_tbl, daddr, dev);
+}
+
+static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
+					      struct sk_buff *skb,
+					      const void *daddr)
+{
+	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+
+	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
 }
 
 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
@@ -233,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 	struct net_device *dev = dst->dev;
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
-	daddr = choose_neigh_daddr(rt, NULL, daddr);
+	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
 	if (!daddr)
 		return;
 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -250,7 +238,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
 	.mtu			=	ip6_mtu,
-	.cow_metrics		=	ipv6_cow_metrics,
+	.cow_metrics		=	dst_cow_metrics_generic,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -258,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.update_pmtu		=	ip6_rt_update_pmtu,
 	.redirect		=	rt6_do_redirect,
 	.local_out		=	__ip6_local_out,
-	.neigh_lookup		=	ip6_neigh_lookup,
+	.neigh_lookup		=	ip6_dst_neigh_lookup,
 	.confirm_neigh		=	ip6_confirm_neigh,
 };
 
@@ -288,13 +276,22 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 	.redirect		=	ip6_rt_blackhole_redirect,
 	.cow_metrics		=	dst_cow_metrics_generic,
-	.neigh_lookup		=	ip6_neigh_lookup,
+	.neigh_lookup		=	ip6_dst_neigh_lookup,
 };
 
 static const u32 ip6_template_metrics[RTAX_MAX] = {
 	[RTAX_HOPLIMIT - 1] = 0,
 };
 
+static const struct fib6_info fib6_null_entry_template = {
+	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.fib6_protocol  = RTPROT_KERNEL,
+	.fib6_metric	= ~(u32)0,
+	.fib6_ref	= ATOMIC_INIT(1),
+	.fib6_type	= RTN_UNREACHABLE,
+	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
+};
+
 static const struct rt6_info ip6_null_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
@@ -305,9 +302,6 @@ static const struct rt6_info ip6_null_entry_template = {
 		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -322,9 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = {
 		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
 static const struct rt6_info ip6_blk_hole_entry_template = {
@@ -337,9 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 		.output		= dst_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
 #endif
@@ -349,14 +337,12 @@ static void rt6_info_init(struct rt6_info *rt)
 	struct dst_entry *dst = &rt->dst;
 
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
-	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
 }
 
 /* allocate dst with ip6_dst_ops */
-static struct rt6_info *__ip6_dst_alloc(struct net *net,
-					struct net_device *dev,
-					int flags)
+struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
+			       int flags)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					1, DST_OBSOLETE_FORCE_CHK, flags);
@@ -368,34 +354,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
 
 	return rt;
 }
-
-struct rt6_info *ip6_dst_alloc(struct net *net,
-			       struct net_device *dev,
-			       int flags)
-{
-	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
-
-	if (rt) {
-		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
-		if (!rt->rt6i_pcpu) {
-			dst_release_immediate(&rt->dst);
-			return NULL;
-		}
-	}
-
-	return rt;
-}
 EXPORT_SYMBOL(ip6_dst_alloc);
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
-	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from;
 	struct inet6_dev *idev;
 
 	dst_destroy_metrics_generic(dst);
-	free_percpu(rt->rt6i_pcpu);
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -403,14 +370,12 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
 	}
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
-	if (bucket) {
-		rt->rt6i_exception_bucket = NULL;
-		kfree(bucket);
-	}
 
-	rt->from = NULL;
-	dst_release(&from->dst);
+	rcu_read_lock();
+	from = rcu_dereference(rt->from);
+	rcu_assign_pointer(rt->from, NULL);
+	fib6_info_release(from);
+	rcu_read_unlock();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -440,23 +405,27 @@ static bool __rt6_check_expired(const struct rt6_info *rt)
 
 static bool rt6_check_expired(const struct rt6_info *rt)
 {
+	struct fib6_info *from;
+
+	from = rcu_dereference(rt->from);
+
 	if (rt->rt6i_flags & RTF_EXPIRES) {
 		if (time_after(jiffies, rt->dst.expires))
 			return true;
-	} else if (rt->from) {
+	} else if (from) {
 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
-			rt6_check_expired(rt->from);
+			fib6_check_expired(from);
 	}
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(const struct net *net,
-					     struct rt6_info *match,
+static struct fib6_info *rt6_multipath_select(const struct net *net,
+					      struct fib6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     const struct sk_buff *skb,
 					     int strict)
 {
-	struct rt6_info *sibling, *next_sibling;
+	struct fib6_info *sibling, *next_sibling;
 
 	/* We might have already computed the hash for ICMPv6 errors. In such
 	 * case it will always be non-zero. Otherwise now is the time to do it.
@@ -464,12 +433,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
 	if (!fl6->mp_hash)
 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
-	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
 		return match;
 
-	list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
-				 rt6i_siblings) {
-		if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
+				 fib6_siblings) {
+		int nh_upper_bound;
+
+		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
+		if (fl6->mp_hash > nh_upper_bound)
 			continue;
 		if (rt6_score_route(sibling, oif, strict) < 0)
 			break;
@@ -484,38 +456,27 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
  *	Route lookup. rcu_read_lock() should be held.
  */
 
-static inline struct rt6_info *rt6_device_match(struct net *net,
-						    struct rt6_info *rt,
+static inline struct fib6_info *rt6_device_match(struct net *net,
+						 struct fib6_info *rt,
 						    const struct in6_addr *saddr,
 						    int oif,
 						    int flags)
 {
-	struct rt6_info *local = NULL;
-	struct rt6_info *sprt;
+	struct fib6_info *sprt;
 
-	if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
+	if (!oif && ipv6_addr_any(saddr) &&
+	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
 		return rt;
 
 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
-		struct net_device *dev = sprt->dst.dev;
+		const struct net_device *dev = sprt->fib6_nh.nh_dev;
 
-		if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
+		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
 			continue;
 
 		if (oif) {
 			if (dev->ifindex == oif)
 				return sprt;
-			if (dev->flags & IFF_LOOPBACK) {
-				if (!sprt->rt6i_idev ||
-				    sprt->rt6i_idev->dev->ifindex != oif) {
-					if (flags & RT6_LOOKUP_F_IFACE)
-						continue;
-					if (local &&
-					    local->rt6i_idev->dev->ifindex == oif)
-						continue;
-				}
-				local = sprt;
-			}
 		} else {
 			if (ipv6_chk_addr(net, saddr, dev,
 					  flags & RT6_LOOKUP_F_IFACE))
@@ -523,15 +484,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 		}
 	}
 
-	if (oif) {
-		if (local)
-			return local;
+	if (oif && flags & RT6_LOOKUP_F_IFACE)
+		return net->ipv6.fib6_null_entry;
 
-		if (flags & RT6_LOOKUP_F_IFACE)
-			return net->ipv6.ip6_null_entry;
-	}
-
-	return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
+	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
 }
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -553,10 +509,13 @@ static void rt6_probe_deferred(struct work_struct *w)
 	kfree(work);
 }
 
-static void rt6_probe(struct rt6_info *rt)
+static void rt6_probe(struct fib6_info *rt)
 {
 	struct __rt6_probe_work *work;
+	const struct in6_addr *nh_gw;
 	struct neighbour *neigh;
+	struct net_device *dev;
+
 	/*
 	 * Okay, this does not seem to be appropriate
 	 * for now, however, we need to check if it
@@ -565,20 +524,25 @@ static void rt6_probe(struct rt6_info *rt)
 	 * Router Reachability Probe MUST be rate-limited
 	 * to no more than one per minute.
 	 */
-	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
+	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
 		return;
+
+	nh_gw = &rt->fib6_nh.nh_gw;
+	dev = rt->fib6_nh.nh_dev;
 	rcu_read_lock_bh();
-	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
+		struct inet6_dev *idev;
+
 		if (neigh->nud_state & NUD_VALID)
 			goto out;
 
+		idev = __in6_dev_get(dev);
 		work = NULL;
 		write_lock(&neigh->lock);
 		if (!(neigh->nud_state & NUD_VALID) &&
 		    time_after(jiffies,
-			       neigh->updated +
-			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
+			       neigh->updated + idev->cnf.rtr_probe_interval)) {
 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 			if (work)
 				__neigh_set_probe_once(neigh);
@@ -590,9 +554,9 @@ static void rt6_probe(struct rt6_info *rt)
 
 	if (work) {
 		INIT_WORK(&work->work, rt6_probe_deferred);
-		work->target = rt->rt6i_gateway;
-		dev_hold(rt->dst.dev);
-		work->dev = rt->dst.dev;
+		work->target = *nh_gw;
+		dev_hold(dev);
+		work->dev = dev;
 		schedule_work(&work->work);
 	}
 
@@ -600,7 +564,7 @@ static void rt6_probe(struct rt6_info *rt)
 	rcu_read_unlock_bh();
 }
 #else
-static inline void rt6_probe(struct rt6_info *rt)
+static inline void rt6_probe(struct fib6_info *rt)
 {
 }
 #endif
@@ -608,28 +572,27 @@ static inline void rt6_probe(struct rt6_info *rt)
 /*
  * Default Router Selection (RFC 2461 6.3.6)
  */
-static inline int rt6_check_dev(struct rt6_info *rt, int oif)
+static inline int rt6_check_dev(struct fib6_info *rt, int oif)
 {
-	struct net_device *dev = rt->dst.dev;
+	const struct net_device *dev = rt->fib6_nh.nh_dev;
+
 	if (!oif || dev->ifindex == oif)
 		return 2;
-	if ((dev->flags & IFF_LOOPBACK) &&
-	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
-		return 1;
 	return 0;
 }
 
-static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 {
-	struct neighbour *neigh;
 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
+	struct neighbour *neigh;
 
-	if (rt->rt6i_flags & RTF_NONEXTHOP ||
-	    !(rt->rt6i_flags & RTF_GATEWAY))
+	if (rt->fib6_flags & RTF_NONEXTHOP ||
+	    !(rt->fib6_flags & RTF_GATEWAY))
 		return RT6_NUD_SUCCEED;
 
 	rcu_read_lock_bh();
-	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
+					  &rt->fib6_nh.nh_gw);
 	if (neigh) {
 		read_lock(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID)
@@ -650,8 +613,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 	return ret;
 }
 
-static int rt6_score_route(struct rt6_info *rt, int oif,
-			   int strict)
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
 {
 	int m;
 
@@ -659,7 +621,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 		return RT6_NUD_FAIL_HARD;
 #ifdef CONFIG_IPV6_ROUTER_PREF
-	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
+	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
 #endif
 	if (strict & RT6_LOOKUP_F_REACHABLE) {
 		int n = rt6_check_neigh(rt);
@@ -669,23 +631,37 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	return m;
 }
 
-static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
-				   int *mpri, struct rt6_info *match,
+/* called with rc_read_lock held */
+static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
+{
+	const struct net_device *dev = fib6_info_nh_dev(f6i);
+	bool rc = false;
+
+	if (dev) {
+		const struct inet6_dev *idev = __in6_dev_get(dev);
+
+		rc = !!idev->cnf.ignore_routes_with_linkdown;
+	}
+
+	return rc;
+}
+
+static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
+				   int *mpri, struct fib6_info *match,
 				   bool *do_rr)
 {
 	int m;
 	bool match_do_rr = false;
-	struct inet6_dev *idev = rt->rt6i_idev;
 
-	if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 		goto out;
 
-	if (idev->cnf.ignore_routes_with_linkdown &&
-	    rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+	if (fib6_ignore_linkdown(rt) &&
+	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 		goto out;
 
-	if (rt6_check_expired(rt))
+	if (fib6_check_expired(rt))
 		goto out;
 
 	m = rt6_score_route(rt, oif, strict);
@@ -709,19 +685,19 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	return match;
 }
 
-static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
-				     struct rt6_info *leaf,
-				     struct rt6_info *rr_head,
+static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
+				     struct fib6_info *leaf,
+				     struct fib6_info *rr_head,
 				     u32 metric, int oif, int strict,
 				     bool *do_rr)
 {
-	struct rt6_info *rt, *match, *cont;
+	struct fib6_info *rt, *match, *cont;
 	int mpri = -1;
 
 	match = NULL;
 	cont = NULL;
 	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
-		if (rt->rt6i_metric != metric) {
+		if (rt->fib6_metric != metric) {
 			cont = rt;
 			break;
 		}
@@ -731,7 +707,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	for (rt = leaf; rt && rt != rr_head;
 	     rt = rcu_dereference(rt->rt6_next)) {
-		if (rt->rt6i_metric != metric) {
+		if (rt->fib6_metric != metric) {
 			cont = rt;
 			break;
 		}
@@ -748,16 +724,16 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	return match;
 }
 
-static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
 				   int oif, int strict)
 {
-	struct rt6_info *leaf = rcu_dereference(fn->leaf);
-	struct rt6_info *match, *rt0;
+	struct fib6_info *leaf = rcu_dereference(fn->leaf);
+	struct fib6_info *match, *rt0;
 	bool do_rr = false;
 	int key_plen;
 
-	if (!leaf || leaf == net->ipv6.ip6_null_entry)
-		return net->ipv6.ip6_null_entry;
+	if (!leaf || leaf == net->ipv6.fib6_null_entry)
+		return net->ipv6.fib6_null_entry;
 
 	rt0 = rcu_dereference(fn->rr_ptr);
 	if (!rt0)
@@ -768,39 +744,39 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 	 * (This might happen if all routes under fn are deleted from
 	 * the tree and fib6_repair_tree() is called on the node.)
 	 */
-	key_plen = rt0->rt6i_dst.plen;
+	key_plen = rt0->fib6_dst.plen;
 #ifdef CONFIG_IPV6_SUBTREES
-	if (rt0->rt6i_src.plen)
-		key_plen = rt0->rt6i_src.plen;
+	if (rt0->fib6_src.plen)
+		key_plen = rt0->fib6_src.plen;
 #endif
 	if (fn->fn_bit != key_plen)
-		return net->ipv6.ip6_null_entry;
+		return net->ipv6.fib6_null_entry;
 
-	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
+	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
 			     &do_rr);
 
 	if (do_rr) {
-		struct rt6_info *next = rcu_dereference(rt0->rt6_next);
+		struct fib6_info *next = rcu_dereference(rt0->rt6_next);
 
 		/* no entries matched; do round-robin */
-		if (!next || next->rt6i_metric != rt0->rt6i_metric)
+		if (!next || next->fib6_metric != rt0->fib6_metric)
 			next = leaf;
 
 		if (next != rt0) {
-			spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 			/* make sure next is not being deleted from the tree */
-			if (next->rt6i_node)
+			if (next->fib6_node)
 				rcu_assign_pointer(fn->rr_ptr, next);
-			spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 		}
 	}
 
-	return match ? match : net->ipv6.ip6_null_entry;
+	return match ? match : net->ipv6.fib6_null_entry;
 }
 
-static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
 {
-	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
@@ -812,7 +788,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
 	unsigned long lifetime;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	if (len < sizeof(struct route_info)) {
 		return -EINVAL;
@@ -850,13 +826,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	}
 
 	if (rinfo->prefix_len == 0)
-		rt = rt6_get_dflt_router(gwaddr, dev);
+		rt = rt6_get_dflt_router(net, gwaddr, dev);
 	else
 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 					gwaddr, dev);
 
 	if (rt && !lifetime) {
-		ip6_del_rt(rt);
+		ip6_del_rt(net, rt);
 		rt = NULL;
 	}
 
@@ -864,21 +840,162 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 					dev, pref);
 	else if (rt)
-		rt->rt6i_flags = RTF_ROUTEINFO |
-				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+		rt->fib6_flags = RTF_ROUTEINFO |
+				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 
 	if (rt) {
 		if (!addrconf_finite_timeout(lifetime))
-			rt6_clean_expires(rt);
+			fib6_clean_expires(rt);
 		else
-			rt6_set_expires(rt, jiffies + HZ * lifetime);
+			fib6_set_expires(rt, jiffies + HZ * lifetime);
 
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 	return 0;
 }
 #endif
 
+/*
+ *	Misc support functions
+ */
+
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
+{
+	struct net_device *dev = rt->fib6_nh.nh_dev;
+
+	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+		/* for copies of local routes, dst->dev needs to be the
+		 * device if it is a master device, the master device if
+		 * device is enslaved, and the loopback as the default
+		 */
+		if (netif_is_l3_slave(dev) &&
+		    !rt6_need_strict(&rt->fib6_dst.addr))
+			dev = l3mdev_master_dev_rcu(dev);
+		else if (!netif_is_l3_master(dev))
+			dev = dev_net(dev)->loopback_dev;
+		/* last case is netif_is_l3_master(dev) is true in which
+		 * case we want dev returned to be dev
+		 */
+	}
+
+	return dev;
+}
+
+static const int fib6_prop[RTN_MAX + 1] = {
+	[RTN_UNSPEC]	= 0,
+	[RTN_UNICAST]	= 0,
+	[RTN_LOCAL]	= 0,
+	[RTN_BROADCAST]	= 0,
+	[RTN_ANYCAST]	= 0,
+	[RTN_MULTICAST]	= 0,
+	[RTN_BLACKHOLE]	= -EINVAL,
+	[RTN_UNREACHABLE] = -EHOSTUNREACH,
+	[RTN_PROHIBIT]	= -EACCES,
+	[RTN_THROW]	= -EAGAIN,
+	[RTN_NAT]	= -EINVAL,
+	[RTN_XRESOLVE]	= -EINVAL,
+};
+
+static int ip6_rt_type_to_error(u8 fib6_type)
+{
+	return fib6_prop[fib6_type];
+}
+
+static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
+{
+	unsigned short flags = 0;
+
+	if (rt->dst_nocount)
+		flags |= DST_NOCOUNT;
+	if (rt->dst_nopolicy)
+		flags |= DST_NOPOLICY;
+	if (rt->dst_host)
+		flags |= DST_HOST;
+
+	return flags;
+}
+
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
+{
+	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+
+	switch (ort->fib6_type) {
+	case RTN_BLACKHOLE:
+		rt->dst.output = dst_discard_out;
+		rt->dst.input = dst_discard;
+		break;
+	case RTN_PROHIBIT:
+		rt->dst.output = ip6_pkt_prohibit_out;
+		rt->dst.input = ip6_pkt_prohibit;
+		break;
+	case RTN_THROW:
+	case RTN_UNREACHABLE:
+	default:
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		break;
+	}
+}
+
+static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
+{
+	rt->dst.flags |= fib6_info_dst_flags(ort);
+
+	if (ort->fib6_flags & RTF_REJECT) {
+		ip6_rt_init_dst_reject(rt, ort);
+		return;
+	}
+
+	rt->dst.error = 0;
+	rt->dst.output = ip6_output;
+
+	if (ort->fib6_type == RTN_LOCAL) {
+		rt->dst.input = ip6_input;
+	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
+		rt->dst.input = ip6_mc_input;
+	} else {
+		rt->dst.input = ip6_forward;
+	}
+
+	if (ort->fib6_nh.nh_lwtstate) {
+		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+		lwtunnel_set_redirect(&rt->dst);
+	}
+
+	rt->dst.lastuse = jiffies;
+}
+
+static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
+{
+	rt->rt6i_flags &= ~RTF_EXPIRES;
+	fib6_info_hold(from);
+	rcu_assign_pointer(rt->from, from);
+	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
+	if (from->fib6_metrics != &dst_default_metrics) {
+		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+		refcount_inc(&from->fib6_metrics->refcnt);
+	}
+}
+
+static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
+{
+	struct net_device *dev = fib6_info_nh_dev(ort);
+
+	ip6_rt_init_dst(rt, ort);
+
+	rt->rt6i_dst = ort->fib6_dst;
+	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
+	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
+	rt->rt6i_flags = ort->fib6_flags;
+	rt6_set_from(rt, ort);
+#ifdef CONFIG_IPV6_SUBTREES
+	rt->rt6i_src = ort->fib6_src;
+#endif
+	rt->rt6i_prefsrc = ort->fib6_prefsrc;
+	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+}
+
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 					struct in6_addr *saddr)
 {
@@ -914,14 +1031,29 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 	return false;
 }
 
+/* called with rcu_lock held */
+static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
+{
+	unsigned short flags = fib6_info_dst_flags(rt);
+	struct net_device *dev = rt->fib6_nh.nh_dev;
+	struct rt6_info *nrt;
+
+	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
+	if (nrt)
+		ip6_rt_copy_init(nrt, rt);
+
+	return nrt;
+}
+
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6,
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct fib6_info *f6i;
 	struct fib6_node *fn;
+	struct rt6_info *rt;
 
 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
 		flags &= ~RT6_LOOKUP_F_IFACE;
@@ -929,35 +1061,43 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	rt = rcu_dereference(fn->leaf);
-	if (!rt) {
-		rt = net->ipv6.ip6_null_entry;
+	f6i = rcu_dereference(fn->leaf);
+	if (!f6i) {
+		f6i = net->ipv6.fib6_null_entry;
 	} else {
-		rt = rt6_device_match(net, rt, &fl6->saddr,
+		f6i = rt6_device_match(net, f6i, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
-		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
-						  skb, flags);
+		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
+			f6i = rt6_multipath_select(net, f6i, fl6,
+						   fl6->flowi6_oif, skb, flags);
 	}
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
-	/* Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache)
-		rt = rt_cache;
 
-	if (ip6_hold_safe(net, &rt, true))
-		dst_use_noref(&rt->dst, jiffies);
+	/* Search through exception table */
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
+		if (ip6_hold_safe(net, &rt, true))
+			dst_use_noref(&rt->dst, jiffies);
+	} else if (f6i == net->ipv6.fib6_null_entry) {
+		rt = net->ipv6.ip6_null_entry;
+		dst_hold(&rt->dst);
+	} else {
+		rt = ip6_create_rt_rcu(f6i);
+		if (!rt) {
+			rt = net->ipv6.ip6_null_entry;
+			dst_hold(&rt->dst);
+		}
+	}
 
 	rcu_read_unlock();
 
 	trace_fib6_table_lookup(net, rt, table, fl6);
 
 	return rt;
-
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
@@ -999,55 +1139,28 @@ EXPORT_SYMBOL(rt6_lookup);
  * Caller must hold dst before calling it.
  */
 
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
-			struct mx6_config *mxc,
+static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
 			struct netlink_ext_ack *extack)
 {
 	int err;
 	struct fib6_table *table;
 
-	table = rt->rt6i_table;
+	table = rt->fib6_table;
 	spin_lock_bh(&table->tb6_lock);
-	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
+	err = fib6_add(&table->tb6_root, rt, info, extack);
 	spin_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
-int ip6_ins_rt(struct rt6_info *rt)
+int ip6_ins_rt(struct net *net, struct fib6_info *rt)
 {
-	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
-	struct mx6_config mxc = { .mx = NULL, };
+	struct nl_info info = {	.nl_net = net, };
 
-	/* Hold dst to account for the reference from the fib6 tree */
-	dst_hold(&rt->dst);
-	return __ip6_ins_rt(rt, &info, &mxc, NULL);
+	return __ip6_ins_rt(rt, &info, NULL);
 }
 
-/* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
-{
-	struct net_device *dev = rt->dst.dev;
-
-	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
-		/* for copies of local routes, dst->dev needs to be the
-		 * device if it is a master device, the master device if
-		 * device is enslaved, and the loopback as the default
-		 */
-		if (netif_is_l3_slave(dev) &&
-		    !rt6_need_strict(&rt->rt6i_dst.addr))
-			dev = l3mdev_master_dev_rcu(dev);
-		else if (!netif_is_l3_master(dev))
-			dev = dev_net(dev)->loopback_dev;
-		/* last case is netif_is_l3_master(dev) is true in which
-		 * case we want dev returned to be dev
-		 */
-	}
-
-	return dev;
-}
-
-static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
 {
@@ -1058,26 +1171,20 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 *	Clone the route.
 	 */
 
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-
-	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
-	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
-	rcu_read_unlock();
+	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
 	if (!rt)
 		return NULL;
 
 	ip6_rt_copy_init(rt, ort);
 	rt->rt6i_flags |= RTF_CACHE;
-	rt->rt6i_metric = 0;
 	rt->dst.flags |= DST_HOST;
 	rt->rt6i_dst.addr = *daddr;
 	rt->rt6i_dst.plen = 128;
 
 	if (!rt6_is_gw_or_nonexthop(ort)) {
-		if (ort->rt6i_dst.plen != 128 &&
-		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
+		if (ort->fib6_dst.plen != 128 &&
+		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
 			rt->rt6i_flags |= RTF_ANYCAST;
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
@@ -1090,45 +1197,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	return rt;
 }
 
-static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 {
+	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev;
 	struct rt6_info *pcpu_rt;
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(rt);
-	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
 	ip6_rt_copy_init(pcpu_rt, rt);
-	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 	pcpu_rt->rt6i_flags |= RTF_PCPU;
 	return pcpu_rt;
 }
 
 /* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
 {
 	struct rt6_info *pcpu_rt, **p;
 
 	p = this_cpu_ptr(rt->rt6i_pcpu);
 	pcpu_rt = *p;
 
-	if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
-		rt6_dst_from_metrics_check(pcpu_rt);
+	if (pcpu_rt)
+		ip6_hold_safe(NULL, &pcpu_rt, false);
 
 	return pcpu_rt;
 }
 
-static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_make_pcpu_route(struct net *net,
+					    struct fib6_info *rt)
 {
 	struct rt6_info *pcpu_rt, *prev, **p;
 
 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
 	if (!pcpu_rt) {
-		struct net *net = dev_net(rt->dst.dev);
-
 		dst_hold(&net->ipv6.ip6_null_entry->dst);
 		return net->ipv6.ip6_null_entry;
 	}
@@ -1138,7 +1244,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 	prev = cmpxchg(p, NULL, pcpu_rt);
 	BUG_ON(prev);
 
-	rt6_dst_from_metrics_check(pcpu_rt);
 	return pcpu_rt;
 }
 
@@ -1158,9 +1263,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 		return;
 
 	net = dev_net(rt6_ex->rt6i->dst.dev);
-	rt6_ex->rt6i->rt6i_node = NULL;
 	hlist_del_rcu(&rt6_ex->hlist);
-	rt6_release(rt6_ex->rt6i);
+	dst_release(&rt6_ex->rt6i->dst);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
@@ -1268,20 +1372,36 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
 	return NULL;
 }
 
-static int rt6_insert_exception(struct rt6_info *nrt,
-				struct rt6_info *ort)
+static unsigned int fib6_mtu(const struct fib6_info *rt)
 {
-	struct net *net = dev_net(ort->dst.dev);
+	unsigned int mtu;
+
+	if (rt->fib6_pmtu) {
+		mtu = rt->fib6_pmtu;
+	} else {
+		struct net_device *dev = fib6_info_nh_dev(rt);
+		struct inet6_dev *idev;
+
+		rcu_read_lock();
+		idev = __in6_dev_get(dev);
+		mtu = idev->cnf.mtu6;
+		rcu_read_unlock();
+	}
+
+	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
+}
+
+static int rt6_insert_exception(struct rt6_info *nrt,
+				struct fib6_info *ort)
+{
+	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 	int err = 0;
 
-	/* ort can't be a cache or pcpu route */
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
-
 	spin_lock_bh(&rt6_exception_lock);
 
 	if (ort->exception_bucket_flushed) {
@@ -1308,19 +1428,19 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (ort->rt6i_src.plen)
+	if (ort->fib6_src.plen)
 		src_key = &nrt->rt6i_src.addr;
 #endif
 
 	/* Update rt6i_prefsrc as it could be changed
 	 * in rt6_remove_prefsrc()
 	 */
-	nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
 	/* rt6_mtu_change() might lower mtu on ort.
 	 * Only insert this exception route if its mtu
 	 * is less than ort's mtu value.
 	 */
-	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -1337,8 +1457,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	}
 	rt6_ex->rt6i = nrt;
 	rt6_ex->stamp = jiffies;
-	atomic_inc(&nrt->rt6i_ref);
-	nrt->rt6i_node = ort->rt6i_node;
 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
 	bucket->depth++;
 	net->ipv6.rt6_stats->fib_rt_cache++;
@@ -1351,16 +1469,16 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 
 	/* Update fn->fn_sernum to invalidate all cached dst */
 	if (!err) {
-		spin_lock_bh(&ort->rt6i_table->tb6_lock);
-		fib6_update_sernum(ort);
-		spin_unlock_bh(&ort->rt6i_table->tb6_lock);
+		spin_lock_bh(&ort->fib6_table->tb6_lock);
+		fib6_update_sernum(net, ort);
+		spin_unlock_bh(&ort->fib6_table->tb6_lock);
 		fib6_force_start_gc(net);
 	}
 
 	return err;
 }
 
-void rt6_flush_exceptions(struct rt6_info *rt)
+void rt6_flush_exceptions(struct fib6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1390,7 +1508,7 @@ void rt6_flush_exceptions(struct rt6_info *rt)
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr)
 {
@@ -1408,7 +1526,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (rt->rt6i_src.plen)
+	if (rt->fib6_src.plen)
 		src_key = saddr;
 #endif
 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
@@ -1420,10 +1538,10 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-int rt6_remove_exception_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 	int err;
@@ -1445,7 +1563,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->rt6i_src.plen)
+	if (from->fib6_src.plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1468,7 +1586,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 
@@ -1486,7 +1604,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->rt6i_src.plen)
+	if (from->fib6_src.plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_rcu(&bucket,
@@ -1498,7 +1616,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	rcu_read_unlock();
 }
 
-static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1540,7 +1658,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
 }
 
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
-				       struct rt6_info *rt, int mtu)
+				       struct fib6_info *rt, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1557,12 +1675,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 			struct rt6_info *entry = rt6_ex->rt6i;
 
 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
-			 * route), the metrics of its rt->dst.from have already
+			 * route), the metrics of its rt->from have already
 			 * been updated.
 			 */
-			if (entry->rt6i_pmtu &&
+			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
-				entry->rt6i_pmtu = mtu;
+				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
 		}
 		bucket++;
 	}
@@ -1570,7 +1688,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 
 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
-static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
 					struct in6_addr *gateway)
 {
 	struct rt6_exception_bucket *bucket;
@@ -1649,7 +1767,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	gc_args->more++;
 }
 
-void rt6_age_exceptions(struct rt6_info *rt,
+void rt6_age_exceptions(struct fib6_info *rt,
 			struct fib6_gc_args *gc_args,
 			unsigned long now)
 {
@@ -1685,7 +1803,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *rt, *rt_cache;
+	struct fib6_info *f6i;
+	struct rt6_info *rt;
 	int strict = 0;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1702,10 +1821,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		oif = 0;
 
 redo_rt6_select:
-	rt = rt6_select(net, fn, oif, strict);
-	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
-	if (rt == net->ipv6.ip6_null_entry) {
+	f6i = rt6_select(net, fn, oif, strict);
+	if (f6i->fib6_nsiblings)
+		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
@@ -1717,45 +1836,35 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		}
 	}
 
-	/*Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache)
-		rt = rt_cache;
-
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
+		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
-	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (ip6_hold_safe(net, &rt, true)) {
+	}
+
+	/*Search through exception table */
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
+		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
-			rt6_dst_from_metrics_check(rt);
-		}
+
 		rcu_read_unlock();
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !(rt->rt6i_flags & RTF_GATEWAY))) {
+			    !(f6i->fib6_flags & RTF_GATEWAY))) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
 		 * from the fl6->daddr used to look-up route here.
 		 */
-
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &rt, true)) {
-			dst_use_noref(&rt->dst, jiffies);
-		} else {
-			rcu_read_unlock();
-			uncached_rt = rt;
-			goto uncached_rt_out;
-		}
-		rcu_read_unlock();
+		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
 
-		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
-		dst_release(&rt->dst);
+		rcu_read_unlock();
 
 		if (uncached_rt) {
 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
@@ -1768,7 +1877,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_hold(&uncached_rt->dst);
 		}
 
-uncached_rt_out:
 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
 		return uncached_rt;
 
@@ -1777,24 +1885,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&rt->dst, jiffies);
 		local_bh_disable();
-		pcpu_rt = rt6_get_pcpu_route(rt);
+		pcpu_rt = rt6_get_pcpu_route(f6i);
 
-		if (!pcpu_rt) {
-			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
-				/* No dst_hold() on rt is needed because grabbing
-				 * rt->rt6i_ref makes sure rt can't be released.
-				 */
-				pcpu_rt = rt6_make_pcpu_route(rt);
-				rt6_release(rt);
-			} else {
-				/* rt is already removed from tree */
-				pcpu_rt = net->ipv6.ip6_null_entry;
-				dst_hold(&pcpu_rt->dst);
-			}
-		}
+		if (!pcpu_rt)
+			pcpu_rt = rt6_make_pcpu_route(net, f6i);
+
 		local_bh_enable();
 		rcu_read_unlock();
 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
@@ -2015,7 +2111,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 		rt->rt6i_idev = in6_dev_get(loopback_dev);
 		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
-		rt->rt6i_metric = 0;
 
 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 #ifdef CONFIG_IPV6_SUBTREES
@@ -2031,18 +2126,27 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
-static void rt6_dst_from_metrics_check(struct rt6_info *rt)
-{
-	if (rt->from &&
-	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
-		dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
-}
-
-static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
+static bool fib6_check(struct fib6_info *f6i, u32 cookie)
 {
 	u32 rt_cookie = 0;
 
-	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
+	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
+		return false;
+
+	if (fib6_check_expired(f6i))
+		return false;
+
+	return true;
+}
+
+static struct dst_entry *rt6_check(struct rt6_info *rt,
+				   struct fib6_info *from,
+				   u32 cookie)
+{
+	u32 rt_cookie = 0;
+
+	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
+	    rt_cookie != cookie)
 		return NULL;
 
 	if (rt6_check_expired(rt))
@@ -2051,11 +2155,13 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 	return &rt->dst;
 }
 
-static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
+static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
+					    struct fib6_info *from,
+					    u32 cookie)
 {
 	if (!__rt6_check_expired(rt) &&
 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
-	    rt6_check(rt->from, cookie))
+	    fib6_check(from, cookie))
 		return &rt->dst;
 	else
 		return NULL;
@@ -2063,22 +2169,30 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 
 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 {
+	struct dst_entry *dst_ret;
+	struct fib6_info *from;
 	struct rt6_info *rt;
 
-	rt = (struct rt6_info *) dst;
+	rt = container_of(dst, struct rt6_info, dst);
+
+	rcu_read_lock();
 
 	/* All IPV6 dsts are created with ->obsolete set to the value
 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
 	 * into this function always.
 	 */
 
-	rt6_dst_from_metrics_check(rt);
+	from = rcu_dereference(rt->from);
 
-	if (rt->rt6i_flags & RTF_PCPU ||
-	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
-		return rt6_dst_from_check(rt, cookie);
+	if (from && (rt->rt6i_flags & RTF_PCPU ||
+	    unlikely(!list_empty(&rt->rt6i_uncached))))
+		dst_ret = rt6_dst_from_check(rt, from, cookie);
 	else
-		return rt6_check(rt, cookie);
+		dst_ret = rt6_check(rt, from, cookie);
+
+	rcu_read_unlock();
+
+	return dst_ret;
 }
 
 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -2088,7 +2202,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (rt6_check_expired(rt)) {
-				ip6_del_rt(rt);
+				rt6_remove_exception_rt(rt);
 				dst = NULL;
 			}
 		} else {
@@ -2109,33 +2223,58 @@ static void ip6_link_failure(struct sk_buff *skb)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (dst_hold_safe(&rt->dst))
-				ip6_del_rt(rt);
+				rt6_remove_exception_rt(rt);
 		} else {
+			struct fib6_info *from;
 			struct fib6_node *fn;
 
 			rcu_read_lock();
-			fn = rcu_dereference(rt->rt6i_node);
-			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
-				fn->fn_sernum = -1;
+			from = rcu_dereference(rt->from);
+			if (from) {
+				fn = rcu_dereference(from->fib6_node);
+				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+					fn->fn_sernum = -1;
+			}
 			rcu_read_unlock();
 		}
 	}
 }
 
+static void rt6_update_expires(struct rt6_info *rt0, int timeout)
+{
+	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
+		struct fib6_info *from;
+
+		rcu_read_lock();
+		from = rcu_dereference(rt0->from);
+		if (from)
+			rt0->dst.expires = from->expires;
+		rcu_read_unlock();
+	}
+
+	dst_set_expires(&rt0->dst, timeout);
+	rt0->rt6i_flags |= RTF_EXPIRES;
+}
+
 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 {
 	struct net *net = dev_net(rt->dst.dev);
 
+	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
 	rt->rt6i_flags |= RTF_MODIFIED;
-	rt->rt6i_pmtu = mtu;
 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
 }
 
 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
+	bool from_set;
+
+	rcu_read_lock();
+	from_set = !!rcu_dereference(rt->from);
+	rcu_read_unlock();
+
 	return !(rt->rt6i_flags & RTF_CACHE) &&
-		(rt->rt6i_flags & RTF_PCPU ||
-		 rcu_access_pointer(rt->rt6i_node));
+		(rt->rt6i_flags & RTF_PCPU || from_set);
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -2171,14 +2310,18 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 		if (rt6->rt6i_flags & RTF_CACHE)
 			rt6_update_exception_stamp_rt(rt6);
 	} else if (daddr) {
+		struct fib6_info *from;
 		struct rt6_info *nrt6;
 
-		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
+		rcu_read_lock();
+		from = rcu_dereference(rt6->from);
+		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
-			if (rt6_insert_exception(nrt6, rt6))
+			if (rt6_insert_exception(nrt6, from))
 				dst_release_immediate(&nrt6->dst);
 		}
+		rcu_read_unlock();
 	}
 }
 
@@ -2259,7 +2402,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *ret = NULL, *rt_cache;
+	struct fib6_info *rt;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -2276,29 +2420,29 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 			continue;
-		if (rt6_check_expired(rt))
+		if (fib6_check_expired(rt))
 			continue;
-		if (rt->dst.error)
+		if (rt->fib6_flags & RTF_REJECT)
 			break;
-		if (!(rt->rt6i_flags & RTF_GATEWAY))
+		if (!(rt->fib6_flags & RTF_GATEWAY))
 			continue;
-		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
 			continue;
 		/* rt_cache's gateway might be different from its 'parent'
 		 * in the case of an ip redirect.
 		 * So we keep searching in the exception table if the gateway
 		 * is different.
 		 */
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
 			rt_cache = rt6_find_cached_rt(rt,
 						      &fl6->daddr,
 						      &fl6->saddr);
 			if (rt_cache &&
 			    ipv6_addr_equal(&rdfl->gateway,
 					    &rt_cache->rt6i_gateway)) {
-				rt = rt_cache;
+				ret = rt_cache;
 				break;
 			}
 			continue;
@@ -2307,25 +2451,28 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	}
 
 	if (!rt)
-		rt = net->ipv6.ip6_null_entry;
-	else if (rt->dst.error) {
-		rt = net->ipv6.ip6_null_entry;
+		rt = net->ipv6.fib6_null_entry;
+	else if (rt->fib6_flags & RTF_REJECT) {
+		ret = net->ipv6.ip6_null_entry;
 		goto out;
 	}
 
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
 
 out:
-	ip6_hold_safe(net, &rt, true);
+	if (ret)
+		dst_hold(&ret->dst);
+	else
+		ret = ip6_create_rt_rcu(rt);
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table, fl6);
-	return rt;
+	trace_fib6_table_lookup(net, ret, table, fl6);
+	return ret;
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
@@ -2417,12 +2564,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
-	const struct rt6_info *rt = (const struct rt6_info *)dst;
-	unsigned int mtu = rt->rt6i_pmtu;
 	struct inet6_dev *idev;
-
-	if (mtu)
-		goto out;
+	unsigned int mtu;
 
 	mtu = dst_metric_raw(dst, RTAX_MTU);
 	if (mtu)
@@ -2506,60 +2649,22 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	return entries > rt_max_size;
 }
 
-static int ip6_convert_metrics(struct mx6_config *mxc,
-			       const struct fib6_config *cfg)
+static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
+			       struct fib6_config *cfg)
 {
-	struct net *net = cfg->fc_nlinfo.nl_net;
-	bool ecn_ca = false;
-	struct nlattr *nla;
-	int remaining;
-	u32 *mp;
+	struct dst_metrics *p;
 
 	if (!cfg->fc_mx)
 		return 0;
 
-	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
-	if (unlikely(!mp))
+	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
+	if (unlikely(!p))
 		return -ENOMEM;
 
-	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
-		int type = nla_type(nla);
-		u32 val;
+	refcount_set(&p->refcnt, 1);
+	rt->fib6_metrics = p;
 
-		if (!type)
-			continue;
-		if (unlikely(type > RTAX_MAX))
-			goto err;
-
-		if (type == RTAX_CC_ALGO) {
-			char tmp[TCP_CA_NAME_MAX];
-
-			nla_strlcpy(tmp, nla, sizeof(tmp));
-			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
-			if (val == TCP_CA_UNSPEC)
-				goto err;
-		} else {
-			val = nla_get_u32(nla);
-		}
-		if (type == RTAX_HOPLIMIT && val > 255)
-			val = 255;
-		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
-			goto err;
-
-		mp[type - 1] = val;
-		__set_bit(type - 1, mxc->mx_valid);
-	}
-
-	if (ecn_ca) {
-		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
-		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
-	}
-
-	mxc->mx = mp;
-	return 0;
- err:
-	kfree(mp);
-	return -EINVAL;
+	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
 }
 
 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
@@ -2745,11 +2850,12 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 	return err;
 }
 
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
+					      gfp_t gfp_flags,
 					      struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->fc_nlinfo.nl_net;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
@@ -2768,6 +2874,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 	}
 
+	if (cfg->fc_type > RTN_MAX) {
+		NL_SET_ERR_MSG(extack, "Invalid route type");
+		goto out;
+	}
+
 	if (cfg->fc_dst_len > 128) {
 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
 		goto out;
@@ -2826,35 +2937,30 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (!table)
 		goto out;
 
-	rt = ip6_dst_alloc(net, NULL,
-			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
-
-	if (!rt) {
-		err = -ENOMEM;
+	err = -ENOMEM;
+	rt = fib6_info_alloc(gfp_flags);
+	if (!rt)
 		goto out;
-	}
+
+	if (cfg->fc_flags & RTF_ADDRCONF)
+		rt->dst_nocount = true;
+
+	err = ip6_convert_metrics(net, rt, cfg);
+	if (err < 0)
+		goto out;
 
 	if (cfg->fc_flags & RTF_EXPIRES)
-		rt6_set_expires(rt, jiffies +
+		fib6_set_expires(rt, jiffies +
 				clock_t_to_jiffies(cfg->fc_expires));
 	else
-		rt6_clean_expires(rt);
+		fib6_clean_expires(rt);
 
 	if (cfg->fc_protocol == RTPROT_UNSPEC)
 		cfg->fc_protocol = RTPROT_BOOT;
-	rt->rt6i_protocol = cfg->fc_protocol;
+	rt->fib6_protocol = cfg->fc_protocol;
 
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
-	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->dst.input = ip6_mc_input;
-	else if (cfg->fc_flags & RTF_LOCAL)
-		rt->dst.input = ip6_input;
-	else
-		rt->dst.input = ip6_forward;
-
-	rt->dst.output = ip6_output;
-
 	if (cfg->fc_encap) {
 		struct lwtunnel_state *lwtstate;
 
@@ -2863,22 +2969,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 					   &lwtstate, extack);
 		if (err)
 			goto out;
-		rt->dst.lwtstate = lwtstate_get(lwtstate);
-		lwtunnel_set_redirect(&rt->dst);
+		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
 	}
 
-	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
-	rt->rt6i_dst.plen = cfg->fc_dst_len;
-	if (rt->rt6i_dst.plen == 128)
-		rt->dst.flags |= DST_HOST;
+	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+	rt->fib6_dst.plen = cfg->fc_dst_len;
+	if (rt->fib6_dst.plen == 128)
+		rt->dst_host = true;
 
 #ifdef CONFIG_IPV6_SUBTREES
-	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
-	rt->rt6i_src.plen = cfg->fc_src_len;
+	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
+	rt->fib6_src.plen = cfg->fc_src_len;
 #endif
 
-	rt->rt6i_metric = cfg->fc_metric;
-	rt->rt6i_nh_weight = 1;
+	rt->fib6_metric = cfg->fc_metric;
+	rt->fib6_nh.nh_weight = 1;
+
+	rt->fib6_type = cfg->fc_type;
 
 	/* We cannot add true routes via loopback here,
 	   they would result in kernel looping; promote them to reject routes
@@ -2901,28 +3008,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 				goto out;
 			}
 		}
-		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
-		switch (cfg->fc_type) {
-		case RTN_BLACKHOLE:
-			rt->dst.error = -EINVAL;
-			rt->dst.output = dst_discard_out;
-			rt->dst.input = dst_discard;
-			break;
-		case RTN_PROHIBIT:
-			rt->dst.error = -EACCES;
-			rt->dst.output = ip6_pkt_prohibit_out;
-			rt->dst.input = ip6_pkt_prohibit;
-			break;
-		case RTN_THROW:
-		case RTN_UNREACHABLE:
-		default:
-			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
-					: (cfg->fc_type == RTN_UNREACHABLE)
-					? -EHOSTUNREACH : -ENETUNREACH;
-			rt->dst.output = ip6_pkt_discard_out;
-			rt->dst.input = ip6_pkt_discard;
-			break;
-		}
+		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
 		goto install_route;
 	}
 
@@ -2931,7 +3017,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 
-		rt->rt6i_gateway = cfg->fc_gateway;
+		rt->fib6_nh.nh_gw = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -2956,96 +3042,82 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 			err = -EINVAL;
 			goto out;
 		}
-		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
-		rt->rt6i_prefsrc.plen = 128;
+		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
+		rt->fib6_prefsrc.plen = 128;
 	} else
-		rt->rt6i_prefsrc.plen = 0;
+		rt->fib6_prefsrc.plen = 0;
 
-	rt->rt6i_flags = cfg->fc_flags;
+	rt->fib6_flags = cfg->fc_flags;
 
 install_route:
-	if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
 	    !netif_carrier_ok(dev))
-		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
-	rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
-	rt->dst.dev = dev;
-	rt->rt6i_idev = idev;
-	rt->rt6i_table = table;
+		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
+	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
+	rt->fib6_nh.nh_dev = dev;
+	rt->fib6_table = table;
 
 	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
+	if (idev)
+		in6_dev_put(idev);
+
 	return rt;
 out:
 	if (dev)
 		dev_put(dev);
 	if (idev)
 		in6_dev_put(idev);
-	if (rt)
-		dst_release_immediate(&rt->dst);
 
+	fib6_info_release(rt);
 	return ERR_PTR(err);
 }
 
-int ip6_route_add(struct fib6_config *cfg,
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack)
 {
-	struct mx6_config mxc = { .mx = NULL, };
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	int err;
 
-	rt = ip6_route_info_create(cfg, extack);
-	if (IS_ERR(rt)) {
-		err = PTR_ERR(rt);
-		rt = NULL;
-		goto out;
-	}
+	rt = ip6_route_info_create(cfg, gfp_flags, extack);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 
-	err = ip6_convert_metrics(&mxc, cfg);
-	if (err)
-		goto out;
-
-	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
-
-	kfree(mxc.mx);
-
-	return err;
-out:
-	if (rt)
-		dst_release_immediate(&rt->dst);
+	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
+	fib6_info_release(rt);
 
 	return err;
 }
 
-static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
 {
-	int err;
+	struct net *net = info->nl_net;
 	struct fib6_table *table;
-	struct net *net = dev_net(rt->dst.dev);
+	int err;
 
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
 		err = -ENOENT;
 		goto out;
 	}
 
-	table = rt->rt6i_table;
+	table = rt->fib6_table;
 	spin_lock_bh(&table->tb6_lock);
 	err = fib6_del(rt, info);
 	spin_unlock_bh(&table->tb6_lock);
 
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt)
+int ip6_del_rt(struct net *net, struct fib6_info *rt)
 {
-	struct nl_info info = {
-		.nl_net = dev_net(rt->dst.dev),
-	};
+	struct nl_info info = { .nl_net = net };
+
 	return __ip6_del_rt(rt, &info);
 }
 
-static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
 {
 	struct nl_info *info = &cfg->fc_nlinfo;
 	struct net *net = info->nl_net;
@@ -3053,20 +3125,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	struct fib6_table *table;
 	int err = -ENOENT;
 
-	if (rt == net->ipv6.ip6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry)
 		goto out_put;
-	table = rt->rt6i_table;
+	table = rt->fib6_table;
 	spin_lock_bh(&table->tb6_lock);
 
-	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
-		struct rt6_info *sibling, *next_sibling;
+	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
+		struct fib6_info *sibling, *next_sibling;
 
 		/* prefer to send a single notification with all hops */
 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
 		if (skb) {
 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 
-			if (rt6_fill_node(net, skb, rt,
+			if (rt6_fill_node(net, skb, rt, NULL,
 					  NULL, NULL, 0, RTM_DELROUTE,
 					  info->portid, seq, 0) < 0) {
 				kfree_skb(skb);
@@ -3076,8 +3148,8 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 		}
 
 		list_for_each_entry_safe(sibling, next_sibling,
-					 &rt->rt6i_siblings,
-					 rt6i_siblings) {
+					 &rt->fib6_siblings,
+					 fib6_siblings) {
 			err = fib6_del(sibling, info);
 			if (err)
 				goto out_unlock;
@@ -3088,7 +3160,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 out_unlock:
 	spin_unlock_bh(&table->tb6_lock);
 out_put:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 
 	if (skb) {
 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
@@ -3097,11 +3169,28 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	return err;
 }
 
+static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+{
+	int rc = -ESRCH;
+
+	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
+		goto out;
+
+	if (cfg->fc_flags & RTF_GATEWAY &&
+	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+		goto out;
+	if (dst_hold_safe(&rt->dst))
+		rc = rt6_remove_exception_rt(rt);
+out:
+	return rc;
+}
+
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *rt_cache;
 	struct fib6_table *table;
+	struct fib6_info *rt;
 	struct fib6_node *fn;
 	int err = -ESRCH;
 
@@ -3121,25 +3210,29 @@ static int ip6_route_del(struct fib6_config *cfg,
 	if (fn) {
 		for_each_fib6_node_rt_rcu(fn) {
 			if (cfg->fc_flags & RTF_CACHE) {
+				int rc;
+
 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
 							      &cfg->fc_src);
-				if (!rt_cache)
-					continue;
-				rt = rt_cache;
+				if (rt_cache) {
+					rc = ip6_del_cached_rt(rt_cache, cfg);
+					if (rc != -ESRCH)
+						return rc;
+				}
+				continue;
 			}
 			if (cfg->fc_ifindex &&
-			    (!rt->dst.dev ||
-			     rt->dst.dev->ifindex != cfg->fc_ifindex))
+			    (!rt->fib6_nh.nh_dev ||
+			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
 				continue;
 			if (cfg->fc_flags & RTF_GATEWAY &&
-			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
 				continue;
-			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
+			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
 				continue;
-			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
 				continue;
-			if (!dst_hold_safe(&rt->dst))
-				break;
+			fib6_info_hold(rt);
 			rcu_read_unlock();
 
 			/* if gateway was specified only delete the one hop */
@@ -3161,6 +3254,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	struct ndisc_options ndopts;
 	struct inet6_dev *in6_dev;
 	struct neighbour *neigh;
+	struct fib6_info *from;
 	struct rd_msg *msg;
 	int optlen, on_link;
 	u8 *lladdr;
@@ -3242,7 +3336,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 				     NEIGH_UPDATE_F_ISROUTER)),
 		     NDISC_REDIRECT, &ndopts);
 
-	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
+	rcu_read_lock();
+	from = rcu_dereference(rt->from);
+	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
+	rcu_read_unlock();
 	if (!nrt)
 		goto out;
 
@@ -3250,14 +3347,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
-	nrt->rt6i_protocol = RTPROT_REDIRECT;
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
 	/* No need to remove rt from the exception table if rt is
 	 * a cached route because rt6_insert_exception() will
 	 * takes care of it
 	 */
-	if (rt6_insert_exception(nrt, rt)) {
+	if (rt6_insert_exception(nrt, rt->from)) {
 		dst_release_immediate(&nrt->dst);
 		goto out;
 	}
@@ -3272,44 +3368,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	neigh_release(neigh);
 }
 
-/*
- *	Misc support functions
- */
-
-static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
-{
-	BUG_ON(from->from);
-
-	rt->rt6i_flags &= ~RTF_EXPIRES;
-	dst_hold(&from->dst);
-	rt->from = from;
-	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
-}
-
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
-{
-	rt->dst.input = ort->dst.input;
-	rt->dst.output = ort->dst.output;
-	rt->rt6i_dst = ort->rt6i_dst;
-	rt->dst.error = ort->dst.error;
-	rt->rt6i_idev = ort->rt6i_idev;
-	if (rt->rt6i_idev)
-		in6_dev_hold(rt->rt6i_idev);
-	rt->dst.lastuse = jiffies;
-	rt->rt6i_gateway = ort->rt6i_gateway;
-	rt->rt6i_flags = ort->rt6i_flags;
-	rt6_set_from(rt, ort);
-	rt->rt6i_metric = ort->rt6i_metric;
-#ifdef CONFIG_IPV6_SUBTREES
-	rt->rt6i_src = ort->rt6i_src;
-#endif
-	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
-	rt->rt6i_table = ort->rt6i_table;
-	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
-}
-
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev)
@@ -3317,7 +3377,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
 	int ifindex = dev->ifindex;
 	struct fib6_node *fn;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
 
 	table = fib6_get_table(net, tb_id);
@@ -3330,13 +3390,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->dst.dev->ifindex != ifindex)
+		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
 			continue;
-		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
-		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 			continue;
-		ip6_hold_safe(NULL, &rt, false);
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -3344,7 +3404,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	return rt;
 }
 
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev,
@@ -3357,6 +3417,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
 				  RTF_UP | RTF_PREF(pref),
 		.fc_protocol = RTPROT_RA,
+		.fc_type = RTN_UNICAST,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = net,
@@ -3370,36 +3431,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 	if (!prefixlen)
 		cfg.fc_flags |= RTF_DEFAULT;
 
-	ip6_route_add(&cfg, NULL);
+	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
 
 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
 }
 #endif
 
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
+struct fib6_info *rt6_get_dflt_router(struct net *net,
+				     const struct in6_addr *addr,
+				     struct net_device *dev)
 {
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_table *table;
 
-	table = fib6_get_table(dev_net(dev), tb_id);
+	table = fib6_get_table(net, tb_id);
 	if (!table)
 		return NULL;
 
 	rcu_read_lock();
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
-		if (dev == rt->dst.dev &&
-		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
-		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
+		if (dev == rt->fib6_nh.nh_dev &&
+		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
+		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
 			break;
 	}
 	if (rt)
-		ip6_hold_safe(NULL, &rt, false);
+		fib6_info_hold(rt);
 	rcu_read_unlock();
 	return rt;
 }
 
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
+				     const struct in6_addr *gwaddr,
 				     struct net_device *dev,
 				     unsigned int pref)
 {
@@ -3410,14 +3474,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
 		.fc_protocol = RTPROT_RA,
+		.fc_type = RTN_UNICAST,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
-		.fc_nlinfo.nl_net = dev_net(dev),
+		.fc_nlinfo.nl_net = net,
 	};
 
 	cfg.fc_gateway = *gwaddr;
 
-	if (!ip6_route_add(&cfg, NULL)) {
+	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
 		struct fib6_table *table;
 
 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -3425,24 +3490,25 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
 	}
 
-	return rt6_get_dflt_router(gwaddr, dev);
+	return rt6_get_dflt_router(net, gwaddr, dev);
 }
 
-static void __rt6_purge_dflt_routers(struct fib6_table *table)
+static void __rt6_purge_dflt_routers(struct net *net,
+				     struct fib6_table *table)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 restart:
 	rcu_read_lock();
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
-		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
-		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			if (dst_hold_safe(&rt->dst)) {
-				rcu_read_unlock();
-				ip6_del_rt(rt);
-			} else {
-				rcu_read_unlock();
-			}
+		struct net_device *dev = fib6_info_nh_dev(rt);
+		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
+
+		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+		    (!idev || idev->cnf.accept_ra != 2)) {
+			fib6_info_hold(rt);
+			rcu_read_unlock();
+			ip6_del_rt(net, rt);
 			goto restart;
 		}
 	}
@@ -3463,7 +3529,7 @@ void rt6_purge_dflt_routers(struct net *net)
 		head = &net->ipv6.fib_table_hash[h];
 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
-				__rt6_purge_dflt_routers(table);
+				__rt6_purge_dflt_routers(net, table);
 		}
 	}
 
@@ -3484,6 +3550,7 @@ static void rtmsg_to_fib6_config(struct net *net,
 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
 	cfg->fc_flags = rtmsg->rtmsg_flags;
+	cfg->fc_type = rtmsg->rtmsg_type;
 
 	cfg->fc_nlinfo.nl_net = net;
 
@@ -3513,7 +3580,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&cfg, NULL);
+			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
 			break;
 		case SIOCDELRT:
 			err = ip6_route_del(&cfg, NULL);
@@ -3541,7 +3608,8 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 	case IPSTATS_MIB_INNOROUTES:
 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
 		if (type == IPV6_ADDR_ANY) {
-			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+			IP6_INC_STATS(dev_net(dst->dev),
+				      __in6_dev_get_safely(skb->dev),
 				      IPSTATS_MIB_INADDRERRORS);
 			break;
 		}
@@ -3582,40 +3650,40 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
  *	Allocate a dst for local (unicast / anycast) address.
  */
 
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
-				    const struct in6_addr *addr,
-				    bool anycast)
+struct fib6_info *addrconf_f6i_alloc(struct net *net,
+				     struct inet6_dev *idev,
+				     const struct in6_addr *addr,
+				     bool anycast, gfp_t gfp_flags)
 {
 	u32 tb_id;
-	struct net *net = dev_net(idev->dev);
 	struct net_device *dev = idev->dev;
-	struct rt6_info *rt;
+	struct fib6_info *f6i;
 
-	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
-	if (!rt)
+	f6i = fib6_info_alloc(gfp_flags);
+	if (!f6i)
 		return ERR_PTR(-ENOMEM);
 
-	in6_dev_hold(idev);
+	f6i->dst_nocount = true;
+	f6i->dst_host = true;
+	f6i->fib6_protocol = RTPROT_KERNEL;
+	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
+	if (anycast) {
+		f6i->fib6_type = RTN_ANYCAST;
+		f6i->fib6_flags |= RTF_ANYCAST;
+	} else {
+		f6i->fib6_type = RTN_LOCAL;
+		f6i->fib6_flags |= RTF_LOCAL;
+	}
 
-	rt->dst.flags |= DST_HOST;
-	rt->dst.input = ip6_input;
-	rt->dst.output = ip6_output;
-	rt->rt6i_idev = idev;
-
-	rt->rt6i_protocol = RTPROT_KERNEL;
-	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-	if (anycast)
-		rt->rt6i_flags |= RTF_ANYCAST;
-	else
-		rt->rt6i_flags |= RTF_LOCAL;
-
-	rt->rt6i_gateway  = *addr;
-	rt->rt6i_dst.addr = *addr;
-	rt->rt6i_dst.plen = 128;
+	f6i->fib6_nh.nh_gw = *addr;
+	dev_hold(dev);
+	f6i->fib6_nh.nh_dev = dev;
+	f6i->fib6_dst.addr = *addr;
+	f6i->fib6_dst.plen = 128;
 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
-	rt->rt6i_table = fib6_get_table(net, tb_id);
+	f6i->fib6_table = fib6_get_table(net, tb_id);
 
-	return rt;
+	return f6i;
 }
 
 /* remove deleted ip from prefsrc entries */
@@ -3625,18 +3693,18 @@ struct arg_dev_net_ip {
 	struct in6_addr *addr;
 };
 
-static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
 {
 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
 
-	if (((void *)rt->dst.dev == dev || !dev) &&
-	    rt != net->ipv6.ip6_null_entry &&
-	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
+	    rt != net->ipv6.fib6_null_entry &&
+	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
 		spin_lock_bh(&rt6_exception_lock);
 		/* remove prefsrc entry */
-		rt->rt6i_prefsrc.plen = 0;
+		rt->fib6_prefsrc.plen = 0;
 		/* need to update cache as well */
 		rt6_exceptions_remove_prefsrc(rt);
 		spin_unlock_bh(&rt6_exception_lock);
@@ -3658,12 +3726,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
 
 /* Remove routers and update dst entries when gateway turn into host. */
-static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
 
-	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
-	    ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
 		return -1;
 	}
 
@@ -3689,85 +3757,85 @@ struct arg_netdev_event {
 	};
 };
 
-static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	struct fib6_node *fn;
 
-	fn = rcu_dereference_protected(rt->rt6i_node,
-			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	fn = rcu_dereference_protected(rt->fib6_node,
+			lockdep_is_held(&rt->fib6_table->tb6_lock));
 	iter = rcu_dereference_protected(fn->leaf,
-			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+			lockdep_is_held(&rt->fib6_table->tb6_lock));
 	while (iter) {
-		if (iter->rt6i_metric == rt->rt6i_metric &&
+		if (iter->fib6_metric == rt->fib6_metric &&
 		    rt6_qualify_for_ecmp(iter))
 			return iter;
 		iter = rcu_dereference_protected(iter->rt6_next,
-				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+				lockdep_is_held(&rt->fib6_table->tb6_lock));
 	}
 
 	return NULL;
 }
 
-static bool rt6_is_dead(const struct rt6_info *rt)
+static bool rt6_is_dead(const struct fib6_info *rt)
 {
-	if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
-	    (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
-	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
+	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
+	     fib6_ignore_linkdown(rt)))
 		return true;
 
 	return false;
 }
 
-static int rt6_multipath_total_weight(const struct rt6_info *rt)
+static int rt6_multipath_total_weight(const struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	int total = 0;
 
 	if (!rt6_is_dead(rt))
-		total += rt->rt6i_nh_weight;
+		total += rt->fib6_nh.nh_weight;
 
-	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
 		if (!rt6_is_dead(iter))
-			total += iter->rt6i_nh_weight;
+			total += iter->fib6_nh.nh_weight;
 	}
 
 	return total;
 }
 
-static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
 {
 	int upper_bound = -1;
 
 	if (!rt6_is_dead(rt)) {
-		*weight += rt->rt6i_nh_weight;
+		*weight += rt->fib6_nh.nh_weight;
 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
 						    total) - 1;
 	}
-	atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
 }
 
-static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	int weight = 0;
 
 	rt6_upper_bound_set(rt, &weight, total);
 
-	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
 		rt6_upper_bound_set(iter, &weight, total);
 }
 
-void rt6_multipath_rebalance(struct rt6_info *rt)
+void rt6_multipath_rebalance(struct fib6_info *rt)
 {
-	struct rt6_info *first;
+	struct fib6_info *first;
 	int total;
 
 	/* In case the entire multipath route was marked for flushing,
 	 * then there is no need to rebalance upon the removal of every
 	 * sibling route.
 	 */
-	if (!rt->rt6i_nsiblings || rt->should_flush)
+	if (!rt->fib6_nsiblings || rt->should_flush)
 		return;
 
 	/* During lookup routes are evaluated in order, so we need to
@@ -3782,14 +3850,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt)
 	rt6_multipath_upper_bound_set(first, total);
 }
 
-static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+static int fib6_ifup(struct fib6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
-	const struct net *net = dev_net(arg->dev);
+	struct net *net = dev_net(arg->dev);
 
-	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
-		rt->rt6i_nh_flags &= ~arg->nh_flags;
-		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
+		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
+		fib6_update_sernum_upto_root(net, rt);
 		rt6_multipath_rebalance(rt);
 	}
 
@@ -3811,95 +3879,96 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
 }
 
-static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
 				   const struct net_device *dev)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
-	if (rt->dst.dev == dev)
+	if (rt->fib6_nh.nh_dev == dev)
 		return true;
-	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
-		if (iter->dst.dev == dev)
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
+		if (iter->fib6_nh.nh_dev == dev)
 			return true;
 
 	return false;
 }
 
-static void rt6_multipath_flush(struct rt6_info *rt)
+static void rt6_multipath_flush(struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	rt->should_flush = 1;
-	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
 		iter->should_flush = 1;
 }
 
-static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
 					     const struct net_device *down_dev)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	unsigned int dead = 0;
 
-	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh.nh_dev == down_dev ||
+	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 		dead++;
-	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
-		if (iter->dst.dev == down_dev ||
-		    iter->rt6i_nh_flags & RTNH_F_DEAD)
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
+		if (iter->fib6_nh.nh_dev == down_dev ||
+		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
 			dead++;
 
 	return dead;
 }
 
-static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
 				       const struct net_device *dev,
 				       unsigned int nh_flags)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
-	if (rt->dst.dev == dev)
-		rt->rt6i_nh_flags |= nh_flags;
-	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
-		if (iter->dst.dev == dev)
-			iter->rt6i_nh_flags |= nh_flags;
+	if (rt->fib6_nh.nh_dev == dev)
+		rt->fib6_nh.nh_flags |= nh_flags;
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
+		if (iter->fib6_nh.nh_dev == dev)
+			iter->fib6_nh.nh_flags |= nh_flags;
 }
 
 /* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
+static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
 	const struct net_device *dev = arg->dev;
-	const struct net *net = dev_net(dev);
+	struct net *net = dev_net(dev);
 
-	if (rt == net->ipv6.ip6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry)
 		return 0;
 
 	switch (arg->event) {
 	case NETDEV_UNREGISTER:
-		return rt->dst.dev == dev ? -1 : 0;
+		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
 	case NETDEV_DOWN:
 		if (rt->should_flush)
 			return -1;
-		if (!rt->rt6i_nsiblings)
-			return rt->dst.dev == dev ? -1 : 0;
+		if (!rt->fib6_nsiblings)
+			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
 		if (rt6_multipath_uses_dev(rt, dev)) {
 			unsigned int count;
 
 			count = rt6_multipath_dead_count(rt, dev);
-			if (rt->rt6i_nsiblings + 1 == count) {
+			if (rt->fib6_nsiblings + 1 == count) {
 				rt6_multipath_flush(rt);
 				return -1;
 			}
 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
 						   RTNH_F_LINKDOWN);
-			fib6_update_sernum(rt);
+			fib6_update_sernum(net, rt);
 			rt6_multipath_rebalance(rt);
 		}
 		return -2;
 	case NETDEV_CHANGE:
-		if (rt->dst.dev != dev ||
-		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+		if (rt->fib6_nh.nh_dev != dev ||
+		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
 			break;
-		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 		rt6_multipath_rebalance(rt);
 		break;
 	}
@@ -3931,7 +4000,7 @@ struct rt6_mtu_change_arg {
 	unsigned int mtu;
 };
 
-static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
+static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
@@ -3951,12 +4020,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   Since RFC 1981 doesn't include administrative MTU increase
 	   update PMTU increase is a MUST. (i.e. jumbo frame)
 	 */
-	if (rt->dst.dev == arg->dev &&
-	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
+	if (rt->fib6_nh.nh_dev == arg->dev &&
+	    !fib6_metric_locked(rt, RTAX_MTU)) {
+		u32 mtu = rt->fib6_pmtu;
+
+		if (mtu >= arg->mtu ||
+		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+
 		spin_lock_bh(&rt6_exception_lock);
-		if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
-		    rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
-			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
 		spin_unlock_bh(&rt6_exception_lock);
 	}
@@ -4115,9 +4187,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 }
 
 struct rt6_nh {
-	struct rt6_info *rt6_info;
+	struct fib6_info *fib6_info;
 	struct fib6_config r_cfg;
-	struct mx6_config mxc;
 	struct list_head next;
 };
 
@@ -4132,23 +4203,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
 	}
 }
 
-static int ip6_route_info_append(struct list_head *rt6_nh_list,
-				 struct rt6_info *rt, struct fib6_config *r_cfg)
+static int ip6_route_info_append(struct net *net,
+				 struct list_head *rt6_nh_list,
+				 struct fib6_info *rt,
+				 struct fib6_config *r_cfg)
 {
 	struct rt6_nh *nh;
 	int err = -EEXIST;
 
 	list_for_each_entry(nh, rt6_nh_list, next) {
-		/* check if rt6_info already exists */
-		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
+		/* check if fib6_info already exists */
+		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
 			return err;
 	}
 
 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
 	if (!nh)
 		return -ENOMEM;
-	nh->rt6_info = rt;
-	err = ip6_convert_metrics(&nh->mxc, r_cfg);
+	nh->fib6_info = rt;
+	err = ip6_convert_metrics(net, rt, r_cfg);
 	if (err) {
 		kfree(nh);
 		return err;
@@ -4159,8 +4232,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,
 	return 0;
 }
 
-static void ip6_route_mpath_notify(struct rt6_info *rt,
-				   struct rt6_info *rt_last,
+static void ip6_route_mpath_notify(struct fib6_info *rt,
+				   struct fib6_info *rt_last,
 				   struct nl_info *info,
 				   __u16 nlflags)
 {
@@ -4170,10 +4243,10 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 	 * nexthop. Since sibling routes are always added at the end of
 	 * the list, find the first sibling of the last route appended
 	 */
-	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
-		rt = list_first_entry(&rt_last->rt6i_siblings,
-				      struct rt6_info,
-				      rt6i_siblings);
+	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
+		rt = list_first_entry(&rt_last->fib6_siblings,
+				      struct fib6_info,
+				      fib6_siblings);
 	}
 
 	if (rt)
@@ -4183,11 +4256,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 static int ip6_route_multipath_add(struct fib6_config *cfg,
 				   struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
+	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
 	struct nl_info *info = &cfg->fc_nlinfo;
 	struct fib6_config r_cfg;
 	struct rtnexthop *rtnh;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct rt6_nh *err_nh;
 	struct rt6_nh *nh, *nh_safe;
 	__u16 nlflags;
@@ -4207,7 +4280,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	rtnh = (struct rtnexthop *)cfg->fc_mp;
 
 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
-	 * rt6_info structs per nexthop
+	 * fib6_info structs per nexthop
 	 */
 	while (rtnh_ok(rtnh, remaining)) {
 		memcpy(&r_cfg, cfg, sizeof(*cfg));
@@ -4230,18 +4303,19 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 		}
 
 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
-		rt = ip6_route_info_create(&r_cfg, extack);
+		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
 			rt = NULL;
 			goto cleanup;
 		}
 
-		rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
 
-		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
+		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
+					    rt, &r_cfg);
 		if (err) {
-			dst_release_immediate(&rt->dst);
+			fib6_info_release(rt);
 			goto cleanup;
 		}
 
@@ -4256,14 +4330,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 	err_nh = NULL;
 	list_for_each_entry(nh, &rt6_nh_list, next) {
-		rt_last = nh->rt6_info;
-		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
+		rt_last = nh->fib6_info;
+		err = __ip6_ins_rt(nh->fib6_info, info, extack);
+		fib6_info_release(nh->fib6_info);
+
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
-			rt_notif = nh->rt6_info;
+			rt_notif = nh->fib6_info;
 
-		/* nh->rt6_info is used or freed at this point, reset to NULL*/
-		nh->rt6_info = NULL;
+		/* nh->fib6_info is used or freed at this point, reset to NULL*/
+		nh->fib6_info = NULL;
 		if (err) {
 			if (replace && nhn)
 				ip6_print_replace_route_err(&rt6_nh_list);
@@ -4304,9 +4380,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
-		if (nh->rt6_info)
-			dst_release_immediate(&nh->rt6_info->dst);
-		kfree(nh->mxc.mx);
+		if (nh->fib6_info)
+			fib6_info_release(nh->fib6_info);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4383,20 +4458,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (cfg.fc_mp)
 		return ip6_route_multipath_add(&cfg, extack);
 	else
-		return ip6_route_add(&cfg, extack);
+		return ip6_route_add(&cfg, GFP_KERNEL, extack);
 }
 
-static size_t rt6_nlmsg_size(struct rt6_info *rt)
+static size_t rt6_nlmsg_size(struct fib6_info *rt)
 {
 	int nexthop_len = 0;
 
-	if (rt->rt6i_nsiblings) {
+	if (rt->fib6_nsiblings) {
 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
 			    + NLA_ALIGN(sizeof(struct rtnexthop))
 			    + nla_total_size(16) /* RTA_GATEWAY */
-			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
+			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
 
-		nexthop_len *= rt->rt6i_nsiblings;
+		nexthop_len *= rt->fib6_nsiblings;
 	}
 
 	return NLMSG_ALIGN(sizeof(struct rtmsg))
@@ -4412,38 +4487,41 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 	       + nla_total_size(sizeof(struct rta_cacheinfo))
 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
 	       + nla_total_size(1) /* RTA_PREF */
-	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
+	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
 	       + nexthop_len;
 }
 
-static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
 			    unsigned int *flags, bool skip_oif)
 {
-	if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 		*flags |= RTNH_F_DEAD;
 
-	if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
+	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
 		*flags |= RTNH_F_LINKDOWN;
-		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
+
+		rcu_read_lock();
+		if (fib6_ignore_linkdown(rt))
 			*flags |= RTNH_F_DEAD;
+		rcu_read_unlock();
 	}
 
-	if (rt->rt6i_flags & RTF_GATEWAY) {
-		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
+	if (rt->fib6_flags & RTF_GATEWAY) {
+		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
 			goto nla_put_failure;
 	}
 
-	*flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
-	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
+	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
 		*flags |= RTNH_F_OFFLOAD;
 
 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
-	if (!skip_oif && rt->dst.dev &&
-	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+	if (!skip_oif && rt->fib6_nh.nh_dev &&
+	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
 		goto nla_put_failure;
 
-	if (rt->dst.lwtstate &&
-	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
+	if (rt->fib6_nh.nh_lwtstate &&
+	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
 		goto nla_put_failure;
 
 	return 0;
@@ -4453,8 +4531,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 }
 
 /* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
+static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
 {
+	const struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rtnexthop *rtnh;
 	unsigned int flags = 0;
 
@@ -4462,8 +4541,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	if (!rtnh)
 		goto nla_put_failure;
 
-	rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
-	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
+	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
+	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
 
 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
 		goto nla_put_failure;
@@ -4479,16 +4558,16 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	return -EMSGSIZE;
 }
 
-static int rt6_fill_node(struct net *net,
-			 struct sk_buff *skb, struct rt6_info *rt,
-			 struct in6_addr *dst, struct in6_addr *src,
+static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+			 struct fib6_info *rt, struct dst_entry *dst,
+			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags)
 {
-	u32 metrics[RTAX_MAX];
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
-	long expires;
+	long expires = 0;
+	u32 *pmetrics;
 	u32 table;
 
 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
@@ -4497,53 +4576,31 @@ static int rt6_fill_node(struct net *net,
 
 	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET6;
-	rtm->rtm_dst_len = rt->rt6i_dst.plen;
-	rtm->rtm_src_len = rt->rt6i_src.plen;
+	rtm->rtm_dst_len = rt->fib6_dst.plen;
+	rtm->rtm_src_len = rt->fib6_src.plen;
 	rtm->rtm_tos = 0;
-	if (rt->rt6i_table)
-		table = rt->rt6i_table->tb6_id;
+	if (rt->fib6_table)
+		table = rt->fib6_table->tb6_id;
 	else
 		table = RT6_TABLE_UNSPEC;
 	rtm->rtm_table = table;
 	if (nla_put_u32(skb, RTA_TABLE, table))
 		goto nla_put_failure;
-	if (rt->rt6i_flags & RTF_REJECT) {
-		switch (rt->dst.error) {
-		case -EINVAL:
-			rtm->rtm_type = RTN_BLACKHOLE;
-			break;
-		case -EACCES:
-			rtm->rtm_type = RTN_PROHIBIT;
-			break;
-		case -EAGAIN:
-			rtm->rtm_type = RTN_THROW;
-			break;
-		default:
-			rtm->rtm_type = RTN_UNREACHABLE;
-			break;
-		}
-	}
-	else if (rt->rt6i_flags & RTF_LOCAL)
-		rtm->rtm_type = RTN_LOCAL;
-	else if (rt->rt6i_flags & RTF_ANYCAST)
-		rtm->rtm_type = RTN_ANYCAST;
-	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
-		rtm->rtm_type = RTN_LOCAL;
-	else
-		rtm->rtm_type = RTN_UNICAST;
+
+	rtm->rtm_type = rt->fib6_type;
 	rtm->rtm_flags = 0;
 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
-	rtm->rtm_protocol = rt->rt6i_protocol;
+	rtm->rtm_protocol = rt->fib6_protocol;
 
-	if (rt->rt6i_flags & RTF_CACHE)
+	if (rt->fib6_flags & RTF_CACHE)
 		rtm->rtm_flags |= RTM_F_CLONED;
 
-	if (dst) {
-		if (nla_put_in6_addr(skb, RTA_DST, dst))
+	if (dest) {
+		if (nla_put_in6_addr(skb, RTA_DST, dest))
 			goto nla_put_failure;
 		rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
-		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
+		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
 			goto nla_put_failure;
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src) {
@@ -4551,12 +4608,12 @@ static int rt6_fill_node(struct net *net,
 			goto nla_put_failure;
 		rtm->rtm_src_len = 128;
 	} else if (rtm->rtm_src_len &&
-		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
+		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
 		goto nla_put_failure;
 #endif
 	if (iif) {
 #ifdef CONFIG_IPV6_MROUTE
-		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
+		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
 			int err = ip6mr_get_route(net, skb, rtm, portid);
 
 			if (err == 0)
@@ -4567,34 +4624,32 @@ static int rt6_fill_node(struct net *net,
 #endif
 			if (nla_put_u32(skb, RTA_IIF, iif))
 				goto nla_put_failure;
-	} else if (dst) {
+	} else if (dest) {
 		struct in6_addr saddr_buf;
-		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
+		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
 			goto nla_put_failure;
 	}
 
-	if (rt->rt6i_prefsrc.plen) {
+	if (rt->fib6_prefsrc.plen) {
 		struct in6_addr saddr_buf;
-		saddr_buf = rt->rt6i_prefsrc.addr;
+		saddr_buf = rt->fib6_prefsrc.addr;
 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
 			goto nla_put_failure;
 	}
 
-	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
-	if (rt->rt6i_pmtu)
-		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
-	if (rtnetlink_put_metrics(skb, metrics) < 0)
+	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
+	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
+	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
 		goto nla_put_failure;
 
 	/* For multipath routes, walk the siblings list and add
 	 * each as a nexthop within RTA_MULTIPATH.
 	 */
-	if (rt->rt6i_nsiblings) {
-		struct rt6_info *sibling, *next_sibling;
+	if (rt->fib6_nsiblings) {
+		struct fib6_info *sibling, *next_sibling;
 		struct nlattr *mp;
 
 		mp = nla_nest_start(skb, RTA_MULTIPATH);
@@ -4605,7 +4660,7 @@ static int rt6_fill_node(struct net *net,
 			goto nla_put_failure;
 
 		list_for_each_entry_safe(sibling, next_sibling,
-					 &rt->rt6i_siblings, rt6i_siblings) {
+					 &rt->fib6_siblings, fib6_siblings) {
 			if (rt6_add_nexthop(skb, sibling) < 0)
 				goto nla_put_failure;
 		}
@@ -4616,12 +4671,15 @@ static int rt6_fill_node(struct net *net,
 			goto nla_put_failure;
 	}
 
-	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
+	if (rt->fib6_flags & RTF_EXPIRES) {
+		expires = dst ? dst->expires : rt->expires;
+		expires -= jiffies;
+	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
 		goto nla_put_failure;
 
-	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
+	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
 		goto nla_put_failure;
 
 
@@ -4633,12 +4691,12 @@ static int rt6_fill_node(struct net *net,
 	return -EMSGSIZE;
 }
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct fib6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	struct net *net = arg->net;
 
-	if (rt == net->ipv6.ip6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry)
 		return 0;
 
 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
@@ -4646,16 +4704,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 		/* user wants prefix routes only */
 		if (rtm->rtm_flags & RTM_F_PREFIX &&
-		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
+		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
 			/* success since this is not a prefix route */
 			return 1;
 		}
 	}
 
-	return rt6_fill_node(net,
-		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
-		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
-		     NLM_F_MULTI);
+	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
+			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
+			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
 }
 
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
@@ -4664,6 +4721,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[RTA_MAX+1];
 	int err, iif = 0, oif = 0;
+	struct fib6_info *from;
 	struct dst_entry *dst;
 	struct rt6_info *rt;
 	struct sk_buff *skb;
@@ -4752,14 +4810,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
-	if (fibmatch && rt->from) {
-		struct rt6_info *ort = rt->from;
-
-		dst_hold(&ort->dst);
-		ip6_rt_put(rt);
-		rt = ort;
-	}
-
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);
@@ -4768,14 +4818,21 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	}
 
 	skb_dst_set(skb, &rt->dst);
+
+	rcu_read_lock();
+	from = rcu_dereference(rt->from);
+
 	if (fibmatch)
-		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
+		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, 0);
 	else
-		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
-				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
-				    nlh->nlmsg_seq, 0);
+		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+				    &fl6.saddr, iif, RTM_NEWROUTE,
+				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+				    0);
+	rcu_read_unlock();
+
 	if (err < 0) {
 		kfree_skb(skb);
 		goto errout;
@@ -4786,7 +4843,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int nlm_flags)
 {
 	struct sk_buff *skb;
@@ -4801,8 +4858,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
 	if (!skb)
 		goto errout;
 
-	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
-				event, info->portid, seq, nlm_flags);
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+			    event, info->portid, seq, nlm_flags);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -4827,6 +4884,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
+		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
 		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5023,11 +5081,17 @@ static int __net_init ip6_route_net_init(struct net *net)
 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
 		goto out_ip6_dst_ops;
 
+	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
+					    sizeof(*net->ipv6.fib6_null_entry),
+					    GFP_KERNEL);
+	if (!net->ipv6.fib6_null_entry)
+		goto out_ip6_dst_entries;
+
 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
 					   sizeof(*net->ipv6.ip6_null_entry),
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
-		goto out_ip6_dst_entries;
+		goto out_fib6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
 			 ip6_template_metrics, true);
@@ -5074,6 +5138,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 out_ip6_null_entry:
 	kfree(net->ipv6.ip6_null_entry);
 #endif
+out_fib6_null_entry:
+	kfree(net->ipv6.fib6_null_entry);
 out_ip6_dst_entries:
 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
 out_ip6_dst_ops:
@@ -5082,6 +5148,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 
 static void __net_exit ip6_route_net_exit(struct net *net)
 {
+	kfree(net->ipv6.fib6_null_entry);
 	kfree(net->ipv6.ip6_null_entry);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	kfree(net->ipv6.ip6_prohibit_entry);
@@ -5152,6 +5219,7 @@ void __init ip6_route_init_special_entries(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
+	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 416fe67..2cff209 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	 * it was magically lost, so this code needs audit */
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
 						   RTF_LOCAL);
-	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
-	xdst->u.rt6.rt6i_node = rt->rt6i_node;
 	xdst->route_cookie = rt6_get_cookie(rt);
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 8da8431..8055e39 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -68,15 +68,6 @@ enum {
 	NCSI_MODE_MAX
 };
 
-enum {
-	NCSI_FILTER_BASE	= 0,
-	NCSI_FILTER_VLAN	= 0,
-	NCSI_FILTER_UC,
-	NCSI_FILTER_MC,
-	NCSI_FILTER_MIXED,
-	NCSI_FILTER_MAX
-};
-
 struct ncsi_channel_version {
 	u32 version;		/* Supported BCD encoded NCSI version */
 	u32 alpha2;		/* Supported BCD encoded NCSI version */
@@ -98,11 +89,18 @@ struct ncsi_channel_mode {
 	u32 data[8];	/* Data entries                */
 };
 
-struct ncsi_channel_filter {
-	u32 index;	/* Index of channel filters          */
-	u32 total;	/* Total entries in the filter table */
-	u64 bitmap;	/* Bitmap of valid entries           */
-	u32 data[];	/* Data for the valid entries        */
+struct ncsi_channel_mac_filter {
+	u8	n_uc;
+	u8	n_mc;
+	u8	n_mixed;
+	u64	bitmap;
+	unsigned char	*addrs;
+};
+
+struct ncsi_channel_vlan_filter {
+	u8	n_vids;
+	u64	bitmap;
+	u16	*vids;
 };
 
 struct ncsi_channel_stats {
@@ -186,7 +184,9 @@ struct ncsi_channel {
 	struct ncsi_channel_version version;
 	struct ncsi_channel_cap	    caps[NCSI_CAP_MAX];
 	struct ncsi_channel_mode    modes[NCSI_MODE_MAX];
-	struct ncsi_channel_filter  *filters[NCSI_FILTER_MAX];
+	/* Filtering Settings */
+	struct ncsi_channel_mac_filter	mac_filter;
+	struct ncsi_channel_vlan_filter	vlan_filter;
 	struct ncsi_channel_stats   stats;
 	struct {
 		struct timer_list   timer;
@@ -320,10 +320,6 @@ extern spinlock_t ncsi_dev_lock;
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
-u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
-int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
-int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
-int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
 void ncsi_start_channel_monitor(struct ncsi_channel *nc);
 void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
 struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index c3695ba..5561e22 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -27,125 +27,6 @@
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
 
-static inline int ncsi_filter_size(int table)
-{
-	int sizes[] = { 2, 6, 6, 6 };
-
-	BUILD_BUG_ON(ARRAY_SIZE(sizes) != NCSI_FILTER_MAX);
-	if (table < NCSI_FILTER_BASE || table >= NCSI_FILTER_MAX)
-		return -EINVAL;
-
-	return sizes[table];
-}
-
-u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
-{
-	struct ncsi_channel_filter *ncf;
-	int size;
-
-	ncf = nc->filters[table];
-	if (!ncf)
-		return NULL;
-
-	size = ncsi_filter_size(table);
-	if (size < 0)
-		return NULL;
-
-	return ncf->data + size * index;
-}
-
-/* Find the first active filter in a filter table that matches the given
- * data parameter. If data is NULL, this returns the first active filter.
- */
-int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
-{
-	struct ncsi_channel_filter *ncf;
-	void *bitmap;
-	int index, size;
-	unsigned long flags;
-
-	ncf = nc->filters[table];
-	if (!ncf)
-		return -ENXIO;
-
-	size = ncsi_filter_size(table);
-	if (size < 0)
-		return size;
-
-	spin_lock_irqsave(&nc->lock, flags);
-	bitmap = (void *)&ncf->bitmap;
-	index = -1;
-	while ((index = find_next_bit(bitmap, ncf->total, index + 1))
-	       < ncf->total) {
-		if (!data || !memcmp(ncf->data + size * index, data, size)) {
-			spin_unlock_irqrestore(&nc->lock, flags);
-			return index;
-		}
-	}
-	spin_unlock_irqrestore(&nc->lock, flags);
-
-	return -ENOENT;
-}
-
-int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data)
-{
-	struct ncsi_channel_filter *ncf;
-	int index, size;
-	void *bitmap;
-	unsigned long flags;
-
-	size = ncsi_filter_size(table);
-	if (size < 0)
-		return size;
-
-	index = ncsi_find_filter(nc, table, data);
-	if (index >= 0)
-		return index;
-
-	ncf = nc->filters[table];
-	if (!ncf)
-		return -ENODEV;
-
-	spin_lock_irqsave(&nc->lock, flags);
-	bitmap = (void *)&ncf->bitmap;
-	do {
-		index = find_next_zero_bit(bitmap, ncf->total, 0);
-		if (index >= ncf->total) {
-			spin_unlock_irqrestore(&nc->lock, flags);
-			return -ENOSPC;
-		}
-	} while (test_and_set_bit(index, bitmap));
-
-	memcpy(ncf->data + size * index, data, size);
-	spin_unlock_irqrestore(&nc->lock, flags);
-
-	return index;
-}
-
-int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index)
-{
-	struct ncsi_channel_filter *ncf;
-	int size;
-	void *bitmap;
-	unsigned long flags;
-
-	size = ncsi_filter_size(table);
-	if (size < 0)
-		return size;
-
-	ncf = nc->filters[table];
-	if (!ncf || index >= ncf->total)
-		return -ENODEV;
-
-	spin_lock_irqsave(&nc->lock, flags);
-	bitmap = (void *)&ncf->bitmap;
-	if (test_and_clear_bit(index, bitmap))
-		memset(ncf->data + size * index, 0, size);
-	spin_unlock_irqrestore(&nc->lock, flags);
-
-	return 0;
-}
-
 static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -339,20 +220,13 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
 static void ncsi_remove_channel(struct ncsi_channel *nc)
 {
 	struct ncsi_package *np = nc->package;
-	struct ncsi_channel_filter *ncf;
 	unsigned long flags;
-	int i;
+
+	spin_lock_irqsave(&nc->lock, flags);
 
 	/* Release filters */
-	spin_lock_irqsave(&nc->lock, flags);
-	for (i = 0; i < NCSI_FILTER_MAX; i++) {
-		ncf = nc->filters[i];
-		if (!ncf)
-			continue;
-
-		nc->filters[i] = NULL;
-		kfree(ncf);
-	}
+	kfree(nc->mac_filter.addrs);
+	kfree(nc->vlan_filter.vids);
 
 	nc->state = NCSI_CHANNEL_INACTIVE;
 	spin_unlock_irqrestore(&nc->lock, flags);
@@ -670,32 +544,26 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
 static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 			 struct ncsi_cmd_arg *nca)
 {
+	struct ncsi_channel_vlan_filter *ncf;
+	unsigned long flags;
+	void *bitmap;
 	int index;
-	u32 *data;
 	u16 vid;
 
-	index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL);
-	if (index < 0) {
-		/* Filter table empty */
+	ncf = &nc->vlan_filter;
+	bitmap = &ncf->bitmap;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	index = find_next_bit(bitmap, ncf->n_vids, 0);
+	if (index >= ncf->n_vids) {
+		spin_unlock_irqrestore(&nc->lock, flags);
 		return -1;
 	}
+	vid = ncf->vids[index];
 
-	data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
-	if (!data) {
-		netdev_err(ndp->ndev.dev,
-			   "NCSI: failed to retrieve filter %d\n", index);
-		/* Set the VLAN id to 0 - this will still disable the entry in
-		 * the filter table, but we won't know what it was.
-		 */
-		vid = 0;
-	} else {
-		vid = *(u16 *)data;
-	}
-
-	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-		      "NCSI: removed vlan tag %u at index %d\n",
-		      vid, index + 1);
-	ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
+	clear_bit(index, bitmap);
+	ncf->vids[index] = 0;
+	spin_unlock_irqrestore(&nc->lock, flags);
 
 	nca->type = NCSI_PKT_CMD_SVF;
 	nca->words[1] = vid;
@@ -711,45 +579,55 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 		       struct ncsi_cmd_arg *nca)
 {
+	struct ncsi_channel_vlan_filter *ncf;
 	struct vlan_vid *vlan = NULL;
-	int index = 0;
+	unsigned long flags;
+	int i, index;
+	void *bitmap;
+	u16 vid;
 
+	if (list_empty(&ndp->vlan_vids))
+		return -1;
+
+	ncf = &nc->vlan_filter;
+	bitmap = &ncf->bitmap;
+
+	spin_lock_irqsave(&nc->lock, flags);
+
+	rcu_read_lock();
 	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
-		index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
-		if (index < 0) {
-			/* New tag to add */
-			netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-				      "NCSI: new vlan id to set: %u\n",
-				      vlan->vid);
+		vid = vlan->vid;
+		for (i = 0; i < ncf->n_vids; i++)
+			if (ncf->vids[i] == vid) {
+				vid = 0;
+				break;
+			}
+		if (vid)
 			break;
-		}
-		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-			      "vid %u already at filter pos %d\n",
-			      vlan->vid, index);
 	}
+	rcu_read_unlock();
 
-	if (!vlan || index >= 0) {
-		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-			      "no vlan ids left to set\n");
+	if (!vid) {
+		/* No VLAN ID is not set */
+		spin_unlock_irqrestore(&nc->lock, flags);
 		return -1;
 	}
 
-	index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
-	if (index < 0) {
+	index = find_next_zero_bit(bitmap, ncf->n_vids, 0);
+	if (index < 0 || index >= ncf->n_vids) {
 		netdev_err(ndp->ndev.dev,
-			   "Failed to add new VLAN tag, error %d\n", index);
-		if (index == -ENOSPC)
-			netdev_err(ndp->ndev.dev,
-				   "Channel %u already has all VLAN filters set\n",
-				   nc->id);
+			   "Channel %u already has all VLAN filters set\n",
+			   nc->id);
+		spin_unlock_irqrestore(&nc->lock, flags);
 		return -1;
 	}
 
-	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-		      "NCSI: set vid %u in packet, index %u\n",
-		      vlan->vid, index + 1);
+	ncf->vids[index] = vid;
+	set_bit(index, bitmap);
+	spin_unlock_irqrestore(&nc->lock, flags);
+
 	nca->type = NCSI_PKT_CMD_SVF;
-	nca->words[1] = vlan->vid;
+	nca->words[1] = vid;
 	/* HW filter index starts at 1 */
 	nca->bytes[6] = index + 1;
 	nca->bytes[7] = 0x01;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 8d7e849..b09ef77 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -58,10 +58,9 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
 				   struct ncsi_dev_priv *ndp,
 				   struct ncsi_channel *nc)
 {
-	struct nlattr *vid_nest;
-	struct ncsi_channel_filter *ncf;
+	struct ncsi_channel_vlan_filter *ncf;
 	struct ncsi_channel_mode *m;
-	u32 *data;
+	struct nlattr *vid_nest;
 	int i;
 
 	nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
@@ -79,18 +78,13 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
 	vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
 	if (!vid_nest)
 		return -ENOMEM;
-	ncf = nc->filters[NCSI_FILTER_VLAN];
+	ncf = &nc->vlan_filter;
 	i = -1;
-	if (ncf) {
-		while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
-					  i + 1)) < ncf->total) {
-			data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
-			/* Uninitialised channels will have 'zero' vlan ids */
-			if (!data || !*data)
-				continue;
+	while ((i = find_next_bit((void *)&ncf->bitmap, ncf->n_vids,
+				  i + 1)) < ncf->n_vids) {
+		if (ncf->vids[i])
 			nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
-				    *(u16 *)data);
-		}
+				    ncf->vids[i]);
 	}
 	nla_nest_end(skb, vid_nest);
 
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index efd933f..ce94979 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -334,9 +334,9 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
 	struct ncsi_rsp_pkt *rsp;
 	struct ncsi_dev_priv *ndp = nr->ndp;
 	struct ncsi_channel *nc;
-	struct ncsi_channel_filter *ncf;
-	unsigned short vlan;
-	int ret;
+	struct ncsi_channel_vlan_filter *ncf;
+	unsigned long flags;
+	void *bitmap;
 
 	/* Find the package and channel */
 	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
@@ -346,22 +346,23 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
 		return -ENODEV;
 
 	cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd);
-	ncf = nc->filters[NCSI_FILTER_VLAN];
-	if (!ncf)
-		return -ENOENT;
-	if (cmd->index >= ncf->total)
+	ncf = &nc->vlan_filter;
+	if (cmd->index > ncf->n_vids)
 		return -ERANGE;
 
-	/* Add or remove the VLAN filter */
+	/* Add or remove the VLAN filter. Remember HW indexes from 1 */
+	spin_lock_irqsave(&nc->lock, flags);
+	bitmap = &ncf->bitmap;
 	if (!(cmd->enable & 0x1)) {
-		/* HW indexes from 1 */
-		ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1);
+		if (test_and_clear_bit(cmd->index - 1, bitmap))
+			ncf->vids[cmd->index - 1] = 0;
 	} else {
-		vlan = ntohs(cmd->vlan);
-		ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+		set_bit(cmd->index - 1, bitmap);
+		ncf->vids[cmd->index - 1] = ntohs(cmd->vlan);
 	}
+	spin_unlock_irqrestore(&nc->lock, flags);
 
-	return ret;
+	return 0;
 }
 
 static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
@@ -422,8 +423,12 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
 	struct ncsi_rsp_pkt *rsp;
 	struct ncsi_dev_priv *ndp = nr->ndp;
 	struct ncsi_channel *nc;
-	struct ncsi_channel_filter *ncf;
+	struct ncsi_channel_mac_filter *ncf;
+	unsigned long flags;
 	void *bitmap;
+	bool enabled;
+	int index;
+
 
 	/* Find the package and channel */
 	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
@@ -436,31 +441,23 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
 	 * isn't supported yet.
 	 */
 	cmd = (struct ncsi_cmd_sma_pkt *)skb_network_header(nr->cmd);
-	switch (cmd->at_e >> 5) {
-	case 0x0:	/* UC address */
-		ncf = nc->filters[NCSI_FILTER_UC];
-		break;
-	case 0x1:	/* MC address */
-		ncf = nc->filters[NCSI_FILTER_MC];
-		break;
-	default:
-		return -EINVAL;
-	}
+	enabled = cmd->at_e & 0x1;
+	ncf = &nc->mac_filter;
+	bitmap = &ncf->bitmap;
 
-	/* Sanity check on the filter */
-	if (!ncf)
-		return -ENOENT;
-	else if (cmd->index >= ncf->total)
+	if (cmd->index > ncf->n_uc + ncf->n_mc + ncf->n_mixed)
 		return -ERANGE;
 
-	bitmap = &ncf->bitmap;
-	if (cmd->at_e & 0x1) {
-		set_bit(cmd->index, bitmap);
-		memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
+	index = (cmd->index - 1) * ETH_ALEN;
+	spin_lock_irqsave(&nc->lock, flags);
+	if (enabled) {
+		set_bit(cmd->index - 1, bitmap);
+		memcpy(&ncf->addrs[index], cmd->mac, ETH_ALEN);
 	} else {
-		clear_bit(cmd->index, bitmap);
-		memset(ncf->data + 6 * cmd->index, 0, 6);
+		clear_bit(cmd->index - 1, bitmap);
+		memset(&ncf->addrs[index], 0, ETH_ALEN);
 	}
+	spin_unlock_irqrestore(&nc->lock, flags);
 
 	return 0;
 }
@@ -631,9 +628,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 	struct ncsi_rsp_gc_pkt *rsp;
 	struct ncsi_dev_priv *ndp = nr->ndp;
 	struct ncsi_channel *nc;
-	struct ncsi_channel_filter *ncf;
-	size_t size, entry_size;
-	int cnt, i;
+	size_t size;
 
 	/* Find the channel */
 	rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
@@ -655,64 +650,40 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 	nc->caps[NCSI_CAP_VLAN].cap = rsp->vlan_mode &
 				      NCSI_CAP_VLAN_MASK;
 
-	/* Build filters */
-	for (i = 0; i < NCSI_FILTER_MAX; i++) {
-		switch (i) {
-		case NCSI_FILTER_VLAN:
-			cnt = rsp->vlan_cnt;
-			entry_size = 2;
-			break;
-		case NCSI_FILTER_MIXED:
-			cnt = rsp->mixed_cnt;
-			entry_size = 6;
-			break;
-		case NCSI_FILTER_MC:
-			cnt = rsp->mc_cnt;
-			entry_size = 6;
-			break;
-		case NCSI_FILTER_UC:
-			cnt = rsp->uc_cnt;
-			entry_size = 6;
-			break;
-		default:
-			continue;
-		}
+	size = (rsp->uc_cnt + rsp->mc_cnt + rsp->mixed_cnt) * ETH_ALEN;
+	nc->mac_filter.addrs = kzalloc(size, GFP_KERNEL);
+	if (!nc->mac_filter.addrs)
+		return -ENOMEM;
+	nc->mac_filter.n_uc = rsp->uc_cnt;
+	nc->mac_filter.n_mc = rsp->mc_cnt;
+	nc->mac_filter.n_mixed = rsp->mixed_cnt;
 
-		if (!cnt || nc->filters[i])
-			continue;
-
-		size = sizeof(*ncf) + cnt * entry_size;
-		ncf = kzalloc(size, GFP_ATOMIC);
-		if (!ncf) {
-			pr_warn("%s: Cannot alloc filter table (%d)\n",
-				__func__, i);
-			return -ENOMEM;
-		}
-
-		ncf->index = i;
-		ncf->total = cnt;
-		if (i == NCSI_FILTER_VLAN) {
-			/* Set VLAN filters active so they are cleared in
-			 * first configuration state
-			 */
-			ncf->bitmap = U64_MAX;
-		} else {
-			ncf->bitmap = 0x0ul;
-		}
-		nc->filters[i] = ncf;
-	}
+	nc->vlan_filter.vids = kcalloc(rsp->vlan_cnt,
+				       sizeof(*nc->vlan_filter.vids),
+				       GFP_KERNEL);
+	if (!nc->vlan_filter.vids)
+		return -ENOMEM;
+	/* Set VLAN filters active so they are cleared in the first
+	 * configuration state
+	 */
+	nc->vlan_filter.bitmap = U64_MAX;
+	nc->vlan_filter.n_vids = rsp->vlan_cnt;
 
 	return 0;
 }
 
 static int ncsi_rsp_handler_gp(struct ncsi_request *nr)
 {
-	struct ncsi_rsp_gp_pkt *rsp;
+	struct ncsi_channel_vlan_filter *ncvf;
+	struct ncsi_channel_mac_filter *ncmf;
 	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_rsp_gp_pkt *rsp;
 	struct ncsi_channel *nc;
-	unsigned short enable, vlan;
+	unsigned short enable;
 	unsigned char *pdata;
-	int table, i;
+	unsigned long flags;
+	void *bitmap;
+	int i;
 
 	/* Find the channel */
 	rsp = (struct ncsi_rsp_gp_pkt *)skb_network_header(nr->rsp);
@@ -746,36 +717,33 @@ static int ncsi_rsp_handler_gp(struct ncsi_request *nr)
 	/* MAC addresses filter table */
 	pdata = (unsigned char *)rsp + 48;
 	enable = rsp->mac_enable;
+	ncmf = &nc->mac_filter;
+	spin_lock_irqsave(&nc->lock, flags);
+	bitmap = &ncmf->bitmap;
 	for (i = 0; i < rsp->mac_cnt; i++, pdata += 6) {
-		if (i >= (nc->filters[NCSI_FILTER_UC]->total +
-			  nc->filters[NCSI_FILTER_MC]->total))
-			table = NCSI_FILTER_MIXED;
-		else if (i >= nc->filters[NCSI_FILTER_UC]->total)
-			table = NCSI_FILTER_MC;
-		else
-			table = NCSI_FILTER_UC;
-
 		if (!(enable & (0x1 << i)))
-			continue;
+			clear_bit(i, bitmap);
+		else
+			set_bit(i, bitmap);
 
-		if (ncsi_find_filter(nc, table, pdata) >= 0)
-			continue;
-
-		ncsi_add_filter(nc, table, pdata);
+		memcpy(&ncmf->addrs[i * ETH_ALEN], pdata, ETH_ALEN);
 	}
+	spin_unlock_irqrestore(&nc->lock, flags);
 
 	/* VLAN filter table */
 	enable = ntohs(rsp->vlan_enable);
+	ncvf = &nc->vlan_filter;
+	bitmap = &ncvf->bitmap;
+	spin_lock_irqsave(&nc->lock, flags);
 	for (i = 0; i < rsp->vlan_cnt; i++, pdata += 2) {
 		if (!(enable & (0x1 << i)))
-			continue;
+			clear_bit(i, bitmap);
+		else
+			set_bit(i, bitmap);
 
-		vlan = ntohs(*(__be16 *)pdata);
-		if (ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan) >= 0)
-			continue;
-
-		ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+		ncvf->vids[i] = ntohs(*(__be16 *)pdata);
 	}
+	spin_unlock_irqrestore(&nc->lock, flags);
 
 	return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 4527921..ba0a0fd 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -266,12 +266,13 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
 
 		/* check and decrement ttl */
 		if (ipv6_hdr(skb)->hop_limit <= 1) {
+			struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
+
 			/* Force OUTPUT device used as source address */
 			skb->dev = dst->dev;
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
 				    ICMPV6_EXC_HOPLIMIT, 0);
-			__IP6_INC_STATS(net, ip6_dst_idev(dst),
-					IPSTATS_MIB_INHDRERRORS);
+			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 
 			return false;
 		}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index f7d47c8..2dfb492 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -697,6 +697,9 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
 		goto prop_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
 		goto prop_msg_full;
+	if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
+		if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
+			goto prop_msg_full;
 
 	nla_nest_end(msg->skb, prop);
 
@@ -979,12 +982,23 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 
 		if (props[TIPC_NLA_PROP_TOL]) {
 			b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
-			tipc_node_apply_tolerance(net, b);
+			tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL);
 		}
 		if (props[TIPC_NLA_PROP_PRIO])
 			b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 		if (props[TIPC_NLA_PROP_WIN])
 			b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+		if (props[TIPC_NLA_PROP_MTU]) {
+			if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
+				return -EINVAL;
+#ifdef CONFIG_TIPC_MEDIA_UDP
+			if (tipc_udp_mtu_bad(nla_get_u32
+					     (props[TIPC_NLA_PROP_MTU])))
+				return -EINVAL;
+			b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+			tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
+#endif
+		}
 	}
 
 	return 0;
@@ -1029,6 +1043,9 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
 		goto prop_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
 		goto prop_msg_full;
+	if (media->type_id == TIPC_MEDIA_TYPE_UDP)
+		if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
+			goto prop_msg_full;
 
 	nla_nest_end(msg->skb, prop);
 	nla_nest_end(msg->skb, attrs);
@@ -1158,6 +1175,16 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 			m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 		if (props[TIPC_NLA_PROP_WIN])
 			m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+		if (props[TIPC_NLA_PROP_MTU]) {
+			if (m->type_id != TIPC_MEDIA_TYPE_UDP)
+				return -EINVAL;
+#ifdef CONFIG_TIPC_MEDIA_UDP
+			if (tipc_udp_mtu_bad(nla_get_u32
+					     (props[TIPC_NLA_PROP_MTU])))
+				return -EINVAL;
+			m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+#endif
+		}
 	}
 
 	return 0;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6efcee6..394290c 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -94,6 +94,8 @@ struct tipc_bearer;
  * @priority: default link (and bearer) priority
  * @tolerance: default time (in ms) before declaring link failure
  * @window: default window (in packets) before declaring link congestion
+ * @mtu: max packet size bearer can support for media type not dependent on
+ * underlying device MTU
  * @type_id: TIPC media identifier
  * @hwaddr_len: TIPC media address len
  * @name: media name
@@ -118,6 +120,7 @@ struct tipc_media {
 	u32 priority;
 	u32 tolerance;
 	u32 window;
+	u32 mtu;
 	u32 type_id;
 	u32 hwaddr_len;
 	char name[TIPC_MAX_MEDIA_NAME];
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6f98b56..e9c52e14 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1681,7 +1681,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 	kfree_skb(skb);
 }
 
-void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
+			      int prop)
 {
 	struct tipc_net *tn = tipc_net(net);
 	int bearer_id = b->identity;
@@ -1696,8 +1697,13 @@ void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
 	list_for_each_entry_rcu(n, &tn->node_list, list) {
 		tipc_node_write_lock(n);
 		e = &n->links[bearer_id];
-		if (e->link)
-			tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+		if (e->link) {
+			if (prop == TIPC_NLA_PROP_TOL)
+				tipc_link_set_tolerance(e->link, b->tolerance,
+							&xmitq);
+			else if (prop == TIPC_NLA_PROP_MTU)
+				tipc_link_set_mtu(e->link, b->mtu);
+		}
 		tipc_node_write_unlock(n);
 		tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
 	}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index f24b835..bb271a3 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -67,7 +67,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
 			  struct tipc_media_addr *maddr,
 			  bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
-void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
 			   char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e7d91f5..9783101 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -713,8 +713,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 			err = -EINVAL;
 			goto err;
 		}
-		b->mtu = dev->mtu - sizeof(struct iphdr)
-			- sizeof(struct udphdr);
+		b->mtu = b->media->mtu;
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (local.proto == htons(ETH_P_IPV6)) {
 		udp_conf.family = AF_INET6;
@@ -803,6 +802,7 @@ struct tipc_media udp_media_info = {
 	.priority	= TIPC_DEF_LINK_PRI,
 	.tolerance	= TIPC_DEF_LINK_TOL,
 	.window		= TIPC_DEF_LINK_WIN,
+	.mtu		= TIPC_DEF_LINK_UDP_MTU,
 	.type_id	= TIPC_MEDIA_TYPE_UDP,
 	.hwaddr_len	= 0,
 	.name		= "udp"
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
index 281bbae..e7455cc 100644
--- a/net/tipc/udp_media.h
+++ b/net/tipc/udp_media.h
@@ -38,9 +38,23 @@
 #ifndef _TIPC_UDP_MEDIA_H
 #define _TIPC_UDP_MEDIA_H
 
+#include <linux/ip.h>
+#include <linux/udp.h>
+
 int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
 int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
 int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
 
+/* check if configured MTU is too low for tipc headers */
+static inline bool tipc_udp_mtu_bad(u32 mtu)
+{
+	if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) +
+	    sizeof(struct udphdr)))
+		return false;
+
+	pr_warn("MTU too low for tipc bearer\n");
+	return true;
+}
+
 #endif
 #endif
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6ed..aa8c392 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -44,6 +44,7 @@
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
 hostprogs-y += cpustat
+hostprogs-y += xdp_adjust_tail
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -95,6 +96,7 @@
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
 cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
+xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -148,6 +150,7 @@
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
 always += cpustat_kern.o
+always += xdp_adjust_tail_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -193,6 +196,7 @@
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
 HOSTLOADLIBES_cpustat += -lelf
+HOSTLOADLIBES_xdp_adjust_tail += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index 6fc6e19..33a6375 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -9,10 +9,10 @@
  *   if (value)
  *        (*(u64*)value) += 1;
  *
- * - attaches this program to eth0 raw socket
+ * - attaches this program to loopback interface "lo" raw socket
  *
  * - every second user space reads map[tcp], map[udp], map[icmp] to see
- *   how many packets of given protocol were seen on eth0
+ *   how many packets of given protocol were seen on "lo"
  */
 #include <stdio.h>
 #include <unistd.h>
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
new file mode 100644
index 0000000..411fdb2
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program shows how to use bpf_xdp_adjust_tail() by
+ * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
+ * to be more preice in case of v4)" where receiving packets bigger then
+ * 600 bytes.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include "bpf_helpers.h"
+
+#define DEFAULT_TTL 64
+#define MAX_PCKT_SIZE 600
+#define ICMP_TOOBIG_SIZE 98
+#define ICMP_TOOBIG_PAYLOAD_SIZE 92
+
+struct bpf_map_def SEC("maps") icmpcnt = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+static __always_inline void count_icmp(void)
+{
+	u64 key = 0;
+	u64 *icmp_count;
+
+	icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
+	if (icmp_count)
+		*icmp_count += 1;
+}
+
+static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
+{
+	struct ethhdr *eth;
+
+	eth = data;
+	memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
+	memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
+	eth->h_proto = orig_eth->h_proto;
+}
+
+static __always_inline __u16 csum_fold_helper(__u32 csum)
+{
+	return ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline void ipv4_csum(void *data_start, int data_size,
+				      __u32 *csum)
+{
+	*csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+	*csum = csum_fold_helper(*csum);
+}
+
+static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
+{
+	int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
+
+	if (bpf_xdp_adjust_head(xdp, 0 - headroom))
+		return XDP_DROP;
+	void *data = (void *)(long)xdp->data;
+	void *data_end = (void *)(long)xdp->data_end;
+
+	if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
+		return XDP_DROP;
+
+	struct iphdr *iph, *orig_iph;
+	struct icmphdr *icmp_hdr;
+	struct ethhdr *orig_eth;
+	__u32 csum = 0;
+	__u64 off = 0;
+
+	orig_eth = data + headroom;
+	swap_mac(data, orig_eth);
+	off += sizeof(struct ethhdr);
+	iph = data + off;
+	off += sizeof(struct iphdr);
+	icmp_hdr = data + off;
+	off += sizeof(struct icmphdr);
+	orig_iph = data + off;
+	icmp_hdr->type = ICMP_DEST_UNREACH;
+	icmp_hdr->code = ICMP_FRAG_NEEDED;
+	icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
+	icmp_hdr->checksum = 0;
+	ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
+	icmp_hdr->checksum = csum;
+	iph->ttl = DEFAULT_TTL;
+	iph->daddr = orig_iph->saddr;
+	iph->saddr = orig_iph->daddr;
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->protocol = IPPROTO_ICMP;
+	iph->tos = 0;
+	iph->tot_len = htons(
+		ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
+	iph->check = 0;
+	csum = 0;
+	ipv4_csum(iph, sizeof(struct iphdr), &csum);
+	iph->check = csum;
+	count_icmp();
+	return XDP_TX;
+}
+
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int pckt_size = data_end - data;
+	int offset;
+
+	if (pckt_size > MAX_PCKT_SIZE) {
+		offset = pckt_size - ICMP_TOOBIG_SIZE;
+		if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+			return XDP_PASS;
+		return send_icmp4_too_big(xdp);
+	}
+	return XDP_PASS;
+}
+
+SEC("xdp_icmp")
+int _xdp_icmp(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else
+		return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
new file mode 100644
index 0000000..f621a54
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define STATS_INTERVAL_S 2U
+
+static int ifindex = -1;
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+	if (ifindex > -1)
+		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	exit(0);
+}
+
+/* simple "icmp packet too big sent" counter
+ */
+static void poll_stats(unsigned int kill_after_s)
+{
+	time_t started_at = time(NULL);
+	__u64 value = 0;
+	int key = 0;
+
+
+	while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+		sleep(STATS_INTERVAL_S);
+
+		assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
+
+		printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
+	}
+}
+
+static void usage(const char *cmd)
+{
+	printf("Start a XDP prog which send ICMP \"packet too big\" \n"
+		"messages if ingress packet is bigger then MAX_SIZE bytes\n");
+	printf("Usage: %s [...]\n", cmd);
+	printf("    -i <ifindex> Interface Index\n");
+	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
+	printf("    -S use skb-mode\n");
+	printf("    -N enforce native mode\n");
+	printf("    -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned char opt_flags[256] = {};
+	unsigned int kill_after_s = 0;
+	const char *optstr = "i:T:SNh";
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	char filename[256];
+	int opt;
+	int i;
+
+
+	for (i = 0; i < strlen(optstr); i++)
+		if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
+			opt_flags[(unsigned char)optstr[i]] = 1;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+
+		switch (opt) {
+		case 'i':
+			ifindex = atoi(optarg);
+			break;
+		case 'T':
+			kill_after_s = atoi(optarg);
+			break;
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		default:
+			usage(argv[0]);
+			return 1;
+		}
+		opt_flags[opt] = 0;
+	}
+
+	for (i = 0; i < strlen(optstr); i++) {
+		if (opt_flags[(unsigned int)optstr[i]]) {
+			fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+		printf("link set xdp fd failed\n");
+		return 1;
+	}
+
+	poll_stats(kill_after_s);
+
+	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+
+	return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index eec1452..894bc64 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -330,7 +330,7 @@ static void stats_print(struct stats_record *stats_rec,
 			pps = calc_pps_u64(r, p, t);
 			if (pps > 0)
 				printf(fmt1, "Exception", i,
-				       0.0, pps, err2str(rec_i));
+				       0.0, pps, action2str(rec_i));
 		}
 		pps = calc_pps_u64(&rec->total, &prev->total, t);
 		if (pps > 0)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 0e4e923..d004f63 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -26,7 +26,8 @@
 |	**bpftool** **cgroup help**
 |
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-|	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** }
+|	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
+|		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -63,7 +64,13 @@
 		  **egress** egress path of the inet socket (since 4.10);
 		  **sock_create** opening of an inet socket (since 4.10);
 		  **sock_ops** various socket operations (since 4.12);
-		  **device** device access (since 4.15).
+		  **device** device access (since 4.15);
+		  **bind4** call to bind(2) for an inet4 socket (since 4.17);
+		  **bind6** call to bind(2) for an inet6 socket (since 4.17);
+		  **post_bind4** return from bind(2) for an inet4 socket (since 4.17);
+		  **post_bind6** return from bind(2) for an inet6 socket (since 4.17);
+		  **connect4** call to connect(2) for an inet4 socket (since 4.17);
+		  **connect6** call to connect(2) for an inet6 socket (since 4.17).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 457e868..5f512b1 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -23,10 +23,10 @@
 
 |	**bpftool** **map { show | list }**   [*MAP*]
 |	**bpftool** **map dump**    *MAP*
-|	**bpftool** **map update**  *MAP*  **key** *BYTES*   **value** *VALUE* [*UPDATE_FLAGS*]
-|	**bpftool** **map lookup**  *MAP*  **key** *BYTES*
-|	**bpftool** **map getnext** *MAP* [**key** *BYTES*]
-|	**bpftool** **map delete**  *MAP*  **key** *BYTES*
+|	**bpftool** **map update**  *MAP*  **key** [**hex**] *BYTES*   **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
+|	**bpftool** **map lookup**  *MAP*  **key** [**hex**] *BYTES*
+|	**bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*]
+|	**bpftool** **map delete**  *MAP*  **key** [**hex**] *BYTES*
 |	**bpftool** **map pin**     *MAP*  *FILE*
 |	**bpftool** **map help**
 |
@@ -48,20 +48,26 @@
 	**bpftool map dump**    *MAP*
 		  Dump all entries in a given *MAP*.
 
-	**bpftool map update**  *MAP*  **key** *BYTES*   **value** *VALUE* [*UPDATE_FLAGS*]
+	**bpftool map update**  *MAP*  **key** [**hex**] *BYTES*   **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
 		  Update map entry for a given *KEY*.
 
 		  *UPDATE_FLAGS* can be one of: **any** update existing entry
 		  or add if doesn't exit; **exist** update only if entry already
 		  exists; **noexist** update only if entry doesn't exist.
 
-	**bpftool map lookup**  *MAP*  **key** *BYTES*
+		  If the **hex** keyword is provided in front of the bytes
+		  sequence, the bytes are parsed as hexadeximal values, even if
+		  no "0x" prefix is added. If the keyword is not provided, then
+		  the bytes are parsed as decimal values, unless a "0x" prefix
+		  (for hexadecimal) or a "0" prefix (for octal) is provided.
+
+	**bpftool map lookup**  *MAP*  **key** [**hex**] *BYTES*
 		  Lookup **key** in the map.
 
-	**bpftool map getnext** *MAP* [**key** *BYTES*]
+	**bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*]
 		  Get next key.  If *key* is not specified, get first key.
 
-	**bpftool map delete**  *MAP*  **key** *BYTES*
+	**bpftool map delete**  *MAP*  **key** [**hex**] *BYTES*
 		  Remove entry from the map.
 
 	**bpftool map pin**     *MAP*  *FILE*
@@ -98,7 +104,12 @@
   10: hash  name some_map  flags 0x0
 	key 4B  value 8B  max_entries 2048  memlock 167936B
 
-**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04**
+The following three commands are equivalent:
+
+|
+| **# bpftool map update id 10 key hex   20   c4   b7   00 value hex   0f   ff   ff   ab   01   02   03   4c**
+| **# bpftool map update id 10 key     0x20 0xc4 0xb7 0x00 value     0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c**
+| **# bpftool map update id 10 key       32  196  183    0 value       15  255  255  171    1    2    3   76**
 
 **# bpftool map lookup id 10 key 0 1 2 3**
 
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 490811b..852d84a 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -147,7 +147,7 @@
 
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes|visual)
+        help|hex|opcodes|visual)
             return 0
             ;;
         tag)
@@ -283,7 +283,7 @@
                             return 0
                             ;;
                         key)
-                            return 0
+                            COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
                             ;;
                         *)
                             _bpftool_once_attr 'key'
@@ -302,7 +302,7 @@
                             return 0
                             ;;
                         key)
-                            return 0
+                            COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
                             ;;
                         value)
                             # We can have bytes, or references to a prog or a
@@ -321,6 +321,8 @@
                                     return 0
                                     ;;
                                 *)
+                                    COMPREPLY+=( $( compgen -W 'hex' \
+                                        -- "$cur" ) )
                                     return 0
                                     ;;
                             esac
@@ -372,7 +374,8 @@
                     ;;
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
-                        device'
+                        device bind4 bind6 post_bind4 post_bind6 connect4 \
+                        connect6'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag'
                     case $prev in
@@ -380,7 +383,8 @@
                             _filedir
                             return 0
                             ;;
-                        ingress|egress|sock_create|sock_ops|device)
+                        ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
+                        post_bind4|post_bind6|connect4|connect6)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index cae32a6..1351bd6 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -16,8 +16,11 @@
 #define HELP_SPEC_ATTACH_FLAGS						\
 	"ATTACH_FLAGS := { multi | override }"
 
-#define HELP_SPEC_ATTACH_TYPES						\
-	"ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
+#define HELP_SPEC_ATTACH_TYPES						       \
+	"       ATTACH_TYPE := { ingress | egress | sock_create |\n"	       \
+	"                        sock_ops | device | bind4 | bind6 |\n"	       \
+	"                        post_bind4 | post_bind6 | connect4 |\n"       \
+	"                        connect6 }"
 
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -25,6 +28,12 @@ static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
 	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
 	[BPF_CGROUP_DEVICE] = "device",
+	[BPF_CGROUP_INET4_BIND] = "bind4",
+	[BPF_CGROUP_INET6_BIND] = "bind6",
+	[BPF_CGROUP_INET4_CONNECT] = "connect4",
+	[BPF_CGROUP_INET6_CONNECT] = "connect6",
+	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
@@ -282,7 +291,7 @@ static int do_help(int argc, char **argv)
 		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
 		"       %s %s help\n"
 		"\n"
-		"       " HELP_SPEC_ATTACH_TYPES "\n"
+		HELP_SPEC_ATTACH_TYPES "\n"
 		"       " HELP_SPEC_ATTACH_FLAGS "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f509c86..a6cdb64 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -283,11 +283,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 static char **parse_bytes(char **argv, const char *name, unsigned char *val,
 			  unsigned int n)
 {
-	unsigned int i = 0;
+	unsigned int i = 0, base = 0;
 	char *endptr;
 
+	if (is_prefix(*argv, "hex")) {
+		base = 16;
+		argv++;
+	}
+
 	while (i < n && argv[i]) {
-		val[i] = strtoul(argv[i], &endptr, 0);
+		val[i] = strtoul(argv[i], &endptr, base);
 		if (*endptr) {
 			p_err("error parsing byte: %s", argv[i]);
 			return NULL;
@@ -869,10 +874,10 @@ static int do_help(int argc, char **argv)
 	fprintf(stderr,
 		"Usage: %s %s { show | list }   [MAP]\n"
 		"       %s %s dump    MAP\n"
-		"       %s %s update  MAP  key BYTES value VALUE [UPDATE_FLAGS]\n"
-		"       %s %s lookup  MAP  key BYTES\n"
-		"       %s %s getnext MAP [key BYTES]\n"
-		"       %s %s delete  MAP  key BYTES\n"
+		"       %s %s update  MAP  key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n"
+		"       %s %s lookup  MAP  key [hex] BYTES\n"
+		"       %s %s getnext MAP [key [hex] BYTES]\n"
+		"       %s %s delete  MAP  key [hex] BYTES\n"
 		"       %s %s pin     MAP  FILE\n"
 		"       %s %s help\n"
 		"\n"
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f7a8108..548adb9 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -68,6 +68,9 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SOCK_OPS]	= "sock_ops",
 	[BPF_PROG_TYPE_SK_SKB]		= "sk_skb",
 	[BPF_PROG_TYPE_CGROUP_DEVICE]	= "cgroup_device",
+	[BPF_PROG_TYPE_SK_MSG]		= "sk_msg",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT]	= "raw_tracepoint",
+	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
 };
 
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d07465..7f7fbb9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_cmd {
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
 	BPF_RAW_TRACEPOINT_OPEN,
+	BPF_BTF_LOAD,
 };
 
 enum bpf_map_type {
@@ -279,6 +280,9 @@ union bpf_attr {
 					 */
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
+		__u32	btf_fd;		/* fd pointing to a BTF type data */
+		__u32	btf_key_id;	/* BTF type_id of the key */
+		__u32	btf_value_id;	/* BTF type_id of the value */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -363,6 +367,14 @@ union bpf_attr {
 		__u64 name;
 		__u32 prog_fd;
 	} raw_tracepoint;
+
+	struct { /* anonymous struct for BPF_BTF_LOAD */
+		__aligned_u64	btf;
+		__aligned_u64	btf_log_buf;
+		__u32		btf_size;
+		__u32		btf_log_size;
+		__u32		btf_log_level;
+	};
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -755,6 +767,13 @@ union bpf_attr {
  *     @addr: pointer to struct sockaddr to bind socket to
  *     @addr_len: length of sockaddr structure
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_xdp_adjust_tail(xdp_md, delta)
+ *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
+ *     size is supported.
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: A negative integer to be added to xdp_md.data_end
+ *     Return: 0 on success or negative on error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -821,7 +840,8 @@ union bpf_attr {
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
-	FN(bind),
+	FN(bind),			\
+	FN(xdp_adjust_tail),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
new file mode 100644
index 0000000..74a30b1
--- /dev/null
+++ b/tools/include/uapi/linux/btf.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC	0xeB9F
+#define BTF_MAGIC_SWAP	0x9FeB
+#define BTF_VERSION	1
+#define BTF_FLAGS_COMPR	0x01
+
+struct btf_header {
+	__u16	magic;
+	__u8	version;
+	__u8	flags;
+
+	__u32	parent_label;
+	__u32	parent_name;
+
+	/* All offsets are in bytes relative to the end of this header */
+	__u32	label_off;	/* offset of label section	*/
+	__u32	object_off;	/* offset of data object section*/
+	__u32	func_off;	/* offset of function section	*/
+	__u32	type_off;	/* offset of type section	*/
+	__u32	str_off;	/* offset of string section	*/
+	__u32	str_len;	/* length of string section	*/
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE	0x7fffffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET	0x7fffffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN	0xffff
+
+/* The type id is referring to a parent BTF */
+#define BTF_TYPE_PARENT(id)	(((id) >> 31) & 0x1)
+#define BTF_TYPE_ID(id)		((id) & BTF_MAX_TYPE)
+
+/* String is in the ELF string section */
+#define BTF_STR_TBL_ELF_ID(ref)	(((ref) >> 31) & 0x1)
+#define BTF_STR_OFFSET(ref)	((ref) & BTF_MAX_NAME_OFFSET)
+
+struct btf_type {
+	__u32 name;
+	/* "info" bits arrangement
+	 * bits  0-15: vlen (e.g. # of struct's members)
+	 * bits 16-23: unused
+	 * bits 24-28: kind (e.g. int, ptr, array...etc)
+	 * bits 29-30: unused
+	 * bits    31: root
+	 */
+	__u32 info;
+	/* "size" is used by INT, ENUM, STRUCT and UNION.
+	 * "size" tells the size of the type it is describing.
+	 *
+	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+	 * "type" is a type_id referring to another type.
+	 */
+	union {
+		__u32 size;
+		__u32 type;
+	};
+};
+
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
+#define BTF_INFO_ISROOT(info)	(!!(((info) >> 24) & 0x80))
+#define BTF_INFO_VLEN(info)	((info) & 0xffff)
+
+#define BTF_KIND_UNKN		0	/* Unknown	*/
+#define BTF_KIND_INT		1	/* Integer	*/
+#define BTF_KIND_PTR		2	/* Pointer	*/
+#define BTF_KIND_ARRAY		3	/* Array	*/
+#define BTF_KIND_STRUCT		4	/* Struct	*/
+#define BTF_KIND_UNION		5	/* Union	*/
+#define BTF_KIND_ENUM		6	/* Enumeration	*/
+#define BTF_KIND_FWD		7	/* Forward	*/
+#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
+#define BTF_KIND_VOLATILE	9	/* Volatile	*/
+#define BTF_KIND_CONST		10	/* Const	*/
+#define BTF_KIND_RESTRICT	11	/* Restrict	*/
+#define BTF_KIND_MAX		11
+#define NR_BTF_KINDS		12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL)	(((VAL) & 0xff000000) >> 24)
+#define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL)	((VAL)  & 0x0000ffff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED	0x1
+#define BTF_INT_CHAR	0x2
+#define BTF_INT_BOOL	0x4
+#define BTF_INT_VARARGS	0x8
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+	__u32	name;
+	__s32	val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+	__u32	type;
+	__u32	index_type;
+	__u32	nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member".  The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+	__u32	name;
+	__u32	type;
+	__u32	offset;	/* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 64c679d..6070e65 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index acbb3f8..76b36cc 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -73,43 +73,76 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 	return syscall(__NR_bpf, cmd, attr, size);
 }
 
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
-			int key_size, int value_size, int max_entries,
-			__u32 map_flags, int node)
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
-	__u32 name_len = name ? strlen(name) : 0;
+	__u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
 	union bpf_attr attr;
 
 	memset(&attr, '\0', sizeof(attr));
 
-	attr.map_type = map_type;
-	attr.key_size = key_size;
-	attr.value_size = value_size;
-	attr.max_entries = max_entries;
-	attr.map_flags = map_flags;
-	memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
-
-	if (node >= 0) {
-		attr.map_flags |= BPF_F_NUMA_NODE;
-		attr.numa_node = node;
-	}
+	attr.map_type = create_attr->map_type;
+	attr.key_size = create_attr->key_size;
+	attr.value_size = create_attr->value_size;
+	attr.max_entries = create_attr->max_entries;
+	attr.map_flags = create_attr->map_flags;
+	memcpy(attr.map_name, create_attr->name,
+	       min(name_len, BPF_OBJ_NAME_LEN - 1));
+	attr.numa_node = create_attr->numa_node;
+	attr.btf_fd = create_attr->btf_fd;
+	attr.btf_key_id = create_attr->btf_key_id;
+	attr.btf_value_id = create_attr->btf_value_id;
 
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+			int key_size, int value_size, int max_entries,
+			__u32 map_flags, int node)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+	if (node >= 0) {
+		map_attr.numa_node = node;
+		map_attr.map_flags |= BPF_F_NUMA_NODE;
+	}
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
 int bpf_create_map(enum bpf_map_type map_type, int key_size,
 		   int value_size, int max_entries, __u32 map_flags)
 {
-	return bpf_create_map_node(map_type, NULL, key_size, value_size,
-				   max_entries, map_flags, -1);
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
 }
 
 int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
 			int key_size, int value_size, int max_entries,
 			__u32 map_flags)
 {
-	return bpf_create_map_node(map_type, name, key_size, value_size,
-				   max_entries, map_flags, -1);
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
 }
 
 int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
@@ -573,3 +606,28 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 	close(sock);
 	return ret;
 }
+
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+		 bool do_log)
+{
+	union bpf_attr attr = {};
+	int fd;
+
+	attr.btf = ptr_to_u64(btf);
+	attr.btf_size = btf_size;
+
+retry:
+	if (do_log && log_buf && log_buf_size) {
+		attr.btf_log_level = 1;
+		attr.btf_log_size = log_buf_size;
+		attr.btf_log_buf = ptr_to_u64(log_buf);
+	}
+
+	fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+	if (fd == -1 && !do_log && log_buf && log_buf_size) {
+		do_log = true;
+		goto retry;
+	}
+
+	return fd;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 39f6a0d..553b11a 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -24,8 +24,23 @@
 #define __BPF_BPF_H
 
 #include <linux/bpf.h>
+#include <stdbool.h>
 #include <stddef.h>
 
+struct bpf_create_map_attr {
+	const char *name;
+	enum bpf_map_type map_type;
+	__u32 map_flags;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 numa_node;
+	__u32 btf_fd;
+	__u32 btf_key_id;
+	__u32 btf_value_id;
+};
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
 			int key_size, int value_size, int max_entries,
 			__u32 map_flags, int node);
@@ -87,4 +102,6 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
 int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+		 bool do_log);
 #endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
new file mode 100644
index 0000000..58b6255
--- /dev/null
+++ b/tools/lib/bpf/btf.c
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "bpf.h"
+
+#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+#define BTF_MAX_NR_TYPES 65535
+
+static struct btf_type btf_void;
+
+struct btf {
+	union {
+		struct btf_header *hdr;
+		void *data;
+	};
+	struct btf_type **types;
+	const char *strings;
+	void *nohdr_data;
+	uint32_t nr_types;
+	uint32_t types_size;
+	uint32_t data_size;
+	int fd;
+};
+
+static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
+{
+	if (!BTF_STR_TBL_ELF_ID(offset) &&
+	    BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+		return &btf->strings[BTF_STR_OFFSET(offset)];
+	else
+		return NULL;
+}
+
+static int btf_add_type(struct btf *btf, struct btf_type *t)
+{
+	if (btf->types_size - btf->nr_types < 2) {
+		struct btf_type **new_types;
+		u32 expand_by, new_size;
+
+		if (btf->types_size == BTF_MAX_NR_TYPES)
+			return -E2BIG;
+
+		expand_by = max(btf->types_size >> 2, 16);
+		new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+
+		new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+		if (!new_types)
+			return -ENOMEM;
+
+		if (btf->nr_types == 0)
+			new_types[0] = &btf_void;
+
+		btf->types = new_types;
+		btf->types_size = new_size;
+	}
+
+	btf->types[++(btf->nr_types)] = t;
+
+	return 0;
+}
+
+static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
+{
+	const struct btf_header *hdr = btf->hdr;
+	u32 meta_left;
+
+	if (btf->data_size < sizeof(struct btf_header)) {
+		elog("BTF header not found\n");
+		return -EINVAL;
+	}
+
+	if (hdr->magic != BTF_MAGIC) {
+		elog("Invalid BTF magic:%x\n", hdr->magic);
+		return -EINVAL;
+	}
+
+	if (hdr->version != BTF_VERSION) {
+		elog("Unsupported BTF version:%u\n", hdr->version);
+		return -ENOTSUP;
+	}
+
+	if (hdr->flags) {
+		elog("Unsupported BTF flags:%x\n", hdr->flags);
+		return -ENOTSUP;
+	}
+
+	meta_left = btf->data_size - sizeof(*hdr);
+	if (!meta_left) {
+		elog("BTF has no data\n");
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->type_off) {
+		elog("Invalid BTF type section offset:%u\n", hdr->type_off);
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->str_off) {
+		elog("Invalid BTF string section offset:%u\n", hdr->str_off);
+		return -EINVAL;
+	}
+
+	if (hdr->type_off >= hdr->str_off) {
+		elog("BTF type section offset >= string section offset. No type?\n");
+		return -EINVAL;
+	}
+
+	if (hdr->type_off & 0x02) {
+		elog("BTF type section is not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	btf->nohdr_data = btf->hdr + 1;
+
+	return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+	const struct btf_header *hdr = btf->hdr;
+	const char *start = btf->nohdr_data + hdr->str_off;
+	const char *end = start + btf->hdr->str_len;
+
+	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+	    start[0] || end[-1]) {
+		elog("Invalid BTF string section\n");
+		return -EINVAL;
+	}
+
+	btf->strings = start;
+
+	return 0;
+}
+
+static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+	struct btf_header *hdr = btf->hdr;
+	void *nohdr_data = btf->nohdr_data;
+	void *next_type = nohdr_data + hdr->type_off;
+	void *end_type = nohdr_data + hdr->str_off;
+
+	while (next_type < end_type) {
+		struct btf_type *t = next_type;
+		uint16_t vlen = BTF_INFO_VLEN(t->info);
+		int err;
+
+		next_type += sizeof(*t);
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_INT:
+			next_type += sizeof(int);
+			break;
+		case BTF_KIND_ARRAY:
+			next_type += sizeof(struct btf_array);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			next_type += vlen * sizeof(struct btf_member);
+			break;
+		case BTF_KIND_ENUM:
+			next_type += vlen * sizeof(struct btf_enum);
+			break;
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_PTR:
+		case BTF_KIND_FWD:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+			break;
+		default:
+			elog("Unsupported BTF_KIND:%u\n",
+			     BTF_INFO_KIND(t->info));
+			return -EINVAL;
+		}
+
+		err = btf_add_type(btf, t);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const struct btf_type *btf_type_by_id(const struct btf *btf,
+					     uint32_t type_id)
+{
+	if (type_id > btf->nr_types)
+		return NULL;
+
+	return btf->types[type_id];
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+	return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+	return !t || btf_type_is_void(t);
+}
+
+static int64_t btf_type_size(const struct btf_type *t)
+{
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+		return t->size;
+	case BTF_KIND_PTR:
+		return sizeof(void *);
+	default:
+		return -EINVAL;
+	}
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id)
+{
+	const struct btf_array *array;
+	const struct btf_type *t;
+	uint32_t nelems = 1;
+	int64_t size = -1;
+	int i;
+
+	t = btf_type_by_id(btf, type_id);
+	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+	     i++) {
+		size = btf_type_size(t);
+		if (size >= 0)
+			break;
+
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+			type_id = t->type;
+			break;
+		case BTF_KIND_ARRAY:
+			array = (const struct btf_array *)(t + 1);
+			if (nelems && array->nelems > UINT32_MAX / nelems)
+				return -E2BIG;
+			nelems *= array->nelems;
+			type_id = array->type;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		t = btf_type_by_id(btf, type_id);
+	}
+
+	if (size < 0)
+		return -EINVAL;
+
+	if (nelems && size > UINT32_MAX / nelems)
+		return -E2BIG;
+
+	return nelems * size;
+}
+
+int32_t btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+	uint32_t i;
+
+	if (!strcmp(type_name, "void"))
+		return 0;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		const struct btf_type *t = btf->types[i];
+		const char *name = btf_name_by_offset(btf, t->name);
+
+		if (name && !strcmp(type_name, name))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+void btf__free(struct btf *btf)
+{
+	if (!btf)
+		return;
+
+	if (btf->fd != -1)
+		close(btf->fd);
+
+	free(btf->data);
+	free(btf->types);
+	free(btf);
+}
+
+struct btf *btf__new(uint8_t *data, uint32_t size,
+		     btf_print_fn_t err_log)
+{
+	uint32_t log_buf_size = 0;
+	char *log_buf = NULL;
+	struct btf *btf;
+	int err;
+
+	btf = calloc(1, sizeof(struct btf));
+	if (!btf)
+		return ERR_PTR(-ENOMEM);
+
+	btf->fd = -1;
+
+	if (err_log) {
+		log_buf = malloc(BPF_LOG_BUF_SIZE);
+		if (!log_buf) {
+			err = -ENOMEM;
+			goto done;
+		}
+		*log_buf = 0;
+		log_buf_size = BPF_LOG_BUF_SIZE;
+	}
+
+	btf->data = malloc(size);
+	if (!btf->data) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	memcpy(btf->data, data, size);
+	btf->data_size = size;
+
+	btf->fd = bpf_load_btf(btf->data, btf->data_size,
+			       log_buf, log_buf_size, false);
+
+	if (btf->fd == -1) {
+		err = -errno;
+		elog("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+		if (log_buf && *log_buf)
+			elog("%s\n", log_buf);
+		goto done;
+	}
+
+	err = btf_parse_hdr(btf, err_log);
+	if (err)
+		goto done;
+
+	err = btf_parse_str_sec(btf, err_log);
+	if (err)
+		goto done;
+
+	err = btf_parse_type_sec(btf, err_log);
+
+done:
+	free(log_buf);
+
+	if (err) {
+		btf__free(btf);
+		return ERR_PTR(err);
+	}
+
+	return btf;
+}
+
+int btf__fd(const struct btf *btf)
+{
+	return btf->fd;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
new file mode 100644
index 0000000..74bb344
--- /dev/null
+++ b/tools/lib/bpf/btf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __BPF_BTF_H
+#define __BPF_BTF_H
+
+#include <stdint.h>
+
+#define BTF_ELF_SEC ".BTF"
+
+struct btf;
+
+typedef int (*btf_print_fn_t)(const char *, ...)
+	__attribute__((format(printf, 1, 2)));
+
+void btf__free(struct btf *btf);
+struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log);
+int32_t btf__find_by_name(const struct btf *btf, const char *type_name);
+int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id);
+int btf__fd(const struct btf *btf);
+
+#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5922443..6513e0b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -45,6 +45,7 @@
 
 #include "libbpf.h"
 #include "bpf.h"
+#include "btf.h"
 
 #ifndef EM_BPF
 #define EM_BPF 247
@@ -212,6 +213,8 @@ struct bpf_map {
 	char *name;
 	size_t offset;
 	struct bpf_map_def def;
+	uint32_t btf_key_id;
+	uint32_t btf_value_id;
 	void *priv;
 	bpf_map_clear_priv_t clear_priv;
 };
@@ -256,6 +259,8 @@ struct bpf_object {
 	 */
 	struct list_head list;
 
+	struct btf *btf;
+
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
 
@@ -819,7 +824,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 							data->d_size);
 		else if (strcmp(name, "maps") == 0)
 			obj->efile.maps_shndx = idx;
-		else if (sh.sh_type == SHT_SYMTAB) {
+		else if (strcmp(name, BTF_ELF_SEC) == 0) {
+			obj->btf = btf__new(data->d_buf, data->d_size,
+					    __pr_debug);
+			if (IS_ERR(obj->btf)) {
+				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+					   BTF_ELF_SEC, PTR_ERR(obj->btf));
+				obj->btf = NULL;
+			}
+		} else if (sh.sh_type == SHT_SYMTAB) {
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
 					   obj->path);
@@ -996,33 +1009,126 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	return 0;
 }
 
+static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+{
+	struct bpf_map_def *def = &map->def;
+	const size_t max_name = 256;
+	int64_t key_size, value_size;
+	int32_t key_id, value_id;
+	char name[max_name];
+
+	/* Find key type by name from BTF */
+	if (snprintf(name, max_name, "%s_key", map->name) == max_name) {
+		pr_warning("map:%s length of BTF key_type:%s_key is too long\n",
+			   map->name, map->name);
+		return -EINVAL;
+	}
+
+	key_id = btf__find_by_name(btf, name);
+	if (key_id < 0) {
+		pr_debug("map:%s key_type:%s cannot be found in BTF\n",
+			 map->name, name);
+		return key_id;
+	}
+
+	key_size = btf__resolve_size(btf, key_id);
+	if (key_size < 0) {
+		pr_warning("map:%s key_type:%s cannot get the BTF type_size\n",
+			   map->name, name);
+		return key_size;
+	}
+
+	if (def->key_size != key_size) {
+		pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
+			   map->name, name, key_size, def->key_size);
+		return -EINVAL;
+	}
+
+	/* Find value type from BTF */
+	if (snprintf(name, max_name, "%s_value", map->name) == max_name) {
+		pr_warning("map:%s length of BTF value_type:%s_value is too long\n",
+			  map->name, map->name);
+		return -EINVAL;
+	}
+
+	value_id = btf__find_by_name(btf, name);
+	if (value_id < 0) {
+		pr_debug("map:%s value_type:%s cannot be found in BTF\n",
+			 map->name, name);
+		return value_id;
+	}
+
+	value_size = btf__resolve_size(btf, value_id);
+	if (value_size < 0) {
+		pr_warning("map:%s value_type:%s cannot get the BTF type_size\n",
+			   map->name, name);
+		return value_size;
+	}
+
+	if (def->value_size != value_size) {
+		pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
+			   map->name, name, value_size, def->value_size);
+		return -EINVAL;
+	}
+
+	map->btf_key_id = key_id;
+	map->btf_value_id = value_id;
+
+	return 0;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
+	struct bpf_create_map_attr create_attr = {};
 	unsigned int i;
+	int err;
 
 	for (i = 0; i < obj->nr_maps; i++) {
-		struct bpf_map_def *def = &obj->maps[i].def;
-		int *pfd = &obj->maps[i].fd;
+		struct bpf_map *map = &obj->maps[i];
+		struct bpf_map_def *def = &map->def;
+		int *pfd = &map->fd;
 
-		*pfd = bpf_create_map_name(def->type,
-					   obj->maps[i].name,
-					   def->key_size,
-					   def->value_size,
-					   def->max_entries,
-					   def->map_flags);
+		create_attr.name = map->name;
+		create_attr.map_type = def->type;
+		create_attr.map_flags = def->map_flags;
+		create_attr.key_size = def->key_size;
+		create_attr.value_size = def->value_size;
+		create_attr.max_entries = def->max_entries;
+		create_attr.btf_fd = 0;
+		create_attr.btf_key_id = 0;
+		create_attr.btf_value_id = 0;
+
+		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+			create_attr.btf_fd = btf__fd(obj->btf);
+			create_attr.btf_key_id = map->btf_key_id;
+			create_attr.btf_value_id = map->btf_value_id;
+		}
+
+		*pfd = bpf_create_map_xattr(&create_attr);
+		if (*pfd < 0 && create_attr.btf_key_id) {
+			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+				   map->name, strerror(errno), errno);
+			create_attr.btf_fd = 0;
+			create_attr.btf_key_id = 0;
+			create_attr.btf_value_id = 0;
+			map->btf_key_id = 0;
+			map->btf_value_id = 0;
+			*pfd = bpf_create_map_xattr(&create_attr);
+		}
+
 		if (*pfd < 0) {
 			size_t j;
-			int err = *pfd;
 
+			err = *pfd;
 			pr_warning("failed to create map (name: '%s'): %s\n",
-				   obj->maps[i].name,
+				   map->name,
 				   strerror(errno));
 			for (j = 0; j < i; j++)
 				zclose(obj->maps[j].fd);
 			return err;
 		}
-		pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd);
+		pr_debug("create map %s: fd=%d\n", map->name, *pfd);
 	}
 
 	return 0;
@@ -1641,6 +1747,7 @@ void bpf_object__close(struct bpf_object *obj)
 
 	bpf_object__elf_finish(obj);
 	bpf_object__unload(obj);
+	btf__free(obj->btf);
 
 	for (i = 0; i < obj->nr_maps; i++) {
 		zfree(&obj->maps[i].name);
@@ -1692,6 +1799,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
 	return obj ? obj->kern_version : 0;
 }
 
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+	return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 			 bpf_object_clear_priv_t clear_priv)
 {
@@ -1845,6 +1957,7 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
@@ -1859,6 +1972,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 
 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
 
+#define BPF_S_PROG_SEC(string, ptype) \
+	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
+
 #define BPF_SA_PROG_SEC(string, ptype) \
 	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
 
@@ -1874,6 +1990,7 @@ static const struct {
 	BPF_PROG_SEC("classifier",	BPF_PROG_TYPE_SCHED_CLS),
 	BPF_PROG_SEC("action",		BPF_PROG_TYPE_SCHED_ACT),
 	BPF_PROG_SEC("tracepoint/",	BPF_PROG_TYPE_TRACEPOINT),
+	BPF_PROG_SEC("raw_tracepoint/",	BPF_PROG_TYPE_RAW_TRACEPOINT),
 	BPF_PROG_SEC("xdp",		BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",	BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("cgroup/skb",	BPF_PROG_TYPE_CGROUP_SKB),
@@ -1889,10 +2006,13 @@ static const struct {
 	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
 	BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
 	BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+	BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
+	BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
 };
 
 #undef BPF_PROG_SEC
 #undef BPF_PROG_SEC_FULL
+#undef BPF_S_PROG_SEC
 #undef BPF_SA_PROG_SEC
 
 static int bpf_program__identify_section(struct bpf_program *prog)
@@ -1929,6 +2049,16 @@ const char *bpf_map__name(struct bpf_map *map)
 	return map ? map->name : NULL;
 }
 
+uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
+{
+	return map ? map->btf_key_id : 0;
+}
+
+uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
+{
+	return map ? map->btf_value_id : 0;
+}
+
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
 		     bpf_map_clear_priv_t clear_priv)
 {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a3a62a5..d6ac4fa 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -78,6 +78,7 @@ int bpf_object__load(struct bpf_object *obj);
 int bpf_object__unload(struct bpf_object *obj);
 const char *bpf_object__name(struct bpf_object *obj);
 unsigned int bpf_object__kversion(struct bpf_object *obj);
+int bpf_object__btf_fd(const struct bpf_object *obj);
 
 struct bpf_object *bpf_object__next(struct bpf_object *prev);
 #define bpf_object__for_each_safe(pos, tmp)			\
@@ -185,6 +186,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n);
  */
 int bpf_program__set_socket_filter(struct bpf_program *prog);
 int bpf_program__set_tracepoint(struct bpf_program *prog);
+int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
 int bpf_program__set_kprobe(struct bpf_program *prog);
 int bpf_program__set_sched_cls(struct bpf_program *prog);
 int bpf_program__set_sched_act(struct bpf_program *prog);
@@ -194,6 +196,7 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
+bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
 bool bpf_program__is_kprobe(struct bpf_program *prog);
 bool bpf_program__is_sched_cls(struct bpf_program *prog);
 bool bpf_program__is_sched_act(struct bpf_program *prog);
@@ -239,6 +242,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 int bpf_map__fd(struct bpf_map *map);
 const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
 const char *bpf_map__name(struct bpf_map *map);
+uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0a315dd..0b72cc7 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -24,14 +24,15 @@
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
-	test_sock test_sock_addr
+	test_sock test_sock_addr test_btf
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
-	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
+	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
+	test_btf_haskv.o test_btf_nokv.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -66,6 +67,8 @@
 
 CLANG ?= clang
 LLC   ?= llc
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
 
 PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
 
@@ -83,9 +86,26 @@
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help |& grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help |& grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version |& grep LLVM)
+
+ifneq ($(BTF_LLC_PROBE),)
+ifneq ($(BTF_PAHOLE_PROBE),)
+ifneq ($(BTF_OBJCOPY_PROBE),)
+	CLANG_FLAGS += -g
+	LLC_FLAGS += -mattr=dwarfris
+	DWARF2BTF = y
+endif
+endif
+endif
+
 $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
-	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d8223d9..9271576 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -96,6 +96,9 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_pull_data;
 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
 	(void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_tail;
+
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -129,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+	(void *) BPF_FUNC_csum_diff;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 0000000..4cd5e86
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int offset = 0;
+
+	if (data_end - data == 54)
+		offset = 256;
+	else
+		offset = 20;
+	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 0000000..7b39b1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,1669 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "bpf_rlimit.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define __printf(a, b)	__attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+	va_list args;
+	int err;
+
+	va_start(args, format);
+	err = vfprintf(stderr, format, args);
+	va_end(args);
+	return err;
+}
+
+#define BTF_INFO_ENC(kind, root, vlen)			\
+	((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type)	\
+	(name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits)	\
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz),	\
+	BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems)	\
+	(type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+	BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset)	\
+	(name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+
+#define BTF_TYPEDEF_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_END_RAW 0xdeadbeef
+#define NAME_TBD 0xdeadb33f
+
+#define MAX_NR_RAW_TYPES 1024
+#define BTF_LOG_BUF_SIZE 65535
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+static struct args {
+	unsigned int raw_test_num;
+	unsigned int file_test_num;
+	unsigned int get_info_test_num;
+	bool raw_test;
+	bool file_test;
+	bool get_info_test;
+	bool pprint_test;
+	bool always_log;
+} args;
+
+static char btf_log_buf[BTF_LOG_BUF_SIZE];
+
+static struct btf_header hdr_tmpl = {
+	.magic = BTF_MAGIC,
+	.version = BTF_VERSION,
+};
+
+struct btf_raw_test {
+	const char *descr;
+	const char *str_sec;
+	const char *map_name;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	enum bpf_map_type map_type;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 key_id;
+	__u32 value_id;
+	__u32 max_entries;
+	bool btf_load_err;
+	bool map_create_err;
+	int type_off_delta;
+	int str_off_delta;
+	int str_len_delta;
+};
+
+static struct btf_raw_test raw_tests[] = {
+/* enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *	int m;
+ *	unsigned long long n;
+ *	char o;
+ *	[3 bytes hole]
+ *	int p[8];
+ *	int q[4][8];
+ *	enum E r;
+ * };
+ */
+{
+	.descr = "struct test #1",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8]		*/
+		BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r		*/
+		/* } */
+		/* int[4][8] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),			/* [6] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test1_map",
+	.key_size = sizeof(int),
+	.value_size = 180,
+	.key_id = 1,
+	.value_id = 5,
+	.max_entries = 4,
+},
+
+/* typedef struct b Struct_B;
+ *
+ * struct A {
+ *     int m;
+ *     struct b n[4];
+ *     const Struct_B o[4];
+ * };
+ *
+ * struct B {
+ *     int m;
+ *     int n;
+ * };
+ */
+{
+	.descr = "struct test #2",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct b [4] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),
+
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4]	*/
+		BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
+		/* } */
+
+		/* struct B { */				/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+		/* } */
+
+		/* const int */					/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+		/* typedef struct b Struct_B */	/* [6] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
+		/* const Struct_B */				/* [7] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
+		/* const Struct_B [4] */			/* [8] */
+		BTF_TYPE_ARRAY_ENC(7, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test2_map",
+	.key_size = sizeof(int),
+	.value_size = 68,
+	.key_id = 1,
+	.value_id = 3,
+	.max_entries = 4,
+},
+
+/* Test member exceeds the size of struct.
+ *
+ * struct A {
+ *     int m;
+ *     int n;
+ * };
+ */
+{
+	.descr = "size check test #1",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */				/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 -  1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check1_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ *     int m;
+ *     int n[2];
+ * };
+ */
+{
+	.descr = "size check test #2",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* int[2] */					/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 2),
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check2_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_id = 1,
+	.value_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ *     int m;
+ *     void *n;
+ * };
+ */
+{
+	.descr = "size check test #3",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* void* */					/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check3_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_id = 1,
+	.value_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* Test member exceeds the size of struct
+ *
+ * enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *     int m;
+ *     enum E n;
+ * };
+ */
+{
+	.descr = "size check test #4",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* enum E { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		/* } */
+		/* struct A { */		/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0E\0E0\0E1\0A\0m\0n",
+	.str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check4_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_id = 1,
+	.value_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* typedef const void * const_void_ptr;
+ * struct A {
+ *	const_void_ptr m;
+ * };
+ */
+{
+	.descr = "void test #1",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void* */	/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+		/* struct A { */	/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/* const_void_ptr m; */
+		BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0const_void_ptr\0A\0m",
+	.str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test1_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_id = 1,
+	.value_id = 4,
+	.max_entries = 4,
+},
+
+/* struct A {
+ *     const void m;
+ * };
+ */
+{
+	.descr = "void test #2",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* struct A { */	/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
+		/* const void m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test2_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_id = 1,
+	.value_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* typedef const void * const_void_ptr;
+ * const_void_ptr[4]
+ */
+{
+	.descr = "void test #3",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void* */	/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+		/* const_void_ptr[4] */	/* [4] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0const_void_ptr",
+	.str_sec_size = sizeof("\0const_void_ptr"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test3_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *) * 4,
+	.key_id = 1,
+	.value_id = 4,
+	.max_entries = 4,
+},
+
+/* const void[4]  */
+{
+	.descr = "void test #4",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void[4] */	/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test4_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *) * 4,
+	.key_id = 1,
+	.value_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* Array_A  <------------------+
+ *     elem_type == Array_B    |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array A --+
+ */
+{
+	.descr = "loop test #1",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* Array_B */			/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test1_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* typedef is _before_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A  <------------------+
+ *     elem_type == int_array  |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #2",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* typedef Array_B int_array */
+		BTF_TYPEDEF_ENC(1, 4),				/* [2] */
+		/* Array_A */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),			/* [3] */
+		/* Array_B */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),			/* [4] */
+
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int_array\0",
+	.str_sec_size = sizeof("\0int_array"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test2_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* Array_A  <------------------+
+ *     elem_type == Array_B    |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #3",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* Array_B */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test3_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* typedef is _between_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A  <------------------+
+ *     elem_type == int_array  |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #4",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* typedef Array_B int_array */		/* [3] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),
+		/* Array_B */				/* [4] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int_array\0",
+	.str_sec_size = sizeof("\0int_array"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test4_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* typedef struct B Struct_B
+ *
+ * struct A {
+ *     int x;
+ *     Struct_B y;
+ * };
+ *
+ * struct B {
+ *     int x;
+ *     struct A y;
+ * };
+ */
+{
+	.descr = "loop test #5",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* struct A */					/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y;	*/
+		/* typedef struct B Struct_B */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		/* struct B */					/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y;	*/
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
+	.str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test5_map",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+/* struct A {
+ *     int x;
+ *     struct A array_a[4];
+ * };
+ */
+{
+	.descr = "loop test #6",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 4),			/* [2] */
+		/* struct A */					/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4];	*/
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0x\0y",
+	.str_sec_size = sizeof("\0A\0x\0y"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test6_map",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+{
+	.descr = "loop test #7",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *m;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+		/* CONST type_id=3	*/		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* PTR type_id=2	*/		/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test7_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+{
+	.descr = "loop test #8",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *m;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+		/* struct B { */			/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *n;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 6, 0),
+		/* CONST type_id=5	*/		/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
+		/* PTR type_id=6	*/		/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
+		/* CONST type_id=7	*/		/* [6] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
+		/* PTR type_id=4	*/		/* [7] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0B\0n",
+	.str_sec_size = sizeof("\0A\0m\0B\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test8_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_id = 1,
+	.value_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+},
+
+{
+	.descr = "type_off == str_off",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_id = 1,
+	.value_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
+},
+
+{
+	.descr = "Unaligned type_off",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_id = 1,
+	.value_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.type_off_delta = 1,
+},
+
+{
+	.descr = "str_off beyonds btf size",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_id = 1,
+	.value_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_off_delta = sizeof("\0int") + 1,
+},
+
+{
+	.descr = "str_len beyonds btf size",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_id = 1,
+	.value_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = 1,
+},
+
+{
+	.descr = "String section does not end with null",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_id = 1,
+	.value_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = -1,
+},
+
+{
+	.descr = "Empty string section",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_id = 1,
+	.value_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = 0 - (int)sizeof("\0int"),
+},
+
+}; /* struct btf_raw_test raw_tests[] */
+
+static const char *get_next_str(const char *start, const char *end)
+{
+	return start < end - 1 ? start + 1 : NULL;
+}
+
+static int get_type_sec_size(const __u32 *raw_types)
+{
+	int i;
+
+	for (i = MAX_NR_RAW_TYPES - 1;
+	     i >= 0 && raw_types[i] != BTF_END_RAW;
+	     i--)
+		;
+
+	return i < 0 ? i : i * sizeof(raw_types[0]);
+}
+
+static void *btf_raw_create(const struct btf_header *hdr,
+			    const __u32 *raw_types,
+			    const char *str,
+			    unsigned int str_sec_size,
+			    unsigned int *btf_size)
+{
+	const char *next_str = str, *end_str = str + str_sec_size;
+	unsigned int size_needed, offset;
+	struct btf_header *ret_hdr;
+	int i, type_sec_size;
+	uint32_t *ret_types;
+	void *raw_btf;
+
+	type_sec_size = get_type_sec_size(raw_types);
+	if (type_sec_size < 0) {
+		fprintf(stderr, "Cannot get nr_raw_types\n");
+		return NULL;
+	}
+
+	size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
+	raw_btf = malloc(size_needed);
+	if (!raw_btf) {
+		fprintf(stderr, "Cannot allocate memory for raw_btf\n");
+		return NULL;
+	}
+
+	/* Copy header */
+	memcpy(raw_btf, hdr, sizeof(*hdr));
+	offset = sizeof(*hdr);
+
+	/* Copy type section */
+	ret_types = raw_btf + offset;
+	for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
+		if (raw_types[i] == NAME_TBD) {
+			next_str = get_next_str(next_str, end_str);
+			if (!next_str) {
+				fprintf(stderr, "Error in getting next_str\n");
+				free(raw_btf);
+				return NULL;
+			}
+			ret_types[i] = next_str - str;
+			next_str += strlen(next_str);
+		} else {
+			ret_types[i] = raw_types[i];
+		}
+	}
+	offset += type_sec_size;
+
+	/* Copy string section */
+	memcpy(raw_btf + offset, str, str_sec_size);
+
+	ret_hdr = (struct btf_header *)raw_btf;
+	ret_hdr->str_off = type_sec_size;
+	ret_hdr->str_len = str_sec_size;
+
+	*btf_size = size_needed;
+
+	return raw_btf;
+}
+
+static int do_test_raw(unsigned int test_num)
+{
+	struct btf_raw_test *test = &raw_tests[test_num - 1];
+	struct bpf_create_map_attr create_attr = {};
+	int map_fd = -1, btf_fd = -1;
+	unsigned int raw_btf_size;
+	struct btf_header *hdr;
+	void *raw_btf;
+	int err;
+
+	fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	hdr = raw_btf;
+
+	hdr->type_off = (int)hdr->type_off + test->type_off_delta;
+	hdr->str_off = (int)hdr->str_off + test->str_off_delta;
+	hdr->str_len = (int)hdr->str_len + test->str_len_delta;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	err = ((btf_fd == -1) != test->btf_load_err);
+	if (err)
+		fprintf(stderr, "btf_load_err:%d btf_fd:%d\n",
+			test->btf_load_err, btf_fd);
+
+	if (err || btf_fd == -1)
+		goto done;
+
+	create_attr.name = test->map_name;
+	create_attr.map_type = test->map_type;
+	create_attr.key_size = test->key_size;
+	create_attr.value_size = test->value_size;
+	create_attr.max_entries = test->max_entries;
+	create_attr.btf_fd = btf_fd;
+	create_attr.btf_key_id = test->key_id;
+	create_attr.btf_value_id = test->value_id;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+
+	err = ((map_fd == -1) != test->map_create_err);
+	if (err)
+		fprintf(stderr, "map_create_err:%d map_fd:%d\n",
+			test->map_create_err, map_fd);
+
+done:
+	if (!err)
+		fprintf(stderr, "OK\n");
+
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "%s\n", btf_log_buf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (map_fd != -1)
+		close(map_fd);
+
+	return err;
+}
+
+static int test_raw(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.raw_test_num)
+		return do_test_raw(args.raw_test_num);
+
+	for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+		err |= do_test_raw(i);
+
+	return err;
+}
+
+struct btf_get_info_test {
+	const char *descr;
+	const char *str_sec;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	int info_size_delta;
+};
+
+const struct btf_get_info_test get_info_tests[] = {
+{
+	.descr = "== raw_btf_size+1",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.info_size_delta = 1,
+},
+{
+	.descr = "== raw_btf_size-3",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.info_size_delta = -3,
+},
+};
+
+static int do_test_get_info(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	unsigned int raw_btf_size, user_btf_size, expected_nbytes;
+	uint8_t *raw_btf = NULL, *user_btf = NULL;
+	int btf_fd = -1, err;
+
+	fprintf(stderr, "BTF GET_INFO_BY_ID test[%u] (%s): ",
+		test_num, test->descr);
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	user_btf = malloc(raw_btf_size);
+	if (!user_btf) {
+		fprintf(stderr, "Cannot allocate memory for user_btf\n");
+		err = -1;
+		goto done;
+	}
+
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	if (btf_fd == -1) {
+		fprintf(stderr, "bpf_load_btf:%s(%d)\n",
+			strerror(errno), errno);
+		err = -1;
+		goto done;
+	}
+
+	user_btf_size = (int)raw_btf_size + test->info_size_delta;
+	expected_nbytes = min(raw_btf_size, user_btf_size);
+	if (raw_btf_size > expected_nbytes)
+		memset(user_btf + expected_nbytes, 0xff,
+		       raw_btf_size - expected_nbytes);
+
+	err = bpf_obj_get_info_by_fd(btf_fd, user_btf, &user_btf_size);
+	if (err || user_btf_size != raw_btf_size ||
+	    memcmp(raw_btf, user_btf, expected_nbytes)) {
+		fprintf(stderr,
+			"err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d\n",
+			err, errno,
+			raw_btf_size, user_btf_size, expected_nbytes,
+			memcmp(raw_btf, user_btf, expected_nbytes));
+		err = -1;
+		goto done;
+	}
+
+	while (expected_nbytes < raw_btf_size) {
+		fprintf(stderr, "%u...", expected_nbytes);
+		if (user_btf[expected_nbytes++] != 0xff) {
+			fprintf(stderr, "!= 0xff\n");
+			err = -1;
+			goto done;
+		}
+	}
+
+	fprintf(stderr, "OK\n");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "%s\n", btf_log_buf);
+
+	free(raw_btf);
+	free(user_btf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+
+	return err;
+}
+
+static int test_get_info(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.get_info_test_num)
+		return do_test_get_info(args.get_info_test_num);
+
+	for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+		err |= do_test_get_info(i);
+
+	return err;
+}
+
+struct btf_file_test {
+	const char *file;
+	bool btf_kv_notfound;
+};
+
+static struct btf_file_test file_tests[] = {
+{
+	.file = "test_btf_haskv.o",
+},
+{
+	.file = "test_btf_nokv.o",
+	.btf_kv_notfound = true,
+},
+};
+
+static int file_has_btf_elf(const char *fn)
+{
+	Elf_Scn *scn = NULL;
+	GElf_Ehdr ehdr;
+	int elf_fd;
+	Elf *elf;
+	int ret;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		fprintf(stderr, "Failed to init libelf\n");
+		return -1;
+	}
+
+	elf_fd = open(fn, O_RDONLY);
+	if (elf_fd == -1) {
+		fprintf(stderr, "Cannot open file %s: %s(%d)\n",
+			fn, strerror(errno), errno);
+		return -1;
+	}
+
+	elf = elf_begin(elf_fd, ELF_C_READ, NULL);
+	if (!elf) {
+		fprintf(stderr, "Failed to read ELF from %s. %s\n", fn,
+			elf_errmsg(elf_errno()));
+		ret = -1;
+		goto done;
+	}
+
+	if (!gelf_getehdr(elf, &ehdr)) {
+		fprintf(stderr, "Failed to get EHDR from %s\n", fn);
+		ret = -1;
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn))) {
+		const char *sh_name;
+		GElf_Shdr sh;
+
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			fprintf(stderr,
+				"Failed to get section header from %s\n", fn);
+			ret = -1;
+			goto done;
+		}
+
+		sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!strcmp(sh_name, BTF_ELF_SEC)) {
+			ret = 1;
+			goto done;
+		}
+	}
+
+	ret = 0;
+
+done:
+	close(elf_fd);
+	elf_end(elf);
+	return ret;
+}
+
+static int do_test_file(unsigned int test_num)
+{
+	const struct btf_file_test *test = &file_tests[test_num - 1];
+	struct bpf_object *obj = NULL;
+	struct bpf_program *prog;
+	struct bpf_map *map;
+	int err;
+
+	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
+		test->file);
+
+	err = file_has_btf_elf(test->file);
+	if (err == -1)
+		return err;
+
+	if (err == 0) {
+		fprintf(stderr, "SKIP. No ELF %s found\n", BTF_ELF_SEC);
+		return 0;
+	}
+
+	obj = bpf_object__open(test->file);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = bpf_object__btf_fd(obj);
+	if (err == -1) {
+		fprintf(stderr, "bpf_object__btf_fd: -1\n");
+		goto done;
+	}
+
+	prog = bpf_program__next(NULL, obj);
+	if (!prog) {
+		fprintf(stderr, "Cannot find bpf_prog\n");
+		err = -1;
+		goto done;
+	}
+
+	bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+	err = bpf_object__load(obj);
+	if (err < 0) {
+		fprintf(stderr, "bpf_object__load: %d\n", err);
+		goto done;
+	}
+
+	map = bpf_object__find_map_by_name(obj, "btf_map");
+	if (!map) {
+		fprintf(stderr, "btf_map not found\n");
+		err = -1;
+		goto done;
+	}
+
+	err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
+		!= test->btf_kv_notfound;
+	if (err) {
+		fprintf(stderr,
+			"btf_kv_notfound:%u btf_key_id:%u btf_value_id:%u\n",
+			test->btf_kv_notfound,
+			bpf_map__btf_key_id(map),
+			bpf_map__btf_value_id(map));
+		goto done;
+	}
+
+	fprintf(stderr, "OK\n");
+
+done:
+	bpf_object__close(obj);
+	return err;
+}
+
+static int test_file(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.file_test_num)
+		return do_test_file(args.file_test_num);
+
+	for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+		err |= do_test_file(i);
+
+	return err;
+}
+
+const char *pprint_enum_str[] = {
+	"ENUM_ZERO",
+	"ENUM_ONE",
+	"ENUM_TWO",
+	"ENUM_THREE",
+};
+
+struct pprint_mapv {
+	uint32_t ui32;
+	uint16_t ui16;
+	/* 2 bytes hole */
+	int32_t si32;
+	uint32_t unused_bits2a:2,
+		bits28:28,
+		unused_bits2b:2;
+	union {
+		uint64_t ui64;
+		uint8_t ui8a[8];
+	};
+	enum {
+		ENUM_ZERO,
+		ENUM_ONE,
+		ENUM_TWO,
+		ENUM_THREE,
+	} aenum;
+};
+
+static struct btf_raw_test pprint_test = {
+	.descr = "BTF pretty print test #1",
+	.raw_types = {
+		/* unsighed char */			/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
+		/* unsigned short */			/* [2] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
+		/* unsigned int */			/* [3] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
+		/* int */				/* [4] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned long long */		/* [5] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
+		/* 2 bits */				/* [6] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
+		/* 28 bits */				/* [7] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
+		/* uint8_t[8] */			/* [8] */
+		BTF_TYPE_ARRAY_ENC(9, 3, 8),
+		/* typedef unsigned char uint8_t */	/* [9] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),
+		/* typedef unsigned short uint16_t */	/* [10] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 2),
+		/* typedef unsigned int uint32_t */	/* [11] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),
+		/* typedef int int32_t */		/* [12] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),
+		/* typedef unsigned long long uint64_t *//* [13] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 5),
+		/* union (anon) */			/* [14] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
+		BTF_MEMBER_ENC(NAME_TBD, 8, 0),	/* uint8_t ui8a[8]; */
+		/* enum (anon) */			/* [15] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		BTF_ENUM_ENC(NAME_TBD, 2),
+		BTF_ENUM_ENC(NAME_TBD, 3),
+		/* struct pprint_mapv */		/* [16] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28),
+		BTF_MEMBER_ENC(NAME_TBD, 11, 0),	/* uint32_t ui32 */
+		BTF_MEMBER_ENC(NAME_TBD, 10, 32),	/* uint16_t ui16 */
+		BTF_MEMBER_ENC(NAME_TBD, 12, 64),	/* int32_t si32 */
+		BTF_MEMBER_ENC(NAME_TBD, 6, 96),	/* unused_bits2a */
+		BTF_MEMBER_ENC(NAME_TBD, 7, 98),	/* bits28 */
+		BTF_MEMBER_ENC(NAME_TBD, 6, 126),	/* unused_bits2b */
+		BTF_MEMBER_ENC(0, 14, 128),		/* union (anon) */
+		BTF_MEMBER_ENC(NAME_TBD, 15, 192),	/* aenum */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
+	.str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "pprint_test",
+	.key_size = sizeof(unsigned int),
+	.value_size = sizeof(struct pprint_mapv),
+	.key_id = 3,	/* unsigned int */
+	.value_id = 16,	/* struct pprint_mapv */
+	.max_entries = 128 * 1024,
+};
+
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+{
+	v->ui32 = i;
+	v->si32 = -i;
+	v->unused_bits2a = 3;
+	v->bits28 = i;
+	v->unused_bits2b = 3;
+	v->ui64 = i;
+	v->aenum = i & 0x03;
+}
+
+static int test_pprint(void)
+{
+	const struct btf_raw_test *test = &pprint_test;
+	struct bpf_create_map_attr create_attr = {};
+	int map_fd = -1, btf_fd = -1;
+	struct pprint_mapv mapv = {};
+	unsigned int raw_btf_size;
+	char expected_line[255];
+	FILE *pin_file = NULL;
+	char pin_path[255];
+	size_t line_len = 0;
+	char *line = NULL;
+	unsigned int key;
+	uint8_t *raw_btf;
+	ssize_t nread;
+	int err;
+
+	fprintf(stderr, "%s......", test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+				 test->str_sec, test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	if (btf_fd == -1) {
+		err = -1;
+		fprintf(stderr, "bpf_load_btf: %s(%d)\n",
+			strerror(errno), errno);
+		goto done;
+	}
+
+	create_attr.name = test->map_name;
+	create_attr.map_type = test->map_type;
+	create_attr.key_size = test->key_size;
+	create_attr.value_size = test->value_size;
+	create_attr.max_entries = test->max_entries;
+	create_attr.btf_fd = btf_fd;
+	create_attr.btf_key_id = test->key_id;
+	create_attr.btf_value_id = test->value_id;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+	if (map_fd == -1) {
+		err = -1;
+		fprintf(stderr, "bpf_creat_map_btf: %s(%d)\n",
+			strerror(errno), errno);
+		goto done;
+	}
+
+	if (snprintf(pin_path, sizeof(pin_path), "%s/%s",
+		     "/sys/fs/bpf", test->map_name) == sizeof(pin_path)) {
+		err = -1;
+		fprintf(stderr, "pin_path is too long\n");
+		goto done;
+	}
+
+	err = bpf_obj_pin(map_fd, pin_path);
+	if (err) {
+		fprintf(stderr, "Cannot pin to %s. %s(%d).\n", pin_path,
+			strerror(errno), errno);
+		goto done;
+	}
+
+	for (key = 0; key < test->max_entries; key++) {
+		set_pprint_mapv(&mapv, key);
+		bpf_map_update_elem(map_fd, &key, &mapv, 0);
+	}
+
+	pin_file = fopen(pin_path, "r");
+	if (!pin_file) {
+		err = -1;
+		fprintf(stderr, "fopen(%s): %s(%d)\n", pin_path,
+			strerror(errno), errno);
+		goto done;
+	}
+
+	/* Skip lines start with '#' */
+	while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
+	       *line == '#')
+		;
+
+	if (nread <= 0) {
+		err = -1;
+		fprintf(stderr, "Unexpected EOF\n");
+		goto done;
+	}
+
+	key = 0;
+	do {
+		ssize_t nexpected_line;
+
+		set_pprint_mapv(&mapv, key);
+		nexpected_line = snprintf(expected_line, sizeof(expected_line),
+					  "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+					  key,
+					  mapv.ui32, mapv.si32,
+					  mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
+					  mapv.ui64,
+					  mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
+					  mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
+					  pprint_enum_str[mapv.aenum]);
+
+		if (nexpected_line == sizeof(expected_line)) {
+			err = -1;
+			fprintf(stderr, "expected_line is too long\n");
+			goto done;
+		}
+
+		if (strcmp(expected_line, line)) {
+			err = -1;
+			fprintf(stderr, "unexpected pprint output\n");
+			fprintf(stderr, "expected: %s", expected_line);
+			fprintf(stderr, "    read: %s", line);
+			goto done;
+		}
+
+		nread = getline(&line, &line_len, pin_file);
+	} while (++key < test->max_entries && nread > 0);
+
+	if (key < test->max_entries) {
+		err = -1;
+		fprintf(stderr, "Unexpected EOF\n");
+		goto done;
+	}
+
+	if (nread > 0) {
+		err = -1;
+		fprintf(stderr, "Unexpected extra pprint output: %s\n", line);
+		goto done;
+	}
+
+	err = 0;
+
+done:
+	if (!err)
+		fprintf(stderr, "OK\n");
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "%s\n", btf_log_buf);
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (map_fd != -1)
+		close(map_fd);
+	if (pin_file)
+		fclose(pin_file);
+	unlink(pin_path);
+	free(line);
+
+	return err;
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+		cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
+		ARRAY_SIZE(file_tests));
+}
+
+static int parse_args(int argc, char **argv)
+{
+	const char *optstr = "lpf:r:g:";
+	int opt;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'l':
+			args.always_log = true;
+			break;
+		case 'f':
+			args.file_test_num = atoi(optarg);
+			args.file_test = true;
+			break;
+		case 'r':
+			args.raw_test_num = atoi(optarg);
+			args.raw_test = true;
+			break;
+		case 'g':
+			args.get_info_test_num = atoi(optarg);
+			args.get_info_test = true;
+			break;
+		case 'p':
+			args.pprint_test = true;
+			break;
+		case 'h':
+			usage(argv[0]);
+			exit(0);
+		default:
+				usage(argv[0]);
+				return -1;
+		}
+	}
+
+	if (args.raw_test_num &&
+	    (args.raw_test_num < 1 ||
+	     args.raw_test_num > ARRAY_SIZE(raw_tests))) {
+		fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
+			ARRAY_SIZE(raw_tests));
+		return -1;
+	}
+
+	if (args.file_test_num &&
+	    (args.file_test_num < 1 ||
+	     args.file_test_num > ARRAY_SIZE(file_tests))) {
+		fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
+			ARRAY_SIZE(file_tests));
+		return -1;
+	}
+
+	if (args.get_info_test_num &&
+	    (args.get_info_test_num < 1 ||
+	     args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
+		fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
+			ARRAY_SIZE(get_info_tests));
+		return -1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int err = 0;
+
+	err = parse_args(argc, argv);
+	if (err)
+		return err;
+
+	if (args.always_log)
+		libbpf_set_print(__base_pr, __base_pr, __base_pr);
+
+	if (args.raw_test)
+		err |= test_raw();
+
+	if (args.get_info_test)
+		err |= test_get_info();
+
+	if (args.file_test)
+		err |= test_file();
+
+	if (args.pprint_test)
+		err |= test_pprint();
+
+	if (args.raw_test || args.get_info_test || args.file_test ||
+	    args.pprint_test)
+		return err;
+
+	err |= test_raw();
+	err |= test_get_info();
+	err |= test_file();
+
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 0000000..8c7ca09
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+typedef int btf_map_key;
+typedef struct ipv_counts btf_map_value;
+btf_map_key dumm_key;
+btf_map_value dummy_value;
+
+struct bpf_map_def SEC("maps") btf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct ipv_counts),
+	.max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 0000000..0ed8e08
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct ipv_counts),
+	.max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index faadbe2..eedda98 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -166,6 +166,37 @@ static void test_xdp(void)
 	bpf_object__close(obj);
 }
 
+static void test_xdp_adjust_tail(void)
+{
+	const char *file = "./test_adjust_tail.o";
+	struct bpf_object *obj;
+	char buf[128];
+	__u32 duration, retval, size;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+
+	CHECK(err || errno || retval != XDP_DROP,
+	      "ipv4", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != XDP_TX || size != 54,
+	      "ipv6", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+	bpf_object__close(obj);
+}
+
+
+
 #define MAGIC_VAL 0x1234
 #define NUM_ITER 100000
 #define VIP_NUM 5
@@ -1177,6 +1208,7 @@ int main(void)
 {
 	test_pkt_access();
 	test_xdp();
+	test_xdp_adjust_tail();
 	test_l4lb_all();
 	test_xdp_noinline();
 	test_tcp_estats();
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8f1e13d..c3761c3 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -8,9 +8,11 @@
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
 include ../lib.mk
 
 $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882..0c54059 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -149,6 +149,74 @@
 	log_test "src_ip match ($tcflags)"
 }
 
+match_ip_flags_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags ip_flags frag action continue
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags ip_flags firstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags ip_flags nofirstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+		$tcflags ip_flags nofrag action drop
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on wrong frag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Did not match on nofrag filter (nofrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0,mf" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on frag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match fistfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256,mf" -q
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256" -q
+
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "Did not match on frag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 3
+	check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Matched on nofrag filter (no1stfrag)"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+	log_test "ip_flags match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -181,6 +249,7 @@
 match_src_mac_test
 match_dst_ip_test
 match_src_ip_test
+match_ip_flags_test
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
@@ -191,6 +260,7 @@
 	match_src_mac_test
 	match_dst_ip_test
 	match_src_ip_test
+	match_ip_flags_test
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 0000000..dea342f
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ *  ifconfig lo mtu 61512  # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ *  tcp_mmap -s -z &
+ *  tcp_mmap -H ::1 -z
+ *
+ *  Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ *      (4096 : page size on x86, 12: TCP TS option length)
+ *  tcp_mmap -s -z -M $((4096+12)) &
+ *  tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ *       We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s &                                 # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ *   cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ *  cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ *   cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ *   cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1   # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z &                              # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ *   cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ *   cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ *   cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ *   cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <poll.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY    0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning.  Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning.  Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size  = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+	asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+	unsigned long temp = htotal;
+
+	while (length >= 8*sizeof(long)) {
+		prefetch(zone + 384);
+		temp ^= *(unsigned long *)zone;
+		temp ^= *(unsigned long *)(zone + sizeof(long));
+		temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+		zone += 8*sizeof(long);
+		length -= 8*sizeof(long);
+	}
+	while (length >= 1) {
+		temp ^= *(unsigned char *)zone;
+		zone += 1;
+		length--;
+	}
+	htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+	unsigned long total_mmap = 0, total = 0;
+	unsigned long delta_usec;
+	int flags = MAP_SHARED;
+	struct timeval t0, t1;
+	char *buffer = NULL;
+	void *oaddr = NULL;
+	double throughput;
+	struct rusage ru;
+	int lu, fd;
+
+	fd = (int)(unsigned long)arg;
+
+	gettimeofday(&t0, NULL);
+
+	fcntl(fd, F_SETFL, O_NDELAY);
+	buffer = malloc(chunk_size);
+	if (!buffer) {
+		perror("malloc");
+		goto error;
+	}
+	while (1) {
+		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+		int sub;
+
+		poll(&pfd, 1, 10000);
+		if (zflg) {
+			void *naddr;
+
+			naddr = mmap(oaddr, chunk_size, PROT_READ, flags, fd, 0);
+			if (naddr == (void *)-1) {
+				if (errno == EAGAIN) {
+					/* That is if SO_RCVLOWAT is buggy */
+					usleep(1000);
+					continue;
+				}
+				if (errno == EINVAL) {
+					flags = MAP_SHARED;
+					oaddr = NULL;
+					goto fallback;
+				}
+				if (errno != EIO)
+					perror("mmap()");
+				break;
+			}
+			total_mmap += chunk_size;
+			if (xflg)
+				hash_zone(naddr, chunk_size);
+			total += chunk_size;
+			if (!keepflag) {
+				flags |= MAP_FIXED;
+				oaddr = naddr;
+			}
+			continue;
+		}
+fallback:
+		sub = 0;
+		while (sub < chunk_size) {
+			lu = read(fd, buffer + sub, chunk_size - sub);
+			if (lu == 0)
+				goto end;
+			if (lu < 0)
+				break;
+			if (xflg)
+				hash_zone(buffer + sub, lu);
+			total += lu;
+			sub += lu;
+		}
+	}
+end:
+	gettimeofday(&t1, NULL);
+	delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+	throughput = 0;
+	if (delta_usec)
+		throughput = total * 8.0 / (double)delta_usec / 1000.0;
+	getrusage(RUSAGE_THREAD, &ru);
+	if (total > 1024*1024) {
+		unsigned long total_usec;
+		unsigned long mb = total >> 20;
+		total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+			     1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+		printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+				total / (1024.0 * 1024.0),
+				100.0*total_mmap/total,
+				(double)delta_usec / 1000000.0,
+				throughput,
+				(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+				(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+				(double)total_usec/mb,
+				ru.ru_nvcsw);
+	}
+error:
+	free(buffer);
+	close(fd);
+	pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+	if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+		perror("setsockopt SO_RCVBUF");
+	}
+
+	if (sndbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+		perror("setsockopt SO_SNDBUF");
+	}
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void do_accept(int fdlisten)
+{
+	if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+		       &chunk_size, sizeof(chunk_size)) == -1) {
+		perror("setsockopt SO_RCVLOWAT");
+	}
+
+	apply_rcvsnd_buf(fdlisten);
+
+	while (1) {
+		struct sockaddr_in addr;
+		socklen_t addrlen = sizeof(addr);
+		pthread_t th;
+		int fd, res;
+
+		fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			continue;
+		}
+		res = pthread_create(&th, NULL, child_thread,
+				     (void *)(unsigned long)fd);
+		if (res) {
+			errno = res;
+			perror("pthread_create");
+			close(fd);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listenaddr, addr;
+	unsigned int max_pacing_rate = 0;
+	unsigned long total = 0;
+	char *host = NULL;
+	int fd, c, on = 1;
+	char *buffer;
+	int sflg = 0;
+	int mss = 0;
+
+	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			cfg_port = atoi(optarg);
+			break;
+		case 'H':
+			host = optarg;
+			break;
+		case 's': /* server : listen for incoming connections */
+			sflg++;
+			break;
+		case 'r':
+			rcvbuf = atoi(optarg);
+			break;
+		case 'w':
+			sndbuf = atoi(optarg);
+			break;
+		case 'z':
+			zflg = 1;
+			break;
+		case 'M':
+			mss = atoi(optarg);
+			break;
+		case 'x':
+			xflg = 1;
+			break;
+		case 'k':
+			keepflag = 1;
+			break;
+		case 'P':
+			max_pacing_rate = atoi(optarg) ;
+			break;
+		default:
+			exit(1);
+		}
+	}
+	if (sflg) {
+		int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+		if (fdlisten == -1) {
+			perror("socket");
+			exit(1);
+		}
+		apply_rcvsnd_buf(fdlisten);
+		setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+		setup_sockaddr(cfg_family, host, &listenaddr);
+
+		if (mss &&
+		    setsockopt(fdlisten, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+			perror("setsockopt TCP_MAXSEG");
+			exit(1);
+		}
+		if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+			perror("bind");
+			exit(1);
+		}
+		if (listen(fdlisten, 128) == -1) {
+			perror("listen");
+			exit(1);
+		}
+		do_accept(fdlisten);
+	}
+	buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buffer == (char *)-1) {
+		perror("mmap");
+		exit(1);
+	}
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1) {
+		perror("socket");
+		exit(1);
+	}
+	apply_rcvsnd_buf(fd);
+
+	setup_sockaddr(cfg_family, host, &addr);
+
+	if (mss &&
+	    setsockopt(fd, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+		perror("setsockopt TCP_MAXSEG");
+		exit(1);
+	}
+	if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+		perror("connect");
+		exit(1);
+	}
+	if (max_pacing_rate &&
+	    setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+		       &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+		perror("setsockopt SO_MAX_PACING_RATE");
+
+	if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+			       &on, sizeof(on)) == -1) {
+		perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+		zflg = 0;
+	}
+	while (total < FILE_SZ) {
+		long wr = FILE_SZ - total;
+
+		if (wr > chunk_size)
+			wr = chunk_size;
+		/* Note : we just want to fill the pipe with 0 bytes */
+		wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+		if (wr <= 0)
+			break;
+		total += wr;
+	}
+	close(fd);
+	munmap(buffer, chunk_size);
+	return 0;
+}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 0000000..3aca33c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,588 @@
+[
+    {
+        "id": "9784",
+        "name": "Add valid sample action with mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "5c91",
+        "name": "Add valid sample action with mandatory arguments and continue control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "334b",
+        "name": "Add valid sample action with mandatory arguments and drop control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "da69",
+        "name": "Add valid sample action with mandatory arguments and reclassify control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "13ce",
+        "name": "Add valid sample action with mandatory arguments and pipe control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "1886",
+        "name": "Add valid sample action with mandatory arguments and jump control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 200",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "b6d4",
+        "name": "Add sample action with mandatory arguments and invalid control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a874",
+        "name": "Add invalid sample action without mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ac01",
+        "name": "Add invalid sample action without mandatory argument rate",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "4203",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "14a7",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "8f2e",
+        "name": "Add valid sample action with trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "45f8",
+        "name": "Add sample action with maximum rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ad0c",
+        "name": "Add sample action with maximum trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "83a9",
+        "name": "Add sample action with maximum group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ed27",
+        "name": "Add sample action with invalid rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2eae",
+        "name": "Add sample action with invalid group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6ff3",
+        "name": "Add sample action with invalid trunc size",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 11",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2b2a",
+        "name": "Add sample action with invalid index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 5294967299",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "dee2",
+        "name": "Add sample action with maximum allowed index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 4294967295",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "560e",
+        "name": "Add sample action with cookie",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 45",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "704a",
+        "name": "Replace existing sample action with new rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "60eb",
+        "name": "Replace existing sample action with new group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "2cce",
+        "name": "Replace existing sample action with new trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "59d1",
+        "name": "Replace existing sample action with new control argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    }
+]