| /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) |
| * |
| * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. |
| */ |
| #include <uapi/linux/if_ether.h> |
| #include <uapi/linux/if_packet.h> |
| #include <uapi/linux/if_vlan.h> |
| #include <uapi/linux/ip.h> |
| #include <uapi/linux/ipv6.h> |
| #include <uapi/linux/in.h> |
| #include <uapi/linux/tcp.h> |
| #include <uapi/linux/udp.h> |
| |
| #include <uapi/linux/bpf.h> |
| #include "bpf_helpers.h" |
| #include "hash_func01.h" |
| |
| #define MAX_CPUS 64 /* WARNING - sync with _user.c */ |
| |
| /* Special map type that can XDP_REDIRECT frames to another CPU */ |
| struct bpf_map_def SEC("maps") cpu_map = { |
| .type = BPF_MAP_TYPE_CPUMAP, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(u32), |
| .max_entries = MAX_CPUS, |
| }; |
| |
| /* Common stats data record to keep userspace more simple */ |
| struct datarec { |
| __u64 processed; |
| __u64 dropped; |
| __u64 issue; |
| }; |
| |
| /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success |
| * feedback. Redirect TX errors can be caught via a tracepoint. |
| */ |
| struct bpf_map_def SEC("maps") rx_cnt = { |
| .type = BPF_MAP_TYPE_PERCPU_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(struct datarec), |
| .max_entries = 1, |
| }; |
| |
| /* Used by trace point */ |
| struct bpf_map_def SEC("maps") redirect_err_cnt = { |
| .type = BPF_MAP_TYPE_PERCPU_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(struct datarec), |
| .max_entries = 2, |
| /* TODO: have entries for all possible errno's */ |
| }; |
| |
| /* Used by trace point */ |
| struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { |
| .type = BPF_MAP_TYPE_PERCPU_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(struct datarec), |
| .max_entries = MAX_CPUS, |
| }; |
| |
| /* Used by trace point */ |
| struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { |
| .type = BPF_MAP_TYPE_PERCPU_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(struct datarec), |
| .max_entries = 1, |
| }; |
| |
| /* Set of maps controlling available CPU, and for iterating through |
| * selectable redirect CPUs. |
| */ |
| struct bpf_map_def SEC("maps") cpus_available = { |
| .type = BPF_MAP_TYPE_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(u32), |
| .max_entries = MAX_CPUS, |
| }; |
| struct bpf_map_def SEC("maps") cpus_count = { |
| .type = BPF_MAP_TYPE_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(u32), |
| .max_entries = 1, |
| }; |
| struct bpf_map_def SEC("maps") cpus_iterator = { |
| .type = BPF_MAP_TYPE_PERCPU_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(u32), |
| .max_entries = 1, |
| }; |
| |
| /* Used by trace point */ |
| struct bpf_map_def SEC("maps") exception_cnt = { |
| .type = BPF_MAP_TYPE_PERCPU_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(struct datarec), |
| .max_entries = 1, |
| }; |
| |
| /* Helper parse functions */ |
| |
| /* Parse Ethernet layer 2, extract network layer 3 offset and protocol |
| * |
| * Returns false on error and non-supported ether-type |
| */ |
| struct vlan_hdr { |
| __be16 h_vlan_TCI; |
| __be16 h_vlan_encapsulated_proto; |
| }; |
| |
| static __always_inline |
| bool parse_eth(struct ethhdr *eth, void *data_end, |
| u16 *eth_proto, u64 *l3_offset) |
| { |
| u16 eth_type; |
| u64 offset; |
| |
| offset = sizeof(*eth); |
| if ((void *)eth + offset > data_end) |
| return false; |
| |
| eth_type = eth->h_proto; |
| |
| /* Skip non 802.3 Ethertypes */ |
| if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) |
| return false; |
| |
| /* Handle VLAN tagged packet */ |
| if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { |
| struct vlan_hdr *vlan_hdr; |
| |
| vlan_hdr = (void *)eth + offset; |
| offset += sizeof(*vlan_hdr); |
| if ((void *)eth + offset > data_end) |
| return false; |
| eth_type = vlan_hdr->h_vlan_encapsulated_proto; |
| } |
| /* Handle double VLAN tagged packet */ |
| if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { |
| struct vlan_hdr *vlan_hdr; |
| |
| vlan_hdr = (void *)eth + offset; |
| offset += sizeof(*vlan_hdr); |
| if ((void *)eth + offset > data_end) |
| return false; |
| eth_type = vlan_hdr->h_vlan_encapsulated_proto; |
| } |
| |
| *eth_proto = ntohs(eth_type); |
| *l3_offset = offset; |
| return true; |
| } |
| |
| static __always_inline |
| u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct iphdr *iph = data + nh_off; |
| struct udphdr *udph; |
| u16 dport; |
| |
| if (iph + 1 > data_end) |
| return 0; |
| if (!(iph->protocol == IPPROTO_UDP)) |
| return 0; |
| |
| udph = (void *)(iph + 1); |
| if (udph + 1 > data_end) |
| return 0; |
| |
| dport = ntohs(udph->dest); |
| return dport; |
| } |
| |
| static __always_inline |
| int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct iphdr *iph = data + nh_off; |
| |
| if (iph + 1 > data_end) |
| return 0; |
| return iph->protocol; |
| } |
| |
| static __always_inline |
| int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ipv6hdr *ip6h = data + nh_off; |
| |
| if (ip6h + 1 > data_end) |
| return 0; |
| return ip6h->nexthdr; |
| } |
| |
| SEC("xdp_cpu_map0") |
| int xdp_prognum0_no_touch(struct xdp_md *ctx) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct datarec *rec; |
| u32 *cpu_selected; |
| u32 cpu_dest; |
| u32 key = 0; |
| |
| /* Only use first entry in cpus_available */ |
| cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); |
| if (!cpu_selected) |
| return XDP_ABORTED; |
| cpu_dest = *cpu_selected; |
| |
| /* Count RX packet in map */ |
| rec = bpf_map_lookup_elem(&rx_cnt, &key); |
| if (!rec) |
| return XDP_ABORTED; |
| rec->processed++; |
| |
| if (cpu_dest >= MAX_CPUS) { |
| rec->issue++; |
| return XDP_ABORTED; |
| } |
| |
| return bpf_redirect_map(&cpu_map, cpu_dest, 0); |
| } |
| |
| SEC("xdp_cpu_map1_touch_data") |
| int xdp_prognum1_touch_data(struct xdp_md *ctx) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ethhdr *eth = data; |
| struct datarec *rec; |
| u32 *cpu_selected; |
| u32 cpu_dest; |
| u16 eth_type; |
| u32 key = 0; |
| |
| /* Only use first entry in cpus_available */ |
| cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); |
| if (!cpu_selected) |
| return XDP_ABORTED; |
| cpu_dest = *cpu_selected; |
| |
| /* Validate packet length is minimum Eth header size */ |
| if (eth + 1 > data_end) |
| return XDP_ABORTED; |
| |
| /* Count RX packet in map */ |
| rec = bpf_map_lookup_elem(&rx_cnt, &key); |
| if (!rec) |
| return XDP_ABORTED; |
| rec->processed++; |
| |
| /* Read packet data, and use it (drop non 802.3 Ethertypes) */ |
| eth_type = eth->h_proto; |
| if (ntohs(eth_type) < ETH_P_802_3_MIN) { |
| rec->dropped++; |
| return XDP_DROP; |
| } |
| |
| if (cpu_dest >= MAX_CPUS) { |
| rec->issue++; |
| return XDP_ABORTED; |
| } |
| |
| return bpf_redirect_map(&cpu_map, cpu_dest, 0); |
| } |
| |
| SEC("xdp_cpu_map2_round_robin") |
| int xdp_prognum2_round_robin(struct xdp_md *ctx) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ethhdr *eth = data; |
| struct datarec *rec; |
| u32 cpu_dest; |
| u32 *cpu_lookup; |
| u32 key0 = 0; |
| |
| u32 *cpu_selected; |
| u32 *cpu_iterator; |
| u32 *cpu_max; |
| u32 cpu_idx; |
| |
| cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); |
| if (!cpu_max) |
| return XDP_ABORTED; |
| |
| cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); |
| if (!cpu_iterator) |
| return XDP_ABORTED; |
| cpu_idx = *cpu_iterator; |
| |
| *cpu_iterator += 1; |
| if (*cpu_iterator == *cpu_max) |
| *cpu_iterator = 0; |
| |
| cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); |
| if (!cpu_selected) |
| return XDP_ABORTED; |
| cpu_dest = *cpu_selected; |
| |
| /* Count RX packet in map */ |
| rec = bpf_map_lookup_elem(&rx_cnt, &key0); |
| if (!rec) |
| return XDP_ABORTED; |
| rec->processed++; |
| |
| if (cpu_dest >= MAX_CPUS) { |
| rec->issue++; |
| return XDP_ABORTED; |
| } |
| |
| return bpf_redirect_map(&cpu_map, cpu_dest, 0); |
| } |
| |
| SEC("xdp_cpu_map3_proto_separate") |
| int xdp_prognum3_proto_separate(struct xdp_md *ctx) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ethhdr *eth = data; |
| u8 ip_proto = IPPROTO_UDP; |
| struct datarec *rec; |
| u16 eth_proto = 0; |
| u64 l3_offset = 0; |
| u32 cpu_dest = 0; |
| u32 cpu_idx = 0; |
| u32 *cpu_lookup; |
| u32 key = 0; |
| |
| /* Count RX packet in map */ |
| rec = bpf_map_lookup_elem(&rx_cnt, &key); |
| if (!rec) |
| return XDP_ABORTED; |
| rec->processed++; |
| |
| if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) |
| return XDP_PASS; /* Just skip */ |
| |
| /* Extract L4 protocol */ |
| switch (eth_proto) { |
| case ETH_P_IP: |
| ip_proto = get_proto_ipv4(ctx, l3_offset); |
| break; |
| case ETH_P_IPV6: |
| ip_proto = get_proto_ipv6(ctx, l3_offset); |
| break; |
| case ETH_P_ARP: |
| cpu_idx = 0; /* ARP packet handled on separate CPU */ |
| break; |
| default: |
| cpu_idx = 0; |
| } |
| |
| /* Choose CPU based on L4 protocol */ |
| switch (ip_proto) { |
| case IPPROTO_ICMP: |
| case IPPROTO_ICMPV6: |
| cpu_idx = 2; |
| break; |
| case IPPROTO_TCP: |
| cpu_idx = 0; |
| break; |
| case IPPROTO_UDP: |
| cpu_idx = 1; |
| break; |
| default: |
| cpu_idx = 0; |
| } |
| |
| cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); |
| if (!cpu_lookup) |
| return XDP_ABORTED; |
| cpu_dest = *cpu_lookup; |
| |
| if (cpu_dest >= MAX_CPUS) { |
| rec->issue++; |
| return XDP_ABORTED; |
| } |
| |
| return bpf_redirect_map(&cpu_map, cpu_dest, 0); |
| } |
| |
| SEC("xdp_cpu_map4_ddos_filter_pktgen") |
| int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ethhdr *eth = data; |
| u8 ip_proto = IPPROTO_UDP; |
| struct datarec *rec; |
| u16 eth_proto = 0; |
| u64 l3_offset = 0; |
| u32 cpu_dest = 0; |
| u32 cpu_idx = 0; |
| u16 dest_port; |
| u32 *cpu_lookup; |
| u32 key = 0; |
| |
| /* Count RX packet in map */ |
| rec = bpf_map_lookup_elem(&rx_cnt, &key); |
| if (!rec) |
| return XDP_ABORTED; |
| rec->processed++; |
| |
| if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) |
| return XDP_PASS; /* Just skip */ |
| |
| /* Extract L4 protocol */ |
| switch (eth_proto) { |
| case ETH_P_IP: |
| ip_proto = get_proto_ipv4(ctx, l3_offset); |
| break; |
| case ETH_P_IPV6: |
| ip_proto = get_proto_ipv6(ctx, l3_offset); |
| break; |
| case ETH_P_ARP: |
| cpu_idx = 0; /* ARP packet handled on separate CPU */ |
| break; |
| default: |
| cpu_idx = 0; |
| } |
| |
| /* Choose CPU based on L4 protocol */ |
| switch (ip_proto) { |
| case IPPROTO_ICMP: |
| case IPPROTO_ICMPV6: |
| cpu_idx = 2; |
| break; |
| case IPPROTO_TCP: |
| cpu_idx = 0; |
| break; |
| case IPPROTO_UDP: |
| cpu_idx = 1; |
| /* DDoS filter UDP port 9 (pktgen) */ |
| dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); |
| if (dest_port == 9) { |
| if (rec) |
| rec->dropped++; |
| return XDP_DROP; |
| } |
| break; |
| default: |
| cpu_idx = 0; |
| } |
| |
| cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); |
| if (!cpu_lookup) |
| return XDP_ABORTED; |
| cpu_dest = *cpu_lookup; |
| |
| if (cpu_dest >= MAX_CPUS) { |
| rec->issue++; |
| return XDP_ABORTED; |
| } |
| |
| return bpf_redirect_map(&cpu_map, cpu_dest, 0); |
| } |
| |
| /* Hashing initval */ |
| #define INITVAL 15485863 |
| |
| static __always_inline |
| u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct iphdr *iph = data + nh_off; |
| u32 cpu_hash; |
| |
| if (iph + 1 > data_end) |
| return 0; |
| |
| cpu_hash = iph->saddr + iph->daddr; |
| cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); |
| |
| return cpu_hash; |
| } |
| |
| static __always_inline |
| u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ipv6hdr *ip6h = data + nh_off; |
| u32 cpu_hash; |
| |
| if (ip6h + 1 > data_end) |
| return 0; |
| |
| cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0]; |
| cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1]; |
| cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2]; |
| cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3]; |
| cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); |
| |
| return cpu_hash; |
| } |
| |
| /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The |
| * hashing scheme is symmetric, meaning swapping IP src/dest still hit |
| * same CPU. |
| */ |
| SEC("xdp_cpu_map5_lb_hash_ip_pairs") |
| int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) |
| { |
| void *data_end = (void *)(long)ctx->data_end; |
| void *data = (void *)(long)ctx->data; |
| struct ethhdr *eth = data; |
| u8 ip_proto = IPPROTO_UDP; |
| struct datarec *rec; |
| u16 eth_proto = 0; |
| u64 l3_offset = 0; |
| u32 cpu_dest = 0; |
| u32 cpu_idx = 0; |
| u32 *cpu_lookup; |
| u32 *cpu_max; |
| u32 cpu_hash; |
| u32 key = 0; |
| |
| /* Count RX packet in map */ |
| rec = bpf_map_lookup_elem(&rx_cnt, &key); |
| if (!rec) |
| return XDP_ABORTED; |
| rec->processed++; |
| |
| cpu_max = bpf_map_lookup_elem(&cpus_count, &key); |
| if (!cpu_max) |
| return XDP_ABORTED; |
| |
| if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) |
| return XDP_PASS; /* Just skip */ |
| |
| /* Hash for IPv4 and IPv6 */ |
| switch (eth_proto) { |
| case ETH_P_IP: |
| cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); |
| break; |
| case ETH_P_IPV6: |
| cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); |
| break; |
| case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ |
| default: |
| cpu_hash = 0; |
| } |
| |
| /* Choose CPU based on hash */ |
| cpu_idx = cpu_hash % *cpu_max; |
| |
| cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); |
| if (!cpu_lookup) |
| return XDP_ABORTED; |
| cpu_dest = *cpu_lookup; |
| |
| if (cpu_dest >= MAX_CPUS) { |
| rec->issue++; |
| return XDP_ABORTED; |
| } |
| |
| return bpf_redirect_map(&cpu_map, cpu_dest, 0); |
| } |
| |
| char _license[] SEC("license") = "GPL"; |
| |
| /*** Trace point code ***/ |
| |
| /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format |
| * Code in: kernel/include/trace/events/xdp.h |
| */ |
| struct xdp_redirect_ctx { |
| u64 __pad; // First 8 bytes are not accessible by bpf code |
| int prog_id; // offset:8; size:4; signed:1; |
| u32 act; // offset:12 size:4; signed:0; |
| int ifindex; // offset:16 size:4; signed:1; |
| int err; // offset:20 size:4; signed:1; |
| int to_ifindex; // offset:24 size:4; signed:1; |
| u32 map_id; // offset:28 size:4; signed:0; |
| int map_index; // offset:32 size:4; signed:1; |
| }; // offset:36 |
| |
| enum { |
| XDP_REDIRECT_SUCCESS = 0, |
| XDP_REDIRECT_ERROR = 1 |
| }; |
| |
| static __always_inline |
| int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) |
| { |
| u32 key = XDP_REDIRECT_ERROR; |
| struct datarec *rec; |
| int err = ctx->err; |
| |
| if (!err) |
| key = XDP_REDIRECT_SUCCESS; |
| |
| rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); |
| if (!rec) |
| return 0; |
| rec->dropped += 1; |
| |
| return 0; /* Indicate event was filtered (no further processing)*/ |
| /* |
| * Returning 1 here would allow e.g. a perf-record tracepoint |
| * to see and record these events, but it doesn't work well |
| * in-practice as stopping perf-record also unload this |
| * bpf_prog. Plus, there is additional overhead of doing so. |
| */ |
| } |
| |
| SEC("tracepoint/xdp/xdp_redirect_err") |
| int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) |
| { |
| return xdp_redirect_collect_stat(ctx); |
| } |
| |
| SEC("tracepoint/xdp/xdp_redirect_map_err") |
| int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) |
| { |
| return xdp_redirect_collect_stat(ctx); |
| } |
| |
| /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format |
| * Code in: kernel/include/trace/events/xdp.h |
| */ |
| struct xdp_exception_ctx { |
| u64 __pad; // First 8 bytes are not accessible by bpf code |
| int prog_id; // offset:8; size:4; signed:1; |
| u32 act; // offset:12; size:4; signed:0; |
| int ifindex; // offset:16; size:4; signed:1; |
| }; |
| |
| SEC("tracepoint/xdp/xdp_exception") |
| int trace_xdp_exception(struct xdp_exception_ctx *ctx) |
| { |
| struct datarec *rec; |
| u32 key = 0; |
| |
| rec = bpf_map_lookup_elem(&exception_cnt, &key); |
| if (!rec) |
| return 1; |
| rec->dropped += 1; |
| |
| return 0; |
| } |
| |
| /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format |
| * Code in: kernel/include/trace/events/xdp.h |
| */ |
| struct cpumap_enqueue_ctx { |
| u64 __pad; // First 8 bytes are not accessible by bpf code |
| int map_id; // offset:8; size:4; signed:1; |
| u32 act; // offset:12; size:4; signed:0; |
| int cpu; // offset:16; size:4; signed:1; |
| unsigned int drops; // offset:20; size:4; signed:0; |
| unsigned int processed; // offset:24; size:4; signed:0; |
| int to_cpu; // offset:28; size:4; signed:1; |
| }; |
| |
| SEC("tracepoint/xdp/xdp_cpumap_enqueue") |
| int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) |
| { |
| u32 to_cpu = ctx->to_cpu; |
| struct datarec *rec; |
| |
| if (to_cpu >= MAX_CPUS) |
| return 1; |
| |
| rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); |
| if (!rec) |
| return 0; |
| rec->processed += ctx->processed; |
| rec->dropped += ctx->drops; |
| |
| /* Record bulk events, then userspace can calc average bulk size */ |
| if (ctx->processed > 0) |
| rec->issue += 1; |
| |
| /* Inception: It's possible to detect overload situations, via |
| * this tracepoint. This can be used for creating a feedback |
| * loop to XDP, which can take appropriate actions to mitigate |
| * this overload situation. |
| */ |
| return 0; |
| } |
| |
| /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format |
| * Code in: kernel/include/trace/events/xdp.h |
| */ |
| struct cpumap_kthread_ctx { |
| u64 __pad; // First 8 bytes are not accessible by bpf code |
| int map_id; // offset:8; size:4; signed:1; |
| u32 act; // offset:12; size:4; signed:0; |
| int cpu; // offset:16; size:4; signed:1; |
| unsigned int drops; // offset:20; size:4; signed:0; |
| unsigned int processed; // offset:24; size:4; signed:0; |
| int sched; // offset:28; size:4; signed:1; |
| }; |
| |
| SEC("tracepoint/xdp/xdp_cpumap_kthread") |
| int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) |
| { |
| struct datarec *rec; |
| u32 key = 0; |
| |
| rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); |
| if (!rec) |
| return 0; |
| rec->processed += ctx->processed; |
| rec->dropped += ctx->drops; |
| |
| /* Count times kthread yielded CPU via schedule call */ |
| if (ctx->sched) |
| rec->issue++; |
| |
| return 0; |
| } |