net_sched: fq_codel: cache skb->truesize into skb->cb
Now we defer skb drops, it makes sense to keep a copy
of skb->truesize in struct codel_skb_cb to avoid one
cache line miss per dropped skb in fq_codel_drop(),
to reduce latencies a bit further.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index f715195..a5ea0e9 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -172,7 +172,7 @@
do {
skb = dequeue_head(flow);
len += qdisc_pkt_len(skb);
- mem += skb->truesize;
+ mem += get_codel_cb(skb)->mem_usage;
__qdisc_drop(skb, to_free);
} while (++i < max_packets && len < threshold);
@@ -216,7 +216,8 @@
flow->deficit = q->quantum;
flow->dropped = 0;
}
- q->memory_usage += skb->truesize;
+ get_codel_cb(skb)->mem_usage = skb->truesize;
+ q->memory_usage += get_codel_cb(skb)->mem_usage;
memory_limited = q->memory_usage > q->memory_limit;
if (++sch->q.qlen <= sch->limit && !memory_limited)
return NET_XMIT_SUCCESS;
@@ -267,7 +268,7 @@
if (flow->head) {
skb = dequeue_head(flow);
q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
- q->memory_usage -= skb->truesize;
+ q->memory_usage -= get_codel_cb(skb)->mem_usage;
sch->q.qlen--;
sch->qstats.backlog -= qdisc_pkt_len(skb);
}