| /* Copyright (c) 2016 Facebook |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of version 2 of the GNU General Public |
| * License as published by the Free Software Foundation. |
| */ |
| #include <linux/cpumask.h> |
| #include <linux/spinlock.h> |
| #include <linux/percpu.h> |
| |
| #include "bpf_lru_list.h" |
| |
| #define LOCAL_FREE_TARGET (128) |
| #define LOCAL_NR_SCANS LOCAL_FREE_TARGET |
| |
| /* Helpers to get the local list index */ |
| #define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET) |
| #define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE) |
| #define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING) |
| #define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET) |
| |
| static int get_next_cpu(int cpu) |
| { |
| cpu = cpumask_next(cpu, cpu_possible_mask); |
| if (cpu >= nr_cpu_ids) |
| cpu = cpumask_first(cpu_possible_mask); |
| return cpu; |
| } |
| |
| /* Local list helpers */ |
| static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l) |
| { |
| return &loc_l->lists[LOCAL_FREE_LIST_IDX]; |
| } |
| |
| static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) |
| { |
| return &loc_l->lists[LOCAL_PENDING_LIST_IDX]; |
| } |
| |
| /* bpf_lru_node helpers */ |
| static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) |
| { |
| return node->ref; |
| } |
| |
| static void bpf_lru_list_count_inc(struct bpf_lru_list *l, |
| enum bpf_lru_list_type type) |
| { |
| if (type < NR_BPF_LRU_LIST_COUNT) |
| l->counts[type]++; |
| } |
| |
| static void bpf_lru_list_count_dec(struct bpf_lru_list *l, |
| enum bpf_lru_list_type type) |
| { |
| if (type < NR_BPF_LRU_LIST_COUNT) |
| l->counts[type]--; |
| } |
| |
| static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l, |
| struct bpf_lru_node *node, |
| struct list_head *free_list, |
| enum bpf_lru_list_type tgt_free_type) |
| { |
| if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) |
| return; |
| |
| /* If the removing node is the next_inactive_rotation candidate, |
| * move the next_inactive_rotation pointer also. |
| */ |
| if (&node->list == l->next_inactive_rotation) |
| l->next_inactive_rotation = l->next_inactive_rotation->prev; |
| |
| bpf_lru_list_count_dec(l, node->type); |
| |
| node->type = tgt_free_type; |
| list_move(&node->list, free_list); |
| } |
| |
| /* Move nodes from local list to the LRU list */ |
| static void __bpf_lru_node_move_in(struct bpf_lru_list *l, |
| struct bpf_lru_node *node, |
| enum bpf_lru_list_type tgt_type) |
| { |
| if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) || |
| WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) |
| return; |
| |
| bpf_lru_list_count_inc(l, tgt_type); |
| node->type = tgt_type; |
| node->ref = 0; |
| list_move(&node->list, &l->lists[tgt_type]); |
| } |
| |
| /* Move nodes between or within active and inactive list (like |
| * active to inactive, inactive to active or tail of active back to |
| * the head of active). |
| */ |
| static void __bpf_lru_node_move(struct bpf_lru_list *l, |
| struct bpf_lru_node *node, |
| enum bpf_lru_list_type tgt_type) |
| { |
| if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) || |
| WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) |
| return; |
| |
| if (node->type != tgt_type) { |
| bpf_lru_list_count_dec(l, node->type); |
| bpf_lru_list_count_inc(l, tgt_type); |
| node->type = tgt_type; |
| } |
| node->ref = 0; |
| |
| /* If the moving node is the next_inactive_rotation candidate, |
| * move the next_inactive_rotation pointer also. |
| */ |
| if (&node->list == l->next_inactive_rotation) |
| l->next_inactive_rotation = l->next_inactive_rotation->prev; |
| |
| list_move(&node->list, &l->lists[tgt_type]); |
| } |
| |
| static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l) |
| { |
| return l->counts[BPF_LRU_LIST_T_INACTIVE] < |
| l->counts[BPF_LRU_LIST_T_ACTIVE]; |
| } |
| |
| /* Rotate the active list: |
| * 1. Start from tail |
| * 2. If the node has the ref bit set, it will be rotated |
| * back to the head of active list with the ref bit cleared. |
| * Give this node one more chance to survive in the active list. |
| * 3. If the ref bit is not set, move it to the head of the |
| * inactive list. |
| * 4. It will at most scan nr_scans nodes |
| */ |
| static void __bpf_lru_list_rotate_active(struct bpf_lru *lru, |
| struct bpf_lru_list *l) |
| { |
| struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE]; |
| struct bpf_lru_node *node, *tmp_node, *first_node; |
| unsigned int i = 0; |
| |
| first_node = list_first_entry(active, struct bpf_lru_node, list); |
| list_for_each_entry_safe_reverse(node, tmp_node, active, list) { |
| if (bpf_lru_node_is_ref(node)) |
| __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); |
| else |
| __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); |
| |
| if (++i == lru->nr_scans || node == first_node) |
| break; |
| } |
| } |
| |
| /* Rotate the inactive list. It starts from the next_inactive_rotation |
| * 1. If the node has ref bit set, it will be moved to the head |
| * of active list with the ref bit cleared. |
| * 2. If the node does not have ref bit set, it will leave it |
| * at its current location (i.e. do nothing) so that it can |
| * be considered during the next inactive_shrink. |
| * 3. It will at most scan nr_scans nodes |
| */ |
| static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru, |
| struct bpf_lru_list *l) |
| { |
| struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; |
| struct list_head *cur, *next, *last; |
| struct bpf_lru_node *node; |
| unsigned int i = 0; |
| |
| if (list_empty(inactive)) |
| return; |
| |
| last = l->next_inactive_rotation->next; |
| if (last == inactive) |
| last = last->next; |
| |
| cur = l->next_inactive_rotation; |
| while (i < lru->nr_scans) { |
| if (cur == inactive) { |
| cur = cur->prev; |
| continue; |
| } |
| |
| node = list_entry(cur, struct bpf_lru_node, list); |
| next = cur->prev; |
| if (bpf_lru_node_is_ref(node)) |
| __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); |
| if (cur == last) |
| break; |
| cur = next; |
| i++; |
| } |
| |
| l->next_inactive_rotation = next; |
| } |
| |
| /* Shrink the inactive list. It starts from the tail of the |
| * inactive list and only move the nodes without the ref bit |
| * set to the designated free list. |
| */ |
| static unsigned int |
| __bpf_lru_list_shrink_inactive(struct bpf_lru *lru, |
| struct bpf_lru_list *l, |
| unsigned int tgt_nshrink, |
| struct list_head *free_list, |
| enum bpf_lru_list_type tgt_free_type) |
| { |
| struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; |
| struct bpf_lru_node *node, *tmp_node, *first_node; |
| unsigned int nshrinked = 0; |
| unsigned int i = 0; |
| |
| first_node = list_first_entry(inactive, struct bpf_lru_node, list); |
| list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) { |
| if (bpf_lru_node_is_ref(node)) { |
| __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); |
| } else if (lru->del_from_htab(lru->del_arg, node)) { |
| __bpf_lru_node_move_to_free(l, node, free_list, |
| tgt_free_type); |
| if (++nshrinked == tgt_nshrink) |
| break; |
| } |
| |
| if (++i == lru->nr_scans) |
| break; |
| } |
| |
| return nshrinked; |
| } |
| |
| /* 1. Rotate the active list (if needed) |
| * 2. Always rotate the inactive list |
| */ |
| static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l) |
| { |
| if (bpf_lru_list_inactive_low(l)) |
| __bpf_lru_list_rotate_active(lru, l); |
| |
| __bpf_lru_list_rotate_inactive(lru, l); |
| } |
| |
| /* Calls __bpf_lru_list_shrink_inactive() to shrink some |
| * ref-bit-cleared nodes and move them to the designated |
| * free list. |
| * |
| * If it cannot get a free node after calling |
| * __bpf_lru_list_shrink_inactive(). It will just remove |
| * one node from either inactive or active list without |
| * honoring the ref-bit. It prefers inactive list to active |
| * list in this situation. |
| */ |
| static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru, |
| struct bpf_lru_list *l, |
| unsigned int tgt_nshrink, |
| struct list_head *free_list, |
| enum bpf_lru_list_type tgt_free_type) |
| |
| { |
| struct bpf_lru_node *node, *tmp_node; |
| struct list_head *force_shrink_list; |
| unsigned int nshrinked; |
| |
| nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink, |
| free_list, tgt_free_type); |
| if (nshrinked) |
| return nshrinked; |
| |
| /* Do a force shrink by ignoring the reference bit */ |
| if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE])) |
| force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE]; |
| else |
| force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE]; |
| |
| list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list, |
| list) { |
| if (lru->del_from_htab(lru->del_arg, node)) { |
| __bpf_lru_node_move_to_free(l, node, free_list, |
| tgt_free_type); |
| return 1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* Flush the nodes from the local pending list to the LRU list */ |
| static void __local_list_flush(struct bpf_lru_list *l, |
| struct bpf_lru_locallist *loc_l) |
| { |
| struct bpf_lru_node *node, *tmp_node; |
| |
| list_for_each_entry_safe_reverse(node, tmp_node, |
| local_pending_list(loc_l), list) { |
| if (bpf_lru_node_is_ref(node)) |
| __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE); |
| else |
| __bpf_lru_node_move_in(l, node, |
| BPF_LRU_LIST_T_INACTIVE); |
| } |
| } |
| |
| static void bpf_lru_list_push_free(struct bpf_lru_list *l, |
| struct bpf_lru_node *node) |
| { |
| unsigned long flags; |
| |
| if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) |
| return; |
| |
| raw_spin_lock_irqsave(&l->lock, flags); |
| __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); |
| raw_spin_unlock_irqrestore(&l->lock, flags); |
| } |
| |
| static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, |
| struct bpf_lru_locallist *loc_l) |
| { |
| struct bpf_lru_list *l = &lru->common_lru.lru_list; |
| struct bpf_lru_node *node, *tmp_node; |
| unsigned int nfree = 0; |
| |
| raw_spin_lock(&l->lock); |
| |
| __local_list_flush(l, loc_l); |
| |
| __bpf_lru_list_rotate(lru, l); |
| |
| list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE], |
| list) { |
| __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), |
| BPF_LRU_LOCAL_LIST_T_FREE); |
| if (++nfree == LOCAL_FREE_TARGET) |
| break; |
| } |
| |
| if (nfree < LOCAL_FREE_TARGET) |
| __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, |
| local_free_list(loc_l), |
| BPF_LRU_LOCAL_LIST_T_FREE); |
| |
| raw_spin_unlock(&l->lock); |
| } |
| |
| static void __local_list_add_pending(struct bpf_lru *lru, |
| struct bpf_lru_locallist *loc_l, |
| int cpu, |
| struct bpf_lru_node *node, |
| u32 hash) |
| { |
| *(u32 *)((void *)node + lru->hash_offset) = hash; |
| node->cpu = cpu; |
| node->type = BPF_LRU_LOCAL_LIST_T_PENDING; |
| node->ref = 0; |
| list_add(&node->list, local_pending_list(loc_l)); |
| } |
| |
| struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l) |
| { |
| struct bpf_lru_node *node; |
| |
| node = list_first_entry_or_null(local_free_list(loc_l), |
| struct bpf_lru_node, |
| list); |
| if (node) |
| list_del(&node->list); |
| |
| return node; |
| } |
| |
| struct bpf_lru_node *__local_list_pop_pending(struct bpf_lru *lru, |
| struct bpf_lru_locallist *loc_l) |
| { |
| struct bpf_lru_node *node; |
| bool force = false; |
| |
| ignore_ref: |
| /* Get from the tail (i.e. older element) of the pending list. */ |
| list_for_each_entry_reverse(node, local_pending_list(loc_l), |
| list) { |
| if ((!bpf_lru_node_is_ref(node) || force) && |
| lru->del_from_htab(lru->del_arg, node)) { |
| list_del(&node->list); |
| return node; |
| } |
| } |
| |
| if (!force) { |
| force = true; |
| goto ignore_ref; |
| } |
| |
| return NULL; |
| } |
| |
| struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash) |
| { |
| struct bpf_lru_locallist *loc_l, *steal_loc_l; |
| struct bpf_common_lru *clru = &lru->common_lru; |
| struct bpf_lru_node *node; |
| int steal, first_steal; |
| unsigned long flags; |
| int cpu = raw_smp_processor_id(); |
| |
| loc_l = per_cpu_ptr(clru->local_list, cpu); |
| |
| raw_spin_lock_irqsave(&loc_l->lock, flags); |
| |
| node = __local_list_pop_free(loc_l); |
| if (!node) { |
| bpf_lru_list_pop_free_to_local(lru, loc_l); |
| node = __local_list_pop_free(loc_l); |
| } |
| |
| if (node) |
| __local_list_add_pending(lru, loc_l, cpu, node, hash); |
| |
| raw_spin_unlock_irqrestore(&loc_l->lock, flags); |
| |
| if (node) |
| return node; |
| |
| /* No free nodes found from the local free list and |
| * the global LRU list. |
| * |
| * Steal from the local free/pending list of the |
| * current CPU and remote CPU in RR. It starts |
| * with the loc_l->next_steal CPU. |
| */ |
| |
| first_steal = loc_l->next_steal; |
| steal = first_steal; |
| do { |
| steal_loc_l = per_cpu_ptr(clru->local_list, steal); |
| |
| raw_spin_lock_irqsave(&steal_loc_l->lock, flags); |
| |
| node = __local_list_pop_free(steal_loc_l); |
| if (!node) |
| node = __local_list_pop_pending(lru, steal_loc_l); |
| |
| raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags); |
| |
| steal = get_next_cpu(steal); |
| } while (!node && steal != first_steal); |
| |
| loc_l->next_steal = steal; |
| |
| if (node) { |
| raw_spin_lock_irqsave(&loc_l->lock, flags); |
| __local_list_add_pending(lru, loc_l, cpu, node, hash); |
| raw_spin_unlock_irqrestore(&loc_l->lock, flags); |
| } |
| |
| return node; |
| } |
| |
| void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) |
| { |
| unsigned long flags; |
| |
| if (WARN_ON_ONCE(node->type == BPF_LRU_LIST_T_FREE) || |
| WARN_ON_ONCE(node->type == BPF_LRU_LOCAL_LIST_T_FREE)) |
| return; |
| |
| if (node->type == BPF_LRU_LOCAL_LIST_T_PENDING) { |
| struct bpf_lru_locallist *loc_l; |
| |
| loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); |
| |
| raw_spin_lock_irqsave(&loc_l->lock, flags); |
| |
| if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) { |
| raw_spin_unlock_irqrestore(&loc_l->lock, flags); |
| goto check_lru_list; |
| } |
| |
| node->type = BPF_LRU_LOCAL_LIST_T_FREE; |
| node->ref = 0; |
| list_move(&node->list, local_free_list(loc_l)); |
| |
| raw_spin_unlock_irqrestore(&loc_l->lock, flags); |
| return; |
| } |
| |
| check_lru_list: |
| bpf_lru_list_push_free(&lru->common_lru.lru_list, node); |
| } |
| |
| void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, |
| u32 elem_size, u32 nr_elems) |
| { |
| struct bpf_lru_list *l = &lru->common_lru.lru_list; |
| u32 i; |
| |
| for (i = 0; i < nr_elems; i++) { |
| struct bpf_lru_node *node; |
| |
| node = (struct bpf_lru_node *)(buf + node_offset); |
| node->type = BPF_LRU_LIST_T_FREE; |
| node->ref = 0; |
| list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); |
| buf += elem_size; |
| } |
| } |
| |
| static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu) |
| { |
| int i; |
| |
| for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++) |
| INIT_LIST_HEAD(&loc_l->lists[i]); |
| |
| loc_l->next_steal = cpu; |
| |
| raw_spin_lock_init(&loc_l->lock); |
| } |
| |
| static void bpf_lru_list_init(struct bpf_lru_list *l) |
| { |
| int i; |
| |
| for (i = 0; i < NR_BPF_LRU_LIST_T; i++) |
| INIT_LIST_HEAD(&l->lists[i]); |
| |
| for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++) |
| l->counts[i] = 0; |
| |
| l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; |
| |
| raw_spin_lock_init(&l->lock); |
| } |
| |
| int bpf_lru_init(struct bpf_lru *lru, u32 hash_offset, |
| del_from_htab_func del_from_htab, void *del_arg) |
| { |
| int cpu; |
| struct bpf_common_lru *clru = &lru->common_lru; |
| |
| clru->local_list = alloc_percpu(struct bpf_lru_locallist); |
| if (!clru->local_list) |
| return -ENOMEM; |
| |
| for_each_possible_cpu(cpu) { |
| struct bpf_lru_locallist *loc_l; |
| |
| loc_l = per_cpu_ptr(clru->local_list, cpu); |
| bpf_lru_locallist_init(loc_l, cpu); |
| } |
| |
| bpf_lru_list_init(&clru->lru_list); |
| lru->nr_scans = LOCAL_NR_SCANS; |
| |
| lru->del_from_htab = del_from_htab; |
| lru->del_arg = del_arg; |
| lru->hash_offset = hash_offset; |
| |
| return 0; |
| } |
| |
| void bpf_lru_destroy(struct bpf_lru *lru) |
| { |
| free_percpu(lru->common_lru.local_list); |
| } |