// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/rculist.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/bpf.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
static atomic_t cache_idx;
#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
struct bucket {
struct hlist_head list;
raw_spinlock_t lock;
};
/* Thp map is not the primary owner of a bpf_sk_storage_elem.
* Instead, the sk->sk_bpf_storage is.
*
* The map (bpf_sk_storage_map) is for two purposes
* 1. Define the size of the "sk local storage". It is
* the map's value_size.
*
* 2. Maintain a list to keep track of all elems such
* that they can be cleaned up during the map destruction.
*
* When a bpf local storage is being looked up for a
* particular sk, the "bpf_map" pointer is actually used
* as the "key" to search in the list of elem in
* sk->sk_bpf_storage.
*
* Hence, consider sk->sk_bpf_storage is the mini-map
* with the "bpf_map" pointer as the searching key.
*/
struct bpf_sk_storage_map {
struct bpf_map map;
/* Lookup elem does not require accessing the map.
*
* Updating/Deleting requires a bucket lock to
* link/unlink the elem from the map. Having
* multiple buckets to improve contention.
*/
struct bucket *buckets;
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
};
struct bpf_sk_storage_data {
/* smap is used as the searching key when looking up
* from sk->sk_bpf_storage.
*
* Put it in the same cacheline as the data to minimize
* the number of cachelines access during the cache hit case.
*/
struct bpf_sk_storage_map __rcu *smap;
u8 data[] __aligned(8);
};
/* Linked to bpf_sk_storage and bpf_sk_storage_map */
struct bpf_sk_storage_elem {
struct hlist_node map_node; /* Linked to bpf_sk_storage_map */
struct hlist_node snode; /* Linked to bpf_sk_storage */
struct bpf_sk_storage __rcu *sk_storage;
struct rcu_head rcu;
/* 8 bytes hole */
/* The data is stored in aother cacheline to minimize
* the number of cachelines access during a cache hit.
*/
struct bpf_sk_storage_data sdata ____cacheline_aligned;
};
#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata)
#define SDATA(_SELEM) (&(_SELEM)->sdata)
#define BPF_SK_STORAGE_CACHE_SIZE 16
struct bpf_sk_storage {
struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_sk_storage_elem */
struct sock *sk; /* The sk that owns the the above "list" of
* bpf_sk_storage_elem.
*/
struct rcu_head rcu;
raw_spinlock_t lock; /* Protect adding/removing from the "list" */
};
static struct bucket *select_bucket(struct bpf_sk_storage_map *smap,
struct bpf_sk_storage_elem *selem)
{
return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
}
static int omem_charge(struct sock *sk, unsigned int size)
{
/* same check as in sock_kmalloc() */
if (size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
atomic_add(size, &sk->sk_omem_alloc);
return 0;
}
return -ENOMEM;
}
static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem)
{
return !hlist_unhashed(&selem->snode);
}
static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
}
static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
struct sock *sk, void *value,
bool charge_omem)
{
struct bpf_sk_storage_elem *selem;
if (charge_omem && omem_charge(sk, smap->elem_size))
return NULL;
selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if