aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cpumap.c8
-rw-r--r--kernel/bpf/devmap.c8
-rw-r--r--kernel/bpf/hashtab.c103
-rw-r--r--kernel/bpf/lpm_trie.c7
-rw-r--r--kernel/bpf/offload.c224
-rw-r--r--kernel/bpf/sockmap.c8
-rw-r--r--kernel/bpf/stackmap.c6
-rw-r--r--kernel/bpf/syscall.c71
-rw-r--r--kernel/bpf/verifier.c7
9 files changed, 337 insertions, 105 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ce5b669003b2..192151ec9d12 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -94,13 +94,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
if (!cmap)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- cmap->map.map_type = attr->map_type;
- cmap->map.key_size = attr->key_size;
- cmap->map.value_size = attr->value_size;
- cmap->map.max_entries = attr->max_entries;
- cmap->map.map_flags = attr->map_flags;
- cmap->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&cmap->map, attr);
/* Pre-limit array size based on NR_CPUS, not final CPU check */
if (cmap->map.max_entries > NR_CPUS) {
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ebdef54bf7df..565f9ece9115 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -93,13 +93,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!dtab)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- dtab->map.map_type = attr->map_type;
- dtab->map.key_size = attr->key_size;
- dtab->map.value_size = attr->value_size;
- dtab->map.max_entries = attr->max_entries;
- dtab->map.map_flags = attr->map_flags;
- dtab->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&dtab->map, attr);
/* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3905d4bc5b80..b76828f23b49 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -227,7 +227,7 @@ static int alloc_extra_elems(struct bpf_htab *htab)
}
/* Called from syscall */
-static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+static int htab_map_alloc_check(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
@@ -241,9 +241,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
int numa_node = bpf_map_attr_numa_node(attr);
- struct bpf_htab *htab;
- int err, i;
- u64 cost;
BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
offsetof(struct htab_elem, hash_node.pprev));
@@ -254,40 +251,68 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
/* LRU implementation is much complicated than other
* maps. Hence, limit to CAP_SYS_ADMIN for now.
*/
- return ERR_PTR(-EPERM);
+ return -EPERM;
if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
/* reserved bits should not be used */
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (!lru && percpu_lru)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (lru && !prealloc)
- return ERR_PTR(-ENOTSUPP);
+ return -ENOTSUPP;
if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
+
+ /* check sanity of attributes.
+ * value_size == 0 may be allowed in the future to use map as a set
+ */
+ if (attr->max_entries == 0 || attr->key_size == 0 ||
+ attr->value_size == 0)
+ return -EINVAL;
+
+ if (attr->key_size > MAX_BPF_STACK)
+ /* eBPF programs initialize keys on stack, so they cannot be
+ * larger than max stack size
+ */
+ return -E2BIG;
+
+ if (attr->value_size >= KMALLOC_MAX_SIZE -
+ MAX_BPF_STACK - sizeof(struct htab_elem))
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements via bpf syscall. This check also makes
+ * sure that the elem_size doesn't overflow and it's
+ * kmalloc-able later in htab_map_update_elem()
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
+static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+{
+ bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
+ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ /* percpu_lru means each cpu has its own LRU list.
+ * it is different from BPF_MAP_TYPE_PERCPU_HASH where
+ * the map's value itself is percpu. percpu_lru has
+ * nothing to do with the map's value.
+ */
+ bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
+ bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+ struct bpf_htab *htab;
+ int err, i;
+ u64 cost;
htab = kzalloc(sizeof(*htab), GFP_USER);
if (!htab)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- htab->map.map_type = attr->map_type;
- htab->map.key_size = attr->key_size;
- htab->map.value_size = attr->value_size;
- htab->map.max_entries = attr->max_entries;
- htab->map.map_flags = attr->map_flags;
- htab->map.numa_node = numa_node;
-
- /* check sanity of attributes.
- * value_size == 0 may be allowed in the future to use map as a set
- */
- err = -EINVAL;
- if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
- htab->map.value_size == 0)
- goto free_htab;
+ bpf_map_init_from_attr(&htab->map, attr);
if (percpu_lru) {
/* ensure each CPU's lru list has >=1 elements.
@@ -304,22 +329,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
/* hash table size must be power of 2 */
htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
- err = -E2BIG;
- if (htab->map.key_size > MAX_BPF_STACK)
- /* eBPF programs initialize keys on stack, so they cannot be
- * larger than max stack size
- */
- goto free_htab;
-
- if (htab->map.value_size >= KMALLOC_MAX_SIZE -
- MAX_BPF_STACK - sizeof(struct htab_elem))
- /* if value_size is bigger, the user space won't be able to
- * access the elements via bpf syscall. This check also makes
- * sure that the elem_size doesn't overflow and it's
- * kmalloc-able later in htab_map_update_elem()
- */
- goto free_htab;
-
htab->elem_size = sizeof(struct htab_elem) +
round_up(htab->map.key_size, 8);
if (percpu)
@@ -327,6 +336,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
htab->elem_size += round_up(htab->map.value_size, 8);
+ err = -E2BIG;
/* prevent zero size kmalloc and check for u32 overflow */
if (htab->n_buckets == 0 ||
htab->n_buckets > U32_MAX / sizeof(struct bucket))
@@ -1143,6 +1153,7 @@ static void htab_map_free(struct bpf_map *map)
}
const struct bpf_map_ops htab_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1153,6 +1164,7 @@ const struct bpf_map_ops htab_map_ops = {
};
const struct bpf_map_ops htab_lru_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1236,6 +1248,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
}
const struct bpf_map_ops htab_percpu_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1245,6 +1258,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
};
const struct bpf_map_ops htab_lru_percpu_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1253,11 +1267,11 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem,
};
-static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
+static int fd_htab_map_alloc_check(union bpf_attr *attr)
{
if (attr->value_size != sizeof(u32))
- return ERR_PTR(-EINVAL);
- return htab_map_alloc(attr);
+ return -EINVAL;
+ return htab_map_alloc_check(attr);
}
static void fd_htab_map_free(struct bpf_map *map)
@@ -1328,7 +1342,7 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
if (IS_ERR(inner_map_meta))
return inner_map_meta;
- map = fd_htab_map_alloc(attr);
+ map = htab_map_alloc(attr);
if (IS_ERR(map)) {
bpf_map_meta_free(inner_map_meta);
return map;
@@ -1372,6 +1386,7 @@ static void htab_of_map_free(struct bpf_map *map)
}
const struct bpf_map_ops htab_of_maps_map_ops = {
+ .map_alloc_check = fd_htab_map_alloc_check,
.map_alloc = htab_of_map_alloc,
.map_free = htab_of_map_free,
.map_get_next_key = htab_map_get_next_key,
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 885e45479680..584e02227671 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -522,12 +522,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
/* copy mandatory map attributes */
- trie->map.map_type = attr->map_type;
- trie->map.key_size = attr->key_size;
- trie->map.value_size = attr->value_size;
- trie->map.max_entries = attr->max_entries;
- trie->map.map_flags = attr->map_flags;
- trie->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&trie->map, attr);
trie->data_size = attr->key_size -
offsetof(struct bpf_lpm_trie_key, data);
trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 040d4e0edf3f..453785fa1881 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -24,15 +24,27 @@
#include <linux/rtnetlink.h>
#include <linux/rwsem.h>
-/* Protects bpf_prog_offload_devs and offload members of all progs.
+/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+ * of all progs.
* RTNL lock cannot be taken when holding this lock.
*/
static DECLARE_RWSEM(bpf_devs_lock);
static LIST_HEAD(bpf_prog_offload_devs);
+static LIST_HEAD(bpf_map_offload_devs);
+
+static int bpf_dev_offload_check(struct net_device *netdev)
+{
+ if (!netdev)
+ return -EINVAL;
+ if (!netdev->netdev_ops->ndo_bpf)
+ return -EOPNOTSUPP;
+ return 0;
+}
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
{
- struct bpf_dev_offload *offload;
+ struct bpf_prog_offload *offload;
+ int err;
if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
attr->prog_type != BPF_PROG_TYPE_XDP)
@@ -49,12 +61,15 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
attr->prog_ifindex);
- if (!offload->netdev)
- goto err_free;
+ err = bpf_dev_offload_check(offload->netdev);
+ if (err)
+ goto err_maybe_put;
down_write(&bpf_devs_lock);
- if (offload->netdev->reg_state != NETREG_REGISTERED)
+ if (offload->netdev->reg_state != NETREG_REGISTERED) {
+ err = -EINVAL;
goto err_unlock;
+ }
prog->aux->offload = offload;
list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
dev_put(offload->netdev);
@@ -63,16 +78,17 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
return 0;
err_unlock:
up_write(&bpf_devs_lock);
- dev_put(offload->netdev);
-err_free:
+err_maybe_put:
+ if (offload->netdev)
+ dev_put(offload->netdev);
kfree(offload);
- return -EINVAL;
+ return err;
}
static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
struct netdev_bpf *data)
{
- struct bpf_dev_offload *offload = prog->aux->offload;
+ struct bpf_prog_offload *offload = prog->aux->offload;
struct net_device *netdev;
ASSERT_RTNL();
@@ -80,8 +96,6 @@ static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
if (!offload)
return -ENODEV;
netdev = offload->netdev;
- if (!netdev->netdev_ops->ndo_bpf)
- return -EOPNOTSUPP;
data->command = cmd;
@@ -110,7 +124,7 @@ exit_unlock:
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx)
{
- struct bpf_dev_offload *offload;
+ struct bpf_prog_offload *offload;
int ret = -ENODEV;
down_read(&bpf_devs_lock);
@@ -124,7 +138,7 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
{
- struct bpf_dev_offload *offload = prog->aux->offload;
+ struct bpf_prog_offload *offload = prog->aux->offload;
struct netdev_bpf data = {};
data.offload.prog = prog;
@@ -238,11 +252,186 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
const struct bpf_prog_ops bpf_offload_prog_ops = {
};
+static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
+ enum bpf_netdev_command cmd)
+{
+ struct netdev_bpf data = {};
+ struct net_device *netdev;
+
+ ASSERT_RTNL();
+
+ data.command = cmd;
+ data.offmap = offmap;
+ /* Caller must make sure netdev is valid */
+ netdev = offmap->netdev;
+
+ return netdev->netdev_ops->ndo_bpf(netdev, &data);
+}
+
+struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_offloaded_map *offmap;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+ if (attr->map_type != BPF_MAP_TYPE_HASH)
+ return ERR_PTR(-EINVAL);
+
+ offmap = kzalloc(sizeof(*offmap), GFP_USER);
+ if (!offmap)
+ return ERR_PTR(-ENOMEM);
+
+ bpf_map_init_from_attr(&offmap->map, attr);
+
+ rtnl_lock();
+ down_write(&bpf_devs_lock);
+ offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
+ err = bpf_dev_offload_check(offmap->netdev);
+ if (err)
+ goto err_unlock;
+
+ err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
+ if (err)
+ goto err_unlock;
+
+ list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+ up_write(&bpf_devs_lock);
+ rtnl_unlock();
+
+ return &offmap->map;
+
+err_unlock:
+ up_write(&bpf_devs_lock);
+ rtnl_unlock();
+ kfree(offmap);
+ return ERR_PTR(err);
+}
+
+static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
+{
+ WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
+ /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
+ bpf_map_free_id(&offmap->map, true);
+ list_del_init(&offmap->offloads);
+ offmap->netdev = NULL;
+}
+
+void bpf_map_offload_map_free(struct bpf_map *map)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+
+ rtnl_lock();
+ down_write(&bpf_devs_lock);
+ if (offmap->netdev)
+ __bpf_map_offload_destroy(offmap);
+ up_write(&bpf_devs_lock);
+ rtnl_unlock();
+
+ kfree(offmap);
+}
+
+int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_lookup_elem(offmap, key, value);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+int bpf_map_offload_update_elem(struct bpf_map *map,
+ void *key, void *value, u64 flags)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ if (unlikely(flags > BPF_EXIST))
+ return -EINVAL;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_update_elem(offmap, key, value,
+ flags);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+int bpf_map_offload_delete_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_delete_elem(offmap, key);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+{
+ struct bpf_offloaded_map *offmap;
+ struct bpf_prog_offload *offload;
+ bool ret;
+
+ if (!!bpf_prog_is_dev_bound(prog->aux) != !!bpf_map_is_dev_bound(map))
+ return false;
+ if (!bpf_prog_is_dev_bound(prog->aux))
+ return true;
+
+ down_read(&bpf_devs_lock);
+ offload = prog->aux->offload;
+ offmap = map_to_offmap(map);
+
+ ret = offload && offload->netdev == offmap->netdev;
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+{
+ struct bpf_prog_offload *offload, *tmp;
+
+ list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
+ if (offload->netdev == netdev)
+ __bpf_prog_offload_destroy(offload->prog);
+}
+
+static void bpf_offload_orphan_all_maps(struct net_device *netdev)
+{
+ struct bpf_offloaded_map *offmap, *tmp;
+
+ list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
+ if (offmap->netdev == netdev)
+ __bpf_map_offload_destroy(offmap);
+}
+
static int bpf_offload_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- struct bpf_dev_offload *offload, *tmp;
ASSERT_RTNL();
@@ -253,11 +442,8 @@ static int bpf_offload_notification(struct notifier_block *notifier,
break;
down_write(&bpf_devs_lock);
- list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
- offloads) {
- if (offload->netdev == netdev)
- __bpf_prog_offload_destroy(offload->prog);
- }
+ bpf_offload_orphan_all_progs(netdev);
+ bpf_offload_orphan_all_maps(netdev);
up_write(&bpf_devs_lock);
break;
default:
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 079968680bc3..0314d1783d77 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -513,13 +513,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
if (!stab)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- stab->map.map_type = attr->map_type;
- stab->map.key_size = attr->key_size;
- stab->map.value_size = attr->value_size;
- stab->map.max_entries = attr->max_entries;
- stab->map.map_flags = attr->map_flags;
- stab->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&stab->map, attr);
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 6c63c2222ea8..b0ecf43f5894 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,14 +88,10 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (cost >= U32_MAX - PAGE_SIZE)
goto free_smap;
- smap->map.map_type = attr->map_type;
- smap->map.key_size = attr->key_size;
+ bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
- smap->map.max_entries = attr->max_entries;
- smap->map.map_flags = attr->map_flags;
smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
- smap->map.numa_node = bpf_map_attr_numa_node(attr);
err = bpf_map_precharge_memlock(smap->map.pages);
if (err)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2bac0dc8baba..c691b9e972e3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -94,18 +94,34 @@ static int check_uarg_tail_zero(void __user *uaddr,
return 0;
}
+const struct bpf_map_ops bpf_map_offload_ops = {
+ .map_alloc = bpf_map_offload_map_alloc,
+ .map_free = bpf_map_offload_map_free,
+};
+
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
+ const struct bpf_map_ops *ops;
struct bpf_map *map;
+ int err;
- if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
- !bpf_map_types[attr->map_type])
+ if (attr->map_type >= ARRAY_SIZE(bpf_map_types))
+ return ERR_PTR(-EINVAL);
+ ops = bpf_map_types[attr->map_type];
+ if (!ops)
return ERR_PTR(-EINVAL);
- map = bpf_map_types[attr->map_type]->map_alloc(attr);
+ if (ops->map_alloc_check) {
+ err = ops->map_alloc_check(attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+ if (attr->map_ifindex)
+ ops = &bpf_map_offload_ops;
+ map = ops->map_alloc(attr);
if (IS_ERR(map))
return map;
- map->ops = bpf_map_types[attr->map_type];
+ map->ops = ops;
map->map_type = attr->map_type;
return map;
}
@@ -134,6 +150,16 @@ void bpf_map_area_free(void *area)
kvfree(area);
}
+void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
+{
+ map->map_type = attr->map_type;
+ map->key_size = attr->key_size;
+ map->value_size = attr->value_size;
+ map->max_entries = attr->max_entries;
+ map->map_flags = attr->map_flags;
+ map->numa_node = bpf_map_attr_numa_node(attr);
+}
+
int bpf_map_precharge_memlock(u32 pages)
{
struct user_struct *user = get_current_user();
@@ -189,16 +215,25 @@ static int bpf_map_alloc_id(struct bpf_map *map)
return id > 0 ? 0 : id;
}
-static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
+void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
{
unsigned long flags;
+ /* Offloaded maps are removed from the IDR store when their device
+ * disappears - even if someone holds an fd to them they are unusable,
+ * the memory is gone, all ops will fail; they are simply waiting for
+ * refcnt to drop to be freed.
+ */
+ if (!map->id)
+ return;
+
if (do_idr_lock)
spin_lock_irqsave(&map_idr_lock, flags);
else
__acquire(&map_idr_lock);
idr_remove(&map_idr, map->id);
+ map->id = 0;
if (do_idr_lock)
spin_unlock_irqrestore(&map_idr_lock, flags);
@@ -378,7 +413,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
-#define BPF_MAP_CREATE_LAST_FIELD map_name
+#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -566,8 +601,10 @@ static int map_lookup_elem(union bpf_attr *attr)
if (!value)
goto free_key;
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_lookup_elem(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value);
@@ -654,7 +691,10 @@ static int map_update_elem(union bpf_attr *attr)
goto free_value;
/* Need to create a kthread, thus must support schedule */
- if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_update_elem(map, key, value, attr->flags);
+ goto out;
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out;
}
@@ -731,6 +771,11 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_delete_elem(map, key);
+ goto out;
+ }
+
preempt_disable();
__this_cpu_inc(bpf_prog_active);
rcu_read_lock();
@@ -738,7 +783,7 @@ static int map_delete_elem(union bpf_attr *attr)
rcu_read_unlock();
__this_cpu_dec(bpf_prog_active);
preempt_enable();
-
+out:
if (!err)
trace_bpf_map_delete_elem(map, ufd, key);
kfree(key);
@@ -788,9 +833,15 @@ static int map_get_next_key(union bpf_attr *attr)
if (!next_key)
goto free_key;
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_get_next_key(map, key, next_key);
+ goto out;
+ }
+
rcu_read_lock();
err = map->ops->map_get_next_key(map, key, next_key);
rcu_read_unlock();
+out:
if (err)
goto free_next_key;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48b61caa94cb..ceabb394d2dc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4816,6 +4816,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
return -EINVAL;
}
}
+
+ if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
+ !bpf_offload_dev_match(prog, map)) {
+ verbose(env, "offload device mismatch between prog and map\n");
+ return -EINVAL;
+ }
+
return 0;
}