Skip to content

Commit 951f38c

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-multi-prog-prep'
Jakub Sitnicki says: ==================== This patch set prepares ground for link-based multi-prog attachment for future netns attach types, with BPF_SK_LOOKUP attach type in mind [0]. Two changes are needed in order to attach and run a series of BPF programs: 1) an bpf_prog_array of programs to run (patch #2), and 2) a list of attached links to keep track of attachments (patch #3). Nothing changes for BPF flow_dissector. Just as before only one program can be attached to netns. In v3 I've simplified patch #2 that introduces bpf_prog_array to take advantage of the fact that it will hold at most one program for now. In particular, I'm no longer using bpf_prog_array_copy. It turned out to be less suitable for link operations than I thought as it fails to append the same BPF program. bpf_prog_array_replace_item is also gone, because we know we always want to replace the first element in prog_array. Naturally the code that handles bpf_prog_array will need change once more when there is a program type that allows multi-prog attachment. But I feel it will be better to do it gradually and present it together with tests that actually exercise multi-prog code paths. [0] https://lore.kernel.org/bpf/20200511185218.1422406-1-jakub@cloudflare.com/ v2 -> v3: - Don't check if run_array is null in link update callback. (Martin) - Allow updating the link with the same BPF program. (Andrii) - Add patch thesofproject#4 with a test for the above case. - Kill bpf_prog_array_replace_item. Access the run_array directly. - Switch from bpf_prog_array_copy() to bpf_prog_array_alloc(1, ...). - Replace rcu_deref_protected & RCU_INIT_POINTER with rcu_replace_pointer. - Drop Andrii's Ack from patch #2. Code changed. v1 -> v2: - Show with a (void) cast that bpf_prog_array_replace_item() return value is ignored on purpose. (Andrii) - Explain why bpf-cgroup cannot replace programs in bpf_prog_array based on bpf_prog pointer comparison in patch #2 description. (Andrii) ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 517bbe1 + 6ebb85c commit 951f38c

5 files changed

Lines changed: 160 additions & 76 deletions

File tree

include/net/flow_dissector.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,8 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
372372
}
373373

374374
#ifdef CONFIG_BPF_SYSCALL
375-
int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog);
375+
int flow_dissector_bpf_prog_attach_check(struct net *net,
376+
struct bpf_prog *prog);
376377
#endif /* CONFIG_BPF_SYSCALL */
377378

378379
#endif

include/net/netns/bpf.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@
99
#include <linux/bpf-netns.h>
1010

1111
struct bpf_prog;
12+
struct bpf_prog_array;
1213

1314
struct netns_bpf {
14-
struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE];
15-
struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE];
15+
/* Array of programs to run compiled from progs or links */
16+
struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE];
17+
struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE];
18+
struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE];
1619
};
1720

1821
#endif /* __NETNS_BPF_H__ */

kernel/bpf/net_namespace.c

Lines changed: 113 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,21 @@ struct bpf_netns_link {
1919
* with netns_bpf_mutex held.
2020
*/
2121
struct net *net;
22+
struct list_head node; /* node in list of links attached to net */
2223
};
2324

2425
/* Protects updates to netns_bpf */
2526
DEFINE_MUTEX(netns_bpf_mutex);
2627

2728
/* Must be called with netns_bpf_mutex held. */
28-
static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link)
29+
static void netns_bpf_run_array_detach(struct net *net,
30+
enum netns_bpf_attach_type type)
2931
{
30-
struct bpf_netns_link *net_link =
31-
container_of(link, struct bpf_netns_link, link);
32+
struct bpf_prog_array *run_array;
3233

33-
net_link->net = NULL;
34+
run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
35+
lockdep_is_held(&netns_bpf_mutex));
36+
bpf_prog_array_free(run_array);
3437
}
3538

3639
static void bpf_netns_link_release(struct bpf_link *link)
@@ -54,8 +57,8 @@ static void bpf_netns_link_release(struct bpf_link *link)
5457
if (!net)
5558
goto out_unlock;
5659

57-
net->bpf.links[type] = NULL;
58-
RCU_INIT_POINTER(net->bpf.progs[type], NULL);
60+
netns_bpf_run_array_detach(net, type);
61+
list_del(&net_link->node);
5962

6063
out_unlock:
6164
mutex_unlock(&netns_bpf_mutex);
@@ -76,6 +79,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
7679
struct bpf_netns_link *net_link =
7780
container_of(link, struct bpf_netns_link, link);
7881
enum netns_bpf_attach_type type = net_link->netns_type;
82+
struct bpf_prog_array *run_array;
7983
struct net *net;
8084
int ret = 0;
8185

@@ -93,8 +97,11 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
9397
goto out_unlock;
9498
}
9599

100+
run_array = rcu_dereference_protected(net->bpf.run_array[type],
101+
lockdep_is_held(&netns_bpf_mutex));
102+
WRITE_ONCE(run_array->items[0].prog, new_prog);
103+
96104
old_prog = xchg(&link->prog, new_prog);
97-
rcu_assign_pointer(net->bpf.progs[type], new_prog);
98105
bpf_prog_put(old_prog);
99106

100107
out_unlock:
@@ -142,14 +149,38 @@ static const struct bpf_link_ops bpf_netns_link_ops = {
142149
.show_fdinfo = bpf_netns_link_show_fdinfo,
143150
};
144151

152+
/* Must be called with netns_bpf_mutex held. */
153+
static int __netns_bpf_prog_query(const union bpf_attr *attr,
154+
union bpf_attr __user *uattr,
155+
struct net *net,
156+
enum netns_bpf_attach_type type)
157+
{
158+
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
159+
struct bpf_prog_array *run_array;
160+
u32 prog_cnt = 0, flags = 0;
161+
162+
run_array = rcu_dereference_protected(net->bpf.run_array[type],
163+
lockdep_is_held(&netns_bpf_mutex));
164+
if (run_array)
165+
prog_cnt = bpf_prog_array_length(run_array);
166+
167+
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
168+
return -EFAULT;
169+
if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
170+
return -EFAULT;
171+
if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
172+
return 0;
173+
174+
return bpf_prog_array_copy_to_user(run_array, prog_ids,
175+
attr->query.prog_cnt);
176+
}
177+
145178
int netns_bpf_prog_query(const union bpf_attr *attr,
146179
union bpf_attr __user *uattr)
147180
{
148-
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
149-
u32 prog_id, prog_cnt = 0, flags = 0;
150181
enum netns_bpf_attach_type type;
151-
struct bpf_prog *attached;
152182
struct net *net;
183+
int ret;
153184

154185
if (attr->query.query_flags)
155186
return -EINVAL;
@@ -162,33 +193,19 @@ int netns_bpf_prog_query(const union bpf_attr *attr,
162193
if (IS_ERR(net))
163194
return PTR_ERR(net);
164195

165-
rcu_read_lock();
166-
attached = rcu_dereference(net->bpf.progs[type]);
167-
if (attached) {
168-
prog_cnt = 1;
169-
prog_id = attached->aux->id;
170-
}
171-
rcu_read_unlock();
196+
mutex_lock(&netns_bpf_mutex);
197+
ret = __netns_bpf_prog_query(attr, uattr, net, type);
198+
mutex_unlock(&netns_bpf_mutex);
172199

173200
put_net(net);
174-
175-
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
176-
return -EFAULT;
177-
if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
178-
return -EFAULT;
179-
180-
if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
181-
return 0;
182-
183-
if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
184-
return -EFAULT;
185-
186-
return 0;
201+
return ret;
187202
}
188203

189204
int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
190205
{
206+
struct bpf_prog_array *run_array;
191207
enum netns_bpf_attach_type type;
208+
struct bpf_prog *attached;
192209
struct net *net;
193210
int ret;
194211

@@ -200,19 +217,47 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
200217
mutex_lock(&netns_bpf_mutex);
201218

202219
/* Attaching prog directly is not compatible with links */
203-
if (net->bpf.links[type]) {
220+
if (!list_empty(&net->bpf.links[type])) {
204221
ret = -EEXIST;
205222
goto out_unlock;
206223
}
207224

208225
switch (type) {
209226
case NETNS_BPF_FLOW_DISSECTOR:
210-
ret = flow_dissector_bpf_prog_attach(net, prog);
227+
ret = flow_dissector_bpf_prog_attach_check(net, prog);
211228
break;
212229
default:
213230
ret = -EINVAL;
214231
break;
215232
}
233+
if (ret)
234+
goto out_unlock;
235+
236+
attached = net->bpf.progs[type];
237+
if (attached == prog) {
238+
/* The same program cannot be attached twice */
239+
ret = -EINVAL;
240+
goto out_unlock;
241+
}
242+
243+
run_array = rcu_dereference_protected(net->bpf.run_array[type],
244+
lockdep_is_held(&netns_bpf_mutex));
245+
if (run_array) {
246+
WRITE_ONCE(run_array->items[0].prog, prog);
247+
} else {
248+
run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
249+
if (!run_array) {
250+
ret = -ENOMEM;
251+
goto out_unlock;
252+
}
253+
run_array->items[0].prog = prog;
254+
rcu_assign_pointer(net->bpf.run_array[type], run_array);
255+
}
256+
257+
net->bpf.progs[type] = prog;
258+
if (attached)
259+
bpf_prog_put(attached);
260+
216261
out_unlock:
217262
mutex_unlock(&netns_bpf_mutex);
218263

@@ -226,14 +271,14 @@ static int __netns_bpf_prog_detach(struct net *net,
226271
struct bpf_prog *attached;
227272

228273
/* Progs attached via links cannot be detached */
229-
if (net->bpf.links[type])
274+
if (!list_empty(&net->bpf.links[type]))
230275
return -EINVAL;
231276

232-
attached = rcu_dereference_protected(net->bpf.progs[type],
233-
lockdep_is_held(&netns_bpf_mutex));
277+
attached = net->bpf.progs[type];
234278
if (!attached)
235279
return -ENOENT;
236-
RCU_INIT_POINTER(net->bpf.progs[type], NULL);
280+
netns_bpf_run_array_detach(net, type);
281+
net->bpf.progs[type] = NULL;
237282
bpf_prog_put(attached);
238283
return 0;
239284
}
@@ -257,27 +302,27 @@ int netns_bpf_prog_detach(const union bpf_attr *attr)
257302
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
258303
enum netns_bpf_attach_type type)
259304
{
260-
struct bpf_prog *prog;
305+
struct bpf_netns_link *net_link =
306+
container_of(link, struct bpf_netns_link, link);
307+
struct bpf_prog_array *run_array;
261308
int err;
262309

263310
mutex_lock(&netns_bpf_mutex);
264311

265312
/* Allow attaching only one prog or link for now */
266-
if (net->bpf.links[type]) {
313+
if (!list_empty(&net->bpf.links[type])) {
267314
err = -E2BIG;
268315
goto out_unlock;
269316
}
270317
/* Links are not compatible with attaching prog directly */
271-
prog = rcu_dereference_protected(net->bpf.progs[type],
272-
lockdep_is_held(&netns_bpf_mutex));
273-
if (prog) {
318+
if (net->bpf.progs[type]) {
274319
err = -EEXIST;
275320
goto out_unlock;
276321
}
277322

278323
switch (type) {
279324
case NETNS_BPF_FLOW_DISSECTOR:
280-
err = flow_dissector_bpf_prog_attach(net, link->prog);
325+
err = flow_dissector_bpf_prog_attach_check(net, link->prog);
281326
break;
282327
default:
283328
err = -EINVAL;
@@ -286,7 +331,15 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
286331
if (err)
287332
goto out_unlock;
288333

289-
net->bpf.links[type] = link;
334+
run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
335+
if (!run_array) {
336+
err = -ENOMEM;
337+
goto out_unlock;
338+
}
339+
run_array->items[0].prog = link->prog;
340+
rcu_assign_pointer(net->bpf.run_array[type], run_array);
341+
342+
list_add_tail(&net_link->node, &net->bpf.links[type]);
290343

291344
out_unlock:
292345
mutex_unlock(&netns_bpf_mutex);
@@ -345,23 +398,34 @@ int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
345398
return err;
346399
}
347400

401+
static int __net_init netns_bpf_pernet_init(struct net *net)
402+
{
403+
int type;
404+
405+
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
406+
INIT_LIST_HEAD(&net->bpf.links[type]);
407+
408+
return 0;
409+
}
410+
348411
static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
349412
{
350413
enum netns_bpf_attach_type type;
351-
struct bpf_link *link;
414+
struct bpf_netns_link *net_link;
352415

353416
mutex_lock(&netns_bpf_mutex);
354417
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
355-
link = net->bpf.links[type];
356-
if (link)
357-
bpf_netns_link_auto_detach(link);
358-
else
359-
__netns_bpf_prog_detach(net, type);
418+
netns_bpf_run_array_detach(net, type);
419+
list_for_each_entry(net_link, &net->bpf.links[type], node)
420+
net_link->net = NULL; /* auto-detach link */
421+
if (net->bpf.progs[type])
422+
bpf_prog_put(net->bpf.progs[type]);
360423
}
361424
mutex_unlock(&netns_bpf_mutex);
362425
}
363426

364427
static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
428+
.init = netns_bpf_pernet_init,
365429
.pre_exit = netns_bpf_pernet_pre_exit,
366430
};
367431

0 commit comments

Comments
 (0)