Skip to content

Commit 82dd23e

Browse files
urezkitorvalds
authored andcommitted
mm/vmalloc.c: preload a CPU with one object for split purpose
Refactor the NE_FIT_TYPE split case when it comes to an allocation of one extra object. We need it in order to build a remaining space. The preload is done per CPU in non-atomic context with GFP_KERNEL flags. More permissive parameters can be beneficial for systems which are suffer from high memory pressure or low memory condition. For example on my KVM system(4xCPUs, no swap, 256MB RAM) i can simulate the failure of page allocation with GFP_NOWAIT flags. Using "stress-ng" tool and starting N workers spinning on fork() and exit(), i can trigger below trace: <snip> [ 179.815161] stress-ng-fork: page allocation failure: order:0, mode:0x40800(GFP_NOWAIT|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0 [ 179.815168] CPU: 0 PID: 12612 Comm: stress-ng-fork Not tainted 5.2.0-rc3+ thesofproject#1003 [ 179.815170] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 179.815171] Call Trace: [ 179.815178] dump_stack+0x5c/0x7b [ 179.815182] warn_alloc+0x108/0x190 [ 179.815187] __alloc_pages_slowpath+0xdc7/0xdf0 [ 179.815191] __alloc_pages_nodemask+0x2de/0x330 [ 179.815194] cache_grow_begin+0x77/0x420 [ 179.815197] fallback_alloc+0x161/0x200 [ 179.815200] kmem_cache_alloc+0x1c9/0x570 [ 179.815202] alloc_vmap_area+0x32c/0x990 [ 179.815206] __get_vm_area_node+0xb0/0x170 [ 179.815208] __vmalloc_node_range+0x6d/0x230 [ 179.815211] ? _do_fork+0xce/0x3d0 [ 179.815213] copy_process.part.46+0x850/0x1b90 [ 179.815215] ? _do_fork+0xce/0x3d0 [ 179.815219] _do_fork+0xce/0x3d0 [ 179.815226] ? __do_page_fault+0x2bf/0x4e0 [ 179.815229] do_syscall_64+0x55/0x130 [ 179.815231] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 179.815234] RIP: 0033:0x7fedec4c738b ... [ 179.815237] RSP: 002b:00007ffda469d730 EFLAGS: 00000246 ORIG_RAX: 0000000000000038 [ 179.815239] RAX: ffffffffffffffda RBX: 00007ffda469d730 RCX: 00007fedec4c738b [ 179.815240] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011 [ 179.815241] RBP: 00007ffda469d780 R08: 00007fededd6e300 R09: 00007ffda47f50a0 [ 179.815242] R10: 00007fededd6e5d0 R11: 0000000000000246 R12: 0000000000000000 [ 179.815243] R13: 0000000000000020 R14: 0000000000000000 R15: 0000000000000000 [ 179.815245] Mem-Info: [ 179.815249] active_anon:12686 inactive_anon:14760 isolated_anon:0 active_file:502 inactive_file:61 isolated_file:70 unevictable:2 dirty:0 writeback:0 unstable:0 slab_reclaimable:2380 slab_unreclaimable:7520 mapped:15069 shmem:14813 pagetables:10833 bounce:0 free:1922 free_pcp:229 free_cma:0 <snip> Link: http://lkml.kernel.org/r/20190606120411.8298-3-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com> Cc: Hillf Danton <hdanton@sina.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com> Cc: Roman Gushchin <guro@fb.com> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent cacca6b commit 82dd23e

1 file changed

Lines changed: 51 additions & 4 deletions

File tree

mm/vmalloc.c

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,13 @@ static LIST_HEAD(free_vmap_area_list);
365365
*/
366366
static struct rb_root free_vmap_area_root = RB_ROOT;
367367

368+
/*
369+
* Preload a CPU with one object for "no edge" split case. The
370+
* aim is to get rid of allocations from the atomic context, thus
371+
* to use more permissive allocation masks.
372+
*/
373+
static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
374+
368375
static __always_inline unsigned long
369376
va_size(struct vmap_area *va)
370377
{
@@ -951,9 +958,24 @@ adjust_va_to_fit_type(struct vmap_area *va,
951958
* L V NVA V R
952959
* |---|-------|---|
953960
*/
954-
lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
955-
if (unlikely(!lva))
956-
return -1;
961+
lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
962+
if (unlikely(!lva)) {
963+
/*
964+
* For percpu allocator we do not do any pre-allocation
965+
* and leave it as it is. The reason is it most likely
966+
* never ends up with NE_FIT_TYPE splitting. In case of
967+
* percpu allocations offsets and sizes are aligned to
968+
* fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
969+
* are its main fitting cases.
970+
*
971+
* There are a few exceptions though, as an example it is
972+
* a first allocation (early boot up) when we have "one"
973+
* big free space that has to be split.
974+
*/
975+
lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
976+
if (!lva)
977+
return -1;
978+
}
957979

958980
/*
959981
* Build the remainder.
@@ -1032,7 +1054,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
10321054
unsigned long vstart, unsigned long vend,
10331055
int node, gfp_t gfp_mask)
10341056
{
1035-
struct vmap_area *va;
1057+
struct vmap_area *va, *pva;
10361058
unsigned long addr;
10371059
int purged = 0;
10381060

@@ -1057,7 +1079,32 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
10571079
kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
10581080

10591081
retry:
1082+
/*
1083+
* Preload this CPU with one extra vmap_area object to ensure
1084+
* that we have it available when fit type of free area is
1085+
* NE_FIT_TYPE.
1086+
*
1087+
* The preload is done in non-atomic context, thus it allows us
1088+
* to use more permissive allocation masks to be more stable under
1089+
* low memory condition and high memory pressure.
1090+
*
1091+
* Even if it fails we do not really care about that. Just proceed
1092+
* as it is. "overflow" path will refill the cache we allocate from.
1093+
*/
1094+
preempt_disable();
1095+
if (!__this_cpu_read(ne_fit_preload_node)) {
1096+
preempt_enable();
1097+
pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
1098+
preempt_disable();
1099+
1100+
if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) {
1101+
if (pva)
1102+
kmem_cache_free(vmap_area_cachep, pva);
1103+
}
1104+
}
1105+
10601106
spin_lock(&vmap_area_lock);
1107+
preempt_enable();
10611108

10621109
/*
10631110
* If an allocation fails, the "vend" address is

0 commit comments

Comments
 (0)