Skip to content

Commit bebeb3d

Browse files
walken-googletorvalds
authored andcommitted
mm: introduce mm_populate() for populating new vmas
When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or with MCL_FUTURE in effect), we want to populate the pages within the newly created vmas. This may take a while as we may have to read pages from disk, so ideally we want to do this outside of the write-locked mmap_sem region. This change introduces mm_populate(), which is used to defer populating such mappings until after the mmap_sem write lock has been released. This is implemented as a generalization of the former do_mlock_pages(), which accomplished the same task but was using during mlock() / mlockall(). Signed-off-by: Michel Lespinasse <walken@google.com> Reported-by: Andy Lutomirski <luto@amacapital.net> Acked-by: Rik van Riel <riel@redhat.com> Tested-by: Andy Lutomirski <luto@amacapital.net> Cc: Greg Ungerer <gregungerer@westnet.com.au> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 940e7da commit bebeb3d

File tree

7 files changed

+62
-22
lines changed

7 files changed

+62
-22
lines changed

fs/aio.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx)
103103
unsigned nr_events = ctx->max_reqs;
104104
unsigned long size;
105105
int nr_pages;
106+
bool populate;
106107

107108
/* Compensate for the ring buffer's head/tail overlap entry */
108109
nr_events += 2;/* 1 is required, 2 for good luck */
@@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx)
129130
down_write(&ctx->mm->mmap_sem);
130131
info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
131132
PROT_READ|PROT_WRITE,
132-
MAP_ANONYMOUS|MAP_PRIVATE, 0);
133+
MAP_ANONYMOUS|MAP_PRIVATE, 0,
134+
&populate);
133135
if (IS_ERR((void *)info->mmap_base)) {
134136
up_write(&ctx->mm->mmap_sem);
135137
info->mmap_size = 0;
@@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx)
147149
aio_free_ring(ctx);
148150
return -EAGAIN;
149151
}
152+
if (populate)
153+
mm_populate(info->mmap_base, info->mmap_size);
150154

151155
ctx->user_id = info->mmap_base;
152156

include/linux/mm.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
14741474
extern unsigned long mmap_region(struct file *file, unsigned long addr,
14751475
unsigned long len, unsigned long flags,
14761476
vm_flags_t vm_flags, unsigned long pgoff);
1477-
extern unsigned long do_mmap_pgoff(struct file *, unsigned long,
1478-
unsigned long, unsigned long,
1479-
unsigned long, unsigned long);
1477+
extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1478+
unsigned long len, unsigned long prot, unsigned long flags,
1479+
unsigned long pgoff, bool *populate);
14801480
extern int do_munmap(struct mm_struct *, unsigned long, size_t);
14811481

1482+
#ifdef CONFIG_MMU
1483+
extern int __mm_populate(unsigned long addr, unsigned long len,
1484+
int ignore_errors);
1485+
static inline void mm_populate(unsigned long addr, unsigned long len)
1486+
{
1487+
/* Ignore errors */
1488+
(void) __mm_populate(addr, len, 1);
1489+
}
1490+
#else
1491+
static inline void mm_populate(unsigned long addr, unsigned long len) {}
1492+
#endif
1493+
14821494
/* These take the mm semaphore themselves */
14831495
extern unsigned long vm_brk(unsigned long, unsigned long);
14841496
extern int vm_munmap(unsigned long, size_t);

ipc/shm.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
967967
unsigned long flags;
968968
unsigned long prot;
969969
int acc_mode;
970-
unsigned long user_addr;
971970
struct ipc_namespace *ns;
972971
struct shm_file_data *sfd;
973972
struct path path;
974973
fmode_t f_mode;
974+
bool populate = false;
975975

976976
err = -EINVAL;
977977
if (shmid < 0)
@@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
10701070
goto invalid;
10711071
}
10721072

1073-
user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0);
1074-
*raddr = user_addr;
1073+
addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1074+
*raddr = addr;
10751075
err = 0;
1076-
if (IS_ERR_VALUE(user_addr))
1077-
err = (long)user_addr;
1076+
if (IS_ERR_VALUE(addr))
1077+
err = (long)addr;
10781078
invalid:
10791079
up_write(&current->mm->mmap_sem);
1080+
if (populate)
1081+
mm_populate(addr, size);
10801082

10811083
out_fput:
10821084
fput(file);

mm/mlock.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on)
416416
return error;
417417
}
418418

419-
static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
419+
/*
420+
* __mm_populate - populate and/or mlock pages within a range of address space.
421+
*
422+
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
423+
* flags. VMAs must be already marked with the desired vm_flags, and
424+
* mmap_sem must not be held.
425+
*/
426+
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
420427
{
421428
struct mm_struct *mm = current->mm;
422429
unsigned long end, nstart, nend;
@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
498505
error = do_mlock(start, len, 1);
499506
up_write(&current->mm->mmap_sem);
500507
if (!error)
501-
error = do_mlock_pages(start, len, 0);
508+
error = __mm_populate(start, len, 0);
502509
return error;
503510
}
504511

@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
564571
capable(CAP_IPC_LOCK))
565572
ret = do_mlockall(flags);
566573
up_write(&current->mm->mmap_sem);
567-
if (!ret && (flags & MCL_CURRENT)) {
568-
/* Ignore errors */
569-
do_mlock_pages(0, TASK_SIZE, 1);
570-
}
574+
if (!ret && (flags & MCL_CURRENT))
575+
mm_populate(0, TASK_SIZE);
571576
out:
572577
return ret;
573578
}

mm/mmap.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
11541154

11551155
unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
11561156
unsigned long len, unsigned long prot,
1157-
unsigned long flags, unsigned long pgoff)
1157+
unsigned long flags, unsigned long pgoff,
1158+
bool *populate)
11581159
{
11591160
struct mm_struct * mm = current->mm;
11601161
struct inode *inode;
11611162
vm_flags_t vm_flags;
11621163

1164+
*populate = false;
1165+
11631166
/*
11641167
* Does the application expect PROT_READ to imply PROT_EXEC?
11651168
*
@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
12801283
}
12811284
}
12821285

1283-
return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1286+
addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
1287+
if (!IS_ERR_VALUE(addr) &&
1288+
((vm_flags & VM_LOCKED) ||
1289+
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1290+
*populate = true;
1291+
return addr;
12841292
}
12851293

12861294
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
@@ -1531,10 +1539,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15311539

15321540
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
15331541
if (vm_flags & VM_LOCKED) {
1534-
if (!mlock_vma_pages_range(vma, addr, addr + len))
1542+
if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1543+
vma == get_gate_vma(current->mm)))
15351544
mm->locked_vm += (len >> PAGE_SHIFT);
1536-
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1537-
make_pages_present(addr, addr + len);
1545+
else
1546+
vma->vm_flags &= ~VM_LOCKED;
1547+
}
15381548

15391549
if (file)
15401550
uprobe_mmap(vma);

mm/nommu.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file,
12501250
unsigned long len,
12511251
unsigned long prot,
12521252
unsigned long flags,
1253-
unsigned long pgoff)
1253+
unsigned long pgoff,
1254+
bool *populate)
12541255
{
12551256
struct vm_area_struct *vma;
12561257
struct vm_region *region;
@@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file,
12601261

12611262
kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
12621263

1264+
*populate = false;
1265+
12631266
/* decide whether we should attempt the mapping, and if so what sort of
12641267
* mapping */
12651268
ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,

mm/util.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
355355
{
356356
unsigned long ret;
357357
struct mm_struct *mm = current->mm;
358+
bool populate;
358359

359360
ret = security_mmap_file(file, prot, flag);
360361
if (!ret) {
361362
down_write(&mm->mmap_sem);
362-
ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff);
363+
ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
364+
&populate);
363365
up_write(&mm->mmap_sem);
366+
if (!IS_ERR_VALUE(ret) && populate)
367+
mm_populate(ret, len);
364368
}
365369
return ret;
366370
}

0 commit comments

Comments
 (0)