mm: start tracking VMAs with maple tree

Start tracking the VMAs with the new maple tree structure in parallel with
the rb_tree.  Add debug and trace events for maple tree operations and
duplicate the rb_tree that is created on forks into the maple tree.

The maple tree is added to the mm_struct including the mm_init struct,
added support in required mm/mmap functions, added tracking in kernel/fork
for process forking, and used to find the unmapped_area and checked
against what the rbtree finds.

This also moves the mmap_lock() in exit_mmap() since the oom reaper call
does walk the VMAs.  Otherwise lockdep will be unhappy if oom happens.

When splitting a vma fails due to allocations of the maple tree nodes,
the error path in __split_vma() calls new->vm_ops->close(new).  The page
accounting for hugetlb is actually in the close() operation,  so it
accounts for the removal of 1/2 of the VMA which was not adjusted.  This
results in a negative exit value.  To avoid the negative charge, set
vm_start = vm_end and vm_pgoff = 0.

There is also a potential accounting issue in special mappings from
insert_vm_struct() failing to allocate, so reverse the charge there in
the failure scenario.

Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: Yu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Liam R. Howlett
2022-09-06 19:48:45 +00:00
committed by Andrew Morton
parent e15e06a839
commit d4af56c5c7
9 changed files with 437 additions and 38 deletions

View File

@@ -585,6 +585,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
int retval;
unsigned long charge;
LIST_HEAD(uf);
MA_STATE(mas, &mm->mm_mt, 0, 0);
uprobe_start_dup_mmap();
if (mmap_write_lock_killable(oldmm)) {
@@ -614,6 +615,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
khugepaged_fork(mm, oldmm);
retval = mas_expected_entries(&mas, oldmm->map_count);
if (retval)
goto out;
prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
@@ -629,7 +634,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
*/
if (fatal_signal_pending(current)) {
retval = -EINTR;
goto out;
goto loop_out;
}
if (mpnt->vm_flags & VM_ACCOUNT) {
unsigned long len = vma_pages(mpnt);
@@ -694,6 +699,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
rb_link = &tmp->vm_rb.rb_right;
rb_parent = &tmp->vm_rb;
/* Link the vma into the MT */
mas.index = tmp->vm_start;
mas.last = tmp->vm_end - 1;
mas_store(&mas, tmp);
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
retval = copy_page_range(tmp, mpnt);
@@ -702,10 +712,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
tmp->vm_ops->open(tmp);
if (retval)
goto out;
goto loop_out;
}
/* a new mm has just been created */
retval = arch_dup_mmap(oldmm, mm);
loop_out:
mas_destroy(&mas);
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
@@ -721,7 +733,7 @@ fail_nomem_policy:
fail_nomem:
retval = -ENOMEM;
vm_unacct_memory(charge);
goto out;
goto loop_out;
}
static inline int mm_alloc_pgd(struct mm_struct *mm)
@@ -1111,6 +1123,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
mm->vmacache_seqnum = 0;
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);