mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
mm/hugetlb: introduce hugetlb_walk()
huge_pte_offset() is the main walker function for hugetlb pgtables. The name is not really representing what it does, though. Instead of renaming it, introduce a wrapper function called hugetlb_walk() which will use huge_pte_offset() inside. Assert on the locks when walking the pgtable. Note, the vma lock assertion will be a no-op for private mappings. Document the last special case in the page_vma_mapped_walk() path where we don't need any more lock to call hugetlb_walk(). Taking vma lock there is not needed because either: (1) potential callers of hugetlb pvmw holds i_mmap_rwsem already (from one rmap_walk()), or (2) the caller will not walk a hugetlb vma at all so the hugetlb code path not reachable (e.g. in ksm or uprobe paths). It's slightly implicit for future page_vma_mapped_walk() callers on that lock requirement. But anyway, when one day this rule breaks, one will get a straightforward warning in hugetlb_walk() with lockdep, then there'll be a way out. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20221216155229.2043750-1-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: David Hildenbrand <david@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: James Houghton <jthoughton@google.com> Cc: Jann Horn <jannh@google.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Nadav Amit <nadav.amit@gmail.com> Cc: Rik van Riel <riel@surriel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
@@ -388,9 +388,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
pte_t *ptep, pte;
|
pte_t *ptep, pte;
|
||||||
|
|
||||||
ptep = huge_pte_offset(vma->vm_mm, addr,
|
ptep = hugetlb_walk(vma, addr, huge_page_size(hstate_vma(vma)));
|
||||||
huge_page_size(hstate_vma(vma)));
|
|
||||||
|
|
||||||
if (!ptep)
|
if (!ptep)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@@ -252,14 +252,12 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
|
|||||||
unsigned long flags,
|
unsigned long flags,
|
||||||
unsigned long reason)
|
unsigned long reason)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = ctx->mm;
|
|
||||||
pte_t *ptep, pte;
|
pte_t *ptep, pte;
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
mmap_assert_locked(mm);
|
mmap_assert_locked(ctx->mm);
|
||||||
|
|
||||||
ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
|
|
||||||
|
|
||||||
|
ptep = hugetlb_walk(vma, address, vma_mmu_pagesize(vma));
|
||||||
if (!ptep)
|
if (!ptep)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
#ifndef _LINUX_HUGETLB_H
|
#ifndef _LINUX_HUGETLB_H
|
||||||
#define _LINUX_HUGETLB_H
|
#define _LINUX_HUGETLB_H
|
||||||
|
|
||||||
|
#include <linux/mm.h>
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
#include <linux/mmdebug.h>
|
#include <linux/mmdebug.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
@@ -196,6 +197,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
* huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE.
|
* huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE.
|
||||||
* Returns the pte_t* if found, or NULL if the address is not mapped.
|
* Returns the pte_t* if found, or NULL if the address is not mapped.
|
||||||
*
|
*
|
||||||
|
* IMPORTANT: we should normally not directly call this function, instead
|
||||||
|
* this is only a common interface to implement arch-specific
|
||||||
|
* walker. Please use hugetlb_walk() instead, because that will attempt to
|
||||||
|
* verify the locking for you.
|
||||||
|
*
|
||||||
* Since this function will walk all the pgtable pages (including not only
|
* Since this function will walk all the pgtable pages (including not only
|
||||||
* high-level pgtable page, but also PUD entry that can be unshared
|
* high-level pgtable page, but also PUD entry that can be unshared
|
||||||
* concurrently for VM_SHARED), the caller of this function should be
|
* concurrently for VM_SHARED), the caller of this function should be
|
||||||
@@ -1229,4 +1235,35 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
|
|||||||
#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
|
#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Safe version of huge_pte_offset() to check the locks. See comments
|
||||||
|
* above huge_pte_offset().
|
||||||
|
*/
|
||||||
|
static inline pte_t *
|
||||||
|
hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz)
|
||||||
|
{
|
||||||
|
#if defined(CONFIG_HUGETLB_PAGE) && \
|
||||||
|
defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP)
|
||||||
|
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If pmd sharing possible, locking needed to safely walk the
|
||||||
|
* hugetlb pgtables. More information can be found at the comment
|
||||||
|
* above huge_pte_offset() in the same file.
|
||||||
|
*
|
||||||
|
* NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP.
|
||||||
|
*/
|
||||||
|
if (__vma_shareable_lock(vma))
|
||||||
|
WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) &&
|
||||||
|
!lockdep_is_held(
|
||||||
|
&vma->vm_file->f_mapping->i_mmap_rwsem));
|
||||||
|
#endif
|
||||||
|
return huge_pte_offset(vma->vm_mm, addr, sz);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _LINUX_HUGETLB_H */
|
#endif /* _LINUX_HUGETLB_H */
|
||||||
|
31
mm/hugetlb.c
31
mm/hugetlb.c
@@ -260,11 +260,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
|
|||||||
/*
|
/*
|
||||||
* hugetlb vma_lock helper routines
|
* hugetlb vma_lock helper routines
|
||||||
*/
|
*/
|
||||||
static bool __vma_shareable_lock(struct vm_area_struct *vma)
|
|
||||||
{
|
|
||||||
return vma->vm_flags & VM_MAYSHARE && vma->vm_private_data;
|
|
||||||
}
|
|
||||||
|
|
||||||
void hugetlb_vma_lock_read(struct vm_area_struct *vma)
|
void hugetlb_vma_lock_read(struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
if (__vma_shareable_lock(vma)) {
|
if (__vma_shareable_lock(vma)) {
|
||||||
@@ -4980,7 +4975,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* For shared mappings the vma lock must be held before
|
* For shared mappings the vma lock must be held before
|
||||||
* calling huge_pte_offset in the src vma. Otherwise, the
|
* calling hugetlb_walk() in the src vma. Otherwise, the
|
||||||
* returned ptep could go away if part of a shared pmd and
|
* returned ptep could go away if part of a shared pmd and
|
||||||
* another thread calls huge_pmd_unshare.
|
* another thread calls huge_pmd_unshare.
|
||||||
*/
|
*/
|
||||||
@@ -4990,7 +4985,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||||||
last_addr_mask = hugetlb_mask_last_page(h);
|
last_addr_mask = hugetlb_mask_last_page(h);
|
||||||
for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
|
for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
|
||||||
spinlock_t *src_ptl, *dst_ptl;
|
spinlock_t *src_ptl, *dst_ptl;
|
||||||
src_pte = huge_pte_offset(src, addr, sz);
|
src_pte = hugetlb_walk(src_vma, addr, sz);
|
||||||
if (!src_pte) {
|
if (!src_pte) {
|
||||||
addr |= last_addr_mask;
|
addr |= last_addr_mask;
|
||||||
continue;
|
continue;
|
||||||
@@ -5197,7 +5192,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
|
|||||||
hugetlb_vma_lock_write(vma);
|
hugetlb_vma_lock_write(vma);
|
||||||
i_mmap_lock_write(mapping);
|
i_mmap_lock_write(mapping);
|
||||||
for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
|
for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
|
||||||
src_pte = huge_pte_offset(mm, old_addr, sz);
|
src_pte = hugetlb_walk(vma, old_addr, sz);
|
||||||
if (!src_pte) {
|
if (!src_pte) {
|
||||||
old_addr |= last_addr_mask;
|
old_addr |= last_addr_mask;
|
||||||
new_addr |= last_addr_mask;
|
new_addr |= last_addr_mask;
|
||||||
@@ -5260,7 +5255,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
|
|||||||
last_addr_mask = hugetlb_mask_last_page(h);
|
last_addr_mask = hugetlb_mask_last_page(h);
|
||||||
address = start;
|
address = start;
|
||||||
for (; address < end; address += sz) {
|
for (; address < end; address += sz) {
|
||||||
ptep = huge_pte_offset(mm, address, sz);
|
ptep = hugetlb_walk(vma, address, sz);
|
||||||
if (!ptep) {
|
if (!ptep) {
|
||||||
address |= last_addr_mask;
|
address |= last_addr_mask;
|
||||||
continue;
|
continue;
|
||||||
@@ -5573,7 +5568,7 @@ retry_avoidcopy:
|
|||||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||||
hugetlb_vma_lock_read(vma);
|
hugetlb_vma_lock_read(vma);
|
||||||
spin_lock(ptl);
|
spin_lock(ptl);
|
||||||
ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
|
ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
|
||||||
if (likely(ptep &&
|
if (likely(ptep &&
|
||||||
pte_same(huge_ptep_get(ptep), pte)))
|
pte_same(huge_ptep_get(ptep), pte)))
|
||||||
goto retry_avoidcopy;
|
goto retry_avoidcopy;
|
||||||
@@ -5611,7 +5606,7 @@ retry_avoidcopy:
|
|||||||
* before the page tables are altered
|
* before the page tables are altered
|
||||||
*/
|
*/
|
||||||
spin_lock(ptl);
|
spin_lock(ptl);
|
||||||
ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
|
ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
|
||||||
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
|
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
|
||||||
/* Break COW or unshare */
|
/* Break COW or unshare */
|
||||||
huge_ptep_clear_flush(vma, haddr, ptep);
|
huge_ptep_clear_flush(vma, haddr, ptep);
|
||||||
@@ -6397,7 +6392,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
hugetlb_vma_lock_read(vma);
|
hugetlb_vma_lock_read(vma);
|
||||||
pte = huge_pte_offset(mm, haddr, huge_page_size(h));
|
pte = hugetlb_walk(vma, haddr, huge_page_size(h));
|
||||||
if (!pte)
|
if (!pte)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
@@ -6462,8 +6457,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
*
|
*
|
||||||
* Note that page table lock is not held when pte is null.
|
* Note that page table lock is not held when pte is null.
|
||||||
*/
|
*/
|
||||||
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h),
|
pte = hugetlb_walk(vma, vaddr & huge_page_mask(h),
|
||||||
huge_page_size(h));
|
huge_page_size(h));
|
||||||
if (pte)
|
if (pte)
|
||||||
ptl = huge_pte_lock(h, mm, pte);
|
ptl = huge_pte_lock(h, mm, pte);
|
||||||
absent = !pte || huge_pte_none(huge_ptep_get(pte));
|
absent = !pte || huge_pte_none(huge_ptep_get(pte));
|
||||||
@@ -6654,7 +6649,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
|||||||
last_addr_mask = hugetlb_mask_last_page(h);
|
last_addr_mask = hugetlb_mask_last_page(h);
|
||||||
for (; address < end; address += psize) {
|
for (; address < end; address += psize) {
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
ptep = huge_pte_offset(mm, address, psize);
|
ptep = hugetlb_walk(vma, address, psize);
|
||||||
if (!ptep) {
|
if (!ptep) {
|
||||||
if (!uffd_wp) {
|
if (!uffd_wp) {
|
||||||
address |= last_addr_mask;
|
address |= last_addr_mask;
|
||||||
@@ -7064,8 +7059,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
|
|
||||||
saddr = page_table_shareable(svma, vma, addr, idx);
|
saddr = page_table_shareable(svma, vma, addr, idx);
|
||||||
if (saddr) {
|
if (saddr) {
|
||||||
spte = huge_pte_offset(svma->vm_mm, saddr,
|
spte = hugetlb_walk(svma, saddr,
|
||||||
vma_mmu_pagesize(svma));
|
vma_mmu_pagesize(svma));
|
||||||
if (spte) {
|
if (spte) {
|
||||||
get_page(virt_to_page(spte));
|
get_page(virt_to_page(spte));
|
||||||
break;
|
break;
|
||||||
@@ -7377,7 +7372,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
|
|||||||
hugetlb_vma_lock_write(vma);
|
hugetlb_vma_lock_write(vma);
|
||||||
i_mmap_lock_write(vma->vm_file->f_mapping);
|
i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||||
for (address = start; address < end; address += PUD_SIZE) {
|
for (address = start; address < end; address += PUD_SIZE) {
|
||||||
ptep = huge_pte_offset(mm, address, sz);
|
ptep = hugetlb_walk(vma, address, sz);
|
||||||
if (!ptep)
|
if (!ptep)
|
||||||
continue;
|
continue;
|
||||||
ptl = huge_pte_lock(h, mm, ptep);
|
ptl = huge_pte_lock(h, mm, ptep);
|
||||||
|
@@ -168,9 +168,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
|
|||||||
/* The only possible mapping was handled on last iteration */
|
/* The only possible mapping was handled on last iteration */
|
||||||
if (pvmw->pte)
|
if (pvmw->pte)
|
||||||
return not_found(pvmw);
|
return not_found(pvmw);
|
||||||
|
/*
|
||||||
/* when pud is not present, pte will be NULL */
|
* All callers that get here will already hold the
|
||||||
pvmw->pte = huge_pte_offset(mm, pvmw->address, size);
|
* i_mmap_rwsem. Therefore, no additional locks need to be
|
||||||
|
* taken before calling hugetlb_walk().
|
||||||
|
*/
|
||||||
|
pvmw->pte = hugetlb_walk(vma, pvmw->address, size);
|
||||||
if (!pvmw->pte)
|
if (!pvmw->pte)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@@ -305,13 +305,11 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
|
|||||||
hugetlb_vma_lock_read(vma);
|
hugetlb_vma_lock_read(vma);
|
||||||
do {
|
do {
|
||||||
next = hugetlb_entry_end(h, addr, end);
|
next = hugetlb_entry_end(h, addr, end);
|
||||||
pte = huge_pte_offset(walk->mm, addr & hmask, sz);
|
pte = hugetlb_walk(vma, addr & hmask, sz);
|
||||||
|
|
||||||
if (pte)
|
if (pte)
|
||||||
err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
|
err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
|
||||||
else if (ops->pte_hole)
|
else if (ops->pte_hole)
|
||||||
err = ops->pte_hole(addr, next, -1, walk);
|
err = ops->pte_hole(addr, next, -1, walk);
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
} while (addr = next, addr != end);
|
} while (addr = next, addr != end);
|
||||||
|
Reference in New Issue
Block a user