mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 04:33:26 +02:00
mm/lru: replace pgdat lru_lock with lruvec lock
This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Hugh Dickins helped on the patch polish, thanks! [alex.shi@linux.alibaba.com: fix comment typo] Link: https://lkml.kernel.org/r/5b085715-292a-4b43-50b3-d73dc90d1de5@linux.alibaba.com [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/5a4c2b72-7ee8-2478-fc0e-85eb83aafec4@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-18-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Acked-by: Hugh Dickins <hughd@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Rong Chen <rong.a.chen@intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Yang Shi <yang.shi@linux.alibaba.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Alexander Duyck <alexander.duyck@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Jann Horn <jannh@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mika Penttilä <mika.penttila@nextfour.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Shakeel Butt <shakeelb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
116
mm/swap.c
116
mm/swap.c
@@ -79,16 +79,14 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
|
||||
static void __page_cache_release(struct page *page)
|
||||
{
|
||||
if (PageLRU(page)) {
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
struct lruvec *lruvec;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
lruvec = lock_page_lruvec_irqsave(page, &flags);
|
||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||
__ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
}
|
||||
__ClearPageWaiters(page);
|
||||
}
|
||||
@@ -207,32 +205,30 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
|
||||
void (*move_fn)(struct page *page, struct lruvec *lruvec))
|
||||
{
|
||||
int i;
|
||||
struct pglist_data *pgdat = NULL;
|
||||
struct lruvec *lruvec;
|
||||
struct lruvec *lruvec = NULL;
|
||||
unsigned long flags = 0;
|
||||
|
||||
for (i = 0; i < pagevec_count(pvec); i++) {
|
||||
struct page *page = pvec->pages[i];
|
||||
struct pglist_data *pagepgdat = page_pgdat(page);
|
||||
|
||||
if (pagepgdat != pgdat) {
|
||||
if (pgdat)
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
pgdat = pagepgdat;
|
||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
||||
}
|
||||
struct lruvec *new_lruvec;
|
||||
|
||||
/* block memcg migration during page moving between lru */
|
||||
if (!TestClearPageLRU(page))
|
||||
continue;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
||||
if (lruvec != new_lruvec) {
|
||||
if (lruvec)
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
lruvec = lock_page_lruvec_irqsave(page, &flags);
|
||||
}
|
||||
|
||||
(*move_fn)(page, lruvec);
|
||||
|
||||
SetPageLRU(page);
|
||||
}
|
||||
if (pgdat)
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
if (lruvec)
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
release_pages(pvec->pages, pvec->nr);
|
||||
pagevec_reinit(pvec);
|
||||
}
|
||||
@@ -274,9 +270,15 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
|
||||
{
|
||||
do {
|
||||
unsigned long lrusize;
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
/*
|
||||
* Hold lruvec->lru_lock is safe here, since
|
||||
* 1) The pinned lruvec in reclaim, or
|
||||
* 2) From a pre-LRU page during refault (which also holds the
|
||||
* rcu lock, so would be safe even if the page was on the LRU
|
||||
* and could move simultaneously to a new lruvec).
|
||||
*/
|
||||
spin_lock_irq(&lruvec->lru_lock);
|
||||
/* Record cost event */
|
||||
if (file)
|
||||
lruvec->file_cost += nr_pages;
|
||||
@@ -300,7 +302,7 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
|
||||
lruvec->file_cost /= 2;
|
||||
lruvec->anon_cost /= 2;
|
||||
}
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
spin_unlock_irq(&lruvec->lru_lock);
|
||||
} while ((lruvec = parent_lruvec(lruvec)));
|
||||
}
|
||||
|
||||
@@ -364,13 +366,15 @@ static inline void activate_page_drain(int cpu)
|
||||
|
||||
static void activate_page(struct page *page)
|
||||
{
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
struct lruvec *lruvec;
|
||||
|
||||
page = compound_head(page);
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
if (PageLRU(page))
|
||||
__activate_page(page, mem_cgroup_page_lruvec(page, pgdat));
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
if (TestClearPageLRU(page)) {
|
||||
lruvec = lock_page_lruvec_irq(page);
|
||||
__activate_page(page, lruvec);
|
||||
unlock_page_lruvec_irq(lruvec);
|
||||
SetPageLRU(page);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -860,8 +864,7 @@ void release_pages(struct page **pages, int nr)
|
||||
{
|
||||
int i;
|
||||
LIST_HEAD(pages_to_free);
|
||||
struct pglist_data *locked_pgdat = NULL;
|
||||
struct lruvec *lruvec;
|
||||
struct lruvec *lruvec = NULL;
|
||||
unsigned long flags;
|
||||
unsigned int lock_batch;
|
||||
|
||||
@@ -871,11 +874,11 @@ void release_pages(struct page **pages, int nr)
|
||||
/*
|
||||
* Make sure the IRQ-safe lock-holding time does not get
|
||||
* excessive with a continuous string of pages from the
|
||||
* same pgdat. The lock is held only if pgdat != NULL.
|
||||
* same lruvec. The lock is held only if lruvec != NULL.
|
||||
*/
|
||||
if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
||||
locked_pgdat = NULL;
|
||||
if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
lruvec = NULL;
|
||||
}
|
||||
|
||||
page = compound_head(page);
|
||||
@@ -883,10 +886,9 @@ void release_pages(struct page **pages, int nr)
|
||||
continue;
|
||||
|
||||
if (is_zone_device_page(page)) {
|
||||
if (locked_pgdat) {
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock,
|
||||
flags);
|
||||
locked_pgdat = NULL;
|
||||
if (lruvec) {
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
lruvec = NULL;
|
||||
}
|
||||
/*
|
||||
* ZONE_DEVICE pages that return 'false' from
|
||||
@@ -907,27 +909,27 @@ void release_pages(struct page **pages, int nr)
|
||||
continue;
|
||||
|
||||
if (PageCompound(page)) {
|
||||
if (locked_pgdat) {
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
||||
locked_pgdat = NULL;
|
||||
if (lruvec) {
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
lruvec = NULL;
|
||||
}
|
||||
__put_compound_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PageLRU(page)) {
|
||||
struct pglist_data *pgdat = page_pgdat(page);
|
||||
struct lruvec *new_lruvec;
|
||||
|
||||
if (pgdat != locked_pgdat) {
|
||||
if (locked_pgdat)
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock,
|
||||
new_lruvec = mem_cgroup_page_lruvec(page,
|
||||
page_pgdat(page));
|
||||
if (new_lruvec != lruvec) {
|
||||
if (lruvec)
|
||||
unlock_page_lruvec_irqrestore(lruvec,
|
||||
flags);
|
||||
lock_batch = 0;
|
||||
locked_pgdat = pgdat;
|
||||
spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
|
||||
lruvec = lock_page_lruvec_irqsave(page, &flags);
|
||||
}
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
|
||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||
__ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||
@@ -937,8 +939,8 @@ void release_pages(struct page **pages, int nr)
|
||||
|
||||
list_add(&page->lru, &pages_to_free);
|
||||
}
|
||||
if (locked_pgdat)
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
||||
if (lruvec)
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
|
||||
mem_cgroup_uncharge_list(&pages_to_free);
|
||||
free_unref_page_list(&pages_to_free);
|
||||
@@ -1026,26 +1028,24 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
|
||||
void __pagevec_lru_add(struct pagevec *pvec)
|
||||
{
|
||||
int i;
|
||||
struct pglist_data *pgdat = NULL;
|
||||
struct lruvec *lruvec;
|
||||
struct lruvec *lruvec = NULL;
|
||||
unsigned long flags = 0;
|
||||
|
||||
for (i = 0; i < pagevec_count(pvec); i++) {
|
||||
struct page *page = pvec->pages[i];
|
||||
struct pglist_data *pagepgdat = page_pgdat(page);
|
||||
struct lruvec *new_lruvec;
|
||||
|
||||
if (pagepgdat != pgdat) {
|
||||
if (pgdat)
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
pgdat = pagepgdat;
|
||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
||||
new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
||||
if (lruvec != new_lruvec) {
|
||||
if (lruvec)
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
lruvec = lock_page_lruvec_irqsave(page, &flags);
|
||||
}
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
__pagevec_lru_add_fn(page, lruvec);
|
||||
}
|
||||
if (pgdat)
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
if (lruvec)
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
release_pages(pvec->pages, pvec->nr);
|
||||
pagevec_reinit(pvec);
|
||||
}
|
||||
|
Reference in New Issue
Block a user