Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-12-03

The main changes are:

1) Support BTF in kernel modules, from Andrii.

2) Introduce preferred busy-polling, from Björn.

3) bpf_ima_inode_hash() and bpf_bprm_opts_set() helpers, from KP Singh.

4) Memcg-based memory accounting for bpf objects, from Roman.

5) Allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks, from Stanislav.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (118 commits)
  selftests/bpf: Fix invalid use of strncat in test_sockmap
  libbpf: Use memcpy instead of strncpy to please GCC
  selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module
  selftests/bpf: Add tp_btf CO-RE reloc test for modules
  libbpf: Support attachment of BPF tracing programs to kernel modules
  libbpf: Factor out low-level BPF program loading helper
  bpf: Allow to specify kernel module BTFs when attaching BPF programs
  bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
  selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF
  selftests/bpf: Add support for marking sub-tests as skipped
  selftests/bpf: Add bpf_testmod kernel module for testing
  libbpf: Add kernel module BTF support for CO-RE relocations
  libbpf: Refactor CO-RE relocs to not assume a single BTF object
  libbpf: Add internal helper to load BTF data by FD
  bpf: Keep module's btf_data_size intact after load
  bpf: Fix bpf_put_raw_tracepoint()'s use of __module_address()
  selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP
  bpf: Adds support for setting window clamp
  samples/bpf: Fix spelling mistake "recieving" -> "receiving"
  bpf: Fix cold build of test_progs-no_alu32
  ...
====================

Link: https://lore.kernel.org/r/20201204021936.85653-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2020-12-04 07:48:11 -08:00
196 changed files with 4335 additions and 2574 deletions

View File

@@ -343,6 +343,175 @@ struct mem_cgroup {
extern struct mem_cgroup *root_mem_cgroup;
enum page_memcg_data_flags {
/* page->memcg_data is a pointer to an objcgs vector */
MEMCG_DATA_OBJCGS = (1UL << 0),
/* page has been accounted as a non-slab kernel page */
MEMCG_DATA_KMEM = (1UL << 1),
/* the next bit after the last actual flag */
__NR_MEMCG_DATA_FLAGS = (1UL << 2),
};
#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
/*
* page_memcg - get the memory cgroup associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the memory cgroup associated with the page,
* or NULL. This function assumes that the page is known to have a
* proper memory cgroup pointer. It's not safe to call this function
* against some type of pages, e.g. slab pages or ex-slab pages.
*
* Any of the following ensures page and memcg binding stability:
* - the page lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
*/
static inline struct mem_cgroup *page_memcg(struct page *page)
{
unsigned long memcg_data = page->memcg_data;
VM_BUG_ON_PAGE(PageSlab(page), page);
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_memcg_rcu - locklessly get the memory cgroup associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the memory cgroup associated with the page,
* or NULL. This function assumes that the page is known to have a
* proper memory cgroup pointer. It's not safe to call this function
* against some type of pages, e.g. slab pages or ex-slab pages.
*/
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
VM_BUG_ON_PAGE(PageSlab(page), page);
WARN_ON_ONCE(!rcu_read_lock_held());
return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) &
~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_memcg_check - get the memory cgroup associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the memory cgroup associated with the page,
* or NULL. This function unlike page_memcg() can take any page
* as an argument. It has to be used in cases when it's not known if a page
* has an associated memory cgroup pointer or an object cgroups vector.
*
* Any of the following ensures page and memcg binding stability:
* - the page lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
*/
static inline struct mem_cgroup *page_memcg_check(struct page *page)
{
/*
* Because page->memcg_data might be changed asynchronously
* for slab pages, READ_ONCE() should be used here.
*/
unsigned long memcg_data = READ_ONCE(page->memcg_data);
if (memcg_data & MEMCG_DATA_OBJCGS)
return NULL;
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* PageMemcgKmem - check if the page has MemcgKmem flag set
* @page: a pointer to the page struct
*
* Checks if the page has MemcgKmem flag set. The caller must ensure that
* the page has an associated memory cgroup. It's not safe to call this function
* against some types of pages, e.g. slab pages.
*/
static inline bool PageMemcgKmem(struct page *page)
{
VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
return page->memcg_data & MEMCG_DATA_KMEM;
}
#ifdef CONFIG_MEMCG_KMEM
/*
* page_objcgs - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function assumes that the page is known to have an
* associated object cgroups vector. It's not safe to call this function
* against pages, which might have an associated memory cgroup: e.g.
* kernel stack pages.
*/
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_objcgs_check - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function is safe to use if the page can be directly associated
* with a memory cgroup.
*/
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
return NULL;
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* set_page_objcgs - associate a page with a object cgroups vector
* @page: a pointer to the page struct
* @objcgs: a pointer to the object cgroups vector
*
* Atomically associates a page with a vector of object cgroups.
*/
static inline bool set_page_objcgs(struct page *page,
struct obj_cgroup **objcgs)
{
return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs |
MEMCG_DATA_OBJCGS);
}
#else
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
return NULL;
}
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
static inline bool set_page_objcgs(struct page *page,
struct obj_cgroup **objcgs)
{
return true;
}
#endif
static __always_inline bool memcg_stat_item_in_bytes(int idx)
{
if (idx == MEMCG_PERCPU_B)
@@ -743,15 +912,19 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
static inline void __mod_memcg_page_state(struct page *page,
int idx, int val)
{
if (page->mem_cgroup)
__mod_memcg_state(page->mem_cgroup, idx, val);
struct mem_cgroup *memcg = page_memcg(page);
if (memcg)
__mod_memcg_state(memcg, idx, val);
}
static inline void mod_memcg_page_state(struct page *page,
int idx, int val)
{
if (page->mem_cgroup)
mod_memcg_state(page->mem_cgroup, idx, val);
struct mem_cgroup *memcg = page_memcg(page);
if (memcg)
mod_memcg_state(memcg, idx, val);
}
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
@@ -834,16 +1007,17 @@ static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
struct page *head = compound_head(page); /* rmap on tail pages */
struct mem_cgroup *memcg = page_memcg(head);
pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;
/* Untracked pages have no memcg, no lruvec. Update only the node */
if (!head->mem_cgroup) {
if (!memcg) {
__mod_node_page_state(pgdat, idx, val);
return;
}
lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat);
lruvec = mem_cgroup_lruvec(memcg, pgdat);
__mod_lruvec_state(lruvec, idx, val);
}
@@ -878,8 +1052,10 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
static inline void count_memcg_page_event(struct page *page,
enum vm_event_item idx)
{
if (page->mem_cgroup)
count_memcg_events(page->mem_cgroup, idx, 1);
struct mem_cgroup *memcg = page_memcg(page);
if (memcg)
count_memcg_events(memcg, idx, 1);
}
static inline void count_memcg_event_mm(struct mm_struct *mm,
@@ -948,6 +1124,27 @@ void mem_cgroup_split_huge_fixup(struct page *head);
struct mem_cgroup;
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return NULL;
}
static inline struct mem_cgroup *page_memcg_check(struct page *page)
{
return NULL;
}
static inline bool PageMemcgKmem(struct page *page)
{
return false;
}
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return true;
@@ -1437,7 +1634,7 @@ static inline void mem_cgroup_track_foreign_dirty(struct page *page,
if (mem_cgroup_disabled())
return;
if (unlikely(&page->mem_cgroup->css != wb->memcg_css))
if (unlikely(&page_memcg(page)->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(page, wb);
}