Merge tag 'slab-for-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - SLAB deprecation:

   Following the discussion at LSF/MM 2023 [1] and no objections, the
   SLAB allocator is deprecated by renaming the config option (to make
   its users notice) to CONFIG_SLAB_DEPRECATED with updated help text.
   SLUB should be used instead. Existing defconfigs with CONFIG_SLAB are
   also updated.

 - SLAB_NO_MERGE kmem_cache flag (Jesper Dangaard Brouer):

   There are (very limited) cases where kmem_cache merging is
   undesirable, and existing ways to prevent it are hacky. Introduce a
   new flag to do that cleanly and convert the existing hacky users.
   Btrfs plans to use this for debug kernel builds (that use case is
   always fine), networking for performance reasons (that should be very
   rare).

 - Replace the usage of weak PRNGs (David Keisar Schmidt):

   In addition to using stronger RNGs for the security related features,
   the code is a bit cleaner.

 - Misc code cleanups (SeongJae Parki, Xiongwei Song, Zhen Lei, and
   zhaoxinchao)

Link: https://lwn.net/Articles/932201/ [1]

* tag 'slab-for-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slab_common: use SLAB_NO_MERGE instead of negative refcount
  mm/slab: break up RCU readers on SLAB_TYPESAFE_BY_RCU example code
  mm/slab: add a missing semicolon on SLAB_TYPESAFE_BY_RCU example code
  mm/slab_common: reduce an if statement in create_cache()
  mm/slab: introduce kmem_cache flag SLAB_NO_MERGE
  mm/slab: rename CONFIG_SLAB to CONFIG_SLAB_DEPRECATED
  mm/slab: remove HAVE_HARDENED_USERCOPY_ALLOCATOR
  mm/slab_common: Replace invocation of weak PRNG
  mm/slab: Replace invocation of weak PRNG
  slub: Don't read nr_slabs and total_objects directly
  slub: Remove slabs_node() function
  slub: Remove CONFIG_SMP defined check
  slub: Put objects_show() into CONFIG_SLUB_DEBUG enabled block
  slub: Correct the error code when slab_kset is NULL
  mm/slab: correct return values in comment for _kmem_cache_create()
This commit is contained in:
Linus Torvalds
2023-06-29 16:34:12 -07:00
143 changed files with 80 additions and 240 deletions

View File

@@ -234,18 +234,23 @@ choice
help
This option allows to select a slab allocator.
config SLAB
bool "SLAB"
config SLAB_DEPRECATED
bool "SLAB (DEPRECATED)"
depends on !PREEMPT_RT
select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
Deprecated and scheduled for removal in a few cycles. Replaced by
SLUB.
If you cannot migrate to SLUB, please contact linux-mm@kvack.org
and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
file, explaining why.
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
per cpu and per node queues.
config SLUB
bool "SLUB (Unqueued Allocator)"
select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
@@ -256,6 +261,11 @@ config SLUB
endchoice
config SLAB
bool
default y
depends on SLAB_DEPRECATED
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT

View File

@@ -191,11 +191,10 @@ static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t fla
kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
/*
* Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any
* other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to
* allocate via memcg, if enabled.
* Use SLAB_NO_MERGE to prevent merging with existing caches.
* Use SLAB_ACCOUNT to allocate via memcg, if enabled.
*/
flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT;
flags |= SLAB_NO_MERGE | SLAB_ACCOUNT;
test_cache = kmem_cache_create("test", size, 1, flags, ctor);
KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");

View File

@@ -1883,14 +1883,12 @@ static bool set_on_slab_cache(struct kmem_cache *cachep,
return true;
}
/**
/*
* __kmem_cache_create - Create a cache.
* @cachep: cache management descriptor
* @flags: SLAB flags
*
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within an int, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
* Returns zero on success, nonzero on failure.
*
* The flags are
*
@@ -1903,8 +1901,6 @@ static bool set_on_slab_cache(struct kmem_cache *cachep,
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*
* Return: a pointer to the created cache or %NULL in case of error
*/
int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
{
@@ -2355,44 +2351,34 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Hold information during a freelist initialization */
union freelist_init_state {
struct {
unsigned int pos;
unsigned int *list;
unsigned int count;
};
struct rnd_state rnd_state;
struct freelist_init_state {
unsigned int pos;
unsigned int *list;
unsigned int count;
};
/*
* Initialize the state based on the randomization method available.
* return true if the pre-computed list is available, false otherwise.
*/
static bool freelist_state_initialize(union freelist_init_state *state,
static bool freelist_state_initialize(struct freelist_init_state *state,
struct kmem_cache *cachep,
unsigned int count)
{
bool ret;
unsigned int rand;
/* Use best entropy available to define a random shift */
rand = get_random_u32();
/* Use a random state if the pre-computed list is not available */
if (!cachep->random_seq) {
prandom_seed_state(&state->rnd_state, rand);
ret = false;
} else {
state->list = cachep->random_seq;
state->count = count;
state->pos = rand % count;
state->pos = get_random_u32_below(count);
ret = true;
}
return ret;
}
/* Get the next entry on the list and randomize it using a random shift */
static freelist_idx_t next_random_slot(union freelist_init_state *state)
static freelist_idx_t next_random_slot(struct freelist_init_state *state)
{
if (state->pos >= state->count)
state->pos = 0;
@@ -2413,7 +2399,7 @@ static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
{
unsigned int objfreelist = 0, i, rand, count = cachep->num;
union freelist_init_state state;
struct freelist_init_state state;
bool precomputed;
if (count < 2)
@@ -2442,8 +2428,7 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(&state.rnd_state);
rand %= (i + 1);
rand = get_random_u32_below(i + 1);
swap_free_obj(slab, i, rand);
}
} else {

View File

@@ -332,11 +332,11 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
#if defined(CONFIG_SLAB)
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
SLAB_ACCOUNT)
SLAB_ACCOUNT | SLAB_NO_MERGE)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT | \
SLAB_NO_USER_FLAGS | SLAB_KMALLOC)
SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
@@ -357,6 +357,7 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_TEMPORARY | \
SLAB_ACCOUNT | \
SLAB_KMALLOC | \
SLAB_NO_MERGE | \
SLAB_NO_USER_FLAGS)
bool __kmem_cache_empty(struct kmem_cache *);
@@ -870,16 +871,8 @@ struct kmem_obj_info {
void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);
#else
static inline
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
}
#endif
#ifdef CONFIG_SLUB_DEBUG
void skip_orig_size_check(struct kmem_cache *s, const void *object);

View File

@@ -49,7 +49,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
SLAB_FAILSLAB | kasan_never_merge())
SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge())
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -238,14 +238,12 @@ static struct kmem_cache *create_cache(const char *name,
s->refcount = 1;
list_add(&s->list, &slab_caches);
out:
if (err)
return ERR_PTR(err);
return s;
out_free_cache:
kmem_cache_free(kmem_cache, s);
goto out;
out:
return ERR_PTR(err);
}
/**
@@ -892,6 +890,13 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
flags |= SLAB_CACHE_DMA;
}
/*
* If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
* KMALLOC_NORMAL caches.
*/
if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_NORMAL))
flags |= SLAB_NO_MERGE;
if (minalign > ARCH_KMALLOC_MINALIGN) {
aligned_size = ALIGN(aligned_size, minalign);
aligned_idx = __kmalloc_index(aligned_size, false);
@@ -903,13 +908,6 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
aligned_size, flags);
if (idx != aligned_idx)
kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
/*
* If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
* KMALLOC_NORMAL caches.
*/
if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_NORMAL))
kmalloc_caches[type][idx]->refcount = -1;
}
/*
@@ -1162,7 +1160,7 @@ EXPORT_SYMBOL(kmalloc_large_node);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */
static void freelist_randomize(struct rnd_state *state, unsigned int *list,
static void freelist_randomize(unsigned int *list,
unsigned int count)
{
unsigned int rand;
@@ -1173,8 +1171,7 @@ static void freelist_randomize(struct rnd_state *state, unsigned int *list,
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(state);
rand %= (i + 1);
rand = get_random_u32_below(i + 1);
swap(list[i], list[rand]);
}
}
@@ -1183,7 +1180,6 @@ static void freelist_randomize(struct rnd_state *state, unsigned int *list,
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
gfp_t gfp)
{
struct rnd_state state;
if (count < 2 || cachep->random_seq)
return 0;
@@ -1192,10 +1188,7 @@ int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
if (!cachep->random_seq)
return -ENOMEM;
/* Get best entropy at this stage of boot */
prandom_seed_state(&state, get_random_long());
freelist_randomize(&state, cachep->random_seq, count);
freelist_randomize(cachep->random_seq, count);
return 0;
}

View File

@@ -1365,14 +1365,6 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
list_del(&slab->slab_list);
}
/* Tracking of the number of slabs for debugging purposes */
static inline unsigned long slabs_node(struct kmem_cache *s, int node)
{
struct kmem_cache_node *n = get_node(s, node);
return atomic_long_read(&n->nr_slabs);
}
static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
{
return atomic_long_read(&n->nr_slabs);
@@ -1743,8 +1735,6 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#define disable_higher_order_debug 0
static inline unsigned long slabs_node(struct kmem_cache *s, int node)
{ return 0; }
static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
{ return 0; }
static inline void inc_slabs_node(struct kmem_cache *s, int node,
@@ -4623,7 +4613,7 @@ bool __kmem_cache_empty(struct kmem_cache *s)
struct kmem_cache_node *n;
for_each_kmem_cache_node(s, node, n)
if (n->nr_partial || slabs_node(s, node))
if (n->nr_partial || node_nr_slabs(n))
return false;
return true;
}
@@ -4640,7 +4630,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
/* Attempt to free all objects */
for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
if (n->nr_partial || slabs_node(s, node))
if (n->nr_partial || node_nr_slabs(n))
return 1;
}
return 0;
@@ -4853,7 +4843,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
list_for_each_entry_safe(slab, t, &discard, slab_list)
free_slab(s, slab);
if (slabs_node(s, node))
if (node_nr_slabs(n))
ret = 1;
}
@@ -5191,9 +5181,9 @@ static int validate_slab_node(struct kmem_cache *s,
validate_slab(s, slab, obj_map);
count++;
}
if (count != atomic_long_read(&n->nr_slabs)) {
if (count != node_nr_slabs(n)) {
pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
s->name, count, atomic_long_read(&n->nr_slabs));
s->name, count, node_nr_slabs(n));
slab_add_kunit_errors();
}
@@ -5477,12 +5467,11 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = atomic_long_read(&n->total_objects);
x = node_nr_objs(n);
else if (flags & SO_OBJECTS)
x = atomic_long_read(&n->total_objects) -
count_partial(n, count_free);
x = node_nr_objs(n) - count_partial(n, count_free);
else
x = atomic_long_read(&n->nr_slabs);
x = node_nr_slabs(n);
total += x;
nodes[node] += x;
}
@@ -5637,12 +5626,6 @@ static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(cpu_slabs);
static ssize_t objects_show(struct kmem_cache *s, char *buf)
{
return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
}
SLAB_ATTR_RO(objects);
static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
{
return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
@@ -5671,7 +5654,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP)
#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
struct slab *slab;
@@ -5737,6 +5720,12 @@ static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(total_objects);
static ssize_t objects_show(struct kmem_cache *s, char *buf)
{
return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
}
SLAB_ATTR_RO(objects);
static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
@@ -5968,7 +5957,6 @@ static struct attribute *slab_attrs[] = {
&order_attr.attr,
&min_partial_attr.attr,
&cpu_partial_attr.attr,
&objects_attr.attr,
&objects_partial_attr.attr,
&partial_attr.attr,
&cpu_slabs_attr.attr,
@@ -5982,6 +5970,7 @@ static struct attribute *slab_attrs[] = {
&slabs_cpu_partial_attr.attr,
#ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr,
&objects_attr.attr,
&slabs_attr.attr,
&sanity_checks_attr.attr,
&trace_attr.attr,
@@ -6249,7 +6238,7 @@ static int __init slab_sysfs_init(void)
if (!slab_kset) {
mutex_unlock(&slab_mutex);
pr_err("Cannot register slab subsystem.\n");
return -ENOSYS;
return -ENOMEM;
}
slab_state = FULL;
@@ -6421,7 +6410,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
unsigned long flags;
struct slab *slab;
if (!atomic_long_read(&n->nr_slabs))
if (!node_nr_slabs(n))
continue;
spin_lock_irqsave(&n->list_lock, flags);