mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
Merge tag 'cgroup-for-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - cpuset now support isolated cpus.partition type, which will enable dynamic CPU isolation - pids.peak added to remember the max number of pids used - holes in cgroup namespace plugged - internal cleanups * tag 'cgroup-for-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (25 commits) cgroup: use strscpy() is more robust and safer iocost_monitor: reorder BlkgIterator cgroup: simplify code in cgroup_apply_control cgroup: Make cgroup_get_from_id() prettier cgroup/cpuset: remove unreachable code cgroup: Remove CFTYPE_PRESSURE cgroup: Improve cftype add/rm error handling kselftest/cgroup: Add cpuset v2 partition root state test cgroup/cpuset: Update description of cpuset.cpus.partition in cgroup-v2.rst cgroup/cpuset: Make partition invalid if cpumask change violates exclusivity rule cgroup/cpuset: Relocate a code block in validate_change() cgroup/cpuset: Show invalid partition reason string cgroup/cpuset: Add a new isolated cpus.partition type cgroup/cpuset: Relax constraints to partition & cpus changes cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective cgroup/cpuset: Miscellaneous cleanups & add helper functions cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset cgroup: add pids.peak interface for pids controller cgroup: Remove data-race around cgrp_dfl_visible cgroup: Fix build failure when CONFIG_SHRINKER_DEBUG ...
This commit is contained in:
@@ -2190,75 +2190,93 @@ Cpuset Interface Files
|
|||||||
|
|
||||||
It accepts only the following input values when written to.
|
It accepts only the following input values when written to.
|
||||||
|
|
||||||
======== ================================
|
========== =====================================
|
||||||
"root" a partition root
|
"member" Non-root member of a partition
|
||||||
"member" a non-root member of a partition
|
"root" Partition root
|
||||||
======== ================================
|
"isolated" Partition root without load balancing
|
||||||
|
========== =====================================
|
||||||
|
|
||||||
When set to be a partition root, the current cgroup is the
|
The root cgroup is always a partition root and its state
|
||||||
root of a new partition or scheduling domain that comprises
|
cannot be changed. All other non-root cgroups start out as
|
||||||
itself and all its descendants except those that are separate
|
"member".
|
||||||
partition roots themselves and their descendants. The root
|
|
||||||
cgroup is always a partition root.
|
|
||||||
|
|
||||||
There are constraints on where a partition root can be set.
|
When set to "root", the current cgroup is the root of a new
|
||||||
It can only be set in a cgroup if all the following conditions
|
partition or scheduling domain that comprises itself and all
|
||||||
are true.
|
its descendants except those that are separate partition roots
|
||||||
|
themselves and their descendants.
|
||||||
|
|
||||||
1) The "cpuset.cpus" is not empty and the list of CPUs are
|
When set to "isolated", the CPUs in that partition root will
|
||||||
exclusive, i.e. they are not shared by any of its siblings.
|
be in an isolated state without any load balancing from the
|
||||||
2) The parent cgroup is a partition root.
|
scheduler. Tasks placed in such a partition with multiple
|
||||||
3) The "cpuset.cpus" is also a proper subset of the parent's
|
CPUs should be carefully distributed and bound to each of the
|
||||||
"cpuset.cpus.effective".
|
individual CPUs for optimal performance.
|
||||||
4) There is no child cgroups with cpuset enabled. This is for
|
|
||||||
eliminating corner cases that have to be handled if such a
|
|
||||||
condition is allowed.
|
|
||||||
|
|
||||||
Setting it to partition root will take the CPUs away from the
|
The value shown in "cpuset.cpus.effective" of a partition root
|
||||||
effective CPUs of the parent cgroup. Once it is set, this
|
is the CPUs that the partition root can dedicate to a potential
|
||||||
file cannot be reverted back to "member" if there are any child
|
new child partition root. The new child subtracts available
|
||||||
cgroups with cpuset enabled.
|
CPUs from its parent "cpuset.cpus.effective".
|
||||||
|
|
||||||
A parent partition cannot distribute all its CPUs to its
|
A partition root ("root" or "isolated") can be in one of the
|
||||||
child partitions. There must be at least one cpu left in the
|
two possible states - valid or invalid. An invalid partition
|
||||||
parent partition.
|
root is in a degraded state where some state information may
|
||||||
|
be retained, but behaves more like a "member".
|
||||||
|
|
||||||
Once becoming a partition root, changes to "cpuset.cpus" is
|
All possible state transitions among "member", "root" and
|
||||||
generally allowed as long as the first condition above is true,
|
"isolated" are allowed.
|
||||||
the change will not take away all the CPUs from the parent
|
|
||||||
partition and the new "cpuset.cpus" value is a superset of its
|
|
||||||
children's "cpuset.cpus" values.
|
|
||||||
|
|
||||||
Sometimes, external factors like changes to ancestors'
|
On read, the "cpuset.cpus.partition" file can show the following
|
||||||
"cpuset.cpus" or cpu hotplug can cause the state of the partition
|
values.
|
||||||
root to change. On read, the "cpuset.sched.partition" file
|
|
||||||
can show the following values.
|
|
||||||
|
|
||||||
============== ==============================
|
============================= =====================================
|
||||||
"member" Non-root member of a partition
|
"member" Non-root member of a partition
|
||||||
"root" Partition root
|
"root" Partition root
|
||||||
"root invalid" Invalid partition root
|
"isolated" Partition root without load balancing
|
||||||
============== ==============================
|
"root invalid (<reason>)" Invalid partition root
|
||||||
|
"isolated invalid (<reason>)" Invalid isolated partition root
|
||||||
|
============================= =====================================
|
||||||
|
|
||||||
It is a partition root if the first 2 partition root conditions
|
In the case of an invalid partition root, a descriptive string on
|
||||||
above are true and at least one CPU from "cpuset.cpus" is
|
why the partition is invalid is included within parentheses.
|
||||||
granted by the parent cgroup.
|
|
||||||
|
|
||||||
A partition root can become invalid if none of CPUs requested
|
For a partition root to become valid, the following conditions
|
||||||
in "cpuset.cpus" can be granted by the parent cgroup or the
|
must be met.
|
||||||
parent cgroup is no longer a partition root itself. In this
|
|
||||||
case, it is not a real partition even though the restriction
|
|
||||||
of the first partition root condition above will still apply.
|
|
||||||
The cpu affinity of all the tasks in the cgroup will then be
|
|
||||||
associated with CPUs in the nearest ancestor partition.
|
|
||||||
|
|
||||||
An invalid partition root can be transitioned back to a
|
1) The "cpuset.cpus" is exclusive with its siblings , i.e. they
|
||||||
real partition root if at least one of the requested CPUs
|
are not shared by any of its siblings (exclusivity rule).
|
||||||
can now be granted by its parent. In this case, the cpu
|
2) The parent cgroup is a valid partition root.
|
||||||
affinity of all the tasks in the formerly invalid partition
|
3) The "cpuset.cpus" is not empty and must contain at least
|
||||||
will be associated to the CPUs of the newly formed partition.
|
one of the CPUs from parent's "cpuset.cpus", i.e. they overlap.
|
||||||
Changing the partition state of an invalid partition root to
|
4) The "cpuset.cpus.effective" cannot be empty unless there is
|
||||||
"member" is always allowed even if child cpusets are present.
|
no task associated with this partition.
|
||||||
|
|
||||||
|
External events like hotplug or changes to "cpuset.cpus" can
|
||||||
|
cause a valid partition root to become invalid and vice versa.
|
||||||
|
Note that a task cannot be moved to a cgroup with empty
|
||||||
|
"cpuset.cpus.effective".
|
||||||
|
|
||||||
|
For a valid partition root with the sibling cpu exclusivity
|
||||||
|
rule enabled, changes made to "cpuset.cpus" that violate the
|
||||||
|
exclusivity rule will invalidate the partition as well as its
|
||||||
|
sibiling partitions with conflicting cpuset.cpus values. So
|
||||||
|
care must be taking in changing "cpuset.cpus".
|
||||||
|
|
||||||
|
A valid non-root parent partition may distribute out all its CPUs
|
||||||
|
to its child partitions when there is no task associated with it.
|
||||||
|
|
||||||
|
Care must be taken to change a valid partition root to
|
||||||
|
"member" as all its child partitions, if present, will become
|
||||||
|
invalid causing disruption to tasks running in those child
|
||||||
|
partitions. These inactivated partitions could be recovered if
|
||||||
|
their parent is switched back to a partition root with a proper
|
||||||
|
set of "cpuset.cpus".
|
||||||
|
|
||||||
|
Poll and inotify events are triggered whenever the state of
|
||||||
|
"cpuset.cpus.partition" changes. That includes changes caused
|
||||||
|
by write to "cpuset.cpus.partition", cpu hotplug or other
|
||||||
|
changes that modify the validity status of the partition.
|
||||||
|
This will allow user space agents to monitor unexpected changes
|
||||||
|
to "cpuset.cpus.partition" without the need to do continuous
|
||||||
|
polling.
|
||||||
|
|
||||||
|
|
||||||
Device controller
|
Device controller
|
||||||
|
@@ -19,8 +19,8 @@ int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
cgrp = cgroup_get_from_id(cgrp_id);
|
cgrp = cgroup_get_from_id(cgrp_id);
|
||||||
if (!cgrp)
|
if (IS_ERR(cgrp))
|
||||||
return -ENOENT;
|
return PTR_ERR(cgrp);
|
||||||
css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
|
css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
|
||||||
if (!css) {
|
if (!css) {
|
||||||
ret = -ENOENT;
|
ret = -ENOENT;
|
||||||
|
@@ -126,11 +126,11 @@ enum {
|
|||||||
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
|
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
|
||||||
CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
|
CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
|
||||||
CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */
|
CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */
|
||||||
CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */
|
|
||||||
|
|
||||||
/* internal flags, do not use outside cgroup core proper */
|
/* internal flags, do not use outside cgroup core proper */
|
||||||
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */
|
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */
|
||||||
__CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */
|
__CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */
|
||||||
|
__CFTYPE_ADDED = (1 << 18),
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -384,7 +384,7 @@ struct cgroup {
|
|||||||
/*
|
/*
|
||||||
* The depth this cgroup is at. The root is at depth zero and each
|
* The depth this cgroup is at. The root is at depth zero and each
|
||||||
* step down the hierarchy increments the level. This along with
|
* step down the hierarchy increments the level. This along with
|
||||||
* ancestor_ids[] can determine whether a given cgroup is a
|
* ancestors[] can determine whether a given cgroup is a
|
||||||
* descendant of another without traversing the hierarchy.
|
* descendant of another without traversing the hierarchy.
|
||||||
*/
|
*/
|
||||||
int level;
|
int level;
|
||||||
@@ -504,8 +504,8 @@ struct cgroup {
|
|||||||
/* Used to store internal freezer state */
|
/* Used to store internal freezer state */
|
||||||
struct cgroup_freezer_state freezer;
|
struct cgroup_freezer_state freezer;
|
||||||
|
|
||||||
/* ids of the ancestors at each level including self */
|
/* All ancestors including self */
|
||||||
u64 ancestor_ids[];
|
struct cgroup *ancestors[];
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -522,11 +522,15 @@ struct cgroup_root {
|
|||||||
/* Unique id for this hierarchy. */
|
/* Unique id for this hierarchy. */
|
||||||
int hierarchy_id;
|
int hierarchy_id;
|
||||||
|
|
||||||
/* The root cgroup. Root is destroyed on its release. */
|
/*
|
||||||
|
* The root cgroup. The containing cgroup_root will be destroyed on its
|
||||||
|
* release. cgrp->ancestors[0] will be used overflowing into the
|
||||||
|
* following field. cgrp_ancestor_storage must immediately follow.
|
||||||
|
*/
|
||||||
struct cgroup cgrp;
|
struct cgroup cgrp;
|
||||||
|
|
||||||
/* for cgrp->ancestor_ids[0] */
|
/* must follow cgrp for cgrp->ancestors[0], see above */
|
||||||
u64 cgrp_ancestor_id_storage;
|
struct cgroup *cgrp_ancestor_storage;
|
||||||
|
|
||||||
/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
|
/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
|
||||||
atomic_t nr_cgrps;
|
atomic_t nr_cgrps;
|
||||||
|
@@ -575,7 +575,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
|
|||||||
{
|
{
|
||||||
if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
|
if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
|
||||||
return false;
|
return false;
|
||||||
return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor);
|
return cgrp->ancestors[ancestor->level] == ancestor;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -592,11 +592,9 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
|
|||||||
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
|
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
|
||||||
int ancestor_level)
|
int ancestor_level)
|
||||||
{
|
{
|
||||||
if (cgrp->level < ancestor_level)
|
if (ancestor_level < 0 || ancestor_level > cgrp->level)
|
||||||
return NULL;
|
return NULL;
|
||||||
while (cgrp && cgrp->level > ancestor_level)
|
return cgrp->ancestors[ancestor_level];
|
||||||
cgrp = cgroup_parent(cgrp);
|
|
||||||
return cgrp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -748,11 +746,6 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
|
|||||||
|
|
||||||
static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
|
static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
static inline struct cgroup *cgroup_get_from_id(u64 id)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
#endif /* !CONFIG_CGROUPS */
|
#endif /* !CONFIG_CGROUPS */
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUPS
|
#ifdef CONFIG_CGROUPS
|
||||||
|
@@ -250,6 +250,8 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
|
|||||||
|
|
||||||
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
|
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
|
||||||
bool threadgroup);
|
bool threadgroup);
|
||||||
|
void cgroup_attach_lock(bool lock_threadgroup);
|
||||||
|
void cgroup_attach_unlock(bool lock_threadgroup);
|
||||||
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
|
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
|
||||||
bool *locked)
|
bool *locked)
|
||||||
__acquires(&cgroup_threadgroup_rwsem);
|
__acquires(&cgroup_threadgroup_rwsem);
|
||||||
|
@@ -59,8 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
|
|||||||
int retval = 0;
|
int retval = 0;
|
||||||
|
|
||||||
mutex_lock(&cgroup_mutex);
|
mutex_lock(&cgroup_mutex);
|
||||||
cpus_read_lock();
|
cgroup_attach_lock(true);
|
||||||
percpu_down_write(&cgroup_threadgroup_rwsem);
|
|
||||||
for_each_root(root) {
|
for_each_root(root) {
|
||||||
struct cgroup *from_cgrp;
|
struct cgroup *from_cgrp;
|
||||||
|
|
||||||
@@ -72,8 +71,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
|
|||||||
if (retval)
|
if (retval)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
percpu_up_write(&cgroup_threadgroup_rwsem);
|
cgroup_attach_unlock(true);
|
||||||
cpus_read_unlock();
|
|
||||||
mutex_unlock(&cgroup_mutex);
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
|
@@ -217,6 +217,7 @@ struct cgroup_namespace init_cgroup_ns = {
|
|||||||
|
|
||||||
static struct file_system_type cgroup2_fs_type;
|
static struct file_system_type cgroup2_fs_type;
|
||||||
static struct cftype cgroup_base_files[];
|
static struct cftype cgroup_base_files[];
|
||||||
|
static struct cftype cgroup_psi_files[];
|
||||||
|
|
||||||
/* cgroup optional features */
|
/* cgroup optional features */
|
||||||
enum cgroup_opt_features {
|
enum cgroup_opt_features {
|
||||||
@@ -1689,12 +1690,16 @@ static void css_clear_dir(struct cgroup_subsys_state *css)
|
|||||||
css->flags &= ~CSS_VISIBLE;
|
css->flags &= ~CSS_VISIBLE;
|
||||||
|
|
||||||
if (!css->ss) {
|
if (!css->ss) {
|
||||||
if (cgroup_on_dfl(cgrp))
|
if (cgroup_on_dfl(cgrp)) {
|
||||||
cfts = cgroup_base_files;
|
cgroup_addrm_files(css, cgrp,
|
||||||
else
|
cgroup_base_files, false);
|
||||||
cfts = cgroup1_base_files;
|
if (cgroup_psi_enabled())
|
||||||
|
cgroup_addrm_files(css, cgrp,
|
||||||
cgroup_addrm_files(css, cgrp, cfts, false);
|
cgroup_psi_files, false);
|
||||||
|
} else {
|
||||||
|
cgroup_addrm_files(css, cgrp,
|
||||||
|
cgroup1_base_files, false);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
list_for_each_entry(cfts, &css->ss->cfts, node)
|
list_for_each_entry(cfts, &css->ss->cfts, node)
|
||||||
cgroup_addrm_files(css, cgrp, cfts, false);
|
cgroup_addrm_files(css, cgrp, cfts, false);
|
||||||
@@ -1717,14 +1722,22 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!css->ss) {
|
if (!css->ss) {
|
||||||
if (cgroup_on_dfl(cgrp))
|
if (cgroup_on_dfl(cgrp)) {
|
||||||
cfts = cgroup_base_files;
|
ret = cgroup_addrm_files(&cgrp->self, cgrp,
|
||||||
else
|
cgroup_base_files, true);
|
||||||
cfts = cgroup1_base_files;
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
|
if (cgroup_psi_enabled()) {
|
||||||
if (ret < 0)
|
ret = cgroup_addrm_files(&cgrp->self, cgrp,
|
||||||
return ret;
|
cgroup_psi_files, true);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cgroup_addrm_files(css, cgrp,
|
||||||
|
cgroup1_base_files, true);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
list_for_each_entry(cfts, &css->ss->cfts, node) {
|
list_for_each_entry(cfts, &css->ss->cfts, node) {
|
||||||
ret = cgroup_addrm_files(css, cgrp, cfts, true);
|
ret = cgroup_addrm_files(css, cgrp, cfts, true);
|
||||||
@@ -2050,7 +2063,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
|
|||||||
}
|
}
|
||||||
root_cgrp->kn = kernfs_root_to_node(root->kf_root);
|
root_cgrp->kn = kernfs_root_to_node(root->kf_root);
|
||||||
WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1);
|
WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1);
|
||||||
root_cgrp->ancestor_ids[0] = cgroup_id(root_cgrp);
|
root_cgrp->ancestors[0] = root_cgrp;
|
||||||
|
|
||||||
ret = css_populate_dir(&root_cgrp->self);
|
ret = css_populate_dir(&root_cgrp->self);
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -2173,7 +2186,7 @@ static int cgroup_get_tree(struct fs_context *fc)
|
|||||||
struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
|
struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
cgrp_dfl_visible = true;
|
WRITE_ONCE(cgrp_dfl_visible, true);
|
||||||
cgroup_get_live(&cgrp_dfl_root.cgrp);
|
cgroup_get_live(&cgrp_dfl_root.cgrp);
|
||||||
ctx->root = &cgrp_dfl_root;
|
ctx->root = &cgrp_dfl_root;
|
||||||
|
|
||||||
@@ -2361,7 +2374,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
|
|||||||
ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
|
ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
|
||||||
} else {
|
} else {
|
||||||
/* if no hierarchy exists, everyone is in "/" */
|
/* if no hierarchy exists, everyone is in "/" */
|
||||||
ret = strlcpy(buf, "/", buflen);
|
ret = strscpy(buf, "/", buflen);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_irq(&css_set_lock);
|
spin_unlock_irq(&css_set_lock);
|
||||||
@@ -2393,7 +2406,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path);
|
|||||||
* write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
|
* write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
|
||||||
* CPU hotplug is disabled on entry.
|
* CPU hotplug is disabled on entry.
|
||||||
*/
|
*/
|
||||||
static void cgroup_attach_lock(bool lock_threadgroup)
|
void cgroup_attach_lock(bool lock_threadgroup)
|
||||||
{
|
{
|
||||||
cpus_read_lock();
|
cpus_read_lock();
|
||||||
if (lock_threadgroup)
|
if (lock_threadgroup)
|
||||||
@@ -2404,7 +2417,7 @@ static void cgroup_attach_lock(bool lock_threadgroup)
|
|||||||
* cgroup_attach_unlock - Undo cgroup_attach_lock()
|
* cgroup_attach_unlock - Undo cgroup_attach_lock()
|
||||||
* @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
|
* @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
|
||||||
*/
|
*/
|
||||||
static void cgroup_attach_unlock(bool lock_threadgroup)
|
void cgroup_attach_unlock(bool lock_threadgroup)
|
||||||
{
|
{
|
||||||
if (lock_threadgroup)
|
if (lock_threadgroup)
|
||||||
percpu_up_write(&cgroup_threadgroup_rwsem);
|
percpu_up_write(&cgroup_threadgroup_rwsem);
|
||||||
@@ -3292,11 +3305,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
|
|||||||
* making the following cgroup_update_dfl_csses() properly update
|
* making the following cgroup_update_dfl_csses() properly update
|
||||||
* css associations of all tasks in the subtree.
|
* css associations of all tasks in the subtree.
|
||||||
*/
|
*/
|
||||||
ret = cgroup_update_dfl_csses(cgrp);
|
return cgroup_update_dfl_csses(cgrp);
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -4132,8 +4141,6 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
|
|||||||
restart:
|
restart:
|
||||||
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
|
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
|
||||||
/* does cft->flags tell us to skip this file on @cgrp? */
|
/* does cft->flags tell us to skip this file on @cgrp? */
|
||||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
|
||||||
continue;
|
|
||||||
if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
|
if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
|
||||||
continue;
|
continue;
|
||||||
if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
|
if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
|
||||||
@@ -4198,21 +4205,25 @@ static void cgroup_exit_cftypes(struct cftype *cfts)
|
|||||||
cft->ss = NULL;
|
cft->ss = NULL;
|
||||||
|
|
||||||
/* revert flags set by cgroup core while adding @cfts */
|
/* revert flags set by cgroup core while adding @cfts */
|
||||||
cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
|
cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL |
|
||||||
|
__CFTYPE_ADDED);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
|
static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
|
||||||
{
|
{
|
||||||
struct cftype *cft;
|
struct cftype *cft;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
for (cft = cfts; cft->name[0] != '\0'; cft++) {
|
for (cft = cfts; cft->name[0] != '\0'; cft++) {
|
||||||
struct kernfs_ops *kf_ops;
|
struct kernfs_ops *kf_ops;
|
||||||
|
|
||||||
WARN_ON(cft->ss || cft->kf_ops);
|
WARN_ON(cft->ss || cft->kf_ops);
|
||||||
|
|
||||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
if (cft->flags & __CFTYPE_ADDED) {
|
||||||
continue;
|
ret = -EBUSY;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (cft->seq_start)
|
if (cft->seq_start)
|
||||||
kf_ops = &cgroup_kf_ops;
|
kf_ops = &cgroup_kf_ops;
|
||||||
@@ -4226,26 +4237,26 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
|
|||||||
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
|
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
|
||||||
kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
|
kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
|
||||||
if (!kf_ops) {
|
if (!kf_ops) {
|
||||||
cgroup_exit_cftypes(cfts);
|
ret = -ENOMEM;
|
||||||
return -ENOMEM;
|
break;
|
||||||
}
|
}
|
||||||
kf_ops->atomic_write_len = cft->max_write_len;
|
kf_ops->atomic_write_len = cft->max_write_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
cft->kf_ops = kf_ops;
|
cft->kf_ops = kf_ops;
|
||||||
cft->ss = ss;
|
cft->ss = ss;
|
||||||
|
cft->flags |= __CFTYPE_ADDED;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
if (ret)
|
||||||
|
cgroup_exit_cftypes(cfts);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cgroup_rm_cftypes_locked(struct cftype *cfts)
|
static int cgroup_rm_cftypes_locked(struct cftype *cfts)
|
||||||
{
|
{
|
||||||
lockdep_assert_held(&cgroup_mutex);
|
lockdep_assert_held(&cgroup_mutex);
|
||||||
|
|
||||||
if (!cfts || !cfts[0].ss)
|
|
||||||
return -ENOENT;
|
|
||||||
|
|
||||||
list_del(&cfts->node);
|
list_del(&cfts->node);
|
||||||
cgroup_apply_cftypes(cfts, false);
|
cgroup_apply_cftypes(cfts, false);
|
||||||
cgroup_exit_cftypes(cfts);
|
cgroup_exit_cftypes(cfts);
|
||||||
@@ -4267,6 +4278,12 @@ int cgroup_rm_cftypes(struct cftype *cfts)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if (!cfts || cfts[0].name[0] == '\0')
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!(cfts[0].flags & __CFTYPE_ADDED))
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
mutex_lock(&cgroup_mutex);
|
mutex_lock(&cgroup_mutex);
|
||||||
ret = cgroup_rm_cftypes_locked(cfts);
|
ret = cgroup_rm_cftypes_locked(cfts);
|
||||||
mutex_unlock(&cgroup_mutex);
|
mutex_unlock(&cgroup_mutex);
|
||||||
@@ -5151,10 +5168,13 @@ static struct cftype cgroup_base_files[] = {
|
|||||||
.name = "cpu.stat",
|
.name = "cpu.stat",
|
||||||
.seq_show = cpu_stat_show,
|
.seq_show = cpu_stat_show,
|
||||||
},
|
},
|
||||||
|
{ } /* terminate */
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct cftype cgroup_psi_files[] = {
|
||||||
#ifdef CONFIG_PSI
|
#ifdef CONFIG_PSI
|
||||||
{
|
{
|
||||||
.name = "io.pressure",
|
.name = "io.pressure",
|
||||||
.flags = CFTYPE_PRESSURE,
|
|
||||||
.seq_show = cgroup_io_pressure_show,
|
.seq_show = cgroup_io_pressure_show,
|
||||||
.write = cgroup_io_pressure_write,
|
.write = cgroup_io_pressure_write,
|
||||||
.poll = cgroup_pressure_poll,
|
.poll = cgroup_pressure_poll,
|
||||||
@@ -5162,7 +5182,6 @@ static struct cftype cgroup_base_files[] = {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "memory.pressure",
|
.name = "memory.pressure",
|
||||||
.flags = CFTYPE_PRESSURE,
|
|
||||||
.seq_show = cgroup_memory_pressure_show,
|
.seq_show = cgroup_memory_pressure_show,
|
||||||
.write = cgroup_memory_pressure_write,
|
.write = cgroup_memory_pressure_write,
|
||||||
.poll = cgroup_pressure_poll,
|
.poll = cgroup_pressure_poll,
|
||||||
@@ -5170,7 +5189,6 @@ static struct cftype cgroup_base_files[] = {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "cpu.pressure",
|
.name = "cpu.pressure",
|
||||||
.flags = CFTYPE_PRESSURE,
|
|
||||||
.seq_show = cgroup_cpu_pressure_show,
|
.seq_show = cgroup_cpu_pressure_show,
|
||||||
.write = cgroup_cpu_pressure_write,
|
.write = cgroup_cpu_pressure_write,
|
||||||
.poll = cgroup_pressure_poll,
|
.poll = cgroup_pressure_poll,
|
||||||
@@ -5452,8 +5470,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* allocate the cgroup and its ID, 0 is reserved for the root */
|
/* allocate the cgroup and its ID, 0 is reserved for the root */
|
||||||
cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
|
cgrp = kzalloc(struct_size(cgrp, ancestors, (level + 1)), GFP_KERNEL);
|
||||||
GFP_KERNEL);
|
|
||||||
if (!cgrp)
|
if (!cgrp)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
@@ -5505,7 +5522,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
|
|||||||
|
|
||||||
spin_lock_irq(&css_set_lock);
|
spin_lock_irq(&css_set_lock);
|
||||||
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
|
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
|
||||||
cgrp->ancestor_ids[tcgrp->level] = cgroup_id(tcgrp);
|
cgrp->ancestors[tcgrp->level] = tcgrp;
|
||||||
|
|
||||||
if (tcgrp != cgrp) {
|
if (tcgrp != cgrp) {
|
||||||
tcgrp->nr_descendants++;
|
tcgrp->nr_descendants++;
|
||||||
@@ -5938,6 +5955,7 @@ int __init cgroup_init(void)
|
|||||||
|
|
||||||
BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
|
BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
|
||||||
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
|
||||||
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files));
|
||||||
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
|
||||||
|
|
||||||
cgroup_rstat_boot();
|
cgroup_rstat_boot();
|
||||||
@@ -6058,19 +6076,22 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
|
|||||||
/*
|
/*
|
||||||
* cgroup_get_from_id : get the cgroup associated with cgroup id
|
* cgroup_get_from_id : get the cgroup associated with cgroup id
|
||||||
* @id: cgroup id
|
* @id: cgroup id
|
||||||
* On success return the cgrp, on failure return NULL
|
* On success return the cgrp or ERR_PTR on failure
|
||||||
|
* Only cgroups within current task's cgroup NS are valid.
|
||||||
*/
|
*/
|
||||||
struct cgroup *cgroup_get_from_id(u64 id)
|
struct cgroup *cgroup_get_from_id(u64 id)
|
||||||
{
|
{
|
||||||
struct kernfs_node *kn;
|
struct kernfs_node *kn;
|
||||||
struct cgroup *cgrp = NULL;
|
struct cgroup *cgrp, *root_cgrp;
|
||||||
|
|
||||||
kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
|
kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
|
||||||
if (!kn)
|
if (!kn)
|
||||||
goto out;
|
return ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
if (kernfs_type(kn) != KERNFS_DIR)
|
if (kernfs_type(kn) != KERNFS_DIR) {
|
||||||
goto put;
|
kernfs_put(kn);
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
@@ -6079,9 +6100,19 @@ struct cgroup *cgroup_get_from_id(u64 id)
|
|||||||
cgrp = NULL;
|
cgrp = NULL;
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
put:
|
|
||||||
kernfs_put(kn);
|
kernfs_put(kn);
|
||||||
out:
|
|
||||||
|
if (!cgrp)
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
|
spin_lock_irq(&css_set_lock);
|
||||||
|
root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
|
||||||
|
spin_unlock_irq(&css_set_lock);
|
||||||
|
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
|
||||||
|
cgroup_put(cgrp);
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
|
||||||
return cgrp;
|
return cgrp;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(cgroup_get_from_id);
|
EXPORT_SYMBOL_GPL(cgroup_get_from_id);
|
||||||
@@ -6111,7 +6142,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
|
|||||||
struct cgroup *cgrp;
|
struct cgroup *cgrp;
|
||||||
int ssid, count = 0;
|
int ssid, count = 0;
|
||||||
|
|
||||||
if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
|
if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
seq_printf(m, "%d:", root->hierarchy_id);
|
seq_printf(m, "%d:", root->hierarchy_id);
|
||||||
@@ -6653,8 +6684,12 @@ struct cgroup *cgroup_get_from_path(const char *path)
|
|||||||
{
|
{
|
||||||
struct kernfs_node *kn;
|
struct kernfs_node *kn;
|
||||||
struct cgroup *cgrp = ERR_PTR(-ENOENT);
|
struct cgroup *cgrp = ERR_PTR(-ENOENT);
|
||||||
|
struct cgroup *root_cgrp;
|
||||||
|
|
||||||
kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
|
spin_lock_irq(&css_set_lock);
|
||||||
|
root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
|
||||||
|
kn = kernfs_walk_and_get(root_cgrp->kn, path);
|
||||||
|
spin_unlock_irq(&css_set_lock);
|
||||||
if (!kn)
|
if (!kn)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@@ -6812,9 +6847,6 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf,
|
|||||||
if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
|
if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (prefix)
|
if (prefix)
|
||||||
ret += snprintf(buf + ret, size - ret, "%s.", prefix);
|
ret += snprintf(buf + ret, size - ret, "%s.", prefix);
|
||||||
|
|
||||||
@@ -6834,8 +6866,11 @@ static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
|
|||||||
int ssid;
|
int ssid;
|
||||||
ssize_t ret = 0;
|
ssize_t ret = 0;
|
||||||
|
|
||||||
ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
|
ret = show_delegatable_files(cgroup_base_files, buf + ret,
|
||||||
NULL);
|
PAGE_SIZE - ret, NULL);
|
||||||
|
if (cgroup_psi_enabled())
|
||||||
|
ret += show_delegatable_files(cgroup_psi_files, buf + ret,
|
||||||
|
PAGE_SIZE - ret, NULL);
|
||||||
|
|
||||||
for_each_subsys(ss, ssid)
|
for_each_subsys(ss, ssid)
|
||||||
ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
|
ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -47,6 +47,7 @@ struct pids_cgroup {
|
|||||||
*/
|
*/
|
||||||
atomic64_t counter;
|
atomic64_t counter;
|
||||||
atomic64_t limit;
|
atomic64_t limit;
|
||||||
|
int64_t watermark;
|
||||||
|
|
||||||
/* Handle for "pids.events" */
|
/* Handle for "pids.events" */
|
||||||
struct cgroup_file events_file;
|
struct cgroup_file events_file;
|
||||||
@@ -85,6 +86,16 @@ static void pids_css_free(struct cgroup_subsys_state *css)
|
|||||||
kfree(css_pids(css));
|
kfree(css_pids(css));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void pids_update_watermark(struct pids_cgroup *p, int64_t nr_pids)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This is racy, but we don't need perfectly accurate tallying of
|
||||||
|
* the watermark, and this lets us avoid extra atomic overhead.
|
||||||
|
*/
|
||||||
|
if (nr_pids > READ_ONCE(p->watermark))
|
||||||
|
WRITE_ONCE(p->watermark, nr_pids);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pids_cancel - uncharge the local pid count
|
* pids_cancel - uncharge the local pid count
|
||||||
* @pids: the pid cgroup state
|
* @pids: the pid cgroup state
|
||||||
@@ -128,8 +139,11 @@ static void pids_charge(struct pids_cgroup *pids, int num)
|
|||||||
{
|
{
|
||||||
struct pids_cgroup *p;
|
struct pids_cgroup *p;
|
||||||
|
|
||||||
for (p = pids; parent_pids(p); p = parent_pids(p))
|
for (p = pids; parent_pids(p); p = parent_pids(p)) {
|
||||||
atomic64_add(num, &p->counter);
|
int64_t new = atomic64_add_return(num, &p->counter);
|
||||||
|
|
||||||
|
pids_update_watermark(p, new);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -156,6 +170,12 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
|
|||||||
*/
|
*/
|
||||||
if (new > limit)
|
if (new > limit)
|
||||||
goto revert;
|
goto revert;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Not technically accurate if we go over limit somewhere up
|
||||||
|
* the hierarchy, but that's tolerable for the watermark.
|
||||||
|
*/
|
||||||
|
pids_update_watermark(p, new);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -311,6 +331,14 @@ static s64 pids_current_read(struct cgroup_subsys_state *css,
|
|||||||
return atomic64_read(&pids->counter);
|
return atomic64_read(&pids->counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static s64 pids_peak_read(struct cgroup_subsys_state *css,
|
||||||
|
struct cftype *cft)
|
||||||
|
{
|
||||||
|
struct pids_cgroup *pids = css_pids(css);
|
||||||
|
|
||||||
|
return READ_ONCE(pids->watermark);
|
||||||
|
}
|
||||||
|
|
||||||
static int pids_events_show(struct seq_file *sf, void *v)
|
static int pids_events_show(struct seq_file *sf, void *v)
|
||||||
{
|
{
|
||||||
struct pids_cgroup *pids = css_pids(seq_css(sf));
|
struct pids_cgroup *pids = css_pids(seq_css(sf));
|
||||||
@@ -331,6 +359,11 @@ static struct cftype pids_files[] = {
|
|||||||
.read_s64 = pids_current_read,
|
.read_s64 = pids_current_read,
|
||||||
.flags = CFTYPE_NOT_ON_ROOT,
|
.flags = CFTYPE_NOT_ON_ROOT,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "peak",
|
||||||
|
.flags = CFTYPE_NOT_ON_ROOT,
|
||||||
|
.read_s64 = pids_peak_read,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.name = "events",
|
.name = "events",
|
||||||
.seq_show = pids_events_show,
|
.seq_show = pids_events_show,
|
||||||
|
@@ -5104,8 +5104,8 @@ struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
|
|||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
|
|
||||||
cgrp = cgroup_get_from_id(ino);
|
cgrp = cgroup_get_from_id(ino);
|
||||||
if (!cgrp)
|
if (IS_ERR(cgrp))
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_CAST(cgrp);
|
||||||
|
|
||||||
css = cgroup_get_e_css(cgrp, &memory_cgrp_subsys);
|
css = cgroup_get_e_css(cgrp, &memory_cgrp_subsys);
|
||||||
if (css)
|
if (css)
|
||||||
|
@@ -40,16 +40,17 @@ static noinline bool
|
|||||||
nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
|
nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
|
||||||
{
|
{
|
||||||
struct cgroup *cgrp;
|
struct cgroup *cgrp;
|
||||||
|
u64 cgid;
|
||||||
|
|
||||||
if (!sk_fullsock(sk))
|
if (!sk_fullsock(sk))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level);
|
||||||
if (level > cgrp->level)
|
if (!cgrp)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
memcpy(dest, &cgrp->ancestor_ids[level], sizeof(u64));
|
cgid = cgroup_id(cgrp);
|
||||||
|
memcpy(dest, &cgid, sizeof(u64));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -61,6 +61,11 @@ autop_names = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class BlkgIterator:
|
class BlkgIterator:
|
||||||
|
def __init__(self, root_blkcg, q_id, include_dying=False):
|
||||||
|
self.include_dying = include_dying
|
||||||
|
self.blkgs = []
|
||||||
|
self.walk(root_blkcg, q_id, '')
|
||||||
|
|
||||||
def blkcg_name(blkcg):
|
def blkcg_name(blkcg):
|
||||||
return blkcg.css.cgroup.kn.name.string_().decode('utf-8')
|
return blkcg.css.cgroup.kn.name.string_().decode('utf-8')
|
||||||
|
|
||||||
@@ -82,11 +87,6 @@ class BlkgIterator:
|
|||||||
blkcg.css.children.address_of_(), 'css.sibling'):
|
blkcg.css.children.address_of_(), 'css.sibling'):
|
||||||
self.walk(c, q_id, path)
|
self.walk(c, q_id, path)
|
||||||
|
|
||||||
def __init__(self, root_blkcg, q_id, include_dying=False):
|
|
||||||
self.include_dying = include_dying
|
|
||||||
self.blkgs = []
|
|
||||||
self.walk(root_blkcg, q_id, '')
|
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return iter(self.blkgs)
|
return iter(self.blkgs)
|
||||||
|
|
||||||
|
@@ -77,7 +77,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
// convert cgroup-id to a map index
|
// convert cgroup-id to a map index
|
||||||
cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]);
|
cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
|
||||||
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
|
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
|
||||||
if (!elem)
|
if (!elem)
|
||||||
continue;
|
continue;
|
||||||
|
1
tools/testing/selftests/cgroup/.gitignore
vendored
1
tools/testing/selftests/cgroup/.gitignore
vendored
@@ -5,3 +5,4 @@ test_freezer
|
|||||||
test_kmem
|
test_kmem
|
||||||
test_kill
|
test_kill
|
||||||
test_cpu
|
test_cpu
|
||||||
|
wait_inotify
|
||||||
|
@@ -1,10 +1,11 @@
|
|||||||
# SPDX-License-Identifier: GPL-2.0
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
CFLAGS += -Wall -pthread
|
CFLAGS += -Wall -pthread
|
||||||
|
|
||||||
all:
|
all: ${HELPER_PROGS}
|
||||||
|
|
||||||
TEST_FILES := with_stress.sh
|
TEST_FILES := with_stress.sh
|
||||||
TEST_PROGS := test_stress.sh
|
TEST_PROGS := test_stress.sh test_cpuset_prs.sh
|
||||||
|
TEST_GEN_FILES := wait_inotify
|
||||||
TEST_GEN_PROGS = test_memcontrol
|
TEST_GEN_PROGS = test_memcontrol
|
||||||
TEST_GEN_PROGS += test_kmem
|
TEST_GEN_PROGS += test_kmem
|
||||||
TEST_GEN_PROGS += test_core
|
TEST_GEN_PROGS += test_core
|
||||||
|
674
tools/testing/selftests/cgroup/test_cpuset_prs.sh
Executable file
674
tools/testing/selftests/cgroup/test_cpuset_prs.sh
Executable file
@@ -0,0 +1,674 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
#
|
||||||
|
# Test for cpuset v2 partition root state (PRS)
|
||||||
|
#
|
||||||
|
# The sched verbose flag is set, if available, so that the console log
|
||||||
|
# can be examined for the correct setting of scheduling domain.
|
||||||
|
#
|
||||||
|
|
||||||
|
skip_test() {
|
||||||
|
echo "$1"
|
||||||
|
echo "Test SKIPPED"
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
|
||||||
|
|
||||||
|
# Set sched verbose flag, if available
|
||||||
|
[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose
|
||||||
|
|
||||||
|
# Get wait_inotify location
|
||||||
|
WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
|
||||||
|
|
||||||
|
# Find cgroup v2 mount point
|
||||||
|
CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
|
||||||
|
[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
|
||||||
|
|
||||||
|
CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//")
|
||||||
|
[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
|
||||||
|
|
||||||
|
# Set verbose flag and delay factor
|
||||||
|
PROG=$1
|
||||||
|
VERBOSE=
|
||||||
|
DELAY_FACTOR=1
|
||||||
|
while [[ "$1" = -* ]]
|
||||||
|
do
|
||||||
|
case "$1" in
|
||||||
|
-v) VERBOSE=1
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
-d) DELAY_FACTOR=$2
|
||||||
|
shift
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
*) echo "Usage: $PROG [-v] [-d <delay-factor>"
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
cd $CGROUP2
|
||||||
|
echo +cpuset > cgroup.subtree_control
|
||||||
|
[[ -d test ]] || mkdir test
|
||||||
|
cd test
|
||||||
|
|
||||||
|
# Pause in ms
|
||||||
|
pause()
|
||||||
|
{
|
||||||
|
DELAY=$1
|
||||||
|
LOOP=0
|
||||||
|
while [[ $LOOP -lt $DELAY_FACTOR ]]
|
||||||
|
do
|
||||||
|
sleep $DELAY
|
||||||
|
((LOOP++))
|
||||||
|
done
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
console_msg()
|
||||||
|
{
|
||||||
|
MSG=$1
|
||||||
|
echo "$MSG"
|
||||||
|
echo "" > /dev/console
|
||||||
|
echo "$MSG" > /dev/console
|
||||||
|
pause 0.01
|
||||||
|
}
|
||||||
|
|
||||||
|
test_partition()
|
||||||
|
{
|
||||||
|
EXPECTED_VAL=$1
|
||||||
|
echo $EXPECTED_VAL > cpuset.cpus.partition
|
||||||
|
[[ $? -eq 0 ]] || exit 1
|
||||||
|
ACTUAL_VAL=$(cat cpuset.cpus.partition)
|
||||||
|
[[ $ACTUAL_VAL != $EXPECTED_VAL ]] && {
|
||||||
|
echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $EXPECTED_VAL"
|
||||||
|
echo "Test FAILED"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_effective_cpus()
|
||||||
|
{
|
||||||
|
EXPECTED_VAL=$1
|
||||||
|
ACTUAL_VAL=$(cat cpuset.cpus.effective)
|
||||||
|
[[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && {
|
||||||
|
echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$EXPECTED_VAL'"
|
||||||
|
echo "Test FAILED"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Adding current process to cgroup.procs as a test
|
||||||
|
test_add_proc()
|
||||||
|
{
|
||||||
|
OUTSTR="$1"
|
||||||
|
ERRMSG=$((echo $$ > cgroup.procs) |& cat)
|
||||||
|
echo $ERRMSG | grep -q "$OUTSTR"
|
||||||
|
[[ $? -ne 0 ]] && {
|
||||||
|
echo "cgroup.procs: expect '$OUTSTR', got '$ERRMSG'"
|
||||||
|
echo "Test FAILED"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
echo $$ > $CGROUP2/cgroup.procs # Move out the task
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Testing the new "isolated" partition root type
|
||||||
|
#
|
||||||
|
test_isolated()
|
||||||
|
{
|
||||||
|
echo 2-3 > cpuset.cpus
|
||||||
|
TYPE=$(cat cpuset.cpus.partition)
|
||||||
|
[[ $TYPE = member ]] || echo member > cpuset.cpus.partition
|
||||||
|
|
||||||
|
console_msg "Change from member to root"
|
||||||
|
test_partition root
|
||||||
|
|
||||||
|
console_msg "Change from root to isolated"
|
||||||
|
test_partition isolated
|
||||||
|
|
||||||
|
console_msg "Change from isolated to member"
|
||||||
|
test_partition member
|
||||||
|
|
||||||
|
console_msg "Change from member to isolated"
|
||||||
|
test_partition isolated
|
||||||
|
|
||||||
|
console_msg "Change from isolated to root"
|
||||||
|
test_partition root
|
||||||
|
|
||||||
|
console_msg "Change from root to member"
|
||||||
|
test_partition member
|
||||||
|
|
||||||
|
#
|
||||||
|
# Testing partition root with no cpu
|
||||||
|
#
|
||||||
|
console_msg "Distribute all cpus to child partition"
|
||||||
|
echo +cpuset > cgroup.subtree_control
|
||||||
|
test_partition root
|
||||||
|
|
||||||
|
mkdir A1
|
||||||
|
cd A1
|
||||||
|
echo 2-3 > cpuset.cpus
|
||||||
|
test_partition root
|
||||||
|
test_effective_cpus 2-3
|
||||||
|
cd ..
|
||||||
|
test_effective_cpus ""
|
||||||
|
|
||||||
|
console_msg "Moving task to partition test"
|
||||||
|
test_add_proc "No space left"
|
||||||
|
cd A1
|
||||||
|
test_add_proc ""
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
console_msg "Shrink and expand child partition"
|
||||||
|
cd A1
|
||||||
|
echo 2 > cpuset.cpus
|
||||||
|
cd ..
|
||||||
|
test_effective_cpus 3
|
||||||
|
cd A1
|
||||||
|
echo 2-3 > cpuset.cpus
|
||||||
|
cd ..
|
||||||
|
test_effective_cpus ""
|
||||||
|
|
||||||
|
# Cleaning up
|
||||||
|
console_msg "Cleaning up"
|
||||||
|
echo $$ > $CGROUP2/cgroup.procs
|
||||||
|
[[ -d A1 ]] && rmdir A1
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Cpuset controller state transition test matrix.
|
||||||
|
#
|
||||||
|
# Cgroup test hierarchy
|
||||||
|
#
|
||||||
|
# test -- A1 -- A2 -- A3
|
||||||
|
# \- B1
|
||||||
|
#
|
||||||
|
# P<v> = set cpus.partition (0:member, 1:root, 2:isolated, -1:root invalid)
|
||||||
|
# C<l> = add cpu-list
|
||||||
|
# S<p> = use prefix in subtree_control
|
||||||
|
# T = put a task into cgroup
|
||||||
|
# O<c>-<v> = Write <v> to CPU online file of <c>
|
||||||
|
#
|
||||||
|
SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
|
||||||
|
TEST_MATRIX=(
|
||||||
|
# test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
|
||||||
|
# ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
|
||||||
|
" S+ C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
|
||||||
|
" S+ C0-1 . . C2-3 P1 . . . 0 "
|
||||||
|
" S+ C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
|
||||||
|
" S+ C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
|
||||||
|
" S+ C0-1:S+ . . C2-3 . . . P1 0 "
|
||||||
|
" S+ C0-1:P1 . . C2-3 S+ C1 . . 0 "
|
||||||
|
" S+ C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
|
||||||
|
" S+ C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
|
||||||
|
" S+ C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
|
||||||
|
" S+ C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
|
||||||
|
" S+ C0-1 . . C2-3:P1 . . . C2 0 "
|
||||||
|
" S+ C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
|
||||||
|
" S+ C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
|
||||||
|
" S+ C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
|
||||||
|
" S+ C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
|
||||||
|
# CPU offlining cases:
|
||||||
|
" S+ C0-1 . . C2-3 S+ C4-5 . O2-0 0 A1:0-1,B1:3"
|
||||||
|
" S+ C0-3:P1:S+ C2-3:P1 . . O2-0 . . . 0 A1:0-1,A2:3"
|
||||||
|
" S+ C0-3:P1:S+ C2-3:P1 . . O2-0 O2-1 . . 0 A1:0-1,A2:2-3"
|
||||||
|
" S+ C0-3:P1:S+ C2-3:P1 . . O1-0 . . . 0 A1:0,A2:2-3"
|
||||||
|
" S+ C0-3:P1:S+ C2-3:P1 . . O1-0 O1-1 . . 0 A1:0-1,A2:2-3"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
|
||||||
|
" S+ C2-3:P1:S+ C3:P2 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
|
||||||
|
" S+ C2-3:P1:S+ C3:P2 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . O2-0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . O3-0 . . . 0 A1:2,A2: A1:P1,A2:P1"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . T:O2-0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . . T:O3-0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . O1-0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . O2-0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . O3-0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . T:O1-0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . . T:O2-0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . . . T:O3-0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . T:O1-0 O1-1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . . T:O2-0 O2-1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . . . T:O3-0 O3-1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O1-1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O2-1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
|
||||||
|
|
||||||
|
# test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
|
||||||
|
# ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
|
||||||
|
#
|
||||||
|
# Incorrect change to cpuset.cpus invalidates partition root
|
||||||
|
#
|
||||||
|
# Adding CPUs to partition root that are not in parent's
|
||||||
|
# cpuset.cpus is allowed, but those extra CPUs are ignored.
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
|
||||||
|
|
||||||
|
# Taking away all CPUs from parent or itself if there are tasks
|
||||||
|
# will make the partition invalid.
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
|
||||||
|
|
||||||
|
# Changing a partition root to member makes child partitions invalid
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
|
||||||
|
" S+ $SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
|
||||||
|
|
||||||
|
# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
|
||||||
|
# as they overlap.
|
||||||
|
" S+ C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
|
||||||
|
|
||||||
|
# Deletion of CPUs distributed to child cgroup is allowed.
|
||||||
|
" S+ C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
|
||||||
|
|
||||||
|
# To become a valid partition root, cpuset.cpus must overlap parent's
|
||||||
|
# cpuset.cpus.
|
||||||
|
" S+ C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
|
||||||
|
|
||||||
|
# Enabling partition with child cpusets is allowed
|
||||||
|
" S+ C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
|
||||||
|
|
||||||
|
# A partition root with non-partition root parent is invalid, but it
|
||||||
|
# can be made valid if its parent becomes a partition root too.
|
||||||
|
" S+ C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
|
||||||
|
" S+ C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
|
||||||
|
|
||||||
|
# A non-exclusive cpuset.cpus change will invalidate partition and its siblings
|
||||||
|
" S+ C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
|
||||||
|
" S+ C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
|
||||||
|
" S+ C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
|
||||||
|
|
||||||
|
# test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
|
||||||
|
# ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
|
||||||
|
# Failure cases:
|
||||||
|
|
||||||
|
# A task cannot be added to a partition with no cpu
|
||||||
|
" S+ C2-3:P1:S+ C3:P1 . . O2-0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Write to the cpu online file
|
||||||
|
# $1 - <c>-<v> where <c> = cpu number, <v> value to be written
|
||||||
|
#
|
||||||
|
write_cpu_online()
|
||||||
|
{
|
||||||
|
CPU=${1%-*}
|
||||||
|
VAL=${1#*-}
|
||||||
|
CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online
|
||||||
|
if [[ $VAL -eq 0 ]]
|
||||||
|
then
|
||||||
|
OFFLINE_CPUS="$OFFLINE_CPUS $CPU"
|
||||||
|
else
|
||||||
|
[[ -n "$OFFLINE_CPUS" ]] && {
|
||||||
|
OFFLINE_CPUS=$(echo $CPU $CPU $OFFLINE_CPUS | fmt -1 |\
|
||||||
|
sort | uniq -u)
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
echo $VAL > $CPUFILE
|
||||||
|
pause 0.01
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Set controller state
|
||||||
|
# $1 - cgroup directory
|
||||||
|
# $2 - state
|
||||||
|
# $3 - showerr
|
||||||
|
#
|
||||||
|
# The presence of ":" in state means transition from one to the next.
|
||||||
|
#
|
||||||
|
set_ctrl_state()
|
||||||
|
{
|
||||||
|
TMPMSG=/tmp/.msg_$$
|
||||||
|
CGRP=$1
|
||||||
|
STATE=$2
|
||||||
|
SHOWERR=${3}${VERBOSE}
|
||||||
|
CTRL=${CTRL:=$CONTROLLER}
|
||||||
|
HASERR=0
|
||||||
|
REDIRECT="2> $TMPMSG"
|
||||||
|
[[ -z "$STATE" || "$STATE" = '.' ]] && return 0
|
||||||
|
|
||||||
|
rm -f $TMPMSG
|
||||||
|
for CMD in $(echo $STATE | sed -e "s/:/ /g")
|
||||||
|
do
|
||||||
|
TFILE=$CGRP/cgroup.procs
|
||||||
|
SFILE=$CGRP/cgroup.subtree_control
|
||||||
|
PFILE=$CGRP/cpuset.cpus.partition
|
||||||
|
CFILE=$CGRP/cpuset.cpus
|
||||||
|
S=$(expr substr $CMD 1 1)
|
||||||
|
if [[ $S = S ]]
|
||||||
|
then
|
||||||
|
PREFIX=${CMD#?}
|
||||||
|
COMM="echo ${PREFIX}${CTRL} > $SFILE"
|
||||||
|
eval $COMM $REDIRECT
|
||||||
|
elif [[ $S = C ]]
|
||||||
|
then
|
||||||
|
CPUS=${CMD#?}
|
||||||
|
COMM="echo $CPUS > $CFILE"
|
||||||
|
eval $COMM $REDIRECT
|
||||||
|
elif [[ $S = P ]]
|
||||||
|
then
|
||||||
|
VAL=${CMD#?}
|
||||||
|
case $VAL in
|
||||||
|
0) VAL=member
|
||||||
|
;;
|
||||||
|
1) VAL=root
|
||||||
|
;;
|
||||||
|
2) VAL=isolated
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid partition state - $VAL"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
COMM="echo $VAL > $PFILE"
|
||||||
|
eval $COMM $REDIRECT
|
||||||
|
elif [[ $S = O ]]
|
||||||
|
then
|
||||||
|
VAL=${CMD#?}
|
||||||
|
write_cpu_online $VAL
|
||||||
|
elif [[ $S = T ]]
|
||||||
|
then
|
||||||
|
COMM="echo 0 > $TFILE"
|
||||||
|
eval $COMM $REDIRECT
|
||||||
|
fi
|
||||||
|
RET=$?
|
||||||
|
[[ $RET -ne 0 ]] && {
|
||||||
|
[[ -n "$SHOWERR" ]] && {
|
||||||
|
echo "$COMM"
|
||||||
|
cat $TMPMSG
|
||||||
|
}
|
||||||
|
HASERR=1
|
||||||
|
}
|
||||||
|
pause 0.01
|
||||||
|
rm -f $TMPMSG
|
||||||
|
done
|
||||||
|
return $HASERR
|
||||||
|
}
|
||||||
|
|
||||||
|
set_ctrl_state_noerr()
|
||||||
|
{
|
||||||
|
CGRP=$1
|
||||||
|
STATE=$2
|
||||||
|
[[ -d $CGRP ]] || mkdir $CGRP
|
||||||
|
set_ctrl_state $CGRP $STATE 1
|
||||||
|
[[ $? -ne 0 ]] && {
|
||||||
|
echo "ERROR: Failed to set $2 to cgroup $1!"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
online_cpus()
|
||||||
|
{
|
||||||
|
[[ -n "OFFLINE_CPUS" ]] && {
|
||||||
|
for C in $OFFLINE_CPUS
|
||||||
|
do
|
||||||
|
write_cpu_online ${C}-1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Return 1 if the list of effective cpus isn't the same as the initial list.
|
||||||
|
#
|
||||||
|
reset_cgroup_states()
|
||||||
|
{
|
||||||
|
echo 0 > $CGROUP2/cgroup.procs
|
||||||
|
online_cpus
|
||||||
|
rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
|
||||||
|
set_ctrl_state . S-
|
||||||
|
pause 0.01
|
||||||
|
}
|
||||||
|
|
||||||
|
dump_states()
|
||||||
|
{
|
||||||
|
for DIR in A1 A1/A2 A1/A2/A3 B1
|
||||||
|
do
|
||||||
|
ECPUS=$DIR/cpuset.cpus.effective
|
||||||
|
PRS=$DIR/cpuset.cpus.partition
|
||||||
|
[[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
|
||||||
|
[[ -e $PRS ]] && echo "$PRS: $(cat $PRS)"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Check effective cpus
|
||||||
|
# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]*
|
||||||
|
#
|
||||||
|
check_effective_cpus()
|
||||||
|
{
|
||||||
|
CHK_STR=$1
|
||||||
|
for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
|
||||||
|
do
|
||||||
|
set -- $(echo $CHK | sed -e "s/:/ /g")
|
||||||
|
CGRP=$1
|
||||||
|
CPUS=$2
|
||||||
|
[[ $CGRP = A2 ]] && CGRP=A1/A2
|
||||||
|
[[ $CGRP = A3 ]] && CGRP=A1/A2/A3
|
||||||
|
FILE=$CGRP/cpuset.cpus.effective
|
||||||
|
[[ -e $FILE ]] || return 1
|
||||||
|
[[ $CPUS = $(cat $FILE) ]] || return 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Check cgroup states
|
||||||
|
# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]*
|
||||||
|
#
|
||||||
|
check_cgroup_states()
|
||||||
|
{
|
||||||
|
CHK_STR=$1
|
||||||
|
for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
|
||||||
|
do
|
||||||
|
set -- $(echo $CHK | sed -e "s/:/ /g")
|
||||||
|
CGRP=$1
|
||||||
|
STATE=$2
|
||||||
|
FILE=
|
||||||
|
EVAL=$(expr substr $STATE 2 2)
|
||||||
|
[[ $CGRP = A2 ]] && CGRP=A1/A2
|
||||||
|
[[ $CGRP = A3 ]] && CGRP=A1/A2/A3
|
||||||
|
|
||||||
|
case $STATE in
|
||||||
|
P*) FILE=$CGRP/cpuset.cpus.partition
|
||||||
|
;;
|
||||||
|
*) echo "Unknown state: $STATE!"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
VAL=$(cat $FILE)
|
||||||
|
|
||||||
|
case "$VAL" in
|
||||||
|
member) VAL=0
|
||||||
|
;;
|
||||||
|
root) VAL=1
|
||||||
|
;;
|
||||||
|
isolated)
|
||||||
|
VAL=2
|
||||||
|
;;
|
||||||
|
"root invalid"*)
|
||||||
|
VAL=-1
|
||||||
|
;;
|
||||||
|
"isolated invalid"*)
|
||||||
|
VAL=-2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
[[ $EVAL != $VAL ]] && return 1
|
||||||
|
done
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Run cpuset state transition test
|
||||||
|
# $1 - test matrix name
|
||||||
|
#
|
||||||
|
# This test is somewhat fragile as delays (sleep x) are added in various
|
||||||
|
# places to make sure state changes are fully propagated before the next
|
||||||
|
# action. These delays may need to be adjusted if running in a slower machine.
|
||||||
|
#
|
||||||
|
run_state_test()
|
||||||
|
{
|
||||||
|
TEST=$1
|
||||||
|
CONTROLLER=cpuset
|
||||||
|
CPULIST=0-6
|
||||||
|
I=0
|
||||||
|
eval CNT="\${#$TEST[@]}"
|
||||||
|
|
||||||
|
reset_cgroup_states
|
||||||
|
echo $CPULIST > cpuset.cpus
|
||||||
|
echo root > cpuset.cpus.partition
|
||||||
|
console_msg "Running state transition test ..."
|
||||||
|
|
||||||
|
while [[ $I -lt $CNT ]]
|
||||||
|
do
|
||||||
|
echo "Running test $I ..." > /dev/console
|
||||||
|
eval set -- "\${$TEST[$I]}"
|
||||||
|
ROOT=$1
|
||||||
|
OLD_A1=$2
|
||||||
|
OLD_A2=$3
|
||||||
|
OLD_A3=$4
|
||||||
|
OLD_B1=$5
|
||||||
|
NEW_A1=$6
|
||||||
|
NEW_A2=$7
|
||||||
|
NEW_A3=$8
|
||||||
|
NEW_B1=$9
|
||||||
|
RESULT=${10}
|
||||||
|
ECPUS=${11}
|
||||||
|
STATES=${12}
|
||||||
|
|
||||||
|
set_ctrl_state_noerr . $ROOT
|
||||||
|
set_ctrl_state_noerr A1 $OLD_A1
|
||||||
|
set_ctrl_state_noerr A1/A2 $OLD_A2
|
||||||
|
set_ctrl_state_noerr A1/A2/A3 $OLD_A3
|
||||||
|
set_ctrl_state_noerr B1 $OLD_B1
|
||||||
|
RETVAL=0
|
||||||
|
set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
|
||||||
|
set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
|
||||||
|
set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?))
|
||||||
|
set_ctrl_state B1 $NEW_B1; ((RETVAL += $?))
|
||||||
|
|
||||||
|
[[ $RETVAL -ne $RESULT ]] && {
|
||||||
|
echo "Test $TEST[$I] failed result check!"
|
||||||
|
eval echo \"\${$TEST[$I]}\"
|
||||||
|
dump_states
|
||||||
|
online_cpus
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
[[ -n "$ECPUS" && "$ECPUS" != . ]] && {
|
||||||
|
check_effective_cpus $ECPUS
|
||||||
|
[[ $? -ne 0 ]] && {
|
||||||
|
echo "Test $TEST[$I] failed effective CPU check!"
|
||||||
|
eval echo \"\${$TEST[$I]}\"
|
||||||
|
echo
|
||||||
|
dump_states
|
||||||
|
online_cpus
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[[ -n "$STATES" ]] && {
|
||||||
|
check_cgroup_states $STATES
|
||||||
|
[[ $? -ne 0 ]] && {
|
||||||
|
echo "FAILED: Test $TEST[$I] failed states check!"
|
||||||
|
eval echo \"\${$TEST[$I]}\"
|
||||||
|
echo
|
||||||
|
dump_states
|
||||||
|
online_cpus
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reset_cgroup_states
|
||||||
|
#
|
||||||
|
# Check to see if effective cpu list changes
|
||||||
|
#
|
||||||
|
pause 0.05
|
||||||
|
NEWLIST=$(cat cpuset.cpus.effective)
|
||||||
|
[[ $NEWLIST != $CPULIST ]] && {
|
||||||
|
echo "Effective cpus changed to $NEWLIST after test $I!"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
[[ -n "$VERBOSE" ]] && echo "Test $I done."
|
||||||
|
((I++))
|
||||||
|
done
|
||||||
|
echo "All $I tests of $TEST PASSED."
|
||||||
|
|
||||||
|
echo member > cpuset.cpus.partition
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Wait for inotify event for the given file and read it
|
||||||
|
# $1: cgroup file to wait for
|
||||||
|
# $2: file to store the read result
|
||||||
|
#
|
||||||
|
wait_inotify()
|
||||||
|
{
|
||||||
|
CGROUP_FILE=$1
|
||||||
|
OUTPUT_FILE=$2
|
||||||
|
|
||||||
|
$WAIT_INOTIFY $CGROUP_FILE
|
||||||
|
cat $CGROUP_FILE > $OUTPUT_FILE
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Test if inotify events are properly generated when going into and out of
|
||||||
|
# invalid partition state.
|
||||||
|
#
|
||||||
|
test_inotify()
|
||||||
|
{
|
||||||
|
ERR=0
|
||||||
|
PRS=/tmp/.prs_$$
|
||||||
|
[[ -f $WAIT_INOTIFY ]] || {
|
||||||
|
echo "wait_inotify not found, inotify test SKIPPED."
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
pause 0.01
|
||||||
|
echo 1 > cpuset.cpus
|
||||||
|
echo 0 > cgroup.procs
|
||||||
|
echo root > cpuset.cpus.partition
|
||||||
|
pause 0.01
|
||||||
|
rm -f $PRS
|
||||||
|
wait_inotify $PWD/cpuset.cpus.partition $PRS &
|
||||||
|
pause 0.01
|
||||||
|
set_ctrl_state . "O1-0"
|
||||||
|
pause 0.01
|
||||||
|
check_cgroup_states ".:P-1"
|
||||||
|
if [[ $? -ne 0 ]]
|
||||||
|
then
|
||||||
|
echo "FAILED: Inotify test - partition not invalid"
|
||||||
|
ERR=1
|
||||||
|
elif [[ ! -f $PRS ]]
|
||||||
|
then
|
||||||
|
echo "FAILED: Inotify test - event not generated"
|
||||||
|
ERR=1
|
||||||
|
kill %1
|
||||||
|
elif [[ $(cat $PRS) != "root invalid"* ]]
|
||||||
|
then
|
||||||
|
echo "FAILED: Inotify test - incorrect state"
|
||||||
|
cat $PRS
|
||||||
|
ERR=1
|
||||||
|
fi
|
||||||
|
online_cpus
|
||||||
|
echo member > cpuset.cpus.partition
|
||||||
|
echo 0 > ../cgroup.procs
|
||||||
|
if [[ $ERR -ne 0 ]]
|
||||||
|
then
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
echo "Inotify test PASSED"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
run_state_test TEST_MATRIX
|
||||||
|
test_isolated
|
||||||
|
test_inotify
|
||||||
|
echo "All tests PASSED."
|
||||||
|
cd ..
|
||||||
|
rmdir test
|
87
tools/testing/selftests/cgroup/wait_inotify.c
Normal file
87
tools/testing/selftests/cgroup/wait_inotify.c
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Wait until an inotify event on the given cgroup file.
|
||||||
|
*/
|
||||||
|
#include <linux/limits.h>
|
||||||
|
#include <sys/inotify.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/ptrace.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <poll.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
static const char usage[] = "Usage: %s [-v] <cgroup_file>\n";
|
||||||
|
static char *file;
|
||||||
|
static int verbose;
|
||||||
|
|
||||||
|
static inline void fail_message(char *msg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, msg, file);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
char *cmd = argv[0];
|
||||||
|
int c, fd;
|
||||||
|
struct pollfd fds = { .events = POLLIN, };
|
||||||
|
|
||||||
|
while ((c = getopt(argc, argv, "v")) != -1) {
|
||||||
|
switch (c) {
|
||||||
|
case 'v':
|
||||||
|
verbose++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
argv++, argc--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc != 2) {
|
||||||
|
fprintf(stderr, usage, cmd);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
file = argv[1];
|
||||||
|
fd = open(file, O_RDONLY);
|
||||||
|
if (fd < 0)
|
||||||
|
fail_message("Cgroup file %s not found!\n");
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
fd = inotify_init();
|
||||||
|
if (fd < 0)
|
||||||
|
fail_message("inotify_init() fails on %s!\n");
|
||||||
|
if (inotify_add_watch(fd, file, IN_MODIFY) < 0)
|
||||||
|
fail_message("inotify_add_watch() fails on %s!\n");
|
||||||
|
fds.fd = fd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* poll waiting loop
|
||||||
|
*/
|
||||||
|
for (;;) {
|
||||||
|
int ret = poll(&fds, 1, 10000);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == EINTR)
|
||||||
|
continue;
|
||||||
|
perror("poll");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if ((ret > 0) && (fds.revents & POLLIN))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (verbose) {
|
||||||
|
struct inotify_event events[10];
|
||||||
|
long len;
|
||||||
|
|
||||||
|
usleep(1000);
|
||||||
|
len = read(fd, events, sizeof(events));
|
||||||
|
printf("Number of events read = %ld\n",
|
||||||
|
len/sizeof(struct inotify_event));
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
return 0;
|
||||||
|
}
|
Reference in New Issue
Block a user