mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
Merge branch 'stall.2023.01.09a' into HEAD
stall.2023.01.09a: RCU CPU stall-warning updates.
This commit is contained in:
@@ -5113,6 +5113,12 @@
|
|||||||
rcupdate.rcu_cpu_stall_timeout to be used (after
|
rcupdate.rcu_cpu_stall_timeout to be used (after
|
||||||
conversion from seconds to milliseconds).
|
conversion from seconds to milliseconds).
|
||||||
|
|
||||||
|
rcupdate.rcu_cpu_stall_cputime= [KNL]
|
||||||
|
Provide statistics on the cputime and count of
|
||||||
|
interrupts and tasks during the sampling period. For
|
||||||
|
multiple continuous RCU stalls, all sampling periods
|
||||||
|
begin at half of the first RCU stall timeout.
|
||||||
|
|
||||||
rcupdate.rcu_exp_stall_task_details= [KNL]
|
rcupdate.rcu_exp_stall_task_details= [KNL]
|
||||||
Print stack dumps of any tasks blocking the
|
Print stack dumps of any tasks blocking the
|
||||||
current expedited RCU grace period during an
|
current expedited RCU grace period during an
|
||||||
|
@@ -52,6 +52,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
|
|||||||
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
|
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
|
||||||
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
|
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
|
||||||
|
|
||||||
|
extern unsigned long long nr_context_switches_cpu(int cpu);
|
||||||
extern unsigned long long nr_context_switches(void);
|
extern unsigned long long nr_context_switches(void);
|
||||||
|
|
||||||
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
|
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
|
||||||
@@ -67,6 +68,17 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
|
|||||||
return kstat_cpu(cpu).softirqs[irq];
|
return kstat_cpu(cpu).softirqs[irq];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int kstat_cpu_softirqs_sum(int cpu)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned int sum = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < NR_SOFTIRQS; i++)
|
||||||
|
sum += kstat_softirqs_cpu(i, cpu);
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Number of interrupts per specific IRQ source, since bootup
|
* Number of interrupts per specific IRQ source, since bootup
|
||||||
*/
|
*/
|
||||||
@@ -75,7 +87,7 @@ extern unsigned int kstat_irqs_usr(unsigned int irq);
|
|||||||
/*
|
/*
|
||||||
* Number of interrupts per cpu, since bootup
|
* Number of interrupts per cpu, since bootup
|
||||||
*/
|
*/
|
||||||
static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
|
static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu)
|
||||||
{
|
{
|
||||||
return kstat_cpu(cpu).irqs_sum;
|
return kstat_cpu(cpu).irqs_sum;
|
||||||
}
|
}
|
||||||
|
@@ -82,7 +82,7 @@ config RCU_CPU_STALL_TIMEOUT
|
|||||||
config RCU_EXP_CPU_STALL_TIMEOUT
|
config RCU_EXP_CPU_STALL_TIMEOUT
|
||||||
int "Expedited RCU CPU stall timeout in milliseconds"
|
int "Expedited RCU CPU stall timeout in milliseconds"
|
||||||
depends on RCU_STALL_COMMON
|
depends on RCU_STALL_COMMON
|
||||||
range 0 21000
|
range 0 300000
|
||||||
default 0
|
default 0
|
||||||
help
|
help
|
||||||
If a given expedited RCU grace period extends more than the
|
If a given expedited RCU grace period extends more than the
|
||||||
@@ -92,6 +92,19 @@ config RCU_EXP_CPU_STALL_TIMEOUT
|
|||||||
says to use the RCU_CPU_STALL_TIMEOUT value converted from
|
says to use the RCU_CPU_STALL_TIMEOUT value converted from
|
||||||
seconds to milliseconds.
|
seconds to milliseconds.
|
||||||
|
|
||||||
|
config RCU_CPU_STALL_CPUTIME
|
||||||
|
bool "Provide additional RCU stall debug information"
|
||||||
|
depends on RCU_STALL_COMMON
|
||||||
|
default n
|
||||||
|
help
|
||||||
|
Collect statistics during the sampling period, such as the number of
|
||||||
|
(hard interrupts, soft interrupts, task switches) and the cputime of
|
||||||
|
(hard interrupts, soft interrupts, kernel tasks) are added to the
|
||||||
|
RCU stall report. For multiple continuous RCU stalls, all sampling
|
||||||
|
periods begin at half of the first RCU stall timeout.
|
||||||
|
The boot option rcupdate.rcu_cpu_stall_cputime has the same function
|
||||||
|
as this one, but will override this if it exists.
|
||||||
|
|
||||||
config RCU_TRACE
|
config RCU_TRACE
|
||||||
bool "Enable tracing for RCU"
|
bool "Enable tracing for RCU"
|
||||||
depends on DEBUG_KERNEL
|
depends on DEBUG_KERNEL
|
||||||
|
@@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
|
|||||||
extern int rcu_cpu_stall_suppress;
|
extern int rcu_cpu_stall_suppress;
|
||||||
extern int rcu_cpu_stall_timeout;
|
extern int rcu_cpu_stall_timeout;
|
||||||
extern int rcu_exp_cpu_stall_timeout;
|
extern int rcu_exp_cpu_stall_timeout;
|
||||||
|
extern int rcu_cpu_stall_cputime;
|
||||||
extern bool rcu_exp_stall_task_details __read_mostly;
|
extern bool rcu_exp_stall_task_details __read_mostly;
|
||||||
int rcu_jiffies_till_stall_check(void);
|
int rcu_jiffies_till_stall_check(void);
|
||||||
int rcu_exp_jiffies_till_stall_check(void);
|
int rcu_exp_jiffies_till_stall_check(void);
|
||||||
|
@@ -866,6 +866,24 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
|
|||||||
rdp->rcu_iw_gp_seq = rnp->gp_seq;
|
rdp->rcu_iw_gp_seq = rnp->gp_seq;
|
||||||
irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
|
irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
|
||||||
|
int cpu = rdp->cpu;
|
||||||
|
struct rcu_snap_record *rsrp;
|
||||||
|
struct kernel_cpustat *kcsp;
|
||||||
|
|
||||||
|
kcsp = &kcpustat_cpu(cpu);
|
||||||
|
|
||||||
|
rsrp = &rdp->snap_record;
|
||||||
|
rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
|
||||||
|
rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
|
||||||
|
rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
|
||||||
|
rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
|
||||||
|
rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
|
||||||
|
rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
|
||||||
|
rsrp->jiffies = jiffies;
|
||||||
|
rsrp->gp_seq = rdp->gp_seq;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -158,6 +158,23 @@ union rcu_noqs {
|
|||||||
u16 s; /* Set of bits, aggregate OR here. */
|
u16 s; /* Set of bits, aggregate OR here. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Record the snapshot of the core stats at half of the first RCU stall timeout.
|
||||||
|
* The member gp_seq is used to ensure that all members are updated only once
|
||||||
|
* during the sampling period. The snapshot is taken only if this gp_seq is not
|
||||||
|
* equal to rdp->gp_seq.
|
||||||
|
*/
|
||||||
|
struct rcu_snap_record {
|
||||||
|
unsigned long gp_seq; /* Track rdp->gp_seq counter */
|
||||||
|
u64 cputime_irq; /* Accumulated cputime of hard irqs */
|
||||||
|
u64 cputime_softirq;/* Accumulated cputime of soft irqs */
|
||||||
|
u64 cputime_system; /* Accumulated cputime of kernel tasks */
|
||||||
|
unsigned long nr_hardirqs; /* Accumulated number of hard irqs */
|
||||||
|
unsigned int nr_softirqs; /* Accumulated number of soft irqs */
|
||||||
|
unsigned long long nr_csw; /* Accumulated number of task switches */
|
||||||
|
unsigned long jiffies; /* Track jiffies value */
|
||||||
|
};
|
||||||
|
|
||||||
/* Per-CPU data for read-copy update. */
|
/* Per-CPU data for read-copy update. */
|
||||||
struct rcu_data {
|
struct rcu_data {
|
||||||
/* 1) quiescent-state and grace-period handling : */
|
/* 1) quiescent-state and grace-period handling : */
|
||||||
@@ -262,6 +279,8 @@ struct rcu_data {
|
|||||||
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
|
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
|
||||||
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
|
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
|
||||||
unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
|
unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
|
||||||
|
struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */
|
||||||
|
/* the first RCU stall timeout */
|
||||||
|
|
||||||
long lazy_len; /* Length of buffered lazy callbacks. */
|
long lazy_len; /* Length of buffered lazy callbacks. */
|
||||||
int cpu;
|
int cpu;
|
||||||
|
@@ -39,7 +39,7 @@ int rcu_exp_jiffies_till_stall_check(void)
|
|||||||
// CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
|
// CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
|
||||||
// The minimum clamped value is "2UL", because at least one full
|
// The minimum clamped value is "2UL", because at least one full
|
||||||
// tick has to be guaranteed.
|
// tick has to be guaranteed.
|
||||||
till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
|
till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 300UL * HZ);
|
||||||
|
|
||||||
if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
|
if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
|
||||||
WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
|
WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
|
||||||
@@ -428,6 +428,35 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp
|
|||||||
return j > 2 * HZ;
|
return j > 2 * HZ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void print_cpu_stat_info(int cpu)
|
||||||
|
{
|
||||||
|
struct rcu_snap_record rsr, *rsrp;
|
||||||
|
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||||
|
struct kernel_cpustat *kcsp = &kcpustat_cpu(cpu);
|
||||||
|
|
||||||
|
if (!rcu_cpu_stall_cputime)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rsrp = &rdp->snap_record;
|
||||||
|
if (rsrp->gp_seq != rdp->gp_seq)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rsr.cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
|
||||||
|
rsr.cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
|
||||||
|
rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
|
||||||
|
|
||||||
|
pr_err("\t hardirqs softirqs csw/system\n");
|
||||||
|
pr_err("\t number: %8ld %10d %12lld\n",
|
||||||
|
kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
|
||||||
|
kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
|
||||||
|
nr_context_switches_cpu(cpu) - rsrp->nr_csw);
|
||||||
|
pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n",
|
||||||
|
div_u64(rsr.cputime_irq - rsrp->cputime_irq, NSEC_PER_MSEC),
|
||||||
|
div_u64(rsr.cputime_softirq - rsrp->cputime_softirq, NSEC_PER_MSEC),
|
||||||
|
div_u64(rsr.cputime_system - rsrp->cputime_system, NSEC_PER_MSEC),
|
||||||
|
jiffies_to_msecs(jiffies - rsrp->jiffies));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print out diagnostic information for the specified stalled CPU.
|
* Print out diagnostic information for the specified stalled CPU.
|
||||||
*
|
*
|
||||||
@@ -484,6 +513,8 @@ static void print_cpu_stall_info(int cpu)
|
|||||||
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
|
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
|
||||||
rcuc_starved ? buf : "",
|
rcuc_starved ? buf : "",
|
||||||
falsepositive ? " (false positive?)" : "");
|
falsepositive ? " (false positive?)" : "");
|
||||||
|
|
||||||
|
print_cpu_stat_info(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Complain about starvation of grace-period kthread. */
|
/* Complain about starvation of grace-period kthread. */
|
||||||
@@ -588,7 +619,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
|||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
for_each_possible_cpu(cpu)
|
||||||
totqlen += rcu_get_n_cbs_cpu(cpu);
|
totqlen += rcu_get_n_cbs_cpu(cpu);
|
||||||
pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
|
pr_err("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
|
||||||
smp_processor_id(), (long)(jiffies - gps),
|
smp_processor_id(), (long)(jiffies - gps),
|
||||||
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
|
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
|
||||||
if (ndetected) {
|
if (ndetected) {
|
||||||
@@ -649,7 +680,7 @@ static void print_cpu_stall(unsigned long gps)
|
|||||||
raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
|
raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
|
||||||
for_each_possible_cpu(cpu)
|
for_each_possible_cpu(cpu)
|
||||||
totqlen += rcu_get_n_cbs_cpu(cpu);
|
totqlen += rcu_get_n_cbs_cpu(cpu);
|
||||||
pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
|
pr_err("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
|
||||||
jiffies - gps,
|
jiffies - gps,
|
||||||
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
|
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
|
||||||
|
|
||||||
|
@@ -547,6 +547,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
|
|||||||
module_param(rcu_cpu_stall_timeout, int, 0644);
|
module_param(rcu_cpu_stall_timeout, int, 0644);
|
||||||
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
|
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
|
||||||
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
|
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
|
||||||
|
int rcu_cpu_stall_cputime __read_mostly = IS_ENABLED(CONFIG_RCU_CPU_STALL_CPUTIME);
|
||||||
|
module_param(rcu_cpu_stall_cputime, int, 0644);
|
||||||
bool rcu_exp_stall_task_details __read_mostly;
|
bool rcu_exp_stall_task_details __read_mostly;
|
||||||
module_param(rcu_exp_stall_task_details, bool, 0644);
|
module_param(rcu_exp_stall_task_details, bool, 0644);
|
||||||
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
|
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
|
||||||
|
@@ -5282,6 +5282,11 @@ bool single_task_running(void)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(single_task_running);
|
EXPORT_SYMBOL(single_task_running);
|
||||||
|
|
||||||
|
unsigned long long nr_context_switches_cpu(int cpu)
|
||||||
|
{
|
||||||
|
return cpu_rq(cpu)->nr_switches;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned long long nr_context_switches(void)
|
unsigned long long nr_context_switches(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
Reference in New Issue
Block a user