mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
In preparation for the buddy hardlockup detector where the CPU checking for lockup might not be the currently running CPU, add a "cpu" parameter to watchdog_hardlockup_check(). As part of this change, make hrtimer_interrupts an atomic_t since now the CPU incrementing the value and the CPU reading the value might be different. Technially this could also be done with just READ_ONCE and WRITE_ONCE, but atomic_t feels a little cleaner in this case. While hrtimer_interrupts is made atomic_t, we change hrtimer_interrupts_saved from "unsigned long" to "int". The "int" is needed to match the data type backing atomic_t for hrtimer_interrupts. Even if this changes us from 64-bits to 32-bits (which I don't think is true for most compilers), it doesn't really matter. All we ever do is increment it every few seconds and compare it to an old value so 32-bits is fine (even 16-bits would be). The "signed" vs "unsigned" also doesn't matter for simple equality comparisons. hrtimer_interrupts_saved is _not_ switched to atomic_t nor even accessed with READ_ONCE / WRITE_ONCE. The hrtimer_interrupts_saved is always consistently accessed with the same CPU. NOTE: with the upcoming "buddy" detector there is one special case. When a CPU goes offline/online then we can change which CPU is the one to consistently access a given instance of hrtimer_interrupts_saved. We still can't end up with a partially updated hrtimer_interrupts_saved, however, because we end up petting all affected CPUs to make sure the new and old CPU can't end up somehow read/write hrtimer_interrupts_saved at the same time. Link: https://lkml.kernel.org/r/20230519101840.v5.10.I3a7d4dd8c23ac30ee0b607d77feb6646b64825c0@changeid Signed-off-by: Douglas Anderson <dianders@chromium.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chen-Yu Tsai <wens@csie.org> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Colin Cross <ccross@android.com> Cc: Daniel Thompson <daniel.thompson@linaro.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Guenter Roeck <groeck@chromium.org> Cc: Ian Rogers <irogers@google.com> Cc: Lecopzer Chen <lecopzer.chen@mediatek.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masayoshi Mizuma <msys.mizuma@gmail.com> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Pingfan Liu <kernelfans@gmail.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com> Cc: Ricardo Neri <ricardo.neri@intel.com> Cc: Stephane Eranian <eranian@google.com> Cc: Stephen Boyd <swboyd@chromium.org> Cc: Sumit Garg <sumit.garg@linaro.org> Cc: Tzung-Bi Shih <tzungbi@chromium.org> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
244 lines
7.7 KiB
C
244 lines
7.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* linux/include/linux/nmi.h
|
|
*/
|
|
#ifndef LINUX_NMI_H
|
|
#define LINUX_NMI_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <asm/irq.h>
|
|
#if defined(CONFIG_HAVE_NMI_WATCHDOG)
|
|
#include <asm/nmi.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_LOCKUP_DETECTOR
|
|
void lockup_detector_init(void);
|
|
void lockup_detector_soft_poweroff(void);
|
|
void lockup_detector_cleanup(void);
|
|
|
|
extern int watchdog_user_enabled;
|
|
extern int nmi_watchdog_user_enabled;
|
|
extern int soft_watchdog_user_enabled;
|
|
extern int watchdog_thresh;
|
|
extern unsigned long watchdog_enabled;
|
|
|
|
extern struct cpumask watchdog_cpumask;
|
|
extern unsigned long *watchdog_cpumask_bits;
|
|
#ifdef CONFIG_SMP
|
|
extern int sysctl_softlockup_all_cpu_backtrace;
|
|
extern int sysctl_hardlockup_all_cpu_backtrace;
|
|
#else
|
|
#define sysctl_softlockup_all_cpu_backtrace 0
|
|
#define sysctl_hardlockup_all_cpu_backtrace 0
|
|
#endif /* !CONFIG_SMP */
|
|
|
|
#else /* CONFIG_LOCKUP_DETECTOR */
|
|
static inline void lockup_detector_init(void) { }
|
|
static inline void lockup_detector_soft_poweroff(void) { }
|
|
static inline void lockup_detector_cleanup(void) { }
|
|
#endif /* !CONFIG_LOCKUP_DETECTOR */
|
|
|
|
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
|
|
extern void touch_softlockup_watchdog_sched(void);
|
|
extern void touch_softlockup_watchdog(void);
|
|
extern void touch_softlockup_watchdog_sync(void);
|
|
extern void touch_all_softlockup_watchdogs(void);
|
|
extern unsigned int softlockup_panic;
|
|
|
|
extern int lockup_detector_online_cpu(unsigned int cpu);
|
|
extern int lockup_detector_offline_cpu(unsigned int cpu);
|
|
#else /* CONFIG_SOFTLOCKUP_DETECTOR */
|
|
static inline void touch_softlockup_watchdog_sched(void) { }
|
|
static inline void touch_softlockup_watchdog(void) { }
|
|
static inline void touch_softlockup_watchdog_sync(void) { }
|
|
static inline void touch_all_softlockup_watchdogs(void) { }
|
|
|
|
#define lockup_detector_online_cpu NULL
|
|
#define lockup_detector_offline_cpu NULL
|
|
#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
|
|
|
|
#ifdef CONFIG_DETECT_HUNG_TASK
|
|
void reset_hung_task_detector(void);
|
|
#else
|
|
static inline void reset_hung_task_detector(void) { }
|
|
#endif
|
|
|
|
/*
|
|
* The run state of the lockup detectors is controlled by the content of the
|
|
* 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
|
|
* bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
|
|
*
|
|
* 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and
|
|
* 'soft_watchdog_user_enabled' are variables that are only used as an
|
|
* 'interface' between the parameters in /proc/sys/kernel and the internal
|
|
* state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is
|
|
* handled differently because its value is not boolean, and the lockup
|
|
* detectors are 'suspended' while 'watchdog_thresh' is equal zero.
|
|
*/
|
|
#define NMI_WATCHDOG_ENABLED_BIT 0
|
|
#define SOFT_WATCHDOG_ENABLED_BIT 1
|
|
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
|
|
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
|
|
|
|
#if defined(CONFIG_HARDLOCKUP_DETECTOR)
|
|
extern void hardlockup_detector_disable(void);
|
|
extern unsigned int hardlockup_panic;
|
|
#else
|
|
static inline void hardlockup_detector_disable(void) {}
|
|
#endif
|
|
|
|
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
|
|
void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
|
|
#endif
|
|
|
|
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
|
|
# define NMI_WATCHDOG_SYSCTL_PERM 0644
|
|
#else
|
|
# define NMI_WATCHDOG_SYSCTL_PERM 0444
|
|
#endif
|
|
|
|
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
|
|
extern void arch_touch_nmi_watchdog(void);
|
|
extern void hardlockup_detector_perf_stop(void);
|
|
extern void hardlockup_detector_perf_restart(void);
|
|
extern void hardlockup_detector_perf_disable(void);
|
|
extern void hardlockup_detector_perf_enable(void);
|
|
extern void hardlockup_detector_perf_cleanup(void);
|
|
extern int hardlockup_detector_perf_init(void);
|
|
#else
|
|
static inline void hardlockup_detector_perf_stop(void) { }
|
|
static inline void hardlockup_detector_perf_restart(void) { }
|
|
static inline void hardlockup_detector_perf_disable(void) { }
|
|
static inline void hardlockup_detector_perf_enable(void) { }
|
|
static inline void hardlockup_detector_perf_cleanup(void) { }
|
|
# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
|
|
static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
|
|
static inline void arch_touch_nmi_watchdog(void) {}
|
|
# else
|
|
static inline int hardlockup_detector_perf_init(void) { return 0; }
|
|
# endif
|
|
#endif
|
|
|
|
void watchdog_nmi_stop(void);
|
|
void watchdog_nmi_start(void);
|
|
int watchdog_nmi_probe(void);
|
|
void watchdog_nmi_enable(unsigned int cpu);
|
|
void watchdog_nmi_disable(unsigned int cpu);
|
|
|
|
void lockup_detector_reconfigure(void);
|
|
|
|
/**
|
|
* touch_nmi_watchdog - manually reset the hardlockup watchdog timeout.
|
|
*
|
|
* If we support detecting hardlockups, touch_nmi_watchdog() may be
|
|
* used to pet the watchdog (reset the timeout) - for code which
|
|
* intentionally disables interrupts for a long time. This call is stateless.
|
|
*
|
|
* Though this function has "nmi" in the name, the hardlockup watchdog might
|
|
* not be backed by NMIs. This function will likely be renamed to
|
|
* touch_hardlockup_watchdog() in the future.
|
|
*/
|
|
static inline void touch_nmi_watchdog(void)
|
|
{
|
|
/*
|
|
* Pass on to the hardlockup detector selected via CONFIG_. Note that
|
|
* the hardlockup detector may not be arch-specific nor using NMIs
|
|
* and the arch_touch_nmi_watchdog() function will likely be renamed
|
|
* in the future.
|
|
*/
|
|
arch_touch_nmi_watchdog();
|
|
|
|
/*
|
|
* Touching the hardlock detector implicitly resets the
|
|
* softlockup detector too
|
|
*/
|
|
touch_softlockup_watchdog();
|
|
}
|
|
|
|
/*
|
|
* Create trigger_all_cpu_backtrace() out of the arch-provided
|
|
* base function. Return whether such support was available,
|
|
* to allow calling code to fall back to some other mechanism:
|
|
*/
|
|
#ifdef arch_trigger_cpumask_backtrace
|
|
static inline bool trigger_all_cpu_backtrace(void)
|
|
{
|
|
arch_trigger_cpumask_backtrace(cpu_online_mask, false);
|
|
return true;
|
|
}
|
|
|
|
static inline bool trigger_allbutself_cpu_backtrace(void)
|
|
{
|
|
arch_trigger_cpumask_backtrace(cpu_online_mask, true);
|
|
return true;
|
|
}
|
|
|
|
static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
|
|
{
|
|
arch_trigger_cpumask_backtrace(mask, false);
|
|
return true;
|
|
}
|
|
|
|
static inline bool trigger_single_cpu_backtrace(int cpu)
|
|
{
|
|
arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
|
|
return true;
|
|
}
|
|
|
|
/* generic implementation */
|
|
void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
|
|
bool exclude_self,
|
|
void (*raise)(cpumask_t *mask));
|
|
bool nmi_cpu_backtrace(struct pt_regs *regs);
|
|
|
|
#else
|
|
static inline bool trigger_all_cpu_backtrace(void)
|
|
{
|
|
return false;
|
|
}
|
|
static inline bool trigger_allbutself_cpu_backtrace(void)
|
|
{
|
|
return false;
|
|
}
|
|
static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
|
|
{
|
|
return false;
|
|
}
|
|
static inline bool trigger_single_cpu_backtrace(int cpu)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
|
|
u64 hw_nmi_get_sample_period(int watchdog_thresh);
|
|
#endif
|
|
|
|
#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
|
|
defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
|
|
void watchdog_update_hrtimer_threshold(u64 period);
|
|
#else
|
|
static inline void watchdog_update_hrtimer_threshold(u64 period) { }
|
|
#endif
|
|
|
|
struct ctl_table;
|
|
int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *);
|
|
int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
|
|
int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
|
|
int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *);
|
|
int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
|
|
|
|
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
|
|
#include <asm/nmi.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_NMI_CHECK_CPU
|
|
void nmi_backtrace_stall_snap(const struct cpumask *btp);
|
|
void nmi_backtrace_stall_check(const struct cpumask *btp);
|
|
#else
|
|
static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {}
|
|
static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {}
|
|
#endif
|
|
|
|
#endif
|