mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-22 20:30:58 +02:00
Merge tag 'mm-nonmm-stable-2023-06-24-19-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-mm updates from Andrew Morton: - Arnd Bergmann has fixed a bunch of -Wmissing-prototypes in top-level directories - Douglas Anderson has added a new "buddy" mode to the hardlockup detector. It permits the detector to work on architectures which cannot provide the required interrupts, by having CPUs periodically perform checks on other CPUs - Zhen Lei has enhanced kexec's ability to support two crash regions - Petr Mladek has done a lot of cleanup on the hard lockup detector's Kconfig entries - And the usual bunch of singleton patches in various places * tag 'mm-nonmm-stable-2023-06-24-19-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (72 commits) kernel/time/posix-stubs.c: remove duplicated include ocfs2: remove redundant assignment to variable bit_off watchdog/hardlockup: fix typo in config HARDLOCKUP_DETECTOR_PREFER_BUDDY powerpc: move arch_trigger_cpumask_backtrace from nmi.h to irq.h devres: show which resource was invalid in __devm_ioremap_resource() watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH watchdog/sparc64: define HARDLOCKUP_DETECTOR_SPARC64 watchdog/hardlockup: make HAVE_NMI_WATCHDOG sparc64-specific watchdog/hardlockup: declare arch_touch_nmi_watchdog() only in linux/nmi.h watchdog/hardlockup: make the config checks more straightforward watchdog/hardlockup: sort hardlockup detector related config values a logical way watchdog/hardlockup: move SMP barriers from common code to buddy code watchdog/buddy: simplify the dependency for HARDLOCKUP_DETECTOR_PREFER_BUDDY watchdog/buddy: don't copy the cpumask in watchdog_next_cpu() watchdog/buddy: cleanup how watchdog_buddy_check_hardlockup() is called watchdog/hardlockup: remove softlockup comment in touch_nmi_watchdog() watchdog/hardlockup: in watchdog_hardlockup_check() use cpumask_copy() watchdog/hardlockup: don't use raw_cpu_ptr() in watchdog_hardlockup_kick() watchdog/hardlockup: HAVE_NMI_WATCHDOG must implement watchdog_hardlockup_probe() watchdog/hardlockup: keep kernel.nmi_watchdog sysctl as 0444 if probe fails ...
This commit is contained in:
@@ -91,7 +91,8 @@ obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
|
||||
obj-$(CONFIG_KGDB) += debug/
|
||||
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
|
||||
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
|
||||
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
|
||||
obj-$(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) += watchdog_buddy.o
|
||||
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_perf.o
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
obj-$(CONFIG_RELAY) += relay.o
|
||||
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
|
||||
|
@@ -252,23 +252,19 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
int nr_charged = 0;
|
||||
|
||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
|
||||
BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
|
||||
|
||||
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
|
||||
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
nr_charged++;
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
/*
|
||||
* If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
|
||||
* NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
|
||||
* ignore this page.
|
||||
*/
|
||||
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
|
||||
for (i = 0; i < nr_charged; i++)
|
||||
memcg_kmem_uncharge_page(vm->pages[i], 0);
|
||||
return ret;
|
||||
}
|
||||
|
@@ -279,7 +279,7 @@ void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
|
||||
}
|
||||
EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4);
|
||||
|
||||
void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2)
|
||||
void notrace __sanitizer_cov_trace_cmp8(kcov_u64 arg1, kcov_u64 arg2)
|
||||
{
|
||||
write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_);
|
||||
}
|
||||
@@ -306,16 +306,17 @@ void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
|
||||
}
|
||||
EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4);
|
||||
|
||||
void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2)
|
||||
void notrace __sanitizer_cov_trace_const_cmp8(kcov_u64 arg1, kcov_u64 arg2)
|
||||
{
|
||||
write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2,
|
||||
_RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8);
|
||||
|
||||
void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
|
||||
void notrace __sanitizer_cov_trace_switch(kcov_u64 val, void *arg)
|
||||
{
|
||||
u64 i;
|
||||
u64 *cases = arg;
|
||||
u64 count = cases[0];
|
||||
u64 size = cases[1];
|
||||
u64 type = KCOV_CMP_CONST;
|
||||
|
@@ -1091,6 +1091,11 @@ __bpf_kfunc void crash_kexec(struct pt_regs *regs)
|
||||
}
|
||||
}
|
||||
|
||||
static inline resource_size_t crash_resource_size(const struct resource *res)
|
||||
{
|
||||
return !res->end ? 0 : resource_size(res);
|
||||
}
|
||||
|
||||
ssize_t crash_get_memory_size(void)
|
||||
{
|
||||
ssize_t size = 0;
|
||||
@@ -1098,19 +1103,45 @@ ssize_t crash_get_memory_size(void)
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
if (crashk_res.end != crashk_res.start)
|
||||
size = resource_size(&crashk_res);
|
||||
size += crash_resource_size(&crashk_res);
|
||||
size += crash_resource_size(&crashk_low_res);
|
||||
|
||||
kexec_unlock();
|
||||
return size;
|
||||
}
|
||||
|
||||
static int __crash_shrink_memory(struct resource *old_res,
|
||||
unsigned long new_size)
|
||||
{
|
||||
struct resource *ram_res;
|
||||
|
||||
ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
|
||||
if (!ram_res)
|
||||
return -ENOMEM;
|
||||
|
||||
ram_res->start = old_res->start + new_size;
|
||||
ram_res->end = old_res->end;
|
||||
ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
|
||||
ram_res->name = "System RAM";
|
||||
|
||||
if (!new_size) {
|
||||
release_resource(old_res);
|
||||
old_res->start = 0;
|
||||
old_res->end = 0;
|
||||
} else {
|
||||
crashk_res.end = ram_res->start - 1;
|
||||
}
|
||||
|
||||
crash_free_reserved_phys_range(ram_res->start, ram_res->end);
|
||||
insert_resource(&iomem_resource, ram_res);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int crash_shrink_memory(unsigned long new_size)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long start, end;
|
||||
unsigned long old_size;
|
||||
struct resource *ram_res;
|
||||
unsigned long old_size, low_size;
|
||||
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
@@ -1119,36 +1150,42 @@ int crash_shrink_memory(unsigned long new_size)
|
||||
ret = -ENOENT;
|
||||
goto unlock;
|
||||
}
|
||||
start = crashk_res.start;
|
||||
end = crashk_res.end;
|
||||
old_size = (end == 0) ? 0 : end - start + 1;
|
||||
|
||||
low_size = crash_resource_size(&crashk_low_res);
|
||||
old_size = crash_resource_size(&crashk_res) + low_size;
|
||||
new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
|
||||
if (new_size >= old_size) {
|
||||
ret = (new_size == old_size) ? 0 : -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
|
||||
if (!ram_res) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
/*
|
||||
* (low_size > new_size) implies that low_size is greater than zero.
|
||||
* This also means that if low_size is zero, the else branch is taken.
|
||||
*
|
||||
* If low_size is greater than 0, (low_size > new_size) indicates that
|
||||
* crashk_low_res also needs to be shrunken. Otherwise, only crashk_res
|
||||
* needs to be shrunken.
|
||||
*/
|
||||
if (low_size > new_size) {
|
||||
ret = __crash_shrink_memory(&crashk_res, 0);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __crash_shrink_memory(&crashk_low_res, new_size);
|
||||
} else {
|
||||
ret = __crash_shrink_memory(&crashk_res, new_size - low_size);
|
||||
}
|
||||
|
||||
start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
|
||||
end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
|
||||
|
||||
crash_free_reserved_phys_range(end, crashk_res.end);
|
||||
|
||||
if ((start == end) && (crashk_res.parent != NULL))
|
||||
release_resource(&crashk_res);
|
||||
|
||||
ram_res->start = end;
|
||||
ram_res->end = crashk_res.end;
|
||||
ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
|
||||
ram_res->name = "System RAM";
|
||||
|
||||
crashk_res.end = end - 1;
|
||||
|
||||
insert_resource(&iomem_resource, ram_res);
|
||||
/* Swap crashk_res and crashk_low_res if needed */
|
||||
if (!crashk_res.end && crashk_low_res.end) {
|
||||
crashk_res.start = crashk_low_res.start;
|
||||
crashk_res.end = crashk_low_res.end;
|
||||
release_resource(&crashk_low_res);
|
||||
crashk_low_res.start = 0;
|
||||
crashk_low_res.end = 0;
|
||||
insert_resource(&iomem_resource, &crashk_res);
|
||||
}
|
||||
|
||||
unlock:
|
||||
kexec_unlock();
|
||||
|
@@ -867,6 +867,7 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
|
||||
{
|
||||
unsigned long bss_addr;
|
||||
unsigned long offset;
|
||||
size_t sechdrs_size;
|
||||
Elf_Shdr *sechdrs;
|
||||
int i;
|
||||
|
||||
@@ -874,11 +875,11 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
|
||||
* The section headers in kexec_purgatory are read-only. In order to
|
||||
* have them modifiable make a temporary copy.
|
||||
*/
|
||||
sechdrs = vzalloc(array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum));
|
||||
sechdrs_size = array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum);
|
||||
sechdrs = vzalloc(sechdrs_size);
|
||||
if (!sechdrs)
|
||||
return -ENOMEM;
|
||||
memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff,
|
||||
pi->ehdr->e_shnum * sizeof(Elf_Shdr));
|
||||
memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff, sechdrs_size);
|
||||
pi->sechdrs = sechdrs;
|
||||
|
||||
offset = 0;
|
||||
|
@@ -322,10 +322,10 @@ void __noreturn kthread_exit(long result)
|
||||
* @comp: Completion to complete
|
||||
* @code: The integer value to return to kthread_stop().
|
||||
*
|
||||
* If present complete @comp and the reuturn code to kthread_stop().
|
||||
* If present, complete @comp and then return code to kthread_stop().
|
||||
*
|
||||
* A kernel thread whose module may be removed after the completion of
|
||||
* @comp can use this function exit safely.
|
||||
* @comp can use this function to exit safely.
|
||||
*
|
||||
* Does not return.
|
||||
*/
|
||||
|
@@ -57,4 +57,8 @@ static inline void __lockevent_add(enum lock_events event, int inc)
|
||||
#define lockevent_cond_inc(ev, c)
|
||||
|
||||
#endif /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
|
||||
ssize_t lockevent_read(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos);
|
||||
|
||||
#endif /* __LOCKING_LOCK_EVENTS_H */
|
||||
|
@@ -684,6 +684,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
|
||||
add_taint(taint, LOCKDEP_STILL_OK);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BUG
|
||||
#ifndef __WARN_FLAGS
|
||||
void warn_slowpath_fmt(const char *file, int line, unsigned taint,
|
||||
const char *fmt, ...)
|
||||
@@ -722,8 +723,6 @@ void __warn_printk(const char *fmt, ...)
|
||||
EXPORT_SYMBOL(__warn_printk);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BUG
|
||||
|
||||
/* Support resetting WARN*_ONCE state */
|
||||
|
||||
static int clear_warn_once_set(void *data, u64 val)
|
||||
|
@@ -29,20 +29,18 @@
|
||||
|
||||
static DEFINE_MUTEX(watchdog_mutex);
|
||||
|
||||
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
|
||||
# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
|
||||
# define NMI_WATCHDOG_DEFAULT 1
|
||||
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
|
||||
# define WATCHDOG_HARDLOCKUP_DEFAULT 1
|
||||
#else
|
||||
# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
|
||||
# define NMI_WATCHDOG_DEFAULT 0
|
||||
# define WATCHDOG_HARDLOCKUP_DEFAULT 0
|
||||
#endif
|
||||
|
||||
unsigned long __read_mostly watchdog_enabled;
|
||||
int __read_mostly watchdog_user_enabled = 1;
|
||||
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
|
||||
int __read_mostly soft_watchdog_user_enabled = 1;
|
||||
static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
|
||||
static int __read_mostly watchdog_softlockup_user_enabled = 1;
|
||||
int __read_mostly watchdog_thresh = 10;
|
||||
static int __read_mostly nmi_watchdog_available;
|
||||
static int __read_mostly watchdog_hardlockup_available;
|
||||
|
||||
struct cpumask watchdog_cpumask __read_mostly;
|
||||
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
|
||||
@@ -68,7 +66,7 @@ unsigned int __read_mostly hardlockup_panic =
|
||||
*/
|
||||
void __init hardlockup_detector_disable(void)
|
||||
{
|
||||
nmi_watchdog_user_enabled = 0;
|
||||
watchdog_hardlockup_user_enabled = 0;
|
||||
}
|
||||
|
||||
static int __init hardlockup_panic_setup(char *str)
|
||||
@@ -78,54 +76,163 @@ static int __init hardlockup_panic_setup(char *str)
|
||||
else if (!strncmp(str, "nopanic", 7))
|
||||
hardlockup_panic = 0;
|
||||
else if (!strncmp(str, "0", 1))
|
||||
nmi_watchdog_user_enabled = 0;
|
||||
watchdog_hardlockup_user_enabled = 0;
|
||||
else if (!strncmp(str, "1", 1))
|
||||
nmi_watchdog_user_enabled = 1;
|
||||
watchdog_hardlockup_user_enabled = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("nmi_watchdog=", hardlockup_panic_setup);
|
||||
|
||||
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
|
||||
|
||||
#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
|
||||
|
||||
static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
|
||||
static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
|
||||
static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
|
||||
static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
|
||||
static unsigned long watchdog_hardlockup_all_cpu_dumped;
|
||||
|
||||
notrace void arch_touch_nmi_watchdog(void)
|
||||
{
|
||||
/*
|
||||
* Using __raw here because some code paths have
|
||||
* preemption enabled. If preemption is enabled
|
||||
* then interrupts should be enabled too, in which
|
||||
* case we shouldn't have to worry about the watchdog
|
||||
* going off.
|
||||
*/
|
||||
raw_cpu_write(watchdog_hardlockup_touched, true);
|
||||
}
|
||||
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
||||
|
||||
void watchdog_hardlockup_touch_cpu(unsigned int cpu)
|
||||
{
|
||||
per_cpu(watchdog_hardlockup_touched, cpu) = true;
|
||||
}
|
||||
|
||||
static bool is_hardlockup(unsigned int cpu)
|
||||
{
|
||||
int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
|
||||
|
||||
if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE
|
||||
* for hrtimer_interrupts_saved. hrtimer_interrupts_saved is
|
||||
* written/read by a single CPU.
|
||||
*/
|
||||
per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void watchdog_hardlockup_kick(void)
|
||||
{
|
||||
int new_interrupts;
|
||||
|
||||
new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts));
|
||||
watchdog_buddy_check_hardlockup(new_interrupts);
|
||||
}
|
||||
|
||||
void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
|
||||
{
|
||||
if (per_cpu(watchdog_hardlockup_touched, cpu)) {
|
||||
per_cpu(watchdog_hardlockup_touched, cpu) = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a hardlockup by making sure the CPU's timer
|
||||
* interrupt is incrementing. The timer interrupt should have
|
||||
* fired multiple times before we overflow'd. If it hasn't
|
||||
* then this is a good indication the cpu is stuck
|
||||
*/
|
||||
if (is_hardlockup(cpu)) {
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
struct cpumask backtrace_mask;
|
||||
|
||||
cpumask_copy(&backtrace_mask, cpu_online_mask);
|
||||
|
||||
/* Only print hardlockups once. */
|
||||
if (per_cpu(watchdog_hardlockup_warned, cpu))
|
||||
return;
|
||||
|
||||
pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
|
||||
print_modules();
|
||||
print_irqtrace_events(current);
|
||||
if (cpu == this_cpu) {
|
||||
if (regs)
|
||||
show_regs(regs);
|
||||
else
|
||||
dump_stack();
|
||||
cpumask_clear_cpu(cpu, &backtrace_mask);
|
||||
} else {
|
||||
if (trigger_single_cpu_backtrace(cpu))
|
||||
cpumask_clear_cpu(cpu, &backtrace_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform multi-CPU dump only once to avoid multiple
|
||||
* hardlockups generating interleaving traces
|
||||
*/
|
||||
if (sysctl_hardlockup_all_cpu_backtrace &&
|
||||
!test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
|
||||
trigger_cpumask_backtrace(&backtrace_mask);
|
||||
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(regs, "Hard LOCKUP");
|
||||
|
||||
per_cpu(watchdog_hardlockup_warned, cpu) = true;
|
||||
} else {
|
||||
per_cpu(watchdog_hardlockup_warned, cpu) = false;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
|
||||
|
||||
static inline void watchdog_hardlockup_kick(void) { }
|
||||
|
||||
#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
|
||||
|
||||
/*
|
||||
* These functions can be overridden if an architecture implements its
|
||||
* own hardlockup detector.
|
||||
* These functions can be overridden based on the configured hardlockdup detector.
|
||||
*
|
||||
* watchdog_nmi_enable/disable can be implemented to start and stop when
|
||||
* softlockup watchdog start and stop. The arch must select the
|
||||
* watchdog_hardlockup_enable/disable can be implemented to start and stop when
|
||||
* softlockup watchdog start and stop. The detector must select the
|
||||
* SOFTLOCKUP_DETECTOR Kconfig.
|
||||
*/
|
||||
int __weak watchdog_nmi_enable(unsigned int cpu)
|
||||
{
|
||||
hardlockup_detector_perf_enable();
|
||||
return 0;
|
||||
}
|
||||
void __weak watchdog_hardlockup_enable(unsigned int cpu) { }
|
||||
|
||||
void __weak watchdog_nmi_disable(unsigned int cpu)
|
||||
{
|
||||
hardlockup_detector_perf_disable();
|
||||
}
|
||||
void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
|
||||
|
||||
/* Return 0, if a NMI watchdog is available. Error code otherwise */
|
||||
int __weak __init watchdog_nmi_probe(void)
|
||||
/*
|
||||
* Watchdog-detector specific API.
|
||||
*
|
||||
* Return 0 when hardlockup watchdog is available, negative value otherwise.
|
||||
* Note that the negative value means that a delayed probe might
|
||||
* succeed later.
|
||||
*/
|
||||
int __weak __init watchdog_hardlockup_probe(void)
|
||||
{
|
||||
return hardlockup_detector_perf_init();
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/**
|
||||
* watchdog_nmi_stop - Stop the watchdog for reconfiguration
|
||||
* watchdog_hardlockup_stop - Stop the watchdog for reconfiguration
|
||||
*
|
||||
* The reconfiguration steps are:
|
||||
* watchdog_nmi_stop();
|
||||
* watchdog_hardlockup_stop();
|
||||
* update_variables();
|
||||
* watchdog_nmi_start();
|
||||
* watchdog_hardlockup_start();
|
||||
*/
|
||||
void __weak watchdog_nmi_stop(void) { }
|
||||
void __weak watchdog_hardlockup_stop(void) { }
|
||||
|
||||
/**
|
||||
* watchdog_nmi_start - Start the watchdog after reconfiguration
|
||||
* watchdog_hardlockup_start - Start the watchdog after reconfiguration
|
||||
*
|
||||
* Counterpart to watchdog_nmi_stop().
|
||||
* Counterpart to watchdog_hardlockup_stop().
|
||||
*
|
||||
* The following variables have been updated in update_variables() and
|
||||
* contain the currently valid configuration:
|
||||
@@ -133,23 +240,23 @@ void __weak watchdog_nmi_stop(void) { }
|
||||
* - watchdog_thresh
|
||||
* - watchdog_cpumask
|
||||
*/
|
||||
void __weak watchdog_nmi_start(void) { }
|
||||
void __weak watchdog_hardlockup_start(void) { }
|
||||
|
||||
/**
|
||||
* lockup_detector_update_enable - Update the sysctl enable bit
|
||||
*
|
||||
* Caller needs to make sure that the NMI/perf watchdogs are off, so this
|
||||
* can't race with watchdog_nmi_disable().
|
||||
* Caller needs to make sure that the hard watchdogs are off, so this
|
||||
* can't race with watchdog_hardlockup_disable().
|
||||
*/
|
||||
static void lockup_detector_update_enable(void)
|
||||
{
|
||||
watchdog_enabled = 0;
|
||||
if (!watchdog_user_enabled)
|
||||
return;
|
||||
if (nmi_watchdog_available && nmi_watchdog_user_enabled)
|
||||
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
|
||||
if (soft_watchdog_user_enabled)
|
||||
watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
|
||||
if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled)
|
||||
watchdog_enabled |= WATCHDOG_HARDLOCKUP_ENABLED;
|
||||
if (watchdog_softlockup_user_enabled)
|
||||
watchdog_enabled |= WATCHDOG_SOFTOCKUP_ENABLED;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
|
||||
@@ -179,8 +286,6 @@ static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
|
||||
static DEFINE_PER_CPU(unsigned long, watchdog_report_ts);
|
||||
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
|
||||
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
|
||||
static unsigned long soft_lockup_nmi_warn;
|
||||
|
||||
static int __init nowatchdog_setup(char *str)
|
||||
@@ -192,7 +297,7 @@ __setup("nowatchdog", nowatchdog_setup);
|
||||
|
||||
static int __init nosoftlockup_setup(char *str)
|
||||
{
|
||||
soft_watchdog_user_enabled = 0;
|
||||
watchdog_softlockup_user_enabled = 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("nosoftlockup", nosoftlockup_setup);
|
||||
@@ -306,7 +411,7 @@ static int is_softlockup(unsigned long touch_ts,
|
||||
unsigned long period_ts,
|
||||
unsigned long now)
|
||||
{
|
||||
if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
|
||||
if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) {
|
||||
/* Warn about unreasonable delays. */
|
||||
if (time_after(now, period_ts + get_softlockup_thresh()))
|
||||
return now - touch_ts;
|
||||
@@ -315,22 +420,6 @@ static int is_softlockup(unsigned long touch_ts,
|
||||
}
|
||||
|
||||
/* watchdog detector functions */
|
||||
bool is_hardlockup(void)
|
||||
{
|
||||
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
|
||||
|
||||
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
|
||||
return true;
|
||||
|
||||
__this_cpu_write(hrtimer_interrupts_saved, hrint);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void watchdog_interrupt_count(void)
|
||||
{
|
||||
__this_cpu_inc(hrtimer_interrupts);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct completion, softlockup_completion);
|
||||
static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
|
||||
|
||||
@@ -361,8 +450,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
if (!watchdog_enabled)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
/* kick the hardlockup detector */
|
||||
watchdog_interrupt_count();
|
||||
watchdog_hardlockup_kick();
|
||||
|
||||
/* kick the softlockup detector */
|
||||
if (completion_done(this_cpu_ptr(&softlockup_completion))) {
|
||||
@@ -458,7 +546,7 @@ static void watchdog_enable(unsigned int cpu)
|
||||
complete(done);
|
||||
|
||||
/*
|
||||
* Start the timer first to prevent the NMI watchdog triggering
|
||||
* Start the timer first to prevent the hardlockup watchdog triggering
|
||||
* before the timer has a chance to fire.
|
||||
*/
|
||||
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
|
||||
@@ -468,9 +556,9 @@ static void watchdog_enable(unsigned int cpu)
|
||||
|
||||
/* Initialize timestamp */
|
||||
update_touch_ts();
|
||||
/* Enable the perf event */
|
||||
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
|
||||
watchdog_nmi_enable(cpu);
|
||||
/* Enable the hardlockup detector */
|
||||
if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)
|
||||
watchdog_hardlockup_enable(cpu);
|
||||
}
|
||||
|
||||
static void watchdog_disable(unsigned int cpu)
|
||||
@@ -480,11 +568,11 @@ static void watchdog_disable(unsigned int cpu)
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
|
||||
/*
|
||||
* Disable the perf event first. That prevents that a large delay
|
||||
* between disabling the timer and disabling the perf event causes
|
||||
* the perf NMI to detect a false positive.
|
||||
* Disable the hardlockup detector first. That prevents that a large
|
||||
* delay between disabling the timer and disabling the hardlockup
|
||||
* detector causes a false positive.
|
||||
*/
|
||||
watchdog_nmi_disable(cpu);
|
||||
watchdog_hardlockup_disable(cpu);
|
||||
hrtimer_cancel(hrtimer);
|
||||
wait_for_completion(this_cpu_ptr(&softlockup_completion));
|
||||
}
|
||||
@@ -540,7 +628,7 @@ int lockup_detector_offline_cpu(unsigned int cpu)
|
||||
static void __lockup_detector_reconfigure(void)
|
||||
{
|
||||
cpus_read_lock();
|
||||
watchdog_nmi_stop();
|
||||
watchdog_hardlockup_stop();
|
||||
|
||||
softlockup_stop_all();
|
||||
set_sample_period();
|
||||
@@ -548,7 +636,7 @@ static void __lockup_detector_reconfigure(void)
|
||||
if (watchdog_enabled && watchdog_thresh)
|
||||
softlockup_start_all();
|
||||
|
||||
watchdog_nmi_start();
|
||||
watchdog_hardlockup_start();
|
||||
cpus_read_unlock();
|
||||
/*
|
||||
* Must be called outside the cpus locked section to prevent
|
||||
@@ -589,9 +677,9 @@ static __init void lockup_detector_setup(void)
|
||||
static void __lockup_detector_reconfigure(void)
|
||||
{
|
||||
cpus_read_lock();
|
||||
watchdog_nmi_stop();
|
||||
watchdog_hardlockup_stop();
|
||||
lockup_detector_update_enable();
|
||||
watchdog_nmi_start();
|
||||
watchdog_hardlockup_start();
|
||||
cpus_read_unlock();
|
||||
}
|
||||
void lockup_detector_reconfigure(void)
|
||||
@@ -646,14 +734,14 @@ static void proc_watchdog_update(void)
|
||||
/*
|
||||
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
|
||||
*
|
||||
* caller | table->data points to | 'which'
|
||||
* -------------------|----------------------------|--------------------------
|
||||
* proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
|
||||
* | | SOFT_WATCHDOG_ENABLED
|
||||
* -------------------|----------------------------|--------------------------
|
||||
* proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
|
||||
* -------------------|----------------------------|--------------------------
|
||||
* proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
|
||||
* caller | table->data points to | 'which'
|
||||
* -------------------|----------------------------------|-------------------------------
|
||||
* proc_watchdog | watchdog_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED |
|
||||
* | | WATCHDOG_SOFTOCKUP_ENABLED
|
||||
* -------------------|----------------------------------|-------------------------------
|
||||
* proc_nmi_watchdog | watchdog_hardlockup_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED
|
||||
* -------------------|----------------------------------|-------------------------------
|
||||
* proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED
|
||||
*/
|
||||
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
@@ -685,7 +773,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
|
||||
int proc_watchdog(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
|
||||
return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED |
|
||||
WATCHDOG_SOFTOCKUP_ENABLED,
|
||||
table, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
@@ -695,9 +784,9 @@ int proc_watchdog(struct ctl_table *table, int write,
|
||||
int proc_nmi_watchdog(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
if (!nmi_watchdog_available && write)
|
||||
if (!watchdog_hardlockup_available && write)
|
||||
return -ENOTSUPP;
|
||||
return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
|
||||
return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED,
|
||||
table, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
@@ -707,7 +796,7 @@ int proc_nmi_watchdog(struct ctl_table *table, int write,
|
||||
int proc_soft_watchdog(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
|
||||
return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
|
||||
table, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
@@ -773,15 +862,6 @@ static struct ctl_table watchdog_sysctls[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = (void *)&sixty,
|
||||
},
|
||||
{
|
||||
.procname = "nmi_watchdog",
|
||||
.data = &nmi_watchdog_user_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = NMI_WATCHDOG_SYSCTL_PERM,
|
||||
.proc_handler = proc_nmi_watchdog,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{
|
||||
.procname = "watchdog_cpumask",
|
||||
.data = &watchdog_cpumask_bits,
|
||||
@@ -792,7 +872,7 @@ static struct ctl_table watchdog_sysctls[] = {
|
||||
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
|
||||
{
|
||||
.procname = "soft_watchdog",
|
||||
.data = &soft_watchdog_user_enabled,
|
||||
.data = &watchdog_softlockup_user_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_soft_watchdog,
|
||||
@@ -845,14 +925,90 @@ static struct ctl_table watchdog_sysctls[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table watchdog_hardlockup_sysctl[] = {
|
||||
{
|
||||
.procname = "nmi_watchdog",
|
||||
.data = &watchdog_hardlockup_user_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_nmi_watchdog,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static void __init watchdog_sysctl_init(void)
|
||||
{
|
||||
register_sysctl_init("kernel", watchdog_sysctls);
|
||||
|
||||
if (watchdog_hardlockup_available)
|
||||
watchdog_hardlockup_sysctl[0].mode = 0644;
|
||||
register_sysctl_init("kernel", watchdog_hardlockup_sysctl);
|
||||
}
|
||||
|
||||
#else
|
||||
#define watchdog_sysctl_init() do { } while (0)
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
static void __init lockup_detector_delay_init(struct work_struct *work);
|
||||
static bool allow_lockup_detector_init_retry __initdata;
|
||||
|
||||
static struct work_struct detector_work __initdata =
|
||||
__WORK_INITIALIZER(detector_work, lockup_detector_delay_init);
|
||||
|
||||
static void __init lockup_detector_delay_init(struct work_struct *work)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = watchdog_hardlockup_probe();
|
||||
if (ret) {
|
||||
pr_info("Delayed init of the lockup detector failed: %d\n", ret);
|
||||
pr_info("Hard watchdog permanently disabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
allow_lockup_detector_init_retry = false;
|
||||
|
||||
watchdog_hardlockup_available = true;
|
||||
lockup_detector_setup();
|
||||
}
|
||||
|
||||
/*
|
||||
* lockup_detector_retry_init - retry init lockup detector if possible.
|
||||
*
|
||||
* Retry hardlockup detector init. It is useful when it requires some
|
||||
* functionality that has to be initialized later on a particular
|
||||
* platform.
|
||||
*/
|
||||
void __init lockup_detector_retry_init(void)
|
||||
{
|
||||
/* Must be called before late init calls */
|
||||
if (!allow_lockup_detector_init_retry)
|
||||
return;
|
||||
|
||||
schedule_work(&detector_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that optional delayed hardlockup init is proceed before
|
||||
* the init code and memory is freed.
|
||||
*/
|
||||
static int __init lockup_detector_check(void)
|
||||
{
|
||||
/* Prevent any later retry. */
|
||||
allow_lockup_detector_init_retry = false;
|
||||
|
||||
/* Make sure no work is pending. */
|
||||
flush_work(&detector_work);
|
||||
|
||||
watchdog_sysctl_init();
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
late_initcall_sync(lockup_detector_check);
|
||||
|
||||
void __init lockup_detector_init(void)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
@@ -861,8 +1017,10 @@ void __init lockup_detector_init(void)
|
||||
cpumask_copy(&watchdog_cpumask,
|
||||
housekeeping_cpumask(HK_TYPE_TIMER));
|
||||
|
||||
if (!watchdog_nmi_probe())
|
||||
nmi_watchdog_available = true;
|
||||
if (!watchdog_hardlockup_probe())
|
||||
watchdog_hardlockup_available = true;
|
||||
else
|
||||
allow_lockup_detector_init_retry = true;
|
||||
|
||||
lockup_detector_setup();
|
||||
watchdog_sysctl_init();
|
||||
}
|
||||
|
113
kernel/watchdog_buddy.c
Normal file
113
kernel/watchdog_buddy.c
Normal file
@@ -0,0 +1,113 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
|
||||
static cpumask_t __read_mostly watchdog_cpus;
|
||||
|
||||
static unsigned int watchdog_next_cpu(unsigned int cpu)
|
||||
{
|
||||
unsigned int next_cpu;
|
||||
|
||||
next_cpu = cpumask_next(cpu, &watchdog_cpus);
|
||||
if (next_cpu >= nr_cpu_ids)
|
||||
next_cpu = cpumask_first(&watchdog_cpus);
|
||||
|
||||
if (next_cpu == cpu)
|
||||
return nr_cpu_ids;
|
||||
|
||||
return next_cpu;
|
||||
}
|
||||
|
||||
int __init watchdog_hardlockup_probe(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void watchdog_hardlockup_enable(unsigned int cpu)
|
||||
{
|
||||
unsigned int next_cpu;
|
||||
|
||||
/*
|
||||
* The new CPU will be marked online before the hrtimer interrupt
|
||||
* gets a chance to run on it. If another CPU tests for a
|
||||
* hardlockup on the new CPU before it has run its the hrtimer
|
||||
* interrupt, it will get a false positive. Touch the watchdog on
|
||||
* the new CPU to delay the check for at least 3 sampling periods
|
||||
* to guarantee one hrtimer has run on the new CPU.
|
||||
*/
|
||||
watchdog_hardlockup_touch_cpu(cpu);
|
||||
|
||||
/*
|
||||
* We are going to check the next CPU. Our watchdog_hrtimer
|
||||
* need not be zero if the CPU has already been online earlier.
|
||||
* Touch the watchdog on the next CPU to avoid false positive
|
||||
* if we try to check it in less then 3 interrupts.
|
||||
*/
|
||||
next_cpu = watchdog_next_cpu(cpu);
|
||||
if (next_cpu < nr_cpu_ids)
|
||||
watchdog_hardlockup_touch_cpu(next_cpu);
|
||||
|
||||
/*
|
||||
* Makes sure that watchdog is touched on this CPU before
|
||||
* other CPUs could see it in watchdog_cpus. The counter
|
||||
* part is in watchdog_buddy_check_hardlockup().
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
cpumask_set_cpu(cpu, &watchdog_cpus);
|
||||
}
|
||||
|
||||
void watchdog_hardlockup_disable(unsigned int cpu)
|
||||
{
|
||||
unsigned int next_cpu = watchdog_next_cpu(cpu);
|
||||
|
||||
/*
|
||||
* Offlining this CPU will cause the CPU before this one to start
|
||||
* checking the one after this one. If this CPU just finished checking
|
||||
* the next CPU and updating hrtimer_interrupts_saved, and then the
|
||||
* previous CPU checks it within one sample period, it will trigger a
|
||||
* false positive. Touch the watchdog on the next CPU to prevent it.
|
||||
*/
|
||||
if (next_cpu < nr_cpu_ids)
|
||||
watchdog_hardlockup_touch_cpu(next_cpu);
|
||||
|
||||
/*
|
||||
* Makes sure that watchdog is touched on the next CPU before
|
||||
* this CPU disappear in watchdog_cpus. The counter part is in
|
||||
* watchdog_buddy_check_hardlockup().
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
cpumask_clear_cpu(cpu, &watchdog_cpus);
|
||||
}
|
||||
|
||||
void watchdog_buddy_check_hardlockup(int hrtimer_interrupts)
|
||||
{
|
||||
unsigned int next_cpu;
|
||||
|
||||
/*
|
||||
* Test for hardlockups every 3 samples. The sample period is
|
||||
* watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
|
||||
* watchdog_thresh (over by 20%).
|
||||
*/
|
||||
if (hrtimer_interrupts % 3 != 0)
|
||||
return;
|
||||
|
||||
/* check for a hardlockup on the next CPU */
|
||||
next_cpu = watchdog_next_cpu(smp_processor_id());
|
||||
if (next_cpu >= nr_cpu_ids)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Make sure that the watchdog was touched on next CPU when
|
||||
* watchdog_next_cpu() returned another one because of
|
||||
* a change in watchdog_hardlockup_enable()/disable().
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
watchdog_hardlockup_check(next_cpu, NULL);
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Detect hard lockups on a system
|
||||
* Detect hard lockups on a system using perf
|
||||
*
|
||||
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
|
||||
*
|
||||
@@ -20,28 +20,12 @@
|
||||
#include <asm/irq_regs.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
|
||||
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
||||
static DEFINE_PER_CPU(struct perf_event *, dead_event);
|
||||
static struct cpumask dead_events_mask;
|
||||
|
||||
static unsigned long hardlockup_allcpu_dumped;
|
||||
static atomic_t watchdog_cpus = ATOMIC_INIT(0);
|
||||
|
||||
notrace void arch_touch_nmi_watchdog(void)
|
||||
{
|
||||
/*
|
||||
* Using __raw here because some code paths have
|
||||
* preemption enabled. If preemption is enabled
|
||||
* then interrupts should be enabled too, in which
|
||||
* case we shouldn't have to worry about the watchdog
|
||||
* going off.
|
||||
*/
|
||||
raw_cpu_write(watchdog_nmi_touch, true);
|
||||
}
|
||||
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
||||
|
||||
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
|
||||
static DEFINE_PER_CPU(ktime_t, last_timestamp);
|
||||
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
|
||||
@@ -114,61 +98,24 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
||||
/* Ensure the watchdog never gets throttled */
|
||||
event->hw.interrupts = 0;
|
||||
|
||||
if (__this_cpu_read(watchdog_nmi_touch) == true) {
|
||||
__this_cpu_write(watchdog_nmi_touch, false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!watchdog_check_timestamp())
|
||||
return;
|
||||
|
||||
/* check for a hardlockup
|
||||
* This is done by making sure our timer interrupt
|
||||
* is incrementing. The timer interrupt should have
|
||||
* fired multiple times before we overflow'd. If it hasn't
|
||||
* then this is a good indication the cpu is stuck
|
||||
*/
|
||||
if (is_hardlockup()) {
|
||||
int this_cpu = smp_processor_id();
|
||||
|
||||
/* only print hardlockups once */
|
||||
if (__this_cpu_read(hard_watchdog_warn) == true)
|
||||
return;
|
||||
|
||||
pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
|
||||
this_cpu);
|
||||
print_modules();
|
||||
print_irqtrace_events(current);
|
||||
if (regs)
|
||||
show_regs(regs);
|
||||
else
|
||||
dump_stack();
|
||||
|
||||
/*
|
||||
* Perform all-CPU dump only once to avoid multiple hardlockups
|
||||
* generating interleaving traces
|
||||
*/
|
||||
if (sysctl_hardlockup_all_cpu_backtrace &&
|
||||
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(regs, "Hard LOCKUP");
|
||||
|
||||
__this_cpu_write(hard_watchdog_warn, true);
|
||||
return;
|
||||
}
|
||||
|
||||
__this_cpu_write(hard_watchdog_warn, false);
|
||||
return;
|
||||
watchdog_hardlockup_check(smp_processor_id(), regs);
|
||||
}
|
||||
|
||||
static int hardlockup_detector_event_create(void)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
unsigned int cpu;
|
||||
struct perf_event_attr *wd_attr;
|
||||
struct perf_event *evt;
|
||||
|
||||
/*
|
||||
* Preemption is not disabled because memory will be allocated.
|
||||
* Ensure CPU-locality by calling this in per-CPU kthread.
|
||||
*/
|
||||
WARN_ON(!is_percpu_thread());
|
||||
cpu = raw_smp_processor_id();
|
||||
wd_attr = &wd_hw_attr;
|
||||
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
|
||||
|
||||
@@ -185,10 +132,14 @@ static int hardlockup_detector_event_create(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* hardlockup_detector_perf_enable - Enable the local event
|
||||
* watchdog_hardlockup_enable - Enable the local event
|
||||
*
|
||||
* @cpu: The CPU to enable hard lockup on.
|
||||
*/
|
||||
void hardlockup_detector_perf_enable(void)
|
||||
void watchdog_hardlockup_enable(unsigned int cpu)
|
||||
{
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
|
||||
if (hardlockup_detector_event_create())
|
||||
return;
|
||||
|
||||
@@ -200,12 +151,16 @@ void hardlockup_detector_perf_enable(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* hardlockup_detector_perf_disable - Disable the local event
|
||||
* watchdog_hardlockup_disable - Disable the local event
|
||||
*
|
||||
* @cpu: The CPU to enable hard lockup on.
|
||||
*/
|
||||
void hardlockup_detector_perf_disable(void)
|
||||
void watchdog_hardlockup_disable(unsigned int cpu)
|
||||
{
|
||||
struct perf_event *event = this_cpu_read(watchdog_ev);
|
||||
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
|
||||
if (event) {
|
||||
perf_event_disable(event);
|
||||
this_cpu_write(watchdog_ev, NULL);
|
||||
@@ -268,7 +223,7 @@ void __init hardlockup_detector_perf_restart(void)
|
||||
|
||||
lockdep_assert_cpus_held();
|
||||
|
||||
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
|
||||
if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
|
||||
return;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
@@ -279,12 +234,22 @@ void __init hardlockup_detector_perf_restart(void)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* hardlockup_detector_perf_init - Probe whether NMI event is available at all
|
||||
*/
|
||||
int __init hardlockup_detector_perf_init(void)
|
||||
bool __weak __init arch_perf_nmi_is_available(void)
|
||||
{
|
||||
int ret = hardlockup_detector_event_create();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* watchdog_hardlockup_probe - Probe whether NMI event is available at all
|
||||
*/
|
||||
int __init watchdog_hardlockup_probe(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!arch_perf_nmi_is_available())
|
||||
return -ENODEV;
|
||||
|
||||
ret = hardlockup_detector_event_create();
|
||||
|
||||
if (ret) {
|
||||
pr_info("Perf NMI watchdog permanently disabled\n");
|
Reference in New Issue
Block a user