mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
Booting one of my machines, it triggered the following crash: Kernel/User page tables isolation: enabled ftrace: allocating 36577 entries in 143 pages Starting tracer 'function' BUG: unable to handle page fault for address: ffffffffa000005c #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation PGD 2014067 P4D 2014067 PUD2015063
PMD 7b253067 PTE7b252061
Oops: 0003 [#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper Not tainted 5.4.0-test+ #24 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 RIP: 0010:text_poke_early+0x4a/0x58 Code: 34 24 48 89 54 24 08 e8 bf 72 0b 00 48 8b 34 24 48 8b 4c 24 08 84 c0 74 0b 48 89 df f3 a4 48 83 c4 10 5b c3 9c 58 fa 48 89 df <f3> a4 50 9d 48 83 c4 10 5b e9 d6 f9 ff ff 0 41 57 49 RSP: 0000:ffffffff82003d38 EFLAGS: 00010046 RAX: 0000000000000046 RBX: ffffffffa000005c RCX: 0000000000000005 RDX: 0000000000000005 RSI: ffffffff825b9a90 RDI: ffffffffa000005c RBP: ffffffffa000005c R08: 0000000000000000 R09: ffffffff8206e6e0 R10: ffff88807b01f4c0 R11: ffffffff8176c106 R12: ffffffff8206e6e0 R13: ffffffff824f2440 R14: 0000000000000000 R15: ffffffff8206eac0 FS: 0000000000000000(0000) GS:ffff88807d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa000005c CR3: 0000000002012000 CR4: 00000000000006b0 Call Trace: text_poke_bp+0x27/0x64 ? mutex_lock+0x36/0x5d arch_ftrace_update_trampoline+0x287/0x2d5 ? ftrace_replace_code+0x14b/0x160 ? ftrace_update_ftrace_func+0x65/0x6c __register_ftrace_function+0x6d/0x81 ftrace_startup+0x23/0xc1 register_ftrace_function+0x20/0x37 func_set_flag+0x59/0x77 __set_tracer_option.isra.19+0x20/0x3e trace_set_options+0xd6/0x13e apply_trace_boot_options+0x44/0x6d register_tracer+0x19e/0x1ac early_trace_init+0x21b/0x2c9 start_kernel+0x241/0x518 ? load_ucode_intel_bsp+0x21/0x52 secondary_startup_64+0xa4/0xb0 I was able to trigger it on other machines, when I added to the kernel command line of both "ftrace=function" and "trace_options=func_stack_trace". The cause is the "ftrace=function" would register the function tracer and create a trampoline, and it will set it as executable and read-only. Then the "trace_options=func_stack_trace" would then update the same trampoline to include the stack tracer version of the function tracer. But since the trampoline already exists, it updates it with text_poke_bp(). The problem is that text_poke_bp() called while system_state == SYSTEM_BOOTING, it will simply do a memcpy() and not the page mapping, as it would think that the text is still read-write. But in this case it is not, and we take a fault and crash. Instead, lets keep the ftrace trampolines read-write during boot up, and then when the kernel executable text is set to read-only, the ftrace trampolines get set to read-only as well. Link: https://lkml.kernel.org/r/20200430202147.4dc6e2de@oasis.local.home Cc: Ingo Molnar <mingo@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: stable@vger.kernel.org Fixes:768ae4406a
("x86/ftrace: Use text_poke()") Acked-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
100 lines
2.7 KiB
C
100 lines
2.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_FTRACE_H
|
|
#define _ASM_X86_FTRACE_H
|
|
|
|
#ifdef CONFIG_FUNCTION_TRACER
|
|
#ifndef CC_USING_FENTRY
|
|
# error Compiler does not support fentry?
|
|
#endif
|
|
# define MCOUNT_ADDR ((unsigned long)(__fentry__))
|
|
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
|
#endif
|
|
|
|
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
|
|
|
#ifndef __ASSEMBLY__
|
|
extern atomic_t modifying_ftrace_code;
|
|
extern void __fentry__(void);
|
|
|
|
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
|
{
|
|
/*
|
|
* addr is the address of the mcount call instruction.
|
|
* recordmcount does the necessary offset calculation.
|
|
*/
|
|
return addr;
|
|
}
|
|
|
|
/*
|
|
* When a ftrace registered caller is tracing a function that is
|
|
* also set by a register_ftrace_direct() call, it needs to be
|
|
* differentiated in the ftrace_caller trampoline. To do this, we
|
|
* place the direct caller in the ORIG_AX part of pt_regs. This
|
|
* tells the ftrace_caller that there's a direct caller.
|
|
*/
|
|
static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
|
|
{
|
|
/* Emulate a call */
|
|
regs->orig_ax = addr;
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
|
|
struct dyn_arch_ftrace {
|
|
/* No extra data needed for x86 */
|
|
};
|
|
|
|
#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
|
|
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
#endif /* __ASSEMBLY__ */
|
|
#endif /* CONFIG_FUNCTION_TRACER */
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
|
|
extern void set_ftrace_ops_ro(void);
|
|
#else
|
|
static inline void set_ftrace_ops_ro(void) { }
|
|
#endif
|
|
|
|
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
|
|
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
|
|
{
|
|
/*
|
|
* Compare the symbol name with the system call name. Skip the
|
|
* "__x64_sys", "__ia32_sys" or simple "sys" prefix.
|
|
*/
|
|
return !strcmp(sym + 3, name + 3) ||
|
|
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
|
|
(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
|
|
}
|
|
|
|
#ifndef COMPILE_OFFSETS
|
|
|
|
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
|
|
#include <linux/compat.h>
|
|
|
|
/*
|
|
* Because ia32 syscalls do not map to x86_64 syscall numbers
|
|
* this screws up the trace output when tracing a ia32 task.
|
|
* Instead of reporting bogus syscalls, just do not trace them.
|
|
*
|
|
* If the user really wants these, then they should use the
|
|
* raw syscall tracepoints with filtering.
|
|
*/
|
|
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
|
|
static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
|
|
{
|
|
return in_32bit_syscall();
|
|
}
|
|
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
|
|
#endif /* !COMPILE_OFFSETS */
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_X86_FTRACE_H */
|