From df304c2d0dfd63c40561a8107a217e84fc3515e8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Mar 2021 13:49:26 +0000 Subject: [PATCH 01/14] arm64: cpufeatures: Fix handling of CONFIG_CMDLINE for idreg overrides The built-in kernel commandline (CONFIG_CMDLINE) can be configured in three different ways: 1. CMDLINE_FORCE: Use CONFIG_CMDLINE instead of any bootloader args 2. CMDLINE_EXTEND: Append the bootloader args to CONFIG_CMDLINE 3. CMDLINE_FROM_BOOTLOADER: Only use CONFIG_CMDLINE if there aren't any bootloader args. The early cmdline parsing to detect idreg overrides gets (2) and (3) slightly wrong: in the case of (2) the bootloader args are parsed first and in the case of (3) the CMDLINE is always parsed. Fix these issues by moving the bootargs parsing out into a helper function and following the same logic as that used by the EFI stub. Reviewed-by: Marc Zyngier Fixes: 33200303553d ("arm64: cpufeature: Add an early command-line cpufeature override facility") Link: https://lore.kernel.org/r/20210303134927.18975-2-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 50 +++++++++++++++++------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index dffb16682330..cc071712c6f9 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -163,33 +163,39 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) } while (1); } +static __init const u8 *get_bootargs_cmdline(void) +{ + const u8 *prop; + void *fdt; + int node; + + fdt = get_early_fdt_ptr(); + if (!fdt) + return NULL; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return NULL; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + return NULL; + + return strlen(prop) ? prop : NULL; +} + static __init void parse_cmdline(void) { - if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { - const u8 *prop; - void *fdt; - int node; + const u8 *prop = get_bootargs_cmdline(); - fdt = get_early_fdt_ptr(); - if (!fdt) - goto out; - - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - goto out; - - prop = fdt_getprop(fdt, node, "bootargs", NULL); - if (!prop) - goto out; - - __parse_cmdline(prop, true); - - if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND)) - return; + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + !prop) { + __parse_cmdline(CONFIG_CMDLINE, true); } -out: - __parse_cmdline(CONFIG_CMDLINE, true); + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) + __parse_cmdline(prop, true); } /* Keep checkers quiet */ From cae118b6acc309539b33339e846cbb19187c164c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Mar 2021 13:49:27 +0000 Subject: [PATCH 02/14] arm64: Drop support for CMDLINE_EXTEND The documented behaviour for CMDLINE_EXTEND is that the arguments from the bootloader are appended to the built-in kernel command line. This also matches the option parsing behaviour for the EFI stub and early ID register overrides. Bizarrely, the fdt behaviour is the other way around: appending the built-in command line to the bootloader arguments, resulting in a command-line that doesn't necessarily line-up with the parsing order and definitely doesn't line-up with the documented behaviour. As it turns out, there is a proposal [1] to replace CMDLINE_EXTEND with CMDLINE_PREPEND and CMDLINE_APPEND options which should hopefully make the intended behaviour much clearer. While we wait for those to land, drop CMDLINE_EXTEND for now as there appears to be little enthusiasm for changing the current FDT behaviour. [1] https://lore.kernel.org/lkml/20190319232448.45964-2-danielwa@cisco.com/ Cc: Max Uvarov Cc: Rob Herring Cc: Ard Biesheuvel Cc: Marc Zyngier Cc: Doug Anderson Cc: Tyler Hicks Cc: Frank Rowand Cc: Catalin Marinas Link: https://lore.kernel.org/r/CAL_JsqJX=TCCs7=gg486r9TN4NYscMTCLNfqJF9crskKPq-bTg@mail.gmail.com Link: https://lore.kernel.org/r/20210303134927.18975-3-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 ------ arch/arm64/kernel/idreg-override.c | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1f212b47a48a..f15418332d16 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1855,12 +1855,6 @@ config CMDLINE_FROM_BOOTLOADER the boot loader doesn't provide any, the default kernel command string provided in CMDLINE will be used. -config CMDLINE_EXTEND - bool "Extend bootloader kernel arguments" - help - The command-line arguments provided by the boot loader will be - appended to the default kernel command string. - config CMDLINE_FORCE bool "Always use the default kernel command string" help diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index cc071712c6f9..83f1c4b92095 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -188,11 +188,8 @@ static __init void parse_cmdline(void) { const u8 *prop = get_bootargs_cmdline(); - if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || - IS_ENABLED(CONFIG_CMDLINE_FORCE) || - !prop) { + if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop) __parse_cmdline(CONFIG_CMDLINE, true); - } if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) __parse_cmdline(prop, true); From 07fb6dc327f108937881a096ec6e367a07a7395d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 1 Mar 2021 10:36:32 +0530 Subject: [PATCH 03/14] arm64/mm: Drop redundant ARCH_WANT_HUGE_PMD_SHARE There is already an ARCH_WANT_HUGE_PMD_SHARE which is being selected for applicable configurations. Hence just drop the other redundant entry. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1614575192-21307-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f15418332d16..6f36732dc11a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1055,8 +1055,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_HUGE_PMD_SHARE - config ARCH_HAS_CACHE_LINE_SIZE def_bool y From 79cc2ed5a716544621b11a3f90550e5c7d314306 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 1 Mar 2021 16:55:14 +0530 Subject: [PATCH 04/14] arm64/mm: Drop THP conditionality from FORCE_MAX_ZONEORDER Currently without THP being enabled, MAX_ORDER via FORCE_MAX_ZONEORDER gets reduced to 11, which falls below HUGETLB_PAGE_ORDER for certain 16K and 64K page size configurations. This is problematic which throws up the following warning during boot as pageblock_order via HUGETLB_PAGE_ORDER order exceeds MAX_ORDER. WARNING: CPU: 7 PID: 127 at mm/vmstat.c:1092 __fragmentation_index+0x58/0x70 Modules linked in: CPU: 7 PID: 127 Comm: kswapd0 Not tainted 5.12.0-rc1-00005-g0221e3101a1 #237 Hardware name: linux,dummy-virt (DT) pstate: 20400005 (nzCv daif +PAN -UAO -TCO BTYPE=--) pc : __fragmentation_index+0x58/0x70 lr : fragmentation_index+0x88/0xa8 sp : ffff800016ccfc00 x29: ffff800016ccfc00 x28: 0000000000000000 x27: ffff800011fd4000 x26: 0000000000000002 x25: ffff800016ccfda0 x24: 0000000000000002 x23: 0000000000000640 x22: ffff0005ffcb5b18 x21: 0000000000000002 x20: 000000000000000d x19: ffff0005ffcb3980 x18: 0000000000000004 x17: 0000000000000001 x16: 0000000000000019 x15: ffff800011ca7fb8 x14: 00000000000002b3 x13: 0000000000000000 x12: 00000000000005e0 x11: 0000000000000003 x10: 0000000000000080 x9 : ffff800011c93948 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000007000 x5 : 0000000000007944 x4 : 0000000000000032 x3 : 000000000000001c x2 : 000000000000000b x1 : ffff800016ccfc10 x0 : 000000000000000d Call trace: __fragmentation_index+0x58/0x70 compaction_suitable+0x58/0x78 wakeup_kcompactd+0x8c/0xd8 balance_pgdat+0x570/0x5d0 kswapd+0x1e0/0x388 kthread+0x154/0x158 ret_from_fork+0x10/0x30 This solves the problem via keeping FORCE_MAX_ZONEORDER unchanged with or without THP on 16K and 64K page size configurations, making sure that the HUGETLB_PAGE_ORDER (and pageblock_order) would never exceed MAX_ORDER. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614597914-28565-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f36732dc11a..5656e7aacd69 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1155,8 +1155,8 @@ config XEN config FORCE_MAX_ZONEORDER int - default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) - default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE) + default "14" if ARM64_64K_PAGES + default "12" if ARM64_16K_PAGES default "11" help The kernel memory allocator divides physically contiguous memory From eeb0753ba27b26f609e61f9950b14f1b934fe429 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Mar 2021 10:54:57 +0530 Subject: [PATCH 05/14] arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory pfn_valid() validates a pfn but basically it checks for a valid struct page backing for that pfn. It should always return positive for memory ranges backed with struct page mapping. But currently pfn_valid() fails for all ZONE_DEVICE based memory types even though they have struct page mapping. pfn_valid() asserts that there is a memblock entry for a given pfn without MEMBLOCK_NOMAP flag being set. The problem with ZONE_DEVICE based memory is that they do not have memblock entries. Hence memblock_is_map_memory() will invariably fail via memblock_search() for a ZONE_DEVICE based address. This eventually fails pfn_valid() which is wrong. memblock_is_map_memory() needs to be skipped for such memory ranges. As ZONE_DEVICE memory gets hotplugged into the system via memremap_pages() called from a driver, their respective memory sections will not have SECTION_IS_EARLY set. Normal hotplug memory will never have MEMBLOCK_NOMAP set in their memblock regions. Because the flag MEMBLOCK_NOMAP was specifically designed and set for firmware reserved memory regions. memblock_is_map_memory() can just be skipped as its always going to be positive and that will be an optimization for the normal hotplug memory. Like ZONE_DEVICE based memory, all normal hotplugged memory too will not have SECTION_IS_EARLY set for their sections Skipping memblock_is_map_memory() for all non early memory sections would fix pfn_valid() problem for ZONE_DEVICE based memory and also improve its performance for normal hotplug memory as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Robin Murphy Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: David Hildenbrand Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support") Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614921898-4099-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0ace5e68efba..5920c527845a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -230,6 +230,18 @@ int pfn_valid(unsigned long pfn) if (!valid_section(__pfn_to_section(pfn))) return 0; + + /* + * ZONE_DEVICE memory does not have the memblock entries. + * memblock_is_map_memory() check for ZONE_DEVICE based + * addresses will always fail. Even the normal hotplugged + * memory will never have MEMBLOCK_NOMAP flag set in their + * memblock entries. Skip memblock search for all non early + * memory sections covering all of hotplug memory including + * both normal and ZONE_DEVICE based. + */ + if (!early_section(__pfn_to_section(pfn))) + return pfn_section_valid(__pfn_to_section(pfn), pfn); #endif return memblock_is_map_memory(addr); } From 093bbe211ea566fa828536275e09ee9d75df1f25 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Mar 2021 10:54:58 +0530 Subject: [PATCH 06/14] arm64/mm: Reorganize pfn_valid() There are multiple instances of pfn_to_section_nr() and __pfn_to_section() when CONFIG_SPARSEMEM is enabled. This can be optimized if memory section is fetched earlier. This replaces the open coded PFN and ADDR conversion with PFN_PHYS() and PHYS_PFN() helpers. While there, also add a comment. This does not cause any functional change. Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614921898-4099-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5920c527845a..3685e12aba9b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -219,16 +219,26 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) int pfn_valid(unsigned long pfn) { - phys_addr_t addr = pfn << PAGE_SHIFT; + phys_addr_t addr = PFN_PHYS(pfn); - if ((addr >> PAGE_SHIFT) != pfn) + /* + * Ensure the upper PAGE_SHIFT bits are clear in the + * pfn. Else it might lead to false positives when + * some of the upper bits are set, but the lower bits + * match a valid pfn. + */ + if (PHYS_PFN(addr) != pfn) return 0; #ifdef CONFIG_SPARSEMEM +{ + struct mem_section *ms; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - if (!valid_section(__pfn_to_section(pfn))) + ms = __pfn_to_section(pfn); + if (!valid_section(ms)) return 0; /* @@ -240,8 +250,9 @@ int pfn_valid(unsigned long pfn) * memory sections covering all of hotplug memory including * both normal and ZONE_DEVICE based. */ - if (!early_section(__pfn_to_section(pfn))) - return pfn_section_valid(__pfn_to_section(pfn), pfn); + if (!early_section(ms)) + return pfn_section_valid(ms, pfn); +} #endif return memblock_is_map_memory(addr); } From 86c83365ab76e4b43cedd3ce07a07d32a4dc79ba Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 8 Mar 2021 17:10:23 +0100 Subject: [PATCH 07/14] arm64: kasan: fix page_alloc tagging with DEBUG_VIRTUAL When CONFIG_DEBUG_VIRTUAL is enabled, the default page_to_virt() macro implementation from include/linux/mm.h is used. That definition doesn't account for KASAN tags, which leads to no tags on page_alloc allocations. Provide an arm64-specific definition for page_to_virt() when CONFIG_DEBUG_VIRTUAL is enabled that takes care of KASAN tags. Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Cc: Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/4b55b35202706223d3118230701c6a59749d9b72.1615219501.git.andreyknvl@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c759faf7a1ff..0aabc3be9a75 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -328,6 +328,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + void *__addr = __va(page_to_phys(__page)); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ +}) #define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else #define page_to_virt(x) ({ \ From d15dfd31384ba3cb93150e5f87661a76fa419f74 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 9 Mar 2021 12:26:01 +0000 Subject: [PATCH 08/14] arm64: mte: Map hotplugged memory as Normal Tagged In a system supporting MTE, the linear map must allow reading/writing allocation tags by setting the memory type as Normal Tagged. Currently, this is only handled for memory present at boot. Hotplugged memory uses Normal non-Tagged memory. Introduce pgprot_mhp() for hotplugged memory and use it in add_memory_resource(). The arm64 code maps pgprot_mhp() to pgprot_tagged(). Note that ZONE_DEVICE memory should not be mapped as Tagged and therefore setting the memory type in arch_add_memory() is not feasible. Signed-off-by: Catalin Marinas Fixes: 0178dc761368 ("arm64: mte: Use Normal Tagged attributes for the linear map") Reported-by: Patrick Daly Tested-by: Patrick Daly Link: https://lore.kernel.org/r/1614745263-27827-1-git-send-email-pdaly@codeaurora.org Cc: # 5.10.x Cc: Will Deacon Cc: Andrew Morton Cc: Vincenzo Frascino Cc: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Vincenzo Frascino Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210309122601.5543-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 1 - arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/mmu.c | 3 ++- include/linux/pgtable.h | 4 ++++ mm/memory_hotplug.c | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 046be789fbb4..9a65fb528110 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -66,7 +66,6 @@ extern bool arm64_use_ng_mappings; #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e17b96d0e4b5..47027796c2f9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -486,6 +486,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +#define pgprot_tagged(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) +#define pgprot_mhp pgprot_tagged /* * DMA allocations for non-coherent devices use what the Arm architecture calls * "Normal non-cacheable" memory, which permits speculation, unaligned accesses diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3802cfbdd20d..9c8aa1b44cd5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -512,7 +512,8 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + flags); } /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdfc4e9f253e..5e772392a379 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -904,6 +904,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_mhp +#define pgprot_mhp(prot) (prot) +#endif + #ifdef CONFIG_MMU #ifndef pgprot_modify #define pgprot_modify pgprot_modify diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5ba51a8bdaeb..0cdbbfbc5757 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1072,7 +1072,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { - struct mhp_params params = { .pgprot = PAGE_KERNEL }; + struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; u64 start, size; bool new_node = false; int ret; From 07e644885bf6727a48db109fad053cb43f3c9859 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Mar 2021 19:03:04 +0000 Subject: [PATCH 09/14] kselftest: arm64: Fix exit code of sve-ptrace We track if sve-ptrace encountered a failure in a variable but don't actually use that value when we exit the program, do so. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210309190304.39169-1-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b2282be6f938..612d3899614a 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -332,5 +332,5 @@ int main(void) ksft_print_cnts(); - return 0; + return ret; } From 26f55386f964cefa92ab7ccbed68f1a313074215 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Mar 2021 11:23:10 +0530 Subject: [PATCH 10/14] arm64/mm: Fix __enable_mmu() for new TGRAN range values As per ARM ARM DDI 0487G.a, when FEAT_LPA2 is implemented, ID_AA64MMFR0_EL1 might contain a range of values to describe supported translation granules (4K and 16K pages sizes in particular) instead of just enabled or disabled values. This changes __enable_mmu() function to handle complete acceptable range of values (depending on whether the field is signed or unsigned) now represented with ID_AA64MMFR0_TGRAN_SUPPORTED_[MIN..MAX] pair. While here, also fix similar situations in EFI stub and KVM as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: Suzuki K Poulose Cc: Ard Biesheuvel Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1615355590-21102-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++------ arch/arm64/kernel/head.S | 6 ++++-- arch/arm64/kvm/reset.c | 10 ++++++---- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index dfd4edbfe360..d4a5fca984c3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -796,6 +796,11 @@ #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 + #ifdef CONFIG_ARM64_PA_BITS_52 #define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 #else @@ -961,14 +966,17 @@ #define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #elif defined(CONFIG_ARM64_16K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF #elif defined(CONFIG_ARM64_64K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #endif #define MVFR2_FPMISC_SHIFT 4 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 66b0e0b66e31..8b469f164091 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -655,8 +655,10 @@ SYM_FUNC_END(__secondary_too_slow) SYM_FUNC_START(__enable_mmu) mrs x2, ID_AA64MMFR0_EL1 ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED - b.ne __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + b.lt __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + b.gt __no_granule_support update_early_cpu_boot_status 0, x2, x3 adrp x2, idmap_pg_dir phys_to_ttbr x1, x1 diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3ea..e81c7ec9e102 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -311,16 +311,18 @@ int kvm_set_ipa_limit(void) } switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { - default: - case 1: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE: kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; - case 0: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT: kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); break; - case 2: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX: kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); break; + default: + kvm_err("Unsupported value for TGRAN_2, giving up\n"); + return -EINVAL; } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b69d63143e0d..7bf0a7acae5e 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -24,7 +24,7 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; - if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { + if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) { if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) efi_err("This 64 KB granular kernel is not supported by your CPU\n"); else From 7bb8bc6eb550116c504fb25af8678b9d7ca2abc5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 9 Mar 2021 17:44:12 -0700 Subject: [PATCH 11/14] arm64: perf: Fix 64-bit event counter read truncation Commit 0fdf1bb75953 ("arm64: perf: Avoid PMXEV* indirection") changed armv8pmu_read_evcntr() to return a u32 instead of u64. The result is silent truncation of the event counter when using 64-bit counters. Given the offending commit appears to have passed thru several folks, it seems likely this was a bad rebase after v8.5 PMU 64-bit counters landed. Cc: Alexandru Elisei Cc: Julien Thierry Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Fixes: 0fdf1bb75953 ("arm64: perf: Avoid PMXEV* indirection") Signed-off-by: Rob Herring Acked-by: Mark Rutland Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210310004412.1450128-1-robh@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7d2318f80955..4658fcf88c2b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -460,7 +460,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline u32 armv8pmu_read_evcntr(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); From 7ba8f2b2d652cd8d8a2ab61f4be66973e70f9f88 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 18:15:11 +0100 Subject: [PATCH 12/14] arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds 52-bit VA kernels can run on hardware that is only 48-bit capable, but configure the ID map as 52-bit by default. This was not a problem until recently, because the special T0SZ value for a 52-bit VA space was never programmed into the TCR register anwyay, and because a 52-bit ID map happens to use the same number of translation levels as a 48-bit one. This behavior was changed by commit 1401bef703a4 ("arm64: mm: Always update TCR_EL1 from __cpu_set_tcr_t0sz()"), which causes the unsupported T0SZ value for a 52-bit VA to be programmed into TCR_EL1. While some hardware simply ignores this, Mark reports that Amberwing systems choke on this, resulting in a broken boot. But even before that commit, the unsupported idmap_t0sz value was exposed to KVM and used to program TCR_EL2 incorrectly as well. Given that we already have to deal with address spaces being either 48-bit or 52-bit in size, the cleanest approach seems to be to simply default to a 48-bit VA ID map, and only switch to a 52-bit one if the placement of the kernel in DRAM requires it. This is guaranteed not to happen unless the system is actually 52-bit VA capable. Fixes: 90ec95cda91a ("arm64: mm: Introduce VA_BITS_MIN") Reported-by: Mark Salter Link: http://lore.kernel.org/r/20210310003216.410037-1-msalter@redhat.com Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310171515.416643-2-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 5 +---- arch/arm64/kernel/head.S | 2 +- arch/arm64/mm/mmu.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 70ce8c1d2b07..0f467d550f27 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -65,10 +65,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) - return false; - - return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); + return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); } /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8b469f164091..840bda1869e9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x5, __idmap_text_end clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9c8aa1b44cd5..7484ea4f6ba0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,7 +40,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) -u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 __section(".mmuoff.data.write") vabits_actual; From 30b2675761b8a1a2b6ef56b535ef51b789bb7150 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 18:15:12 +0100 Subject: [PATCH 13/14] arm64: mm: remove unused __cpu_uses_extended_idmap[_level()] These routines lost all existing users during the latest merge window so we can remove them. This avoids the need to fix them in the context of fixing a regression related to the ID map on 52-bit VA kernels. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310171515.416643-3-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0f467d550f27..bd02e99b1a4c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,20 +63,6 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) extern u64 idmap_t0sz; extern u64 idmap_ptrs_per_pgd; -static inline bool __cpu_uses_extended_idmap(void) -{ - return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); -} - -/* - * True if the extended ID map requires an extra level of translation table - * to be configured. - */ -static inline bool __cpu_uses_extended_idmap_level(void) -{ - return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS; -} - /* * Ensure TCR.T0SZ is set to the provided value. */ From c8e3866836528a4ba3b0535834f03768d74f7d8e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Mar 2021 08:04:21 +0000 Subject: [PATCH 14/14] perf/arm_dmc620_pmu: Fix error return code in dmc620_pmu_device_probe() Fix to return negative error code -ENOMEM from the error handling case instead of 0, as done elsewhere in this function. Fixes: 53c218da220c ("driver/perf: Add PMU driver for the ARM DMC-620 memory controller") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210312080421.277562-1-weiyongjun1@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 66ad5b3ece19..f2a85500258d 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -681,6 +681,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) if (!name) { dev_err(&pdev->dev, "Create name failed, PMU @%pa\n", &res->start); + ret = -ENOMEM; goto out_teardown_dev; }