diff options
author | Mike Pagano <mpagano@gentoo.org> | 2024-03-06 13:07:47 -0500 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2024-03-06 13:07:47 -0500 |
commit | 8bc8ef53218c6b5ef66b10198d58003c5a0a0397 (patch) | |
tree | 98f175d113a89fd018246d7870fd1073e06a2e98 | |
parent | Linux patch 6.1.80 (diff) | |
download | linux-patches-8bc8ef53218c6b5ef66b10198d58003c5a0a0397.tar.gz linux-patches-8bc8ef53218c6b5ef66b10198d58003c5a0a0397.tar.bz2 linux-patches-8bc8ef53218c6b5ef66b10198d58003c5a0a0397.zip |
Linuxpatch 6.1.816.1-90
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1080_linux-6.1.81.patch | 14458 |
2 files changed, 14462 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 0bde520b..d0f067de 100644 --- a/0000_README +++ b/0000_README @@ -363,6 +363,10 @@ Patch: 1079_linux-6.1.80.patch From: https://www.kernel.org Desc: Linux 6.1.80 +Patch: 1080_linux-6.1.81.patch +From: https://www.kernel.org +Desc: Linux 6.1.81 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1080_linux-6.1.81.patch b/1080_linux-6.1.81.patch new file mode 100644 index 00000000..59f3b367 --- /dev/null +++ b/1080_linux-6.1.81.patch @@ -0,0 +1,14458 @@ +diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst +index 894a198970055..bac3789f3e8fa 100644 +--- a/Documentation/x86/boot.rst ++++ b/Documentation/x86/boot.rst +@@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware. + + The function prototype for the handover entry point looks like this:: + +- efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) ++ efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp) + + 'handle' is the EFI image handle passed to the boot loader by the EFI + firmware, 'table' is the EFI system table - these are the first two +diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst +index 5d4330be200f9..e801df0bb3a81 100644 +--- a/Documentation/x86/mds.rst ++++ b/Documentation/x86/mds.rst +@@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + ++Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. ++Other than CFLAGS.ZF, this macro doesn't clobber any registers. ++ + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state + (idle) transitions. + +@@ -138,17 +141,30 @@ Mitigation points + + When transitioning from kernel to user space the CPU buffers are flushed + on affected CPUs when the mitigation is not disabled on the kernel +- command line. The migitation is enabled through the static key +- mds_user_clear. +- +- The mitigation is invoked in prepare_exit_to_usermode() which covers +- all but one of the kernel to user space transitions. The exception +- is when we return from a Non Maskable Interrupt (NMI), which is +- handled directly in do_nmi(). +- +- (The reason that NMI is special is that prepare_exit_to_usermode() can +- enable IRQs. In NMI context, NMIs are blocked, and we don't want to +- enable IRQs with NMIs blocked.) ++ command line. The mitigation is enabled through the feature flag ++ X86_FEATURE_CLEAR_CPU_BUF. ++ ++ The mitigation is invoked just before transitioning to userspace after ++ user registers are restored. This is done to minimize the window in ++ which kernel data could be accessed after VERW e.g. via an NMI after ++ VERW. ++ ++ **Corner case not handled** ++ Interrupts returning to kernel don't clear CPUs buffers since the ++ exit-to-user path is expected to do that anyways. But, there could be ++ a case when an NMI is generated in kernel after the exit-to-user path ++ has cleared the buffers. This case is not handled and NMI returning to ++ kernel don't clear CPU buffers because: ++ ++ 1. It is rare to get an NMI after VERW, but before returning to userspace. ++ 2. For an unprivileged user, there is no known way to make that NMI ++ less rare or target it. ++ 3. It would take a large number of these precisely-timed NMIs to mount ++ an actual attack. There's presumably not enough bandwidth. ++ 4. The NMI in question occurs after a VERW, i.e. when user state is ++ restored and most interesting data is already scrubbed. Whats left ++ is only the data that NMI touches, and that may or may not be of ++ any interest. + + + 2. C-State transition +diff --git a/MAINTAINERS b/MAINTAINERS +index 13d1078808bb5..bbfedb0b20938 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -10051,6 +10051,7 @@ F: drivers/infiniband/ + F: include/rdma/ + F: include/trace/events/ib_mad.h + F: include/trace/events/ib_umad.h ++F: include/trace/misc/rdma.h + F: include/uapi/linux/if_infiniband.h + F: include/uapi/rdma/ + F: samples/bpf/ibumad_kern.c +@@ -11139,6 +11140,12 @@ F: fs/nfs_common/ + F: fs/nfsd/ + F: include/linux/lockd/ + F: include/linux/sunrpc/ ++F: include/trace/events/rpcgss.h ++F: include/trace/events/rpcrdma.h ++F: include/trace/events/sunrpc.h ++F: include/trace/misc/fs.h ++F: include/trace/misc/nfs.h ++F: include/trace/misc/sunrpc.h + F: include/uapi/linux/nfsd/ + F: include/uapi/linux/sunrpc/ + F: net/sunrpc/ +diff --git a/Makefile b/Makefile +index bc4adb561a7cf..e13df565a1cb6 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 1 +-SUBLEVEL = 80 ++SUBLEVEL = 81 + EXTRAVERSION = + NAME = Curry Ramen + +diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi +index ec476b1596496..b236d23f80715 100644 +--- a/arch/arm/boot/dts/imx23.dtsi ++++ b/arch/arm/boot/dts/imx23.dtsi +@@ -59,7 +59,7 @@ icoll: interrupt-controller@80000000 { + reg = <0x80000000 0x2000>; + }; + +- dma_apbh: dma-apbh@80004000 { ++ dma_apbh: dma-controller@80004000 { + compatible = "fsl,imx23-dma-apbh"; + reg = <0x80004000 0x2000>; + interrupts = <0 14 20 0 +diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi +index b15df16ecb01a..b81592a613112 100644 +--- a/arch/arm/boot/dts/imx28.dtsi ++++ b/arch/arm/boot/dts/imx28.dtsi +@@ -78,7 +78,7 @@ hsadc: hsadc@80002000 { + status = "disabled"; + }; + +- dma_apbh: dma-apbh@80004000 { ++ dma_apbh: dma-controller@80004000 { + compatible = "fsl,imx28-dma-apbh"; + reg = <0x80004000 0x2000>; + interrupts = <82 83 84 85 +diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi +index ff1e0173b39be..2c6eada01d792 100644 +--- a/arch/arm/boot/dts/imx6qdl.dtsi ++++ b/arch/arm/boot/dts/imx6qdl.dtsi +@@ -150,7 +150,7 @@ soc: soc { + interrupt-parent = <&gpc>; + ranges; + +- dma_apbh: dma-apbh@110000 { ++ dma_apbh: dma-controller@110000 { + compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; + reg = <0x00110000 0x2000>; + interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, +diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi +index 1f1053a898fbf..67d344ae76b51 100644 +--- a/arch/arm/boot/dts/imx6sx.dtsi ++++ b/arch/arm/boot/dts/imx6sx.dtsi +@@ -209,7 +209,7 @@ gpu: gpu@1800000 { + power-domains = <&pd_pu>; + }; + +- dma_apbh: dma-apbh@1804000 { ++ dma_apbh: dma-controller@1804000 { + compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh"; + reg = <0x01804000 0x2000>; + interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, +diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi +index 2b5996395701a..aac081b6daaac 100644 +--- a/arch/arm/boot/dts/imx6ul.dtsi ++++ b/arch/arm/boot/dts/imx6ul.dtsi +@@ -164,7 +164,7 @@ intc: interrupt-controller@a01000 { + <0x00a06000 0x2000>; + }; + +- dma_apbh: dma-apbh@1804000 { ++ dma_apbh: dma-controller@1804000 { + compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; + reg = <0x01804000 0x2000>; + interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, +diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi +index 4b23630fc738d..69aebc691526f 100644 +--- a/arch/arm/boot/dts/imx7s.dtsi ++++ b/arch/arm/boot/dts/imx7s.dtsi +@@ -1267,14 +1267,13 @@ fec1: ethernet@30be0000 { + }; + }; + +- dma_apbh: dma-apbh@33000000 { ++ dma_apbh: dma-controller@33000000 { + compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh"; + reg = <0x33000000 0x2000>; + interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; +- interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3"; + #dma-cells = <1>; + dma-channels = <4>; + clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; +diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c +index bac4cabef6073..467ac2f768ac2 100644 +--- a/arch/arm64/crypto/aes-neonbs-glue.c ++++ b/arch/arm64/crypto/aes-neonbs-glue.c +@@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) + src += blocks * AES_BLOCK_SIZE; + } + if (nbytes && walk.nbytes == walk.total) { ++ u8 buf[AES_BLOCK_SIZE]; ++ u8 *d = dst; ++ ++ if (unlikely(nbytes < AES_BLOCK_SIZE)) ++ src = dst = memcpy(buf + sizeof(buf) - nbytes, ++ src, nbytes); ++ + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, + nbytes, walk.iv); ++ ++ if (unlikely(nbytes < AES_BLOCK_SIZE)) ++ memcpy(d, dst, nbytes); ++ + nbytes = 0; + } + kernel_neon_end(); +diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h +index 62c846be2d76a..a75c0772ecfca 100644 +--- a/arch/arm64/include/asm/efi.h ++++ b/arch/arm64/include/asm/efi.h +@@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si) + } + + #define EFI_ALLOC_ALIGN SZ_64K ++#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) + + /* + * On ARM systems, virtually remapped UEFI runtime services are set up in two +diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c +index 97b026130c71b..1e5f083cdb720 100644 +--- a/arch/powerpc/platforms/pseries/iommu.c ++++ b/arch/powerpc/platforms/pseries/iommu.c +@@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb, + + struct iommu_table_ops iommu_table_lpar_multi_ops; + +-/* +- * iommu_table_setparms_lpar +- * +- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. +- */ +-static void iommu_table_setparms_lpar(struct pci_controller *phb, +- struct device_node *dn, +- struct iommu_table *tbl, +- struct iommu_table_group *table_group, +- const __be32 *dma_window) +-{ +- unsigned long offset, size, liobn; +- +- of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); +- +- iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, +- &iommu_table_lpar_multi_ops); +- +- +- table_group->tce32_start = offset; +- table_group->tce32_size = size; +-} +- + struct iommu_table_ops iommu_table_pseries_ops = { + .set = tce_build_pSeries, + .clear = tce_free_pSeries, +@@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { + * dynamic 64bit DMA window, walking up the device tree. + */ + static struct device_node *pci_dma_find(struct device_node *dn, +- const __be32 **dma_window) ++ struct dynamic_dma_window_prop *prop) + { +- const __be32 *dw = NULL; ++ const __be32 *default_prop = NULL; ++ const __be32 *ddw_prop = NULL; ++ struct device_node *rdn = NULL; ++ bool default_win = false, ddw_win = false; + + for ( ; dn && PCI_DN(dn); dn = dn->parent) { +- dw = of_get_property(dn, "ibm,dma-window", NULL); +- if (dw) { +- if (dma_window) +- *dma_window = dw; +- return dn; ++ default_prop = of_get_property(dn, "ibm,dma-window", NULL); ++ if (default_prop) { ++ rdn = dn; ++ default_win = true; ++ } ++ ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); ++ if (ddw_prop) { ++ rdn = dn; ++ ddw_win = true; ++ break; ++ } ++ ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); ++ if (ddw_prop) { ++ rdn = dn; ++ ddw_win = true; ++ break; + } +- dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); +- if (dw) +- return dn; +- dw = of_get_property(dn, DMA64_PROPNAME, NULL); +- if (dw) +- return dn; ++ ++ /* At least found default window, which is the case for normal boot */ ++ if (default_win) ++ break; + } + +- return NULL; ++ /* For PCI devices there will always be a DMA window, either on the device ++ * or parent bus ++ */ ++ WARN_ON(!(default_win | ddw_win)); ++ ++ /* caller doesn't want to get DMA window property */ ++ if (!prop) ++ return rdn; ++ ++ /* parse DMA window property. During normal system boot, only default ++ * DMA window is passed in OF. But, for kdump, a dedicated adapter might ++ * have both default and DDW in FDT. In this scenario, DDW takes precedence ++ * over default window. ++ */ ++ if (ddw_win) { ++ struct dynamic_dma_window_prop *p; ++ ++ p = (struct dynamic_dma_window_prop *)ddw_prop; ++ prop->liobn = p->liobn; ++ prop->dma_base = p->dma_base; ++ prop->tce_shift = p->tce_shift; ++ prop->window_shift = p->window_shift; ++ } else if (default_win) { ++ unsigned long offset, size, liobn; ++ ++ of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); ++ ++ prop->liobn = cpu_to_be32((u32)liobn); ++ prop->dma_base = cpu_to_be64(offset); ++ prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); ++ prop->window_shift = cpu_to_be32(order_base_2(size)); ++ } ++ ++ return rdn; + } + + static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) +@@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) + struct iommu_table *tbl; + struct device_node *dn, *pdn; + struct pci_dn *ppci; +- const __be32 *dma_window = NULL; ++ struct dynamic_dma_window_prop prop; + + dn = pci_bus_to_OF_node(bus); + + pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", + dn); + +- pdn = pci_dma_find(dn, &dma_window); ++ pdn = pci_dma_find(dn, &prop); + +- if (dma_window == NULL) +- pr_debug(" no ibm,dma-window property !\n"); ++ /* In PPC architecture, there will always be DMA window on bus or one of the ++ * parent bus. During reboot, there will be ibm,dma-window property to ++ * define DMA window. For kdump, there will at least be default window or DDW ++ * or both. ++ */ + + ppci = PCI_DN(pdn); + +@@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) + if (!ppci->table_group) { + ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); + tbl = ppci->table_group->tables[0]; +- if (dma_window) { +- iommu_table_setparms_lpar(ppci->phb, pdn, tbl, +- ppci->table_group, dma_window); + +- if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) +- panic("Failed to initialize iommu table"); +- } ++ iommu_table_setparms_common(tbl, ppci->phb->bus->number, ++ be32_to_cpu(prop.liobn), ++ be64_to_cpu(prop.dma_base), ++ 1ULL << be32_to_cpu(prop.window_shift), ++ be32_to_cpu(prop.tce_shift), NULL, ++ &iommu_table_lpar_multi_ops); ++ ++ /* Only for normal boot with default window. Doesn't matter even ++ * if we set these with DDW which is 64bit during kdump, since ++ * these will not be used during kdump. ++ */ ++ ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); ++ ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); ++ ++ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) ++ panic("Failed to initialize iommu table"); ++ + iommu_register_group(ppci->table_group, + pci_domain_nr(bus), 0); + pr_debug(" created table: %p\n", ppci->table_group); +@@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name) + continue; + } + ++ /* If at the time of system initialization, there are DDWs in OF, ++ * it means this is during kexec. DDW could be direct or dynamic. ++ * We will just mark DDWs as "dynamic" since this is kdump path, ++ * no need to worry about perforance. ddw_list_new_entry() will ++ * set window->direct = false. ++ */ + window = ddw_list_new_entry(pdn, dma64); + if (!window) { + of_node_put(pdn); +@@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) + { + struct device_node *pdn, *dn; + struct iommu_table *tbl; +- const __be32 *dma_window = NULL; + struct pci_dn *pci; ++ struct dynamic_dma_window_prop prop; + + pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); + +@@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) + dn = pci_device_to_OF_node(dev); + pr_debug(" node is %pOF\n", dn); + +- pdn = pci_dma_find(dn, &dma_window); ++ pdn = pci_dma_find(dn, &prop); + if (!pdn || !PCI_DN(pdn)) { + printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " + "no DMA window found for pci dev=%s dn=%pOF\n", +@@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) + if (!pci->table_group) { + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; +- iommu_table_setparms_lpar(pci->phb, pdn, tbl, +- pci->table_group, dma_window); ++ ++ iommu_table_setparms_common(tbl, pci->phb->bus->number, ++ be32_to_cpu(prop.liobn), ++ be64_to_cpu(prop.dma_base), ++ 1ULL << be32_to_cpu(prop.window_shift), ++ be32_to_cpu(prop.tce_shift), NULL, ++ &iommu_table_lpar_multi_ops); ++ ++ /* Only for normal boot with default window. Doesn't matter even ++ * if we set these with DDW which is 64bit during kdump, since ++ * these will not be used during kdump. ++ */ ++ pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); ++ pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + iommu_init_table(tbl, pci->phb->node, 0, 0); + iommu_register_group(pci->table_group, +diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h +index d47d87c2d7e3d..dcf1bc9de5841 100644 +--- a/arch/riscv/include/asm/ftrace.h ++++ b/arch/riscv/include/asm/ftrace.h +@@ -25,6 +25,11 @@ + + #define ARCH_SUPPORTS_FTRACE_OPS 1 + #ifndef __ASSEMBLY__ ++ ++extern void *return_address(unsigned int level); ++ ++#define ftrace_return_address(n) return_address(n) ++ + void MCOUNT_NAME(void); + static inline unsigned long ftrace_call_adjust(unsigned long addr) + { +diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h +index 59bb53da473dd..63055c6ad2c25 100644 +--- a/arch/riscv/include/asm/pgtable.h ++++ b/arch/riscv/include/asm/pgtable.h +@@ -79,7 +79,7 @@ + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +-#define vmemmap ((struct page *)VMEMMAP_START) ++#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) + + #define PCI_IO_SIZE SZ_16M + #define PCI_IO_END VMEMMAP_START +diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile +index ab333cb792fd9..4c0805d264ca8 100644 +--- a/arch/riscv/kernel/Makefile ++++ b/arch/riscv/kernel/Makefile +@@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE + CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) ++CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) + endif + CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) + CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) +@@ -41,6 +42,7 @@ obj-y += irq.o + obj-y += process.o + obj-y += ptrace.o + obj-y += reset.o ++obj-y += return_address.o + obj-y += setup.o + obj-y += signal.o + obj-y += syscall_table.o +diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c +new file mode 100644 +index 0000000000000..c8115ec8fb304 +--- /dev/null ++++ b/arch/riscv/kernel/return_address.c +@@ -0,0 +1,48 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * This code come from arch/arm64/kernel/return_address.c ++ * ++ * Copyright (C) 2023 SiFive. ++ */ ++ ++#include <linux/export.h> ++#include <linux/kprobes.h> ++#include <linux/stacktrace.h> ++ ++struct return_address_data { ++ unsigned int level; ++ void *addr; ++}; ++ ++static bool save_return_addr(void *d, unsigned long pc) ++{ ++ struct return_address_data *data = d; ++ ++ if (!data->level) { ++ data->addr = (void *)pc; ++ return false; ++ } ++ ++ --data->level; ++ ++ return true; ++} ++NOKPROBE_SYMBOL(save_return_addr); ++ ++noinline void *return_address(unsigned int level) ++{ ++ struct return_address_data data; ++ ++ data.level = level + 3; ++ data.addr = NULL; ++ ++ arch_stack_walk(save_return_addr, &data, current, NULL); ++ ++ if (!data.level) ++ return data.addr; ++ else ++ return NULL; ++ ++} ++EXPORT_SYMBOL_GPL(return_address); ++NOKPROBE_SYMBOL(return_address); +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 4c9bfc4be58d4..2f7af61b49b6c 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1982,6 +1982,23 @@ config EFI_STUB + + See Documentation/admin-guide/efi-stub.rst for more information. + ++config EFI_HANDOVER_PROTOCOL ++ bool "EFI handover protocol (DEPRECATED)" ++ depends on EFI_STUB ++ default y ++ help ++ Select this in order to include support for the deprecated EFI ++ handover protocol, which defines alternative entry points into the ++ EFI stub. This is a practice that has no basis in the UEFI ++ specification, and requires a priori knowledge on the part of the ++ bootloader about Linux/x86 specific ways of passing the command line ++ and initrd, and where in memory those assets may be loaded. ++ ++ If in doubt, say Y. Even though the corresponding support is not ++ present in upstream GRUB or other bootloaders, most distros build ++ GRUB with numerous downstream patches applied, and may rely on the ++ handover protocol as as result. ++ + config EFI_MIXED + bool "EFI mixed-mode support" + depends on EFI_STUB && X86_64 +diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile +index 15b7b403a4bd0..3965b2c9efee0 100644 +--- a/arch/x86/boot/compressed/Makefile ++++ b/arch/x86/boot/compressed/Makefile +@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack + ifeq ($(CONFIG_LD_IS_BFD),y) + LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) + endif ++ifeq ($(CONFIG_EFI_STUB),y) ++# ensure that the static EFI stub library will be pulled in, even if it is ++# never referenced explicitly from the startup code ++LDFLAGS_vmlinux += -u efi_pe_entry ++endif + LDFLAGS_vmlinux += -T + + hostprogs := mkpiggy +@@ -100,7 +105,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o + ifdef CONFIG_X86_64 + vmlinux-objs-y += $(obj)/ident_map_64.o + vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o +- vmlinux-objs-y += $(obj)/mem_encrypt.o ++ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o + vmlinux-objs-y += $(obj)/pgtable_64.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o + endif +@@ -108,11 +113,11 @@ endif + vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o + vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o + +-vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o + vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o +-efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a ++vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o ++vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a + +-$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE ++$(obj)/vmlinux: $(vmlinux-objs-y) FORCE + $(call if_changed,ld) + + OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c +index 9caf89063e775..55c98fdd67d2b 100644 +--- a/arch/x86/boot/compressed/acpi.c ++++ b/arch/x86/boot/compressed/acpi.c +@@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) + * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to + * ACPI_TABLE_GUID because it has more features. + */ +- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, ++ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, + ACPI_20_TABLE_GUID); + if (rsdp_addr) + return (acpi_physical_address)rsdp_addr; + + /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ +- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, ++ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, + ACPI_TABLE_GUID); + if (rsdp_addr) + return (acpi_physical_address)rsdp_addr; +@@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void) + enum efi_type et; + int ret; + +- et = efi_get_type(boot_params); ++ et = efi_get_type(boot_params_ptr); + if (et == EFI_TYPE_NONE) + return 0; + +- systab_pa = efi_get_system_table(boot_params); ++ systab_pa = efi_get_system_table(boot_params_ptr); + if (!systab_pa) + error("EFI support advertised, but unable to locate system table."); + +- ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); ++ ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len); + if (ret || !cfg_tbl_pa) + error("EFI config table not found."); + +@@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void) + { + acpi_physical_address pa; + +- pa = boot_params->acpi_rsdp_addr; ++ pa = boot_params_ptr->acpi_rsdp_addr; + + if (!pa) + pa = efi_get_rsdp_addr(); +@@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void) + rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); + if (!rsdp) + rsdp = (struct acpi_table_rsdp *)(long) +- boot_params->acpi_rsdp_addr; ++ boot_params_ptr->acpi_rsdp_addr; + + if (!rsdp) + return 0; +diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c +index f1add5d85da9d..c1bb180973ea2 100644 +--- a/arch/x86/boot/compressed/cmdline.c ++++ b/arch/x86/boot/compressed/cmdline.c +@@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr) + #include "../cmdline.c" + unsigned long get_cmd_line_ptr(void) + { +- unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; ++ unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr; + +- cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; ++ cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32; + + return cmd_line_ptr; + } +diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S +new file mode 100644 +index 0000000000000..8232c5b2a9bf5 +--- /dev/null ++++ b/arch/x86/boot/compressed/efi_mixed.S +@@ -0,0 +1,328 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming ++ * ++ * Early support for invoking 32-bit EFI services from a 64-bit kernel. ++ * ++ * Because this thunking occurs before ExitBootServices() we have to ++ * restore the firmware's 32-bit GDT and IDT before we make EFI service ++ * calls. ++ * ++ * On the plus side, we don't have to worry about mangling 64-bit ++ * addresses into 32-bits because we're executing with an identity ++ * mapped pagetable and haven't transitioned to 64-bit virtual addresses ++ * yet. ++ */ ++ ++#include <linux/linkage.h> ++#include <asm/msr.h> ++#include <asm/page_types.h> ++#include <asm/processor-flags.h> ++#include <asm/segment.h> ++ ++ .code64 ++ .text ++/* ++ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() ++ * is the first thing that runs after switching to long mode. Depending on ++ * whether the EFI handover protocol or the compat entry point was used to ++ * enter the kernel, it will either branch to the common 64-bit EFI stub ++ * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF ++ * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a ++ * struct bootparams pointer as the third argument, so the presence of such a ++ * pointer is used to disambiguate. ++ * ++ * +--------------+ ++ * +------------------+ +------------+ +------>| efi_pe_entry | ++ * | efi32_pe_entry |---->| | | +-----------+--+ ++ * +------------------+ | | +------+----------------+ | ++ * | startup_32 |---->| startup_64_mixed_mode | | ++ * +------------------+ | | +------+----------------+ | ++ * | efi32_stub_entry |---->| | | | ++ * +------------------+ +------------+ | | ++ * V | ++ * +------------+ +----------------+ | ++ * | startup_64 |<----| efi_stub_entry |<--------+ ++ * +------------+ +----------------+ ++ */ ++SYM_FUNC_START(startup_64_mixed_mode) ++ lea efi32_boot_args(%rip), %rdx ++ mov 0(%rdx), %edi ++ mov 4(%rdx), %esi ++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL ++ mov 8(%rdx), %edx // saved bootparams pointer ++ test %edx, %edx ++ jnz efi_stub_entry ++#endif ++ /* ++ * efi_pe_entry uses MS calling convention, which requires 32 bytes of ++ * shadow space on the stack even if all arguments are passed in ++ * registers. We also need an additional 8 bytes for the space that ++ * would be occupied by the return address, and this also results in ++ * the correct stack alignment for entry. ++ */ ++ sub $40, %rsp ++ mov %rdi, %rcx // MS calling convention ++ mov %rsi, %rdx ++ jmp efi_pe_entry ++SYM_FUNC_END(startup_64_mixed_mode) ++ ++SYM_FUNC_START(__efi64_thunk) ++ push %rbp ++ push %rbx ++ ++ movl %ds, %eax ++ push %rax ++ movl %es, %eax ++ push %rax ++ movl %ss, %eax ++ push %rax ++ ++ /* Copy args passed on stack */ ++ movq 0x30(%rsp), %rbp ++ movq 0x38(%rsp), %rbx ++ movq 0x40(%rsp), %rax ++ ++ /* ++ * Convert x86-64 ABI params to i386 ABI ++ */ ++ subq $64, %rsp ++ movl %esi, 0x0(%rsp) ++ movl %edx, 0x4(%rsp) ++ movl %ecx, 0x8(%rsp) ++ movl %r8d, 0xc(%rsp) ++ movl %r9d, 0x10(%rsp) ++ movl %ebp, 0x14(%rsp) ++ movl %ebx, 0x18(%rsp) ++ movl %eax, 0x1c(%rsp) ++ ++ leaq 0x20(%rsp), %rbx ++ sgdt (%rbx) ++ sidt 16(%rbx) ++ ++ leaq 1f(%rip), %rbp ++ ++ /* ++ * Switch to IDT and GDT with 32-bit segments. These are the firmware ++ * GDT and IDT that were installed when the kernel started executing. ++ * The pointers were saved by the efi32_entry() routine below. ++ * ++ * Pass the saved DS selector to the 32-bit code, and use far return to ++ * restore the saved CS selector. ++ */ ++ lidt efi32_boot_idt(%rip) ++ lgdt efi32_boot_gdt(%rip) ++ ++ movzwl efi32_boot_ds(%rip), %edx ++ movzwq efi32_boot_cs(%rip), %rax ++ pushq %rax ++ leaq efi_enter32(%rip), %rax ++ pushq %rax ++ lretq ++ ++1: addq $64, %rsp ++ movq %rdi, %rax ++ ++ pop %rbx ++ movl %ebx, %ss ++ pop %rbx ++ movl %ebx, %es ++ pop %rbx ++ movl %ebx, %ds ++ /* Clear out 32-bit selector from FS and GS */ ++ xorl %ebx, %ebx ++ movl %ebx, %fs ++ movl %ebx, %gs ++ ++ /* ++ * Convert 32-bit status code into 64-bit. ++ */ ++ roll $1, %eax ++ rorq $1, %rax ++ ++ pop %rbx ++ pop %rbp ++ RET ++SYM_FUNC_END(__efi64_thunk) ++ ++ .code32 ++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL ++SYM_FUNC_START(efi32_stub_entry) ++ call 1f ++1: popl %ecx ++ ++ /* Clear BSS */ ++ xorl %eax, %eax ++ leal (_bss - 1b)(%ecx), %edi ++ leal (_ebss - 1b)(%ecx), %ecx ++ subl %edi, %ecx ++ shrl $2, %ecx ++ cld ++ rep stosl ++ ++ add $0x4, %esp /* Discard return address */ ++ popl %ecx ++ popl %edx ++ popl %esi ++ jmp efi32_entry ++SYM_FUNC_END(efi32_stub_entry) ++#endif ++ ++/* ++ * EFI service pointer must be in %edi. ++ * ++ * The stack should represent the 32-bit calling convention. ++ */ ++SYM_FUNC_START_LOCAL(efi_enter32) ++ /* Load firmware selector into data and stack segment registers */ ++ movl %edx, %ds ++ movl %edx, %es ++ movl %edx, %fs ++ movl %edx, %gs ++ movl %edx, %ss ++ ++ /* Reload pgtables */ ++ movl %cr3, %eax ++ movl %eax, %cr3 ++ ++ /* Disable paging */ ++ movl %cr0, %eax ++ btrl $X86_CR0_PG_BIT, %eax ++ movl %eax, %cr0 ++ ++ /* Disable long mode via EFER */ ++ movl $MSR_EFER, %ecx ++ rdmsr ++ btrl $_EFER_LME, %eax ++ wrmsr ++ ++ call *%edi ++ ++ /* We must preserve return value */ ++ movl %eax, %edi ++ ++ /* ++ * Some firmware will return with interrupts enabled. Be sure to ++ * disable them before we switch GDTs and IDTs. ++ */ ++ cli ++ ++ lidtl 16(%ebx) ++ lgdtl (%ebx) ++ ++ movl %cr4, %eax ++ btsl $(X86_CR4_PAE_BIT), %eax ++ movl %eax, %cr4 ++ ++ movl %cr3, %eax ++ movl %eax, %cr3 ++ ++ movl $MSR_EFER, %ecx ++ rdmsr ++ btsl $_EFER_LME, %eax ++ wrmsr ++ ++ xorl %eax, %eax ++ lldt %ax ++ ++ pushl $__KERNEL_CS ++ pushl %ebp ++ ++ /* Enable paging */ ++ movl %cr0, %eax ++ btsl $X86_CR0_PG_BIT, %eax ++ movl %eax, %cr0 ++ lret ++SYM_FUNC_END(efi_enter32) ++ ++/* ++ * This is the common EFI stub entry point for mixed mode. ++ * ++ * Arguments: %ecx image handle ++ * %edx EFI system table pointer ++ * %esi struct bootparams pointer (or NULL when not using ++ * the EFI handover protocol) ++ * ++ * Since this is the point of no return for ordinary execution, no registers ++ * are considered live except for the function parameters. [Note that the EFI ++ * stub may still exit and return to the firmware using the Exit() EFI boot ++ * service.] ++ */ ++SYM_FUNC_START_LOCAL(efi32_entry) ++ call 1f ++1: pop %ebx ++ ++ /* Save firmware GDTR and code/data selectors */ ++ sgdtl (efi32_boot_gdt - 1b)(%ebx) ++ movw %cs, (efi32_boot_cs - 1b)(%ebx) ++ movw %ds, (efi32_boot_ds - 1b)(%ebx) ++ ++ /* Store firmware IDT descriptor */ ++ sidtl (efi32_boot_idt - 1b)(%ebx) ++ ++ /* Store boot arguments */ ++ leal (efi32_boot_args - 1b)(%ebx), %ebx ++ movl %ecx, 0(%ebx) ++ movl %edx, 4(%ebx) ++ movl %esi, 8(%ebx) ++ movb $0x0, 12(%ebx) // efi_is64 ++ ++ /* Disable paging */ ++ movl %cr0, %eax ++ btrl $X86_CR0_PG_BIT, %eax ++ movl %eax, %cr0 ++ ++ jmp startup_32 ++SYM_FUNC_END(efi32_entry) ++ ++/* ++ * efi_status_t efi32_pe_entry(efi_handle_t image_handle, ++ * efi_system_table_32_t *sys_table) ++ */ ++SYM_FUNC_START(efi32_pe_entry) ++ pushl %ebp ++ movl %esp, %ebp ++ pushl %ebx // save callee-save registers ++ pushl %edi ++ ++ call verify_cpu // check for long mode support ++ testl %eax, %eax ++ movl $0x80000003, %eax // EFI_UNSUPPORTED ++ jnz 2f ++ ++ movl 8(%ebp), %ecx // image_handle ++ movl 12(%ebp), %edx // sys_table ++ xorl %esi, %esi ++ jmp efi32_entry // pass %ecx, %edx, %esi ++ // no other registers remain live ++ ++2: popl %edi // restore callee-save registers ++ popl %ebx ++ leave ++ RET ++SYM_FUNC_END(efi32_pe_entry) ++ ++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL ++ .org efi32_stub_entry + 0x200 ++ .code64 ++SYM_FUNC_START_NOALIGN(efi64_stub_entry) ++ jmp efi_handover_entry ++SYM_FUNC_END(efi64_stub_entry) ++#endif ++ ++ .data ++ .balign 8 ++SYM_DATA_START_LOCAL(efi32_boot_gdt) ++ .word 0 ++ .quad 0 ++SYM_DATA_END(efi32_boot_gdt) ++ ++SYM_DATA_START_LOCAL(efi32_boot_idt) ++ .word 0 ++ .quad 0 ++SYM_DATA_END(efi32_boot_idt) ++ ++SYM_DATA_LOCAL(efi32_boot_cs, .word 0) ++SYM_DATA_LOCAL(efi32_boot_ds, .word 0) ++SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) ++SYM_DATA(efi_is64, .byte 1) +diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S +deleted file mode 100644 +index 67e7edcdfea8f..0000000000000 +--- a/arch/x86/boot/compressed/efi_thunk_64.S ++++ /dev/null +@@ -1,195 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming +- * +- * Early support for invoking 32-bit EFI services from a 64-bit kernel. +- * +- * Because this thunking occurs before ExitBootServices() we have to +- * restore the firmware's 32-bit GDT and IDT before we make EFI service +- * calls. +- * +- * On the plus side, we don't have to worry about mangling 64-bit +- * addresses into 32-bits because we're executing with an identity +- * mapped pagetable and haven't transitioned to 64-bit virtual addresses +- * yet. +- */ +- +-#include <linux/linkage.h> +-#include <asm/msr.h> +-#include <asm/page_types.h> +-#include <asm/processor-flags.h> +-#include <asm/segment.h> +- +- .code64 +- .text +-SYM_FUNC_START(__efi64_thunk) +- push %rbp +- push %rbx +- +- movl %ds, %eax +- push %rax +- movl %es, %eax +- push %rax +- movl %ss, %eax +- push %rax +- +- /* Copy args passed on stack */ +- movq 0x30(%rsp), %rbp +- movq 0x38(%rsp), %rbx +- movq 0x40(%rsp), %rax +- +- /* +- * Convert x86-64 ABI params to i386 ABI +- */ +- subq $64, %rsp +- movl %esi, 0x0(%rsp) +- movl %edx, 0x4(%rsp) +- movl %ecx, 0x8(%rsp) +- movl %r8d, 0xc(%rsp) +- movl %r9d, 0x10(%rsp) +- movl %ebp, 0x14(%rsp) +- movl %ebx, 0x18(%rsp) +- movl %eax, 0x1c(%rsp) +- +- leaq 0x20(%rsp), %rbx +- sgdt (%rbx) +- +- addq $16, %rbx +- sidt (%rbx) +- +- leaq 1f(%rip), %rbp +- +- /* +- * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT +- * and IDT that was installed when the kernel started executing. The +- * pointers were saved at the EFI stub entry point in head_64.S. +- * +- * Pass the saved DS selector to the 32-bit code, and use far return to +- * restore the saved CS selector. +- */ +- leaq efi32_boot_idt(%rip), %rax +- lidt (%rax) +- leaq efi32_boot_gdt(%rip), %rax +- lgdt (%rax) +- +- movzwl efi32_boot_ds(%rip), %edx +- movzwq efi32_boot_cs(%rip), %rax +- pushq %rax +- leaq efi_enter32(%rip), %rax +- pushq %rax +- lretq +- +-1: addq $64, %rsp +- movq %rdi, %rax +- +- pop %rbx +- movl %ebx, %ss +- pop %rbx +- movl %ebx, %es +- pop %rbx +- movl %ebx, %ds +- /* Clear out 32-bit selector from FS and GS */ +- xorl %ebx, %ebx +- movl %ebx, %fs +- movl %ebx, %gs +- +- /* +- * Convert 32-bit status code into 64-bit. +- */ +- roll $1, %eax +- rorq $1, %rax +- +- pop %rbx +- pop %rbp +- RET +-SYM_FUNC_END(__efi64_thunk) +- +- .code32 +-/* +- * EFI service pointer must be in %edi. +- * +- * The stack should represent the 32-bit calling convention. +- */ +-SYM_FUNC_START_LOCAL(efi_enter32) +- /* Load firmware selector into data and stack segment registers */ +- movl %edx, %ds +- movl %edx, %es +- movl %edx, %fs +- movl %edx, %gs +- movl %edx, %ss +- +- /* Reload pgtables */ +- movl %cr3, %eax +- movl %eax, %cr3 +- +- /* Disable paging */ +- movl %cr0, %eax +- btrl $X86_CR0_PG_BIT, %eax +- movl %eax, %cr0 +- +- /* Disable long mode via EFER */ +- movl $MSR_EFER, %ecx +- rdmsr +- btrl $_EFER_LME, %eax +- wrmsr +- +- call *%edi +- +- /* We must preserve return value */ +- movl %eax, %edi +- +- /* +- * Some firmware will return with interrupts enabled. Be sure to +- * disable them before we switch GDTs and IDTs. +- */ +- cli +- +- lidtl (%ebx) +- subl $16, %ebx +- +- lgdtl (%ebx) +- +- movl %cr4, %eax +- btsl $(X86_CR4_PAE_BIT), %eax +- movl %eax, %cr4 +- +- movl %cr3, %eax +- movl %eax, %cr3 +- +- movl $MSR_EFER, %ecx +- rdmsr +- btsl $_EFER_LME, %eax +- wrmsr +- +- xorl %eax, %eax +- lldt %ax +- +- pushl $__KERNEL_CS +- pushl %ebp +- +- /* Enable paging */ +- movl %cr0, %eax +- btsl $X86_CR0_PG_BIT, %eax +- movl %eax, %cr0 +- lret +-SYM_FUNC_END(efi_enter32) +- +- .data +- .balign 8 +-SYM_DATA_START(efi32_boot_gdt) +- .word 0 +- .quad 0 +-SYM_DATA_END(efi32_boot_gdt) +- +-SYM_DATA_START(efi32_boot_idt) +- .word 0 +- .quad 0 +-SYM_DATA_END(efi32_boot_idt) +- +-SYM_DATA_START(efi32_boot_cs) +- .word 0 +-SYM_DATA_END(efi32_boot_cs) +- +-SYM_DATA_START(efi32_boot_ds) +- .word 0 +-SYM_DATA_END(efi32_boot_ds) +diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S +index 3b354eb9516df..1cfe9802a42fe 100644 +--- a/arch/x86/boot/compressed/head_32.S ++++ b/arch/x86/boot/compressed/head_32.S +@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) + + #ifdef CONFIG_RELOCATABLE + leal startup_32@GOTOFF(%edx), %ebx +- +-#ifdef CONFIG_EFI_STUB +-/* +- * If we were loaded via the EFI LoadImage service, startup_32() will be at an +- * offset to the start of the space allocated for the image. efi_pe_entry() will +- * set up image_offset to tell us where the image actually starts, so that we +- * can use the full available buffer. +- * image_offset = startup_32 - image_base +- * Otherwise image_offset will be zero and has no effect on the calculations. +- */ +- subl image_offset@GOTOFF(%edx), %ebx +-#endif +- + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx +@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32) + jmp *%eax + SYM_FUNC_END(startup_32) + +-#ifdef CONFIG_EFI_STUB +-SYM_FUNC_START(efi32_stub_entry) +- add $0x4, %esp +- movl 8(%esp), %esi /* save boot_params pointer */ +- call efi_main +- /* efi_main returns the possibly relocated address of startup_32 */ +- jmp *%eax +-SYM_FUNC_END(efi32_stub_entry) +-SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) +-#endif +- + .text + SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) + +@@ -179,15 +155,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) + */ + /* push arguments for extract_kernel: */ + +- pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ + pushl %ebp /* output address */ +- pushl input_len@GOTOFF(%ebx) /* input_len */ +- leal input_data@GOTOFF(%ebx), %eax +- pushl %eax /* input_data */ +- leal boot_heap@GOTOFF(%ebx), %eax +- pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ +- call extract_kernel /* returns kernel location in %eax */ ++ call extract_kernel /* returns kernel entry point in %eax */ + addl $24, %esp + + /* +@@ -208,17 +178,11 @@ SYM_DATA_START_LOCAL(gdt) + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) + +-#ifdef CONFIG_EFI_STUB +-SYM_DATA(image_offset, .long 0) +-#endif +- + /* + * Stack and heap for uncompression + */ + .bss + .balign 4 +-boot_heap: +- .fill BOOT_HEAP_SIZE, 1, 0 + boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 + boot_stack_end: +diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S +index b4bd6df29116f..0d7aef10b19ad 100644 +--- a/arch/x86/boot/compressed/head_64.S ++++ b/arch/x86/boot/compressed/head_64.S +@@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32) + 1: + + /* Setup Exception handling for SEV-ES */ ++#ifdef CONFIG_AMD_MEM_ENCRYPT + call startup32_load_idt ++#endif + + /* Make sure cpu supports long mode. */ + call verify_cpu +@@ -136,19 +138,6 @@ SYM_FUNC_START(startup_32) + + #ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx +- +-#ifdef CONFIG_EFI_STUB +-/* +- * If we were loaded via the EFI LoadImage service, startup_32 will be at an +- * offset to the start of the space allocated for the image. efi_pe_entry will +- * set up image_offset to tell us where the image actually starts, so that we +- * can use the full available buffer. +- * image_offset = startup_32 - image_base +- * Otherwise image_offset will be zero and has no effect on the calculations. +- */ +- subl rva(image_offset)(%ebp), %ebx +-#endif +- + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx +@@ -178,12 +167,13 @@ SYM_FUNC_START(startup_32) + */ + /* + * If SEV is active then set the encryption mask in the page tables. +- * This will insure that when the kernel is copied and decompressed ++ * This will ensure that when the kernel is copied and decompressed + * it will be done so encrypted. + */ +- call get_sev_encryption_bit + xorl %edx, %edx + #ifdef CONFIG_AMD_MEM_ENCRYPT ++ call get_sev_encryption_bit ++ xorl %edx, %edx + testl %eax, %eax + jz 1f + subl $32, %eax /* Encryption bit is always above bit 31 */ +@@ -249,6 +239,11 @@ SYM_FUNC_START(startup_32) + movl $__BOOT_TSS, %eax + ltr %ax + ++#ifdef CONFIG_AMD_MEM_ENCRYPT ++ /* Check if the C-bit position is correct when SEV is active */ ++ call startup32_check_sev_cbit ++#endif ++ + /* + * Setup for the jump to 64bit mode + * +@@ -261,29 +256,11 @@ SYM_FUNC_START(startup_32) + */ + leal rva(startup_64)(%ebp), %eax + #ifdef CONFIG_EFI_MIXED +- movl rva(efi32_boot_args)(%ebp), %edi +- testl %edi, %edi +- jz 1f +- leal rva(efi64_stub_entry)(%ebp), %eax +- movl rva(efi32_boot_args+4)(%ebp), %esi +- movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer +- testl %edx, %edx +- jnz 1f +- /* +- * efi_pe_entry uses MS calling convention, which requires 32 bytes of +- * shadow space on the stack even if all arguments are passed in +- * registers. We also need an additional 8 bytes for the space that +- * would be occupied by the return address, and this also results in +- * the correct stack alignment for entry. +- */ +- subl $40, %esp +- leal rva(efi_pe_entry)(%ebp), %eax +- movl %edi, %ecx // MS calling convention +- movl %esi, %edx ++ cmpb $1, rva(efi_is64)(%ebp) ++ je 1f ++ leal rva(startup_64_mixed_mode)(%ebp), %eax + 1: + #endif +- /* Check if the C-bit position is correct when SEV is active */ +- call startup32_check_sev_cbit + + pushl $__KERNEL_CS + pushl %eax +@@ -296,41 +273,6 @@ SYM_FUNC_START(startup_32) + lret + SYM_FUNC_END(startup_32) + +-#ifdef CONFIG_EFI_MIXED +- .org 0x190 +-SYM_FUNC_START(efi32_stub_entry) +- add $0x4, %esp /* Discard return address */ +- popl %ecx +- popl %edx +- popl %esi +- +- call 1f +-1: pop %ebp +- subl $ rva(1b), %ebp +- +- movl %esi, rva(efi32_boot_args+8)(%ebp) +-SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) +- movl %ecx, rva(efi32_boot_args)(%ebp) +- movl %edx, rva(efi32_boot_args+4)(%ebp) +- movb $0, rva(efi_is64)(%ebp) +- +- /* Save firmware GDTR and code/data selectors */ +- sgdtl rva(efi32_boot_gdt)(%ebp) +- movw %cs, rva(efi32_boot_cs)(%ebp) +- movw %ds, rva(efi32_boot_ds)(%ebp) +- +- /* Store firmware IDT descriptor */ +- sidtl rva(efi32_boot_idt)(%ebp) +- +- /* Disable paging */ +- movl %cr0, %eax +- btrl $X86_CR0_PG_BIT, %eax +- movl %eax, %cr0 +- +- jmp startup_32 +-SYM_FUNC_END(efi32_stub_entry) +-#endif +- + .code64 + .org 0x200 + SYM_CODE_START(startup_64) +@@ -372,20 +314,6 @@ SYM_CODE_START(startup_64) + /* Start with the delta to where the kernel will run at. */ + #ifdef CONFIG_RELOCATABLE + leaq startup_32(%rip) /* - $startup_32 */, %rbp +- +-#ifdef CONFIG_EFI_STUB +-/* +- * If we were loaded via the EFI LoadImage service, startup_32 will be at an +- * offset to the start of the space allocated for the image. efi_pe_entry will +- * set up image_offset to tell us where the image actually starts, so that we +- * can use the full available buffer. +- * image_offset = startup_32 - image_base +- * Otherwise image_offset will be zero and has no effect on the calculations. +- */ +- movl image_offset(%rip), %eax +- subq %rax, %rbp +-#endif +- + movl BP_kernel_alignment(%rsi), %eax + decl %eax + addq %rax, %rbp +@@ -424,10 +352,6 @@ SYM_CODE_START(startup_64) + * For the trampoline, we need the top page table to reside in lower + * memory as we don't have a way to load 64-bit values into CR3 in + * 32-bit mode. +- * +- * We go though the trampoline even if we don't have to: if we're +- * already in a desired paging mode. This way the trampoline code gets +- * tested on every boot. + */ + + /* Make sure we have GDT with 32-bit code segment */ +@@ -442,10 +366,14 @@ SYM_CODE_START(startup_64) + lretq + + .Lon_kernel_cs: ++ /* ++ * RSI holds a pointer to a boot_params structure provided by the ++ * loader, and this needs to be preserved across C function calls. So ++ * move it into a callee saved register. ++ */ ++ movq %rsi, %r15 + +- pushq %rsi + call load_stage1_idt +- popq %rsi + + #ifdef CONFIG_AMD_MEM_ENCRYPT + /* +@@ -456,82 +384,24 @@ SYM_CODE_START(startup_64) + * CPUID instructions being issued, so go ahead and do that now via + * sev_enable(), which will also handle the rest of the SEV-related + * detection/setup to ensure that has been done in advance of any dependent +- * code. ++ * code. Pass the boot_params pointer as the first argument. + */ +- pushq %rsi +- movq %rsi, %rdi /* real mode address */ ++ movq %r15, %rdi + call sev_enable +- popq %rsi + #endif + + /* +- * paging_prepare() sets up the trampoline and checks if we need to +- * enable 5-level paging. +- * +- * paging_prepare() returns a two-quadword structure which lands +- * into RDX:RAX: +- * - Address of the trampoline is returned in RAX. +- * - Non zero RDX means trampoline needs to enable 5-level +- * paging. +- * +- * RSI holds real mode data and needs to be preserved across +- * this function call. +- */ +- pushq %rsi +- movq %rsi, %rdi /* real mode address */ +- call paging_prepare +- popq %rsi +- +- /* Save the trampoline address in RCX */ +- movq %rax, %rcx +- +- /* Set up 32-bit addressable stack */ +- leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp +- +- /* +- * Preserve live 64-bit registers on the stack: this is necessary +- * because the architecture does not guarantee that GPRs will retain +- * their full 64-bit values across a 32-bit mode switch. +- */ +- pushq %rbp +- pushq %rbx +- pushq %rsi +- +- /* +- * Push the 64-bit address of trampoline_return() onto the new stack. +- * It will be used by the trampoline to return to the main code. Due to +- * the 32-bit mode switch, it cannot be kept it in a register either. +- */ +- leaq trampoline_return(%rip), %rdi +- pushq %rdi +- +- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ +- pushq $__KERNEL32_CS +- leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax +- pushq %rax +- lretq +-trampoline_return: +- /* Restore live 64-bit registers */ +- popq %rsi +- popq %rbx +- popq %rbp +- +- /* Restore the stack, the 32-bit trampoline uses its own stack */ +- leaq rva(boot_stack_end)(%rbx), %rsp +- +- /* +- * cleanup_trampoline() would restore trampoline memory. +- * +- * RDI is address of the page table to use instead of page table +- * in trampoline memory (if required). ++ * configure_5level_paging() updates the number of paging levels using ++ * a trampoline in 32-bit addressable memory if the current number does ++ * not match the desired number. + * +- * RSI holds real mode data and needs to be preserved across +- * this function call. ++ * Pass the boot_params pointer as the first argument. The second ++ * argument is the relocated address of the page table to use instead ++ * of the page table in trampoline memory (if required). + */ +- pushq %rsi +- leaq rva(top_pgtable)(%rbx), %rdi +- call cleanup_trampoline +- popq %rsi ++ movq %r15, %rdi ++ leaq rva(top_pgtable)(%rbx), %rsi ++ call configure_5level_paging + + /* Zero EFLAGS */ + pushq $0 +@@ -541,7 +411,6 @@ trampoline_return: + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ +- pushq %rsi + leaq (_bss-8)(%rip), %rsi + leaq rva(_bss-8)(%rbx), %rdi + movl $(_bss - startup_32), %ecx +@@ -549,7 +418,6 @@ trampoline_return: + std + rep movsq + cld +- popq %rsi + + /* + * The GDT may get overwritten either during the copy we just did or +@@ -568,19 +436,6 @@ trampoline_return: + jmp *%rax + SYM_CODE_END(startup_64) + +-#ifdef CONFIG_EFI_STUB +- .org 0x390 +-SYM_FUNC_START(efi64_stub_entry) +- and $~0xf, %rsp /* realign the stack */ +- movq %rdx, %rbx /* save boot_params pointer */ +- call efi_main +- movq %rbx,%rsi +- leaq rva(startup_64)(%rax), %rax +- jmp *%rax +-SYM_FUNC_END(efi64_stub_entry) +-SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) +-#endif +- + .text + SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) + +@@ -594,125 +449,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) + shrq $3, %rcx + rep stosq + +- pushq %rsi + call load_stage2_idt + + /* Pass boot_params to initialize_identity_maps() */ +- movq (%rsp), %rdi ++ movq %r15, %rdi + call initialize_identity_maps +- popq %rsi + + /* + * Do the extraction, and jump to the new kernel.. + */ +- pushq %rsi /* Save the real mode argument */ +- movq %rsi, %rdi /* real mode address */ +- leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ +- leaq input_data(%rip), %rdx /* input_data */ +- movl input_len(%rip), %ecx /* input_len */ +- movq %rbp, %r8 /* output target address */ +- movl output_len(%rip), %r9d /* decompressed length, end of relocs */ +- call extract_kernel /* returns kernel location in %rax */ +- popq %rsi ++ /* pass struct boot_params pointer and output target address */ ++ movq %r15, %rdi ++ movq %rbp, %rsi ++ call extract_kernel /* returns kernel entry point in %rax */ + + /* + * Jump to the decompressed kernel. + */ ++ movq %r15, %rsi + jmp *%rax + SYM_FUNC_END(.Lrelocated) + +- .code32 + /* +- * This is the 32-bit trampoline that will be copied over to low memory. ++ * This is the 32-bit trampoline that will be copied over to low memory. It ++ * will be called using the ordinary 64-bit calling convention from code ++ * running in 64-bit mode. + * + * Return address is at the top of the stack (might be above 4G). +- * ECX contains the base address of the trampoline memory. +- * Non zero RDX means trampoline needs to enable 5-level paging. ++ * The first argument (EDI) contains the address of the temporary PGD level ++ * page table in 32-bit addressable memory which will be programmed into ++ * register CR3. + */ ++ .section ".rodata", "a", @progbits + SYM_CODE_START(trampoline_32bit_src) +- /* Set up data and stack segments */ +- movl $__KERNEL_DS, %eax +- movl %eax, %ds +- movl %eax, %ss ++ /* ++ * Preserve callee save 64-bit registers on the stack: this is ++ * necessary because the architecture does not guarantee that GPRs will ++ * retain their full 64-bit values across a 32-bit mode switch. ++ */ ++ pushq %r15 ++ pushq %r14 ++ pushq %r13 ++ pushq %r12 ++ pushq %rbp ++ pushq %rbx ++ ++ /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ ++ movq %rsp, %rbx ++ shrq $32, %rbx + ++ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ ++ pushq $__KERNEL32_CS ++ leaq 0f(%rip), %rax ++ pushq %rax ++ lretq ++ ++ /* ++ * The 32-bit code below will do a far jump back to long mode and end ++ * up here after reconfiguring the number of paging levels. First, the ++ * stack pointer needs to be restored to its full 64-bit value before ++ * the callee save register contents can be popped from the stack. ++ */ ++.Lret: ++ shlq $32, %rbx ++ orq %rbx, %rsp ++ ++ /* Restore the preserved 64-bit registers */ ++ popq %rbx ++ popq %rbp ++ popq %r12 ++ popq %r13 ++ popq %r14 ++ popq %r15 ++ retq ++ ++ .code32 ++0: + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + +- /* Check what paging mode we want to be in after the trampoline */ +- testl %edx, %edx +- jz 1f +- +- /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ +- movl %cr4, %eax +- testl $X86_CR4_LA57, %eax +- jnz 3f +- jmp 2f +-1: +- /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ +- movl %cr4, %eax +- testl $X86_CR4_LA57, %eax +- jz 3f +-2: + /* Point CR3 to the trampoline's new top level page table */ +- leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax +- movl %eax, %cr3 +-3: ++ movl %edi, %cr3 ++ + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ +- pushl %ecx +- pushl %edx + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + /* Avoid writing EFER if no change was made (for TDX guest) */ + jc 1f + wrmsr +-1: popl %edx +- popl %ecx +- +-#ifdef CONFIG_X86_MCE +- /* +- * Preserve CR4.MCE if the kernel will enable #MC support. +- * Clearing MCE may fault in some environments (that also force #MC +- * support). Any machine check that occurs before #MC support is fully +- * configured will crash the system regardless of the CR4.MCE value set +- * here. +- */ +- movl %cr4, %eax +- andl $X86_CR4_MCE, %eax +-#else +- movl $0, %eax +-#endif +- +- /* Enable PAE and LA57 (if required) paging modes */ +- orl $X86_CR4_PAE, %eax +- testl %edx, %edx +- jz 1f +- orl $X86_CR4_LA57, %eax + 1: ++ /* Toggle CR4.LA57 */ ++ movl %cr4, %eax ++ btcl $X86_CR4_LA57_BIT, %eax + movl %eax, %cr4 + +- /* Calculate address of paging_enabled() once we are executing in the trampoline */ +- leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax +- +- /* Prepare the stack for far return to Long Mode */ +- pushl $__KERNEL_CS +- pushl %eax +- + /* Enable paging again. */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + +- lret ++ /* ++ * Return to the 64-bit calling code using LJMP rather than LRET, to ++ * avoid the need for a 32-bit addressable stack. The destination ++ * address will be adjusted after the template code is copied into a ++ * 32-bit addressable buffer. ++ */ ++.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) + SYM_CODE_END(trampoline_32bit_src) + +- .code64 +-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) +- /* Return from the trampoline */ +- retq +-SYM_FUNC_END(.Lpaging_enabled) ++/* ++ * This symbol is placed right after trampoline_32bit_src() so its address can ++ * be used to infer the size of the trampoline code. ++ */ ++SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) + + /* + * The trampoline code has a size limit. +@@ -721,7 +573,7 @@ SYM_FUNC_END(.Lpaging_enabled) + */ + .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE + +- .code32 ++ .text + SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) + /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ + 1: +@@ -729,6 +581,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) + jmp 1b + SYM_FUNC_END(.Lno_longmode) + ++ .globl verify_cpu + #include "../../kernel/verify_cpu.S" + + .data +@@ -760,249 +613,11 @@ SYM_DATA_START(boot_idt) + .endr + SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) + +-#ifdef CONFIG_AMD_MEM_ENCRYPT +-SYM_DATA_START(boot32_idt_desc) +- .word boot32_idt_end - boot32_idt - 1 +- .long 0 +-SYM_DATA_END(boot32_idt_desc) +- .balign 8 +-SYM_DATA_START(boot32_idt) +- .rept 32 +- .quad 0 +- .endr +-SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) +-#endif +- +-#ifdef CONFIG_EFI_STUB +-SYM_DATA(image_offset, .long 0) +-#endif +-#ifdef CONFIG_EFI_MIXED +-SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +-SYM_DATA(efi_is64, .byte 1) +- +-#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) +-#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) +-#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) +- +- __HEAD +- .code32 +-SYM_FUNC_START(efi32_pe_entry) +-/* +- * efi_status_t efi32_pe_entry(efi_handle_t image_handle, +- * efi_system_table_32_t *sys_table) +- */ +- +- pushl %ebp +- movl %esp, %ebp +- pushl %eax // dummy push to allocate loaded_image +- +- pushl %ebx // save callee-save registers +- pushl %edi +- +- call verify_cpu // check for long mode support +- testl %eax, %eax +- movl $0x80000003, %eax // EFI_UNSUPPORTED +- jnz 2f +- +- call 1f +-1: pop %ebx +- subl $ rva(1b), %ebx +- +- /* Get the loaded image protocol pointer from the image handle */ +- leal -4(%ebp), %eax +- pushl %eax // &loaded_image +- leal rva(loaded_image_proto)(%ebx), %eax +- pushl %eax // pass the GUID address +- pushl 8(%ebp) // pass the image handle +- +- /* +- * Note the alignment of the stack frame. +- * sys_table +- * handle <-- 16-byte aligned on entry by ABI +- * return address +- * frame pointer +- * loaded_image <-- local variable +- * saved %ebx <-- 16-byte aligned here +- * saved %edi +- * &loaded_image +- * &loaded_image_proto +- * handle <-- 16-byte aligned for call to handle_protocol +- */ +- +- movl 12(%ebp), %eax // sys_table +- movl ST32_boottime(%eax), %eax // sys_table->boottime +- call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol +- addl $12, %esp // restore argument space +- testl %eax, %eax +- jnz 2f +- +- movl 8(%ebp), %ecx // image_handle +- movl 12(%ebp), %edx // sys_table +- movl -4(%ebp), %esi // loaded_image +- movl LI32_image_base(%esi), %esi // loaded_image->image_base +- movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry +- /* +- * We need to set the image_offset variable here since startup_32() will +- * use it before we get to the 64-bit efi_pe_entry() in C code. +- */ +- subl %esi, %ebx +- movl %ebx, rva(image_offset)(%ebp) // save image_offset +- jmp efi32_pe_stub_entry +- +-2: popl %edi // restore callee-save registers +- popl %ebx +- leave +- RET +-SYM_FUNC_END(efi32_pe_entry) +- +- .section ".rodata" +- /* EFI loaded image protocol GUID */ +- .balign 4 +-SYM_DATA_START_LOCAL(loaded_image_proto) +- .long 0x5b1b31a1 +- .word 0x9562, 0x11d2 +- .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b +-SYM_DATA_END(loaded_image_proto) +-#endif +- +-#ifdef CONFIG_AMD_MEM_ENCRYPT +- __HEAD +- .code32 +-/* +- * Write an IDT entry into boot32_idt +- * +- * Parameters: +- * +- * %eax: Handler address +- * %edx: Vector number +- * +- * Physical offset is expected in %ebp +- */ +-SYM_FUNC_START(startup32_set_idt_entry) +- push %ebx +- push %ecx +- +- /* IDT entry address to %ebx */ +- leal rva(boot32_idt)(%ebp), %ebx +- shl $3, %edx +- addl %edx, %ebx +- +- /* Build IDT entry, lower 4 bytes */ +- movl %eax, %edx +- andl $0x0000ffff, %edx # Target code segment offset [15:0] +- movl $__KERNEL32_CS, %ecx # Target code segment selector +- shl $16, %ecx +- orl %ecx, %edx +- +- /* Store lower 4 bytes to IDT */ +- movl %edx, (%ebx) +- +- /* Build IDT entry, upper 4 bytes */ +- movl %eax, %edx +- andl $0xffff0000, %edx # Target code segment offset [31:16] +- orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate +- +- /* Store upper 4 bytes to IDT */ +- movl %edx, 4(%ebx) +- +- pop %ecx +- pop %ebx +- RET +-SYM_FUNC_END(startup32_set_idt_entry) +-#endif +- +-SYM_FUNC_START(startup32_load_idt) +-#ifdef CONFIG_AMD_MEM_ENCRYPT +- /* #VC handler */ +- leal rva(startup32_vc_handler)(%ebp), %eax +- movl $X86_TRAP_VC, %edx +- call startup32_set_idt_entry +- +- /* Load IDT */ +- leal rva(boot32_idt)(%ebp), %eax +- movl %eax, rva(boot32_idt_desc+2)(%ebp) +- lidt rva(boot32_idt_desc)(%ebp) +-#endif +- RET +-SYM_FUNC_END(startup32_load_idt) +- +-/* +- * Check for the correct C-bit position when the startup_32 boot-path is used. +- * +- * The check makes use of the fact that all memory is encrypted when paging is +- * disabled. The function creates 64 bits of random data using the RDRAND +- * instruction. RDRAND is mandatory for SEV guests, so always available. If the +- * hypervisor violates that the kernel will crash right here. +- * +- * The 64 bits of random data are stored to a memory location and at the same +- * time kept in the %eax and %ebx registers. Since encryption is always active +- * when paging is off the random data will be stored encrypted in main memory. +- * +- * Then paging is enabled. When the C-bit position is correct all memory is +- * still mapped encrypted and comparing the register values with memory will +- * succeed. An incorrect C-bit position will map all memory unencrypted, so that +- * the compare will use the encrypted random data and fail. +- */ +-SYM_FUNC_START(startup32_check_sev_cbit) +-#ifdef CONFIG_AMD_MEM_ENCRYPT +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- +- /* Check for non-zero sev_status */ +- movl rva(sev_status)(%ebp), %eax +- testl %eax, %eax +- jz 4f +- +- /* +- * Get two 32-bit random values - Don't bail out if RDRAND fails +- * because it is better to prevent forward progress if no random value +- * can be gathered. +- */ +-1: rdrand %eax +- jnc 1b +-2: rdrand %ebx +- jnc 2b +- +- /* Store to memory and keep it in the registers */ +- movl %eax, rva(sev_check_data)(%ebp) +- movl %ebx, rva(sev_check_data+4)(%ebp) +- +- /* Enable paging to see if encryption is active */ +- movl %cr0, %edx /* Backup %cr0 in %edx */ +- movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ +- movl %ecx, %cr0 +- +- cmpl %eax, rva(sev_check_data)(%ebp) +- jne 3f +- cmpl %ebx, rva(sev_check_data+4)(%ebp) +- jne 3f +- +- movl %edx, %cr0 /* Restore previous %cr0 */ +- +- jmp 4f +- +-3: /* Check failed - hlt the machine */ +- hlt +- jmp 3b +- +-4: +- popl %edx +- popl %ecx +- popl %ebx +- popl %eax +-#endif +- RET +-SYM_FUNC_END(startup32_check_sev_cbit) +- + /* + * Stack and heap for uncompression + */ + .bss + .balign 4 +-SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) +- + SYM_DATA_START_LOCAL(boot_stack) + .fill BOOT_STACK_SIZE, 1, 0 + .balign 16 +diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c +index d34222816c9f5..b8c42339bc355 100644 +--- a/arch/x86/boot/compressed/ident_map_64.c ++++ b/arch/x86/boot/compressed/ident_map_64.c +@@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode) + * or does not touch all the pages covering them. + */ + kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); +- boot_params = rmode; +- kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); ++ boot_params_ptr = rmode; ++ kernel_add_identity_map((unsigned long)boot_params_ptr, ++ (unsigned long)(boot_params_ptr + 1)); + cmdline = get_cmd_line_ptr(); + kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + +@@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode) + * Also map the setup_data entries passed via boot_params in case they + * need to be accessed by uncompressed kernel via the identity mapping. + */ +- sd = (struct setup_data *)boot_params->hdr.setup_data; ++ sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; + while (sd) { + unsigned long sd_addr = (unsigned long)sd; + +diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c +index e476bcbd9b422..9794d9174795d 100644 +--- a/arch/x86/boot/compressed/kaslr.c ++++ b/arch/x86/boot/compressed/kaslr.c +@@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void) + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); +- hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); ++ hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr)); + + return hash; + } +@@ -383,7 +383,7 @@ static void handle_mem_options(void) + static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output) + { +- unsigned long init_size = boot_params->hdr.init_size; ++ unsigned long init_size = boot_params_ptr->hdr.init_size; + u64 initrd_start, initrd_size; + unsigned long cmd_line, cmd_line_size; + +@@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, + mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; + + /* Avoid initrd. */ +- initrd_start = (u64)boot_params->ext_ramdisk_image << 32; +- initrd_start |= boot_params->hdr.ramdisk_image; +- initrd_size = (u64)boot_params->ext_ramdisk_size << 32; +- initrd_size |= boot_params->hdr.ramdisk_size; ++ initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32; ++ initrd_start |= boot_params_ptr->hdr.ramdisk_image; ++ initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32; ++ initrd_size |= boot_params_ptr->hdr.ramdisk_size; + mem_avoid[MEM_AVOID_INITRD].start = initrd_start; + mem_avoid[MEM_AVOID_INITRD].size = initrd_size; + /* No need to set mapping for initrd, it will be handled in VO. */ +@@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, + } + + /* Avoid boot parameters. */ +- mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; +- mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); ++ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr; ++ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr); + + /* We don't need to set a mapping for setup_data. */ + +@@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, + } + + /* Avoid all entries in the setup_data linked list. */ +- ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; ++ ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + +@@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region, + static bool + process_efi_entries(unsigned long minimum, unsigned long image_size) + { +- struct efi_info *e = &boot_params->efi_info; ++ struct efi_info *e = &boot_params_ptr->efi_info; + bool efi_mirror_found = false; + struct mem_vector region; + efi_memory_desc_t *md; +@@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum, + struct boot_e820_entry *entry; + + /* Verify potential e820 positions, appending to slots list. */ +- for (i = 0; i < boot_params->e820_entries; i++) { +- entry = &boot_params->e820_table[i]; ++ for (i = 0; i < boot_params_ptr->e820_entries; i++) { ++ entry = &boot_params_ptr->e820_table[i]; + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; +@@ -836,7 +836,7 @@ void choose_random_location(unsigned long input, + return; + } + +- boot_params->hdr.loadflags |= KASLR_FLAG; ++ boot_params_ptr->hdr.loadflags |= KASLR_FLAG; + + if (IS_ENABLED(CONFIG_X86_32)) + mem_limit = KERNEL_IMAGE_SIZE; +diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S +index a73e4d783cae2..32f7cc8a86254 100644 +--- a/arch/x86/boot/compressed/mem_encrypt.S ++++ b/arch/x86/boot/compressed/mem_encrypt.S +@@ -12,16 +12,13 @@ + #include <asm/processor-flags.h> + #include <asm/msr.h> + #include <asm/asm-offsets.h> ++#include <asm/segment.h> ++#include <asm/trapnr.h> + + .text + .code32 + SYM_FUNC_START(get_sev_encryption_bit) +- xor %eax, %eax +- +-#ifdef CONFIG_AMD_MEM_ENCRYPT + push %ebx +- push %ecx +- push %edx + + movl $0x80000000, %eax /* CPUID to check the highest leaf */ + cpuid +@@ -52,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit) + xor %eax, %eax + + .Lsev_exit: +- pop %edx +- pop %ecx + pop %ebx +- +-#endif /* CONFIG_AMD_MEM_ENCRYPT */ +- + RET + SYM_FUNC_END(get_sev_encryption_bit) + +@@ -98,7 +90,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid) + jmp 1b + SYM_CODE_END(sev_es_req_cpuid) + +-SYM_CODE_START(startup32_vc_handler) ++SYM_CODE_START_LOCAL(startup32_vc_handler) + pushl %eax + pushl %ebx + pushl %ecx +@@ -184,15 +176,149 @@ SYM_CODE_START(startup32_vc_handler) + jmp .Lfail + SYM_CODE_END(startup32_vc_handler) + ++/* ++ * Write an IDT entry into boot32_idt ++ * ++ * Parameters: ++ * ++ * %eax: Handler address ++ * %edx: Vector number ++ * %ecx: IDT address ++ */ ++SYM_FUNC_START_LOCAL(startup32_set_idt_entry) ++ /* IDT entry address to %ecx */ ++ leal (%ecx, %edx, 8), %ecx ++ ++ /* Build IDT entry, lower 4 bytes */ ++ movl %eax, %edx ++ andl $0x0000ffff, %edx # Target code segment offset [15:0] ++ orl $(__KERNEL32_CS << 16), %edx # Target code segment selector ++ ++ /* Store lower 4 bytes to IDT */ ++ movl %edx, (%ecx) ++ ++ /* Build IDT entry, upper 4 bytes */ ++ movl %eax, %edx ++ andl $0xffff0000, %edx # Target code segment offset [31:16] ++ orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate ++ ++ /* Store upper 4 bytes to IDT */ ++ movl %edx, 4(%ecx) ++ ++ RET ++SYM_FUNC_END(startup32_set_idt_entry) ++ ++SYM_FUNC_START(startup32_load_idt) ++ push %ebp ++ push %ebx ++ ++ call 1f ++1: pop %ebp ++ ++ leal (boot32_idt - 1b)(%ebp), %ebx ++ ++ /* #VC handler */ ++ leal (startup32_vc_handler - 1b)(%ebp), %eax ++ movl $X86_TRAP_VC, %edx ++ movl %ebx, %ecx ++ call startup32_set_idt_entry ++ ++ /* Load IDT */ ++ leal (boot32_idt_desc - 1b)(%ebp), %ecx ++ movl %ebx, 2(%ecx) ++ lidt (%ecx) ++ ++ pop %ebx ++ pop %ebp ++ RET ++SYM_FUNC_END(startup32_load_idt) ++ ++/* ++ * Check for the correct C-bit position when the startup_32 boot-path is used. ++ * ++ * The check makes use of the fact that all memory is encrypted when paging is ++ * disabled. The function creates 64 bits of random data using the RDRAND ++ * instruction. RDRAND is mandatory for SEV guests, so always available. If the ++ * hypervisor violates that the kernel will crash right here. ++ * ++ * The 64 bits of random data are stored to a memory location and at the same ++ * time kept in the %eax and %ebx registers. Since encryption is always active ++ * when paging is off the random data will be stored encrypted in main memory. ++ * ++ * Then paging is enabled. When the C-bit position is correct all memory is ++ * still mapped encrypted and comparing the register values with memory will ++ * succeed. An incorrect C-bit position will map all memory unencrypted, so that ++ * the compare will use the encrypted random data and fail. ++ */ ++SYM_FUNC_START(startup32_check_sev_cbit) ++ pushl %ebx ++ pushl %ebp ++ ++ call 0f ++0: popl %ebp ++ ++ /* Check for non-zero sev_status */ ++ movl (sev_status - 0b)(%ebp), %eax ++ testl %eax, %eax ++ jz 4f ++ ++ /* ++ * Get two 32-bit random values - Don't bail out if RDRAND fails ++ * because it is better to prevent forward progress if no random value ++ * can be gathered. ++ */ ++1: rdrand %eax ++ jnc 1b ++2: rdrand %ebx ++ jnc 2b ++ ++ /* Store to memory and keep it in the registers */ ++ leal (sev_check_data - 0b)(%ebp), %ebp ++ movl %eax, 0(%ebp) ++ movl %ebx, 4(%ebp) ++ ++ /* Enable paging to see if encryption is active */ ++ movl %cr0, %edx /* Backup %cr0 in %edx */ ++ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ ++ movl %ecx, %cr0 ++ ++ cmpl %eax, 0(%ebp) ++ jne 3f ++ cmpl %ebx, 4(%ebp) ++ jne 3f ++ ++ movl %edx, %cr0 /* Restore previous %cr0 */ ++ ++ jmp 4f ++ ++3: /* Check failed - hlt the machine */ ++ hlt ++ jmp 3b ++ ++4: ++ popl %ebp ++ popl %ebx ++ RET ++SYM_FUNC_END(startup32_check_sev_cbit) ++ + .code64 + + #include "../../kernel/sev_verify_cbit.S" + + .data + +-#ifdef CONFIG_AMD_MEM_ENCRYPT + .balign 8 + SYM_DATA(sme_me_mask, .quad 0) + SYM_DATA(sev_status, .quad 0) + SYM_DATA(sev_check_data, .quad 0) +-#endif ++ ++SYM_DATA_START_LOCAL(boot32_idt) ++ .rept 32 ++ .quad 0 ++ .endr ++SYM_DATA_END(boot32_idt) ++ ++SYM_DATA_START_LOCAL(boot32_idt_desc) ++ .word . - boot32_idt - 1 ++ .long 0 ++SYM_DATA_END(boot32_idt_desc) +diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c +index cf690d8712f4e..8ae7893d712ff 100644 +--- a/arch/x86/boot/compressed/misc.c ++++ b/arch/x86/boot/compressed/misc.c +@@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n); + /* + * This is set up by the setup-routine at boot-time + */ +-struct boot_params *boot_params; ++struct boot_params *boot_params_ptr; + + struct port_io_ops pio_ops; + +@@ -132,8 +132,8 @@ void __putstr(const char *s) + if (lines == 0 || cols == 0) + return; + +- x = boot_params->screen_info.orig_x; +- y = boot_params->screen_info.orig_y; ++ x = boot_params_ptr->screen_info.orig_x; ++ y = boot_params_ptr->screen_info.orig_y; + + while ((c = *s++) != '\0') { + if (c == '\n') { +@@ -154,8 +154,8 @@ void __putstr(const char *s) + } + } + +- boot_params->screen_info.orig_x = x; +- boot_params->screen_info.orig_y = y; ++ boot_params_ptr->screen_info.orig_x = x; ++ boot_params_ptr->screen_info.orig_y = y; + + pos = (x + cols * y) * 2; /* Update cursor position */ + outb(14, vidport); +@@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len, + { } + #endif + +-static void parse_elf(void *output) ++static size_t parse_elf(void *output) + { + #ifdef CONFIG_X86_64 + Elf64_Ehdr ehdr; +@@ -293,10 +293,8 @@ static void parse_elf(void *output) + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || +- ehdr.e_ident[EI_MAG3] != ELFMAG3) { ++ ehdr.e_ident[EI_MAG3] != ELFMAG3) + error("Kernel is not a valid ELF file"); +- return; +- } + + debug_putstr("Parsing ELF... "); + +@@ -328,6 +326,35 @@ static void parse_elf(void *output) + } + + free(phdrs); ++ ++ return ehdr.e_entry - LOAD_PHYSICAL_ADDR; ++} ++ ++const unsigned long kernel_total_size = VO__end - VO__text; ++ ++static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); ++ ++extern unsigned char input_data[]; ++extern unsigned int input_len, output_len; ++ ++unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, ++ void (*error)(char *x)) ++{ ++ unsigned long entry; ++ ++ if (!free_mem_ptr) { ++ free_mem_ptr = (unsigned long)boot_heap; ++ free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); ++ } ++ ++ if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, ++ NULL, error) < 0) ++ return ULONG_MAX; ++ ++ entry = parse_elf(outbuf); ++ handle_relocations(outbuf, output_len, virt_addr); ++ ++ return entry; + } + + /* +@@ -347,25 +374,22 @@ static void parse_elf(void *output) + * |-------uncompressed kernel image---------| + * + */ +-asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, +- unsigned char *input_data, +- unsigned long input_len, +- unsigned char *output, +- unsigned long output_len) ++asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) + { +- const unsigned long kernel_total_size = VO__end - VO__text; + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; ++ memptr heap = (memptr)boot_heap; + unsigned long needed_size; ++ size_t entry_offset; + + /* Retain x86 boot parameters pointer passed from startup_32/64. */ +- boot_params = rmode; ++ boot_params_ptr = rmode; + + /* Clear flags intended for solely in-kernel use. */ +- boot_params->hdr.loadflags &= ~KASLR_FLAG; ++ boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; + +- sanitize_boot_params(boot_params); ++ sanitize_boot_params(boot_params_ptr); + +- if (boot_params->screen_info.orig_video_mode == 7) { ++ if (boot_params_ptr->screen_info.orig_video_mode == 7) { + vidmem = (char *) 0xb0000; + vidport = 0x3b4; + } else { +@@ -373,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + vidport = 0x3d4; + } + +- lines = boot_params->screen_info.orig_video_lines; +- cols = boot_params->screen_info.orig_video_cols; ++ lines = boot_params_ptr->screen_info.orig_video_lines; ++ cols = boot_params_ptr->screen_info.orig_video_cols; + + init_default_io_ops(); + +@@ -393,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + * so that early debugging output from the RSDP parsing code can be + * collected. + */ +- boot_params->acpi_rsdp_addr = get_rsdp_addr(); ++ boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr(); + + debug_putstr("early console in extract_kernel\n"); + +@@ -411,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + * entries. This ensures the full mapped area is usable RAM + * and doesn't include any reserved areas. + */ +- needed_size = max(output_len, kernel_total_size); ++ needed_size = max_t(unsigned long, output_len, kernel_total_size); + #ifdef CONFIG_X86_64 + needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); + #endif +@@ -442,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + #ifdef CONFIG_X86_64 + if (heap > 0x3fffffffffffUL) + error("Destination address too large"); +- if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) ++ if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) + error("Destination virtual address is beyond the kernel mapping area"); + #else + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) +@@ -454,16 +478,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + #endif + + debug_putstr("\nDecompressing Linux... "); +- __decompress(input_data, input_len, NULL, NULL, output, output_len, +- NULL, error); +- parse_elf(output); +- handle_relocations(output, output_len, virt_addr); +- debug_putstr("done.\nBooting the kernel.\n"); ++ ++ entry_offset = decompress_kernel(output, virt_addr, error); ++ ++ debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); ++ debug_puthex(entry_offset); ++ debug_putstr(").\n"); + + /* Disable exception handling before booting the kernel */ + cleanup_exception_handling(); + +- return output; ++ return output + entry_offset; + } + + void fortify_panic(const char *name) +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index a49d9219c06e5..254acd76efde2 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -52,7 +52,6 @@ extern memptr free_mem_ptr; + extern memptr free_mem_end_ptr; + void *malloc(int size); + void free(void *where); +-extern struct boot_params *boot_params; + void __putstr(const char *s); + void __puthex(unsigned long value); + #define error_putstr(__x) __putstr(__x) +@@ -170,9 +169,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } + #endif + + /* ident_map_64.c */ +-#ifdef CONFIG_X86_5LEVEL + extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; +-#endif + extern void kernel_add_identity_map(unsigned long start, unsigned long end); + + /* Used by PAGE_KERN* macros: */ +diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h +index cc9b2529a0863..6d595abe06b34 100644 +--- a/arch/x86/boot/compressed/pgtable.h ++++ b/arch/x86/boot/compressed/pgtable.h +@@ -3,18 +3,16 @@ + + #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) + +-#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 +- + #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE +-#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 +- +-#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE ++#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 + + #ifndef __ASSEMBLER__ + + extern unsigned long *trampoline_32bit; + +-extern void trampoline_32bit_src(void *return_ptr); ++extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); ++ ++extern const u16 trampoline_ljmp_imm_offset; + + #endif /* __ASSEMBLER__ */ + #endif /* BOOT_COMPRESSED_PAGETABLE_H */ +diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c +index 2ac12ff4111bf..51f957b24ba7a 100644 +--- a/arch/x86/boot/compressed/pgtable_64.c ++++ b/arch/x86/boot/compressed/pgtable_64.c +@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; + unsigned int __section(".data") ptrs_per_p4d = 1; + #endif + +-struct paging_config { +- unsigned long trampoline_start; +- unsigned long l5_required; +-}; +- + /* Buffer to preserve trampoline memory */ + static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; + +@@ -29,11 +24,10 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; + * purposes. + * + * Avoid putting the pointer into .bss as it will be cleared between +- * paging_prepare() and extract_kernel(). ++ * configure_5level_paging() and extract_kernel(). + */ + unsigned long *trampoline_32bit __section(".data"); + +-extern struct boot_params *boot_params; + int cmdline_find_option_bool(const char *option); + + static unsigned long find_trampoline_placement(void) +@@ -54,7 +48,7 @@ static unsigned long find_trampoline_placement(void) + * + * Only look for values in the legacy ROM for non-EFI system. + */ +- signature = (char *)&boot_params->efi_info.efi_loader_signature; ++ signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature; + if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && + strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { + ebda_start = *(unsigned short *)0x40e << 4; +@@ -70,10 +64,10 @@ static unsigned long find_trampoline_placement(void) + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Find the first usable memory region under bios_start. */ +- for (i = boot_params->e820_entries - 1; i >= 0; i--) { ++ for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) { + unsigned long new = bios_start; + +- entry = &boot_params->e820_table[i]; ++ entry = &boot_params_ptr->e820_table[i]; + + /* Skip all entries above bios_start. */ + if (bios_start <= entry->addr) +@@ -106,12 +100,13 @@ static unsigned long find_trampoline_placement(void) + return bios_start - TRAMPOLINE_32BIT_SIZE; + } + +-struct paging_config paging_prepare(void *rmode) ++asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) + { +- struct paging_config paging_config = {}; ++ void (*toggle_la57)(void *cr3); ++ bool l5_required = false; + + /* Initialize boot_params. Required for cmdline_find_option_bool(). */ +- boot_params = rmode; ++ boot_params_ptr = bp; + + /* + * Check if LA57 is desired and supported. +@@ -129,12 +124,22 @@ struct paging_config paging_prepare(void *rmode) + !cmdline_find_option_bool("no5lvl") && + native_cpuid_eax(0) >= 7 && + (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { +- paging_config.l5_required = 1; ++ l5_required = true; ++ ++ /* Initialize variables for 5-level paging */ ++ __pgtable_l5_enabled = 1; ++ pgdir_shift = 48; ++ ptrs_per_p4d = 512; + } + +- paging_config.trampoline_start = find_trampoline_placement(); ++ /* ++ * The trampoline will not be used if the paging mode is already set to ++ * the desired one. ++ */ ++ if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) ++ return; + +- trampoline_32bit = (unsigned long *)paging_config.trampoline_start; ++ trampoline_32bit = (unsigned long *)find_trampoline_placement(); + + /* Preserve trampoline memory */ + memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); +@@ -143,32 +148,32 @@ struct paging_config paging_prepare(void *rmode) + memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); + + /* Copy trampoline code in place */ +- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), ++ toggle_la57 = memcpy(trampoline_32bit + ++ TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + ++ /* ++ * Avoid the need for a stack in the 32-bit trampoline code, by using ++ * LJMP rather than LRET to return back to long mode. LJMP takes an ++ * immediate absolute address, which needs to be adjusted based on the ++ * placement of the trampoline. ++ */ ++ *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) += ++ (unsigned long)toggle_la57; ++ + /* + * The code below prepares page table in trampoline memory. + * + * The new page table will be used by trampoline code for switching + * from 4- to 5-level paging or vice versa. +- * +- * If switching is not required, the page table is unused: trampoline +- * code wouldn't touch CR3. +- */ +- +- /* +- * We are not going to use the page table in trampoline memory if we +- * are already in the desired paging mode. + */ +- if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) +- goto out; + +- if (paging_config.l5_required) { ++ if (l5_required) { + /* + * For 4- to 5-level paging transition, set up current CR3 as + * the first and the only entry in a new top-level page table. + */ +- trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; ++ *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; + } else { + unsigned long src; + +@@ -181,38 +186,17 @@ struct paging_config paging_prepare(void *rmode) + * may be above 4G. + */ + src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; +- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), +- (void *)src, PAGE_SIZE); ++ memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); + } + +-out: +- return paging_config; +-} +- +-void cleanup_trampoline(void *pgtable) +-{ +- void *trampoline_pgtable; +- +- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); ++ toggle_la57(trampoline_32bit); + + /* +- * Move the top level page table out of trampoline memory, +- * if it's there. ++ * Move the top level page table out of trampoline memory. + */ +- if ((void *)__native_read_cr3() == trampoline_pgtable) { +- memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); +- native_write_cr3((unsigned long)pgtable); +- } ++ memcpy(pgtable, trampoline_32bit, PAGE_SIZE); ++ native_write_cr3((unsigned long)pgtable); + + /* Restore trampoline memory */ + memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); +- +- /* Initialize variables for 5-level paging */ +-#ifdef CONFIG_X86_5LEVEL +- if (__read_cr4() & X86_CR4_LA57) { +- __pgtable_l5_enabled = 1; +- pgdir_shift = 48; +- ptrs_per_p4d = 512; +- } +-#endif + } +diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c +index 9c91cc40f4565..d07e665bb265b 100644 +--- a/arch/x86/boot/compressed/sev.c ++++ b/arch/x86/boot/compressed/sev.c +@@ -327,20 +327,25 @@ static void enforce_vmpl0(void) + */ + #define SNP_FEATURES_PRESENT (0) + ++u64 snp_get_unsupported_features(u64 status) ++{ ++ if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) ++ return 0; ++ ++ return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; ++} ++ + void snp_check_features(void) + { + u64 unsupported; + +- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) +- return; +- + /* + * Terminate the boot if hypervisor has enabled any feature lacking + * guest side implementation. Pass on the unsupported features mask through + * EXIT_INFO_2 of the GHCB protocol so that those features can be reported + * as part of the guest boot failure. + */ +- unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; ++ unsupported = snp_get_unsupported_features(sev_status); + if (unsupported) { + if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); +@@ -350,35 +355,22 @@ void snp_check_features(void) + } + } + +-void sev_enable(struct boot_params *bp) ++/* ++ * sev_check_cpu_support - Check for SEV support in the CPU capabilities ++ * ++ * Returns < 0 if SEV is not supported, otherwise the position of the ++ * encryption bit in the page table descriptors. ++ */ ++static int sev_check_cpu_support(void) + { + unsigned int eax, ebx, ecx, edx; +- struct msr m; +- bool snp; +- +- /* +- * bp->cc_blob_address should only be set by boot/compressed kernel. +- * Initialize it to 0 to ensure that uninitialized values from +- * buggy bootloaders aren't propagated. +- */ +- if (bp) +- bp->cc_blob_address = 0; +- +- /* +- * Do an initial SEV capability check before snp_init() which +- * loads the CPUID page and the same checks afterwards are done +- * without the hypervisor and are trustworthy. +- * +- * If the HV fakes SEV support, the guest will crash'n'burn +- * which is good enough. +- */ + + /* Check for the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) +- return; ++ return -ENODEV; + + /* + * Check for the SME/SEV feature: +@@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp) + native_cpuid(&eax, &ebx, &ecx, &edx); + /* Check whether SEV is supported */ + if (!(eax & BIT(1))) ++ return -ENODEV; ++ ++ return ebx & 0x3f; ++} ++ ++void sev_enable(struct boot_params *bp) ++{ ++ struct msr m; ++ int bitpos; ++ bool snp; ++ ++ /* ++ * bp->cc_blob_address should only be set by boot/compressed kernel. ++ * Initialize it to 0 to ensure that uninitialized values from ++ * buggy bootloaders aren't propagated. ++ */ ++ if (bp) ++ bp->cc_blob_address = 0; ++ ++ /* ++ * Do an initial SEV capability check before snp_init() which ++ * loads the CPUID page and the same checks afterwards are done ++ * without the hypervisor and are trustworthy. ++ * ++ * If the HV fakes SEV support, the guest will crash'n'burn ++ * which is good enough. ++ */ ++ ++ if (sev_check_cpu_support() < 0) + return; + + /* +@@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp) + + /* Now repeat the checks with the SNP CPUID table. */ + +- /* Recheck the SME/SEV support leaf */ +- eax = 0x80000000; +- ecx = 0; +- native_cpuid(&eax, &ebx, &ecx, &edx); +- if (eax < 0x8000001f) +- return; +- +- /* +- * Recheck for the SME/SEV feature: +- * CPUID Fn8000_001F[EAX] +- * - Bit 0 - Secure Memory Encryption support +- * - Bit 1 - Secure Encrypted Virtualization support +- * CPUID Fn8000_001F[EBX] +- * - Bits 5:0 - Pagetable bit position used to indicate encryption +- */ +- eax = 0x8000001f; +- ecx = 0; +- native_cpuid(&eax, &ebx, &ecx, &edx); +- /* Check whether SEV is supported */ +- if (!(eax & BIT(1))) { ++ bitpos = sev_check_cpu_support(); ++ if (bitpos < 0) { + if (snp) + error("SEV-SNP support indicated by CC blob, but not CPUID."); + return; +@@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp) + if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); + +- sme_me_mask = BIT_ULL(ebx & 0x3f); ++ sme_me_mask = BIT_ULL(bitpos); ++} ++ ++/* ++ * sev_get_status - Retrieve the SEV status mask ++ * ++ * Returns 0 if the CPU is not SEV capable, otherwise the value of the ++ * AMD64_SEV MSR. ++ */ ++u64 sev_get_status(void) ++{ ++ struct msr m; ++ ++ if (sev_check_cpu_support() < 0) ++ return 0; ++ ++ boot_rdmsr(MSR_AMD64_SEV, &m); ++ return m.q; + } + + /* Search for Confidential Computing blob in the EFI config table. */ +@@ -545,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) + * accessed after switchover. + */ + if (sev_snp_enabled()) { +- unsigned long cc_info_pa = boot_params->cc_blob_address; ++ unsigned long cc_info_pa = boot_params_ptr->cc_blob_address; + struct cc_blob_sev_info *cc_info; + + kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); +diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S +index f912d77701305..d31982509654d 100644 +--- a/arch/x86/boot/header.S ++++ b/arch/x86/boot/header.S +@@ -406,7 +406,7 @@ xloadflags: + # define XLF1 0 + #endif + +-#ifdef CONFIG_EFI_STUB ++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + # ifdef CONFIG_EFI_MIXED + # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) + # else +diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c +index a3725ad46c5a0..bd247692b7017 100644 +--- a/arch/x86/boot/tools/build.c ++++ b/arch/x86/boot/tools/build.c +@@ -290,6 +290,7 @@ static void efi_stub_entry_update(void) + { + unsigned long addr = efi32_stub_entry; + ++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + #ifdef CONFIG_X86_64 + /* Yes, this is really how we defined it :( */ + addr = efi64_stub_entry - 0x200; +@@ -298,6 +299,7 @@ static void efi_stub_entry_update(void) + #ifdef CONFIG_EFI_MIXED + if (efi32_stub_entry != addr) + die("32-bit and 64-bit EFI entry points do not match\n"); ++#endif + #endif + put_unaligned_le32(addr, &buf[0x264]); + } +diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S +index bfb7bcb362bcf..09e99d13fc0b3 100644 +--- a/arch/x86/entry/entry.S ++++ b/arch/x86/entry/entry.S +@@ -6,6 +6,9 @@ + #include <linux/linkage.h> + #include <asm/export.h> + #include <asm/msr-index.h> ++#include <asm/unwind_hints.h> ++#include <asm/segment.h> ++#include <asm/cache.h> + + .pushsection .noinstr.text, "ax" + +@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) + EXPORT_SYMBOL_GPL(entry_ibpb); + + .popsection ++ ++/* ++ * Define the VERW operand that is disguised as entry code so that ++ * it can be referenced with KPTI enabled. This ensure VERW can be ++ * used late in exit-to-user path after page tables are switched. ++ */ ++.pushsection .entry.text, "ax" ++ ++.align L1_CACHE_BYTES, 0xcc ++SYM_CODE_START_NOALIGN(mds_verw_sel) ++ UNWIND_HINT_EMPTY ++ ANNOTATE_NOENDBR ++ .word __KERNEL_DS ++.align L1_CACHE_BYTES, 0xcc ++SYM_CODE_END(mds_verw_sel); ++/* For KVM */ ++EXPORT_SYMBOL_GPL(mds_verw_sel); ++ ++.popsection ++ +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index e309e71560389..ee5def1060c86 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32) + BUG_IF_WRONG_CR3 no_user_check=1 + popfl + popl %eax ++ CLEAR_CPU_BUFFERS + + /* + * Return back to the vDSO, which will pop ecx and edx. +@@ -981,6 +982,7 @@ restore_all_switch_stack: + + /* Restore user state */ + RESTORE_REGS pop=4 # skip orig_eax/error_code ++ CLEAR_CPU_BUFFERS + .Lirq_return: + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization +@@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi) + + /* Not on SYSENTER stack. */ + call exc_nmi ++ CLEAR_CPU_BUFFERS + jmp .Lnmi_return + + .Lnmi_from_sysenter_stack: +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 9953d966d1244..c2383c2880ec6 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -223,6 +223,7 @@ syscall_return_via_sysret: + SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + swapgs ++ CLEAR_CPU_BUFFERS + sysretq + SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR +@@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + /* Restore RDI. */ + popq %rdi + swapgs ++ CLEAR_CPU_BUFFERS + jmp .Lnative_iret + + +@@ -767,6 +769,8 @@ native_irq_return_ldt: + */ + popq %rax /* Restore user RAX */ + ++ CLEAR_CPU_BUFFERS ++ + /* + * RSP now points to an ordinary IRET frame, except that the page + * is read-only and RSP[31:16] are preloaded with the userspace +@@ -1493,6 +1497,12 @@ nmi_restore: + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + ++ /* ++ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like ++ * NMI in kernel after user state is restored. For an unprivileged user ++ * these conditions are hard to meet. ++ */ ++ + /* + * iretq reads the "iret" frame and exits the NMI stack in a + * single instruction. We are returning to kernel mode, so this +@@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret) + UNWIND_HINT_EMPTY + ENDBR + mov $-ENOSYS, %eax ++ CLEAR_CPU_BUFFERS + sysretl + SYM_CODE_END(ignore_sysret) + #endif +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index d6c08d8986b17..4bcd009a232bf 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) + xorl %r9d, %r9d + xorl %r10d, %r10d + swapgs ++ CLEAR_CPU_BUFFERS + sysretl + SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR +diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h +index 215d37f7dde8a..a38cc0afc90a0 100644 +--- a/arch/x86/include/asm/boot.h ++++ b/arch/x86/include/asm/boot.h +@@ -79,4 +79,14 @@ + # define BOOT_STACK_SIZE 0x1000 + #endif + ++#ifndef __ASSEMBLY__ ++extern unsigned int output_len; ++extern const unsigned long kernel_total_size; ++ ++unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, ++ void (*error)(char *x)); ++ ++extern struct boot_params *boot_params_ptr; ++#endif ++ + #endif /* _ASM_X86_BOOT_H */ +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index b122708792c4d..b60f24b30cb90 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -304,7 +304,7 @@ + #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ + #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ + #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ +- ++#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */ + + #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + +diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h +index 233ae6986d6f2..e601264b1a243 100644 +--- a/arch/x86/include/asm/efi.h ++++ b/arch/x86/include/asm/efi.h +@@ -88,6 +88,8 @@ static inline void efi_fpu_end(void) + } + + #ifdef CONFIG_X86_32 ++#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) ++ + #define arch_efi_call_virt_setup() \ + ({ \ + efi_fpu_begin(); \ +@@ -101,8 +103,7 @@ static inline void efi_fpu_end(void) + }) + + #else /* !CONFIG_X86_32 */ +- +-#define EFI_LOADER_SIGNATURE "EL64" ++#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT + + extern asmlinkage u64 __efi_call(void *fp, ...); + +@@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + + #ifdef CONFIG_EFI_MIXED + ++#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) ++ + #define ARCH_HAS_EFISTUB_WRAPPERS + + static inline bool efi_is_64bit(void) +@@ -325,6 +328,13 @@ static inline u32 efi64_convert_status(efi_status_t status) + #define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ + (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + ++/* Memory Attribute Protocol */ ++#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ ++ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) ++ ++#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ ++ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) ++ + /* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro +diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h +index 11203a9fe0a87..ffe72790ceafd 100644 +--- a/arch/x86/include/asm/entry-common.h ++++ b/arch/x86/include/asm/entry-common.h +@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + + static __always_inline void arch_exit_to_user_mode(void) + { +- mds_user_clear_cpu_buffers(); + amd_clear_divider(); + } + #define arch_exit_to_user_mode arch_exit_to_user_mode +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index d3706de91a934..8f6f17a8617b6 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -194,6 +194,19 @@ + #endif + .endm + ++/* ++ * Macro to execute VERW instruction that mitigate transient data sampling ++ * attacks such as MDS. On affected systems a microcode update overloaded VERW ++ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. ++ * ++ * Note: Only the memory operand variant of VERW clears the CPU buffers. ++ */ ++.macro CLEAR_CPU_BUFFERS ++ ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF ++ verw _ASM_RIP(mds_verw_sel) ++.Lskip_verw_\@: ++.endm ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_RETPOLINE_SAFE \ +@@ -368,13 +381,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); + DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); + DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +-DECLARE_STATIC_KEY_FALSE(mds_user_clear); + DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + + DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); + + DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + ++extern u16 mds_verw_sel; ++ + #include <asm/segment.h> + + /** +@@ -400,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void) + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); + } + +-/** +- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability +- * +- * Clear CPU buffers if the corresponding static key is enabled +- */ +-static __always_inline void mds_user_clear_cpu_buffers(void) +-{ +- if (static_branch_likely(&mds_user_clear)) +- mds_clear_cpu_buffers(); +-} +- + /** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * +diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h +index 7ca5c9ec8b52e..cf98fc28601fb 100644 +--- a/arch/x86/include/asm/sev.h ++++ b/arch/x86/include/asm/sev.h +@@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void) + __sev_es_nmi_complete(); + } + extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); ++extern void sev_enable(struct boot_params *bp); + + static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) + { +@@ -202,12 +203,15 @@ void snp_set_wakeup_secondary_cpu(void); + bool snp_init(struct boot_params *bp); + void __init __noreturn snp_abort(void); + int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); ++u64 snp_get_unsupported_features(u64 status); ++u64 sev_get_status(void); + #else + static inline void sev_es_ist_enter(struct pt_regs *regs) { } + static inline void sev_es_ist_exit(void) { } + static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } + static inline void sev_es_nmi_complete(void) { } + static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } ++static inline void sev_enable(struct boot_params *bp) { } + static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } + static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } + static inline void setup_ghcb(void) { } +@@ -225,6 +229,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in + { + return -ENOTTY; + } ++ ++static inline u64 snp_get_unsupported_features(u64 status) { return 0; } ++static inline u64 sev_get_status(void) { return 0; } + #endif + + #endif +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 13dffc43ded02..d1895930e6eb8 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); + /* Control unconditional IBPB in switch_mm() */ + DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +-/* Control MDS CPU buffer clear before returning to user space */ +-DEFINE_STATIC_KEY_FALSE(mds_user_clear); +-EXPORT_SYMBOL_GPL(mds_user_clear); + /* Control MDS CPU buffer clear before idling (halt, mwait) */ + DEFINE_STATIC_KEY_FALSE(mds_idle_clear); + EXPORT_SYMBOL_GPL(mds_idle_clear); +@@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void) + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) +@@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void) + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. + */ +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +@@ -423,7 +420,7 @@ static void __init mmio_select_mitigation(void) + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + static_branch_enable(&mmio_stale_data_clear); + +@@ -483,12 +480,12 @@ static void __init md_clear_update_mitigation(void) + if (cpu_mitigations_off()) + return; + +- if (!static_key_enabled(&mds_user_clear)) ++ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + goto out; + + /* +- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data +- * mitigation, if necessary. ++ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO ++ * Stale Data mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 4278996504833..32bd640170475 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci) + } + EXPORT_SYMBOL_GPL(intel_cpu_collect_info); + ++#define MSR_IA32_TME_ACTIVATE 0x982 ++ ++/* Helpers to access TME_ACTIVATE MSR */ ++#define TME_ACTIVATE_LOCKED(x) (x & 0x1) ++#define TME_ACTIVATE_ENABLED(x) (x & 0x2) ++ ++#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ ++#define TME_ACTIVATE_POLICY_AES_XTS_128 0 ++ ++#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ ++ ++#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ ++#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 ++ ++/* Values for mktme_status (SW only construct) */ ++#define MKTME_ENABLED 0 ++#define MKTME_DISABLED 1 ++#define MKTME_UNINITIALIZED 2 ++static int mktme_status = MKTME_UNINITIALIZED; ++ ++static void detect_tme_early(struct cpuinfo_x86 *c) ++{ ++ u64 tme_activate, tme_policy, tme_crypto_algs; ++ int keyid_bits = 0, nr_keyids = 0; ++ static u64 tme_activate_cpu0 = 0; ++ ++ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); ++ ++ if (mktme_status != MKTME_UNINITIALIZED) { ++ if (tme_activate != tme_activate_cpu0) { ++ /* Broken BIOS? */ ++ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); ++ pr_err_once("x86/tme: MKTME is not usable\n"); ++ mktme_status = MKTME_DISABLED; ++ ++ /* Proceed. We may need to exclude bits from x86_phys_bits. */ ++ } ++ } else { ++ tme_activate_cpu0 = tme_activate; ++ } ++ ++ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { ++ pr_info_once("x86/tme: not enabled by BIOS\n"); ++ mktme_status = MKTME_DISABLED; ++ return; ++ } ++ ++ if (mktme_status != MKTME_UNINITIALIZED) ++ goto detect_keyid_bits; ++ ++ pr_info("x86/tme: enabled by BIOS\n"); ++ ++ tme_policy = TME_ACTIVATE_POLICY(tme_activate); ++ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) ++ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); ++ ++ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); ++ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { ++ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", ++ tme_crypto_algs); ++ mktme_status = MKTME_DISABLED; ++ } ++detect_keyid_bits: ++ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); ++ nr_keyids = (1UL << keyid_bits) - 1; ++ if (nr_keyids) { ++ pr_info_once("x86/mktme: enabled by BIOS\n"); ++ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); ++ } else { ++ pr_info_once("x86/mktme: disabled by BIOS\n"); ++ } ++ ++ if (mktme_status == MKTME_UNINITIALIZED) { ++ /* MKTME is usable */ ++ mktme_status = MKTME_ENABLED; ++ } ++ ++ /* ++ * KeyID bits effectively lower the number of physical address ++ * bits. Update cpuinfo_x86::x86_phys_bits accordingly. ++ */ ++ c->x86_phys_bits -= keyid_bits; ++} ++ + static void early_init_intel(struct cpuinfo_x86 *c) + { + u64 misc_enable; +@@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) + */ + if (detect_extended_topology_early(c) < 0) + detect_ht_early(c); ++ ++ /* ++ * Adjust the number of physical bits early because it affects the ++ * valid bits of the MTRR mask registers. ++ */ ++ if (cpu_has(c, X86_FEATURE_TME)) ++ detect_tme_early(c); + } + + static void bsp_init_intel(struct cpuinfo_x86 *c) +@@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) + #endif + } + +-#define MSR_IA32_TME_ACTIVATE 0x982 +- +-/* Helpers to access TME_ACTIVATE MSR */ +-#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +-#define TME_ACTIVATE_ENABLED(x) (x & 0x2) +- +-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +-#define TME_ACTIVATE_POLICY_AES_XTS_128 0 +- +-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ +- +-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 +- +-/* Values for mktme_status (SW only construct) */ +-#define MKTME_ENABLED 0 +-#define MKTME_DISABLED 1 +-#define MKTME_UNINITIALIZED 2 +-static int mktme_status = MKTME_UNINITIALIZED; +- +-static void detect_tme(struct cpuinfo_x86 *c) +-{ +- u64 tme_activate, tme_policy, tme_crypto_algs; +- int keyid_bits = 0, nr_keyids = 0; +- static u64 tme_activate_cpu0 = 0; +- +- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); +- +- if (mktme_status != MKTME_UNINITIALIZED) { +- if (tme_activate != tme_activate_cpu0) { +- /* Broken BIOS? */ +- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); +- pr_err_once("x86/tme: MKTME is not usable\n"); +- mktme_status = MKTME_DISABLED; +- +- /* Proceed. We may need to exclude bits from x86_phys_bits. */ +- } +- } else { +- tme_activate_cpu0 = tme_activate; +- } +- +- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { +- pr_info_once("x86/tme: not enabled by BIOS\n"); +- mktme_status = MKTME_DISABLED; +- return; +- } +- +- if (mktme_status != MKTME_UNINITIALIZED) +- goto detect_keyid_bits; +- +- pr_info("x86/tme: enabled by BIOS\n"); +- +- tme_policy = TME_ACTIVATE_POLICY(tme_activate); +- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) +- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); +- +- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); +- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { +- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", +- tme_crypto_algs); +- mktme_status = MKTME_DISABLED; +- } +-detect_keyid_bits: +- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); +- nr_keyids = (1UL << keyid_bits) - 1; +- if (nr_keyids) { +- pr_info_once("x86/mktme: enabled by BIOS\n"); +- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); +- } else { +- pr_info_once("x86/mktme: disabled by BIOS\n"); +- } +- +- if (mktme_status == MKTME_UNINITIALIZED) { +- /* MKTME is usable */ +- mktme_status = MKTME_ENABLED; +- } +- +- /* +- * KeyID bits effectively lower the number of physical address +- * bits. Update cpuinfo_x86::x86_phys_bits accordingly. +- */ +- c->x86_phys_bits -= keyid_bits; +-} +- + static void init_cpuid_fault(struct cpuinfo_x86 *c) + { + u64 msr; +@@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c) + + init_ia32_feat_ctl(c); + +- if (cpu_has(c, X86_FEATURE_TME)) +- detect_tme(c); +- + init_intel_misc_features(c); + + split_lock_init(); +diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c +index 9dac24680ff8e..993734e96615a 100644 +--- a/arch/x86/kernel/e820.c ++++ b/arch/x86/kernel/e820.c +@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) + e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + + /* +- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need +- * to be reserved. ++ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by ++ * kexec and do not need to be reserved. + */ +- if (data->type != SETUP_EFI && data->type != SETUP_IMA) ++ if (data->type != SETUP_EFI && ++ data->type != SETUP_IMA && ++ data->type != SETUP_RNG_SEED) + e820__range_update_kexec(pa_data, + sizeof(*data) + data->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); +diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c +index cec0bfa3bc04f..ed6cce6c39504 100644 +--- a/arch/x86/kernel/nmi.c ++++ b/arch/x86/kernel/nmi.c +@@ -522,9 +522,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; +- +- if (user_mode(regs)) +- mds_user_clear_cpu_buffers(); + } + + #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) +diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h +index edc3f16cc1896..6a9bfdfbb6e59 100644 +--- a/arch/x86/kvm/vmx/run_flags.h ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -2,7 +2,10 @@ + #ifndef __KVM_X86_VMX_RUN_FLAGS_H + #define __KVM_X86_VMX_RUN_FLAGS_H + +-#define VMX_RUN_VMRESUME (1 << 0) +-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) ++#define VMX_RUN_VMRESUME_SHIFT 0 ++#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 ++ ++#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) ++#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) + + #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 0b5db4de4d09e..0b2cad66dee12 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- testb $VMX_RUN_VMRESUME, %bl ++ bt $VMX_RUN_VMRESUME_SHIFT, %bx + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX +@@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + +- /* Check EFLAGS.ZF from 'testb' above */ +- jz .Lvmlaunch ++ /* Clobbers EFLAGS.ZF */ ++ CLEAR_CPU_BUFFERS ++ ++ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ ++ jnc .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 57c1374fdfd49..5c1590855ffcd 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) + { +- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; ++ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && ++ vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS +@@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, + { + guest_state_enter_irqoff(); + +- /* L1D Flush includes CPU buffer clear to mitigate MDS */ ++ /* ++ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW ++ * mitigation for MDS is done late in VMentry and is still ++ * executed in spite of L1D Flush. This is because an extra VERW ++ * should not matter much after the big hammer L1D Flush. ++ */ + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); +- else if (static_branch_unlikely(&mds_user_clear)) +- mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); +diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c +index c9064d34d8308..0211f704a358b 100644 +--- a/drivers/bluetooth/btqca.c ++++ b/drivers/bluetooth/btqca.c +@@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) + bt_dev_dbg(hdev, "QCA Patch config"); + + skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), +- cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); ++ cmd, 0, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); +@@ -594,27 +594,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + /* Firmware files to download are based on ROM version. + * ROM version is derived from last two bytes of soc_ver. + */ +- rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); ++ if (soc_type == QCA_WCN3988) ++ rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); ++ else ++ rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); + + if (soc_type == QCA_WCN6750) + qca_send_patch_config_cmd(hdev); + + /* Download rampatch file */ + config.type = TLV_TYPE_PATCH; +- if (qca_is_wcn399x(soc_type)) { ++ switch (soc_type) { ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: + snprintf(config.fwname, sizeof(config.fwname), + "qca/crbtfw%02x.tlv", rom_ver); +- } else if (soc_type == QCA_QCA6390) { ++ break; ++ case QCA_WCN3988: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/apbtfw%02x.tlv", rom_ver); ++ break; ++ case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), + "qca/htbtfw%02x.tlv", rom_ver); +- } else if (soc_type == QCA_WCN6750) { ++ break; ++ case QCA_WCN6750: + /* Choose mbn file by default.If mbn file is not found + * then choose tlv file + */ + config.type = ELF_TYPE_PATCH; + snprintf(config.fwname, sizeof(config.fwname), + "qca/msbtfw%02x.mbn", rom_ver); +- } else { ++ break; ++ case QCA_WCN6855: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/hpbtfw%02x.tlv", rom_ver); ++ break; ++ case QCA_WCN7850: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/hmtbtfw%02x.tlv", rom_ver); ++ break; ++ default: + snprintf(config.fwname, sizeof(config.fwname), + "qca/rampatch_%08x.bin", soc_ver); + } +@@ -630,27 +651,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + + /* Download NVM configuration */ + config.type = TLV_TYPE_NVM; +- if (firmware_name) ++ if (firmware_name) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/%s", firmware_name); +- else if (qca_is_wcn399x(soc_type)) { +- if (ver.soc_id == QCA_WCN3991_SOC_ID) { ++ } else { ++ switch (soc_type) { ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/crnv%02xu.bin", rom_ver); ++ } else { ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/crnv%02x.bin", rom_ver); ++ } ++ break; ++ case QCA_WCN3988: + snprintf(config.fwname, sizeof(config.fwname), +- "qca/crnv%02xu.bin", rom_ver); +- } else { ++ "qca/apnv%02x.bin", rom_ver); ++ break; ++ case QCA_QCA6390: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/htnv%02x.bin", rom_ver); ++ break; ++ case QCA_WCN6750: + snprintf(config.fwname, sizeof(config.fwname), +- "qca/crnv%02x.bin", rom_ver); ++ "qca/msnv%02x.bin", rom_ver); ++ break; ++ case QCA_WCN6855: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/hpnv%02x.bin", rom_ver); ++ break; ++ case QCA_WCN7850: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/hmtnv%02x.bin", rom_ver); ++ break; ++ ++ default: ++ snprintf(config.fwname, sizeof(config.fwname), ++ "qca/nvm_%08x.bin", soc_ver); + } + } +- else if (soc_type == QCA_QCA6390) +- snprintf(config.fwname, sizeof(config.fwname), +- "qca/htnv%02x.bin", rom_ver); +- else if (soc_type == QCA_WCN6750) +- snprintf(config.fwname, sizeof(config.fwname), +- "qca/msnv%02x.bin", rom_ver); +- else +- snprintf(config.fwname, sizeof(config.fwname), +- "qca/nvm_%08x.bin", soc_ver); + + err = qca_download_firmware(hdev, &config, soc_type, rom_ver); + if (err < 0) { +@@ -658,16 +700,25 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + return err; + } + +- if (soc_type >= QCA_WCN3991) { ++ switch (soc_type) { ++ case QCA_WCN3991: ++ case QCA_QCA6390: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + err = qca_disable_soc_logging(hdev); + if (err < 0) + return err; ++ break; ++ default: ++ break; + } + + /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the + * VsMsftOpCode. + */ + switch (soc_type) { ++ case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: +@@ -685,11 +736,18 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + return err; + } + +- if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) { ++ switch (soc_type) { ++ case QCA_WCN3991: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + /* get fw build info */ + err = qca_read_fw_build_info(hdev); + if (err < 0) + return err; ++ break; ++ default: ++ break; + } + + bt_dev_info(hdev, "QCA setup on UART is completed"); +diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h +index 61e9a50e66ae1..03bff5c0059de 100644 +--- a/drivers/bluetooth/btqca.h ++++ b/drivers/bluetooth/btqca.h +@@ -142,11 +142,14 @@ enum qca_btsoc_type { + QCA_INVALID = -1, + QCA_AR3002, + QCA_ROME, ++ QCA_WCN3988, + QCA_WCN3990, + QCA_WCN3998, + QCA_WCN3991, + QCA_QCA6390, + QCA_WCN6750, ++ QCA_WCN6855, ++ QCA_WCN7850, + }; + + #if IS_ENABLED(CONFIG_BT_QCA) +@@ -159,16 +162,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, + enum qca_btsoc_type); + int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); + int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); +-static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) +-{ +- return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 || +- soc_type == QCA_WCN3998; +-} +-static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) +-{ +- return soc_type == QCA_WCN6750; +-} +- + #else + + static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) +@@ -196,16 +189,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) + return -EOPNOTSUPP; + } + +-static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) +-{ +- return false; +-} +- +-static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) +-{ +- return false; +-} +- + static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) + { + return -EOPNOTSUPP; +diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c +index 76ceb8a0183d1..8bfef7f81b417 100644 +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -7,6 +7,7 @@ + * + * Copyright (C) 2007 Texas Instruments, Inc. + * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. ++ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * + * Acknowledgements: + * This file is based on hci_ll.c, which was... +@@ -606,9 +607,18 @@ static int qca_open(struct hci_uart *hu) + if (hu->serdev) { + qcadev = serdev_device_get_drvdata(hu->serdev); + +- if (qca_is_wcn399x(qcadev->btsoc_type) || +- qca_is_wcn6750(qcadev->btsoc_type)) ++ switch (qcadev->btsoc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: + hu->init_speed = qcadev->init_speed; ++ break; ++ ++ default: ++ break; ++ } + + if (qcadev->oper_speed) + hu->oper_speed = qcadev->oper_speed; +@@ -1314,11 +1324,20 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) + msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); + + /* Give the controller time to process the request */ +- if (qca_is_wcn399x(qca_soc_type(hu)) || +- qca_is_wcn6750(qca_soc_type(hu))) ++ switch (qca_soc_type(hu)) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + usleep_range(1000, 10000); +- else ++ break; ++ ++ default: + msleep(300); ++ } + + return 0; + } +@@ -1391,12 +1410,20 @@ static unsigned int qca_get_speed(struct hci_uart *hu, + + static int qca_check_speeds(struct hci_uart *hu) + { +- if (qca_is_wcn399x(qca_soc_type(hu)) || +- qca_is_wcn6750(qca_soc_type(hu))) { ++ switch (qca_soc_type(hu)) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + if (!qca_get_speed(hu, QCA_INIT_SPEED) && + !qca_get_speed(hu, QCA_OPER_SPEED)) + return -EINVAL; +- } else { ++ break; ++ ++ default: + if (!qca_get_speed(hu, QCA_INIT_SPEED) || + !qca_get_speed(hu, QCA_OPER_SPEED)) + return -EINVAL; +@@ -1425,13 +1452,29 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) + /* Disable flow control for wcn3990 to deassert RTS while + * changing the baudrate of chip and host. + */ +- if (qca_is_wcn399x(soc_type) || +- qca_is_wcn6750(soc_type)) ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + hci_uart_set_flow_control(hu, true); ++ break; + +- if (soc_type == QCA_WCN3990) { ++ default: ++ break; ++ } ++ ++ switch (soc_type) { ++ case QCA_WCN3990: + reinit_completion(&qca->drop_ev_comp); + set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); ++ break; ++ ++ default: ++ break; + } + + qca_baudrate = qca_get_baudrate_value(speed); +@@ -1443,11 +1486,23 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) + host_set_baudrate(hu, speed); + + error: +- if (qca_is_wcn399x(soc_type) || +- qca_is_wcn6750(soc_type)) ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + hci_uart_set_flow_control(hu, false); ++ break; + +- if (soc_type == QCA_WCN3990) { ++ default: ++ break; ++ } ++ ++ switch (soc_type) { ++ case QCA_WCN3990: + /* Wait for the controller to send the vendor event + * for the baudrate change command. + */ +@@ -1459,6 +1514,10 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) + } + + clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); ++ break; ++ ++ default: ++ break; + } + } + +@@ -1620,12 +1679,20 @@ static int qca_regulator_init(struct hci_uart *hu) + } + } + +- if (qca_is_wcn399x(soc_type)) { ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: + /* Forcefully enable wcn399x to enter in to boot mode. */ + host_set_baudrate(hu, 2400); + ret = qca_send_power_pulse(hu, false); + if (ret) + return ret; ++ break; ++ ++ default: ++ break; + } + + /* For wcn6750 need to enable gpio bt_en */ +@@ -1642,10 +1709,18 @@ static int qca_regulator_init(struct hci_uart *hu) + + qca_set_speed(hu, QCA_INIT_SPEED); + +- if (qca_is_wcn399x(soc_type)) { ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: + ret = qca_send_power_pulse(hu, true); + if (ret) + return ret; ++ break; ++ ++ default: ++ break; + } + + /* Now the device is in ready state to communicate with host. +@@ -1679,10 +1754,18 @@ static int qca_power_on(struct hci_dev *hdev) + if (!hu->serdev) + return 0; + +- if (qca_is_wcn399x(soc_type) || +- qca_is_wcn6750(soc_type)) { ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + ret = qca_regulator_init(hu); +- } else { ++ break; ++ ++ default: + qcadev = serdev_device_get_drvdata(hu->serdev); + if (qcadev->bt_en) { + gpiod_set_value_cansleep(qcadev->bt_en, 1); +@@ -1705,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu) + const char *firmware_name = qca_get_firmware_name(hu); + int ret; + struct qca_btsoc_version ver; ++ const char *soc_name; + + ret = qca_check_speeds(hu); + if (ret) +@@ -1719,9 +1803,30 @@ static int qca_setup(struct hci_uart *hu) + */ + set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + +- bt_dev_info(hdev, "setting up %s", +- qca_is_wcn399x(soc_type) ? "wcn399x" : +- (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390"); ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ soc_name = "wcn399x"; ++ break; ++ ++ case QCA_WCN6750: ++ soc_name = "wcn6750"; ++ break; ++ ++ case QCA_WCN6855: ++ soc_name = "wcn6855"; ++ break; ++ ++ case QCA_WCN7850: ++ soc_name = "wcn7850"; ++ break; ++ ++ default: ++ soc_name = "ROME/QCA6390"; ++ } ++ bt_dev_info(hdev, "setting up %s", soc_name); + + qca->memdump_state = QCA_MEMDUMP_IDLE; + +@@ -1732,15 +1837,33 @@ static int qca_setup(struct hci_uart *hu) + + clear_bit(QCA_SSR_TRIGGERED, &qca->flags); + +- if (qca_is_wcn399x(soc_type) || +- qca_is_wcn6750(soc_type)) { +- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: ++ ++ /* Set BDA quirk bit for reading BDA value from fwnode property ++ * only if that property exist in DT. ++ */ ++ if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { ++ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); ++ bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); ++ } else { ++ bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); ++ } ++ + hci_set_aosp_capable(hdev); + + ret = qca_read_soc_version(hdev, &ver, soc_type); + if (ret) + goto out; +- } else { ++ break; ++ ++ default: + qca_set_speed(hu, QCA_INIT_SPEED); + } + +@@ -1754,8 +1877,17 @@ static int qca_setup(struct hci_uart *hu) + qca_baudrate = qca_get_baudrate_value(speed); + } + +- if (!(qca_is_wcn399x(soc_type) || +- qca_is_wcn6750(soc_type))) { ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: ++ break; ++ ++ default: + /* Get QCA version information */ + ret = qca_read_soc_version(hdev, &ver, soc_type); + if (ret) +@@ -1824,7 +1956,18 @@ static const struct hci_uart_proto qca_proto = { + .dequeue = qca_dequeue, + }; + +-static const struct qca_device_data qca_soc_data_wcn3990 = { ++static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { ++ .soc_type = QCA_WCN3988, ++ .vregs = (struct qca_vreg []) { ++ { "vddio", 15000 }, ++ { "vddxo", 80000 }, ++ { "vddrf", 300000 }, ++ { "vddch0", 450000 }, ++ }, ++ .num_vregs = 4, ++}; ++ ++static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { + .soc_type = QCA_WCN3990, + .vregs = (struct qca_vreg []) { + { "vddio", 15000 }, +@@ -1835,7 +1978,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = { + .num_vregs = 4, + }; + +-static const struct qca_device_data qca_soc_data_wcn3991 = { ++static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { + .soc_type = QCA_WCN3991, + .vregs = (struct qca_vreg []) { + { "vddio", 15000 }, +@@ -1847,7 +1990,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = { + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, + }; + +-static const struct qca_device_data qca_soc_data_wcn3998 = { ++static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { + .soc_type = QCA_WCN3998, + .vregs = (struct qca_vreg []) { + { "vddio", 10000 }, +@@ -1858,13 +2001,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = { + .num_vregs = 4, + }; + +-static const struct qca_device_data qca_soc_data_qca6390 = { ++static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { + .soc_type = QCA_QCA6390, + .num_vregs = 0, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, + }; + +-static const struct qca_device_data qca_soc_data_wcn6750 = { ++static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { + .soc_type = QCA_WCN6750, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, +@@ -1881,6 +2024,34 @@ static const struct qca_device_data qca_soc_data_wcn6750 = { + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, + }; + ++static const struct qca_device_data qca_soc_data_wcn6855 = { ++ .soc_type = QCA_WCN6855, ++ .vregs = (struct qca_vreg []) { ++ { "vddio", 5000 }, ++ { "vddbtcxmx", 126000 }, ++ { "vddrfacmn", 12500 }, ++ { "vddrfa0p8", 102000 }, ++ { "vddrfa1p7", 302000 }, ++ { "vddrfa1p2", 257000 }, ++ }, ++ .num_vregs = 6, ++ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, ++}; ++ ++static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { ++ .soc_type = QCA_WCN7850, ++ .vregs = (struct qca_vreg []) { ++ { "vddio", 5000 }, ++ { "vddaon", 26000 }, ++ { "vdddig", 126000 }, ++ { "vddrfa0p8", 102000 }, ++ { "vddrfa1p2", 257000 }, ++ { "vddrfa1p9", 302000 }, ++ }, ++ .num_vregs = 6, ++ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, ++}; ++ + static void qca_power_shutdown(struct hci_uart *hu) + { + struct qca_serdev *qcadev; +@@ -1906,11 +2077,18 @@ static void qca_power_shutdown(struct hci_uart *hu) + + qcadev = serdev_device_get_drvdata(hu->serdev); + +- if (qca_is_wcn399x(soc_type)) { ++ switch (soc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: + host_set_baudrate(hu, 2400); + qca_send_power_pulse(hu, false); + qca_regulator_disable(qcadev); +- } else if (soc_type == QCA_WCN6750) { ++ break; ++ ++ case QCA_WCN6750: ++ case QCA_WCN6855: + gpiod_set_value_cansleep(qcadev->bt_en, 0); + msleep(100); + qca_regulator_disable(qcadev); +@@ -1918,7 +2096,9 @@ static void qca_power_shutdown(struct hci_uart *hu) + sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); + bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); + } +- } else if (qcadev->bt_en) { ++ break; ++ ++ default: + gpiod_set_value_cansleep(qcadev->bt_en, 0); + } + +@@ -2043,10 +2223,19 @@ static int qca_serdev_probe(struct serdev_device *serdev) + if (!qcadev->oper_speed) + BT_DBG("UART will pick default operating speed"); + +- if (data && +- (qca_is_wcn399x(data->soc_type) || +- qca_is_wcn6750(data->soc_type))) { ++ if (data) + qcadev->btsoc_type = data->soc_type; ++ else ++ qcadev->btsoc_type = QCA_ROME; ++ ++ switch (qcadev->btsoc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: + qcadev->bt_power = devm_kzalloc(&serdev->dev, + sizeof(struct qca_power), + GFP_KERNEL); +@@ -2065,14 +2254,19 @@ static int qca_serdev_probe(struct serdev_device *serdev) + + qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", + GPIOD_OUT_LOW); +- if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) { ++ if (IS_ERR_OR_NULL(qcadev->bt_en) && ++ (data->soc_type == QCA_WCN6750 || ++ data->soc_type == QCA_WCN6855)) { + dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); + power_ctrl_enabled = false; + } + + qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", + GPIOD_IN); +- if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750) ++ if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && ++ (data->soc_type == QCA_WCN6750 || ++ data->soc_type == QCA_WCN6855 || ++ data->soc_type == QCA_WCN7850)) + dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); +@@ -2086,12 +2280,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) + BT_ERR("wcn3990 serdev registration failed"); + return err; + } +- } else { +- if (data) +- qcadev->btsoc_type = data->soc_type; +- else +- qcadev->btsoc_type = QCA_ROME; ++ break; + ++ default: + qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", + GPIOD_OUT_LOW); + if (IS_ERR_OR_NULL(qcadev->bt_en)) { +@@ -2147,12 +2338,24 @@ static void qca_serdev_remove(struct serdev_device *serdev) + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); + struct qca_power *power = qcadev->bt_power; + +- if ((qca_is_wcn399x(qcadev->btsoc_type) || +- qca_is_wcn6750(qcadev->btsoc_type)) && +- power->vregs_on) +- qca_power_shutdown(&qcadev->serdev_hu); +- else if (qcadev->susclk) +- clk_disable_unprepare(qcadev->susclk); ++ switch (qcadev->btsoc_type) { ++ case QCA_WCN3988: ++ case QCA_WCN3990: ++ case QCA_WCN3991: ++ case QCA_WCN3998: ++ case QCA_WCN6750: ++ case QCA_WCN6855: ++ case QCA_WCN7850: ++ if (power->vregs_on) { ++ qca_power_shutdown(&qcadev->serdev_hu); ++ break; ++ } ++ fallthrough; ++ ++ default: ++ if (qcadev->susclk) ++ clk_disable_unprepare(qcadev->susclk); ++ } + + hci_uart_unregister_device(&qcadev->serdev_hu); + } +@@ -2329,10 +2532,13 @@ static const struct of_device_id qca_bluetooth_of_match[] = { + { .compatible = "qcom,qca6174-bt" }, + { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, + { .compatible = "qcom,qca9377-bt" }, ++ { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988}, + { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990}, + { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, + { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, + { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, ++ { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, ++ { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, + { /* sentinel */ } + }; + MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); +diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c +index 422d782475532..dcacc5064d339 100644 +--- a/drivers/clk/tegra/clk-tegra20.c ++++ b/drivers/clk/tegra/clk-tegra20.c +@@ -21,24 +21,24 @@ + #define MISC_CLK_ENB 0x48 + + #define OSC_CTRL 0x50 +-#define OSC_CTRL_OSC_FREQ_MASK (3<<30) +-#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30) +-#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30) +-#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30) +-#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30) +-#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK) +- +-#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28) +-#define OSC_CTRL_PLL_REF_DIV_1 (0<<28) +-#define OSC_CTRL_PLL_REF_DIV_2 (1<<28) +-#define OSC_CTRL_PLL_REF_DIV_4 (2<<28) ++#define OSC_CTRL_OSC_FREQ_MASK (3u<<30) ++#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30) ++#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30) ++#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30) ++#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30) ++#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK) ++ ++#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28) ++#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28) ++#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28) ++#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28) + + #define OSC_FREQ_DET 0x58 +-#define OSC_FREQ_DET_TRIG (1<<31) ++#define OSC_FREQ_DET_TRIG (1u<<31) + + #define OSC_FREQ_DET_STATUS 0x5c +-#define OSC_FREQ_DET_BUSY (1<<31) +-#define OSC_FREQ_DET_CNT_MASK 0xFFFF ++#define OSC_FREQ_DET_BUSYu (1<<31) ++#define OSC_FREQ_DET_CNT_MASK 0xFFFFu + + #define TEGRA20_CLK_PERIPH_BANKS 3 + +diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c +index abdd26f7d04c9..5771f3fc6115d 100644 +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -2952,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, + if (min_pstate < cpu->min_perf_ratio) + min_pstate = cpu->min_perf_ratio; + ++ if (min_pstate > cpu->max_perf_ratio) ++ min_pstate = cpu->max_perf_ratio; ++ + max_pstate = min(cap_pstate, cpu->max_perf_ratio); + if (max_pstate < min_pstate) + max_pstate = min_pstate; +diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c +index f383f219ed008..7082a5a6814a4 100644 +--- a/drivers/dma/fsl-qdma.c ++++ b/drivers/dma/fsl-qdma.c +@@ -109,6 +109,7 @@ + #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 + #define FSL_QDMA_CMD_DSEN_OFFSET 19 + #define FSL_QDMA_CMD_LWC_OFFSET 16 ++#define FSL_QDMA_CMD_PF BIT(17) + + /* Field definition for Descriptor status */ + #define QDMA_CCDF_STATUS_RTE BIT(5) +@@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, + qdma_csgf_set_f(csgf_dest, len); + /* Descriptor Buffer */ + cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << +- FSL_QDMA_CMD_RWTTYPE_OFFSET); ++ FSL_QDMA_CMD_RWTTYPE_OFFSET) | ++ FSL_QDMA_CMD_PF; + sdf->data = QDMA_SDDF_CMD(cmd); + + cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << +@@ -1201,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) + if (!fsl_qdma->queue) + return -ENOMEM; + +- ret = fsl_qdma_irq_init(pdev, fsl_qdma); +- if (ret) +- return ret; +- + fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); + if (fsl_qdma->irq_base < 0) + return fsl_qdma->irq_base; +@@ -1243,16 +1241,19 @@ static int fsl_qdma_probe(struct platform_device *pdev) + + platform_set_drvdata(pdev, fsl_qdma); + +- ret = dma_async_device_register(&fsl_qdma->dma_dev); ++ ret = fsl_qdma_reg_init(fsl_qdma); + if (ret) { +- dev_err(&pdev->dev, +- "Can't register NXP Layerscape qDMA engine.\n"); ++ dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + return ret; + } + +- ret = fsl_qdma_reg_init(fsl_qdma); ++ ret = fsl_qdma_irq_init(pdev, fsl_qdma); ++ if (ret) ++ return ret; ++ ++ ret = dma_async_device_register(&fsl_qdma->dma_dev); + if (ret) { +- dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); ++ dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); + return ret; + } + +diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c +index 1aa65e5de0f3a..f792407348077 100644 +--- a/drivers/dma/ptdma/ptdma-dmaengine.c ++++ b/drivers/dma/ptdma/ptdma-dmaengine.c +@@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) + chan->vc.desc_free = pt_do_cleanup; + vchan_init(&chan->vc, dma_dev); + +- dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); +- + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_reg; +diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c +index 3e8d4b51a8140..97bafb5f70389 100644 +--- a/drivers/firmware/efi/capsule-loader.c ++++ b/drivers/firmware/efi/capsule-loader.c +@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) + return -ENOMEM; + } + +- cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); ++ cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); +diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c +index b7c0e8cc0764f..9077353d1c98d 100644 +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -185,8 +185,27 @@ static const struct attribute_group efi_subsys_attr_group = { + static struct efivars generic_efivars; + static struct efivar_operations generic_ops; + ++static bool generic_ops_supported(void) ++{ ++ unsigned long name_size; ++ efi_status_t status; ++ efi_char16_t name; ++ efi_guid_t guid; ++ ++ name_size = sizeof(name); ++ ++ status = efi.get_next_variable(&name_size, &name, &guid); ++ if (status == EFI_UNSUPPORTED) ++ return false; ++ ++ return true; ++} ++ + static int generic_ops_register(void) + { ++ if (!generic_ops_supported()) ++ return 0; ++ + generic_ops.get_variable = efi.get_variable; + generic_ops.get_next_variable = efi.get_next_variable; + generic_ops.query_variable_store = efi_query_variable_store; +@@ -200,6 +219,9 @@ static int generic_ops_register(void) + + static void generic_ops_unregister(void) + { ++ if (!generic_ops.get_variable) ++ return; ++ + efivars_unregister(&generic_efivars); + } + +diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile +index b6e1dcb98a64c..473ef18421db0 100644 +--- a/drivers/firmware/efi/libstub/Makefile ++++ b/drivers/firmware/efi/libstub/Makefile +@@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o + lib-$(CONFIG_ARM) += arm32-stub.o + lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o + lib-$(CONFIG_X86) += x86-stub.o ++lib-$(CONFIG_X86_64) += x86-5lvl.o + lib-$(CONFIG_RISCV) += riscv-stub.o + lib-$(CONFIG_LOONGARCH) += loongarch-stub.o + +diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c +index 1de9878ddd3a2..6b83c492c3b82 100644 +--- a/drivers/firmware/efi/libstub/alignedmem.c ++++ b/drivers/firmware/efi/libstub/alignedmem.c +@@ -22,12 +22,15 @@ + * Return: status code + */ + efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, +- unsigned long max, unsigned long align) ++ unsigned long max, unsigned long align, ++ int memory_type) + { + efi_physical_addr_t alloc_addr; + efi_status_t status; + int slack; + ++ max = min(max, EFI_ALLOC_LIMIT); ++ + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; + +@@ -36,7 +39,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, + slack = align / EFI_PAGE_SIZE - 1; + + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, +- EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack, ++ memory_type, size / EFI_PAGE_SIZE + slack, + &alloc_addr); + if (status != EFI_SUCCESS) + return status; +diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c +index e2f90566b291a..16f15e36f9a7d 100644 +--- a/drivers/firmware/efi/libstub/arm64-stub.c ++++ b/drivers/firmware/efi/libstub/arm64-stub.c +@@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, + * locate the kernel at a randomized offset in physical memory. + */ + status = efi_random_alloc(*reserve_size, min_kimg_align, +- reserve_addr, phys_seed); ++ reserve_addr, phys_seed, ++ EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + efi_warn("efi_random_alloc() failed: 0x%lx\n", status); + } else { +@@ -190,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, + if (status != EFI_SUCCESS) { + if (!check_image_region((u64)_text, kernel_memsize)) { + efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); +- } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { ++ } else if (IS_ALIGNED((u64)_text, min_kimg_align) && ++ (u64)_end < EFI_ALLOC_LIMIT) { + /* + * Just execute from wherever we were loaded by the +- * UEFI PE/COFF loader if the alignment is suitable. ++ * UEFI PE/COFF loader if the placement is suitable. + */ + *image_addr = (u64)_text; + *reserve_size = 0; +@@ -201,7 +203,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, + } + + status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, +- ULONG_MAX, min_kimg_align); ++ ULONG_MAX, min_kimg_align, ++ EFI_LOADER_CODE); + + if (status != EFI_SUCCESS) { + efi_err("Failed to relocate kernel\n"); +diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c +index 3d9b2469a0dfd..97744822dd951 100644 +--- a/drivers/firmware/efi/libstub/efi-stub-helper.c ++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c +@@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline) + efi_loglevel = CONSOLE_LOGLEVEL_QUIET; + } else if (!strcmp(param, "noinitrd")) { + efi_noinitrd = true; ++ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { ++ efi_no5lvl = true; + } else if (!strcmp(param, "efi") && val) { + efi_nochunk = parse_option_str(val, "nochunk"); + efi_novamap |= parse_option_str(val, "novamap"); +diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h +index 970e86e3aab05..6741f3d900c5a 100644 +--- a/drivers/firmware/efi/libstub/efistub.h ++++ b/drivers/firmware/efi/libstub/efistub.h +@@ -29,6 +29,11 @@ + #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE + #endif + ++#ifndef EFI_ALLOC_LIMIT ++#define EFI_ALLOC_LIMIT ULONG_MAX ++#endif ++ ++extern bool efi_no5lvl; + extern bool efi_nochunk; + extern bool efi_nokaslr; + extern int efi_loglevel; +@@ -415,6 +420,26 @@ union efi_dxe_services_table { + } mixed_mode; + }; + ++typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t; ++ ++union efi_memory_attribute_protocol { ++ struct { ++ efi_status_t (__efiapi *get_memory_attributes)( ++ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *); ++ ++ efi_status_t (__efiapi *set_memory_attributes)( ++ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); ++ ++ efi_status_t (__efiapi *clear_memory_attributes)( ++ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); ++ }; ++ struct { ++ u32 get_memory_attributes; ++ u32 set_memory_attributes; ++ u32 clear_memory_attributes; ++ } mixed_mode; ++}; ++ + typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; + + union efi_uga_draw_protocol { +@@ -880,7 +905,9 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, + efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); + + efi_status_t efi_random_alloc(unsigned long size, unsigned long align, +- unsigned long *addr, unsigned long random_seed); ++ unsigned long *addr, unsigned long random_seed, ++ int memory_type, unsigned long alloc_min, ++ unsigned long alloc_max); + + efi_status_t efi_random_get_seed(void); + +@@ -907,7 +934,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, + unsigned long max); + + efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, +- unsigned long max, unsigned long align); ++ unsigned long max, unsigned long align, ++ int memory_type); + + efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long min); +diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c +index 45841ef55a9f6..4f1fa302234d8 100644 +--- a/drivers/firmware/efi/libstub/mem.c ++++ b/drivers/firmware/efi/libstub/mem.c +@@ -89,9 +89,12 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, + efi_physical_addr_t alloc_addr; + efi_status_t status; + ++ max = min(max, EFI_ALLOC_LIMIT); ++ + if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) + return efi_allocate_pages_aligned(size, addr, max, +- EFI_ALLOC_ALIGN); ++ EFI_ALLOC_ALIGN, ++ EFI_LOADER_DATA); + + alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1; + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, +diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c +index 9fb5869896be7..7ba05719a53ba 100644 +--- a/drivers/firmware/efi/libstub/randomalloc.c ++++ b/drivers/firmware/efi/libstub/randomalloc.c +@@ -16,7 +16,8 @@ + */ + static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + unsigned long size, +- unsigned long align_shift) ++ unsigned long align_shift, ++ u64 alloc_min, u64 alloc_max) + { + unsigned long align = 1UL << align_shift; + u64 first_slot, last_slot, region_end; +@@ -29,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + return 0; + + region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, +- (u64)ULONG_MAX); ++ alloc_max); + if (region_end < size) + return 0; + +- first_slot = round_up(md->phys_addr, align); ++ first_slot = round_up(max(md->phys_addr, alloc_min), align); + last_slot = round_down(region_end - size + 1, align); + + if (first_slot > last_slot) +@@ -53,7 +54,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + efi_status_t efi_random_alloc(unsigned long size, + unsigned long align, + unsigned long *addr, +- unsigned long random_seed) ++ unsigned long random_seed, ++ int memory_type, ++ unsigned long alloc_min, ++ unsigned long alloc_max) + { + unsigned long total_slots = 0, target_slot; + unsigned long total_mirrored_slots = 0; +@@ -75,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, + efi_memory_desc_t *md = (void *)map->map + map_offset; + unsigned long slots; + +- slots = get_entry_num_slots(md, size, ilog2(align)); ++ slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, ++ alloc_max); + MD_NUM_SLOTS(md) = slots; + total_slots += slots; + if (md->attribute & EFI_MEMORY_MORE_RELIABLE) +@@ -118,7 +123,7 @@ efi_status_t efi_random_alloc(unsigned long size, + pages = size / EFI_PAGE_SIZE; + + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, +- EFI_LOADER_DATA, pages, &target); ++ memory_type, pages, &target); + if (status == EFI_SUCCESS) + *addr = target; + break; +diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c +new file mode 100644 +index 0000000000000..479dd445acdcf +--- /dev/null ++++ b/drivers/firmware/efi/libstub/x86-5lvl.c +@@ -0,0 +1,95 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#include <linux/efi.h> ++ ++#include <asm/boot.h> ++#include <asm/desc.h> ++#include <asm/efi.h> ++ ++#include "efistub.h" ++#include "x86-stub.h" ++ ++bool efi_no5lvl; ++ ++static void (*la57_toggle)(void *cr3); ++ ++static const struct desc_struct gdt[] = { ++ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), ++}; ++ ++/* ++ * Enabling (or disabling) 5 level paging is tricky, because it can only be ++ * done from 32-bit mode with paging disabled. This means not only that the ++ * code itself must be running from 32-bit addressable physical memory, but ++ * also that the root page table must be 32-bit addressable, as programming ++ * a 64-bit value into CR3 when running in 32-bit mode is not supported. ++ */ ++efi_status_t efi_setup_5level_paging(void) ++{ ++ u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; ++ efi_status_t status; ++ u8 *la57_code; ++ ++ if (!efi_is_64bit()) ++ return EFI_SUCCESS; ++ ++ /* check for 5 level paging support */ ++ if (native_cpuid_eax(0) < 7 || ++ !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) ++ return EFI_SUCCESS; ++ ++ /* allocate some 32-bit addressable memory for code and a page table */ ++ status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, ++ U32_MAX); ++ if (status != EFI_SUCCESS) ++ return status; ++ ++ la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); ++ memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); ++ ++ /* ++ * To avoid the need to allocate a 32-bit addressable stack, the ++ * trampoline uses a LJMP instruction to switch back to long mode. ++ * LJMP takes an absolute destination address, which needs to be ++ * fixed up at runtime. ++ */ ++ *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; ++ ++ efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); ++ ++ return EFI_SUCCESS; ++} ++ ++void efi_5level_switch(void) ++{ ++ bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; ++ bool have_la57 = native_read_cr4() & X86_CR4_LA57; ++ bool need_toggle = want_la57 ^ have_la57; ++ u64 *pgt = (void *)la57_toggle + PAGE_SIZE; ++ u64 *cr3 = (u64 *)__native_read_cr3(); ++ u64 *new_cr3; ++ ++ if (!la57_toggle || !need_toggle) ++ return; ++ ++ if (!have_la57) { ++ /* ++ * 5 level paging will be enabled, so a root level page needs ++ * to be allocated from the 32-bit addressable physical region, ++ * with its first entry referring to the existing hierarchy. ++ */ ++ new_cr3 = memset(pgt, 0, PAGE_SIZE); ++ new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; ++ } else { ++ /* take the new root table pointer from the current entry #0 */ ++ new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); ++ ++ /* copy the new root table if it is not 32-bit addressable */ ++ if ((u64)new_cr3 > U32_MAX) ++ new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); ++ } ++ ++ native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); ++ ++ la57_toggle(new_cr3); ++} +diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c +index 4f0152b11a890..784e1b2ae5ccd 100644 +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -15,16 +15,16 @@ + #include <asm/setup.h> + #include <asm/desc.h> + #include <asm/boot.h> ++#include <asm/kaslr.h> ++#include <asm/sev.h> + + #include "efistub.h" +- +-/* Maximum physical address for 64-bit kernel with 4-level paging */ +-#define MAXMEM_X86_64_4LEVEL (1ull << 46) ++#include "x86-stub.h" + + const efi_system_table_t *efi_system_table; + const efi_dxe_services_table_t *efi_dxe_table; +-extern u32 image_offset; + static efi_loaded_image_t *image = NULL; ++static efi_memory_attribute_protocol_t *memattr; + + static efi_status_t + preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) +@@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) + } + } + +-static void +-adjust_memory_range_protection(unsigned long start, unsigned long size) ++efi_status_t efi_adjust_memory_range_protection(unsigned long start, ++ unsigned long size) + { + efi_status_t status; + efi_gcd_memory_space_desc_t desc; +@@ -221,12 +221,22 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) + unsigned long rounded_start, rounded_end; + unsigned long unprotect_start, unprotect_size; + +- if (efi_dxe_table == NULL) +- return; +- + rounded_start = rounddown(start, EFI_PAGE_SIZE); + rounded_end = roundup(start + size, EFI_PAGE_SIZE); + ++ if (memattr != NULL) { ++ status = efi_call_proto(memattr, clear_memory_attributes, ++ rounded_start, ++ rounded_end - rounded_start, ++ EFI_MEMORY_XP); ++ if (status != EFI_SUCCESS) ++ efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); ++ return status; ++ } ++ ++ if (efi_dxe_table == NULL) ++ return EFI_SUCCESS; ++ + /* + * Don't modify memory region attributes, they are + * already suitable, to lower the possibility to +@@ -238,7 +248,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) + status = efi_dxe_call(get_memory_space_descriptor, start, &desc); + + if (status != EFI_SUCCESS) +- return; ++ break; + + next = desc.base_address + desc.length; + +@@ -263,69 +273,26 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) + unprotect_start, + unprotect_start + unprotect_size, + status); ++ break; + } + } ++ return EFI_SUCCESS; + } + +-/* +- * Trampoline takes 2 pages and can be loaded in first megabyte of memory +- * with its end placed between 128k and 640k where BIOS might start. +- * (see arch/x86/boot/compressed/pgtable_64.c) +- * +- * We cannot find exact trampoline placement since memory map +- * can be modified by UEFI, and it can alter the computed address. +- */ +- +-#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) +-#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) +- +-void startup_32(struct boot_params *boot_params); +- +-static void +-setup_memory_protection(unsigned long image_base, unsigned long image_size) ++static efi_char16_t *efistub_fw_vendor(void) + { +- /* +- * Allow execution of possible trampoline used +- * for switching between 4- and 5-level page tables +- * and relocated kernel image. +- */ +- +- adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, +- TRAMPOLINE_PLACEMENT_SIZE); ++ unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); + +-#ifdef CONFIG_64BIT +- if (image_base != (unsigned long)startup_32) +- adjust_memory_range_protection(image_base, image_size); +-#else +- /* +- * Clear protection flags on a whole range of possible +- * addresses used for KASLR. We don't need to do that +- * on x86_64, since KASLR/extraction is performed after +- * dedicated identity page tables are built and we only +- * need to remove possible protection on relocated image +- * itself disregarding further relocations. +- */ +- adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, +- KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); +-#endif ++ return (efi_char16_t *)vendor; + } + + static const efi_char16_t apple[] = L"Apple"; + +-static void setup_quirks(struct boot_params *boot_params, +- unsigned long image_base, +- unsigned long image_size) ++static void setup_quirks(struct boot_params *boot_params) + { +- efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) +- efi_table_attr(efi_system_table, fw_vendor); +- +- if (!memcmp(fw_vendor, apple, sizeof(apple))) { +- if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) +- retrieve_apple_device_properties(boot_params); +- } +- +- if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) +- setup_memory_protection(image_base, image_size); ++ if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && ++ !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) ++ retrieve_apple_device_properties(boot_params); + } + + /* +@@ -478,7 +445,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, + } + + image_base = efi_table_attr(image, image_base); +- image_offset = (void *)startup_32 - image_base; + + status = efi_allocate_pages(sizeof(struct boot_params), + (unsigned long *)&boot_params, ULONG_MAX); +@@ -760,85 +726,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) + return EFI_SUCCESS; + } + ++static bool have_unsupported_snp_features(void) ++{ ++ u64 unsupported; ++ ++ unsupported = snp_get_unsupported_features(sev_get_status()); ++ if (unsupported) { ++ efi_err("Unsupported SEV-SNP features detected: 0x%llx\n", ++ unsupported); ++ return true; ++ } ++ return false; ++} ++ ++static void efi_get_seed(void *seed, int size) ++{ ++ efi_get_random_bytes(size, seed); ++ ++ /* ++ * This only updates seed[0] when running on 32-bit, but in that case, ++ * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit. ++ */ ++ *(unsigned long *)seed ^= kaslr_get_random_long("EFI"); ++} ++ ++static void error(char *str) ++{ ++ efi_warn("Decompression failed: %s\n", str); ++} ++ ++static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) ++{ ++ unsigned long virt_addr = LOAD_PHYSICAL_ADDR; ++ unsigned long addr, alloc_size, entry; ++ efi_status_t status; ++ u32 seed[2] = {}; ++ ++ /* determine the required size of the allocation */ ++ alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size), ++ MIN_KERNEL_ALIGN); ++ ++ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { ++ u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; ++ static const efi_char16_t ami[] = L"American Megatrends"; ++ ++ efi_get_seed(seed, sizeof(seed)); ++ ++ virt_addr += (range * seed[1]) >> 32; ++ virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); ++ ++ /* ++ * Older Dell systems with AMI UEFI firmware v2.0 may hang ++ * while decompressing the kernel if physical address ++ * randomization is enabled. ++ * ++ * https://bugzilla.kernel.org/show_bug.cgi?id=218173 ++ */ ++ if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && ++ !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { ++ efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); ++ seed[0] = 0; ++ } ++ ++ boot_params_ptr->hdr.loadflags |= KASLR_FLAG; ++ } ++ ++ status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, ++ seed[0], EFI_LOADER_CODE, ++ LOAD_PHYSICAL_ADDR, ++ EFI_X86_KERNEL_ALLOC_LIMIT); ++ if (status != EFI_SUCCESS) ++ return status; ++ ++ entry = decompress_kernel((void *)addr, virt_addr, error); ++ if (entry == ULONG_MAX) { ++ efi_free(alloc_size, addr); ++ return EFI_LOAD_ERROR; ++ } ++ ++ *kernel_entry = addr + entry; ++ ++ return efi_adjust_memory_range_protection(addr, kernel_total_size); ++} ++ ++static void __noreturn enter_kernel(unsigned long kernel_addr, ++ struct boot_params *boot_params) ++{ ++ /* enter decompressed kernel with boot_params pointer in RSI/ESI */ ++ asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params)); ++ ++ unreachable(); ++} ++ + /* +- * On success, we return the address of startup_32, which has potentially been +- * relocated by efi_relocate_kernel. +- * On failure, we exit to the firmware via efi_exit instead of returning. ++ * On success, this routine will jump to the relocated image directly and never ++ * return. On failure, it will exit to the firmware via efi_exit() instead of ++ * returning. + */ +-asmlinkage unsigned long efi_main(efi_handle_t handle, +- efi_system_table_t *sys_table_arg, +- struct boot_params *boot_params) ++void __noreturn efi_stub_entry(efi_handle_t handle, ++ efi_system_table_t *sys_table_arg, ++ struct boot_params *boot_params) + { +- unsigned long bzimage_addr = (unsigned long)startup_32; +- unsigned long buffer_start, buffer_end; ++ efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; + struct setup_header *hdr = &boot_params->hdr; + const struct linux_efi_initrd *initrd = NULL; ++ unsigned long kernel_entry; + efi_status_t status; + ++ boot_params_ptr = boot_params; ++ + efi_system_table = sys_table_arg; + /* Check if we were booted by the EFI firmware */ + if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + efi_exit(handle, EFI_INVALID_PARAMETER); + +- efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); +- if (efi_dxe_table && +- efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { +- efi_warn("Ignoring DXE services table: invalid signature\n"); +- efi_dxe_table = NULL; ++ if (have_unsupported_snp_features()) ++ efi_exit(handle, EFI_UNSUPPORTED); ++ ++ if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { ++ efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); ++ if (efi_dxe_table && ++ efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { ++ efi_warn("Ignoring DXE services table: invalid signature\n"); ++ efi_dxe_table = NULL; ++ } + } + +- /* +- * If the kernel isn't already loaded at a suitable address, +- * relocate it. +- * +- * It must be loaded above LOAD_PHYSICAL_ADDR. +- * +- * The maximum address for 64-bit is 1 << 46 for 4-level paging. This +- * is defined as the macro MAXMEM, but unfortunately that is not a +- * compile-time constant if 5-level paging is configured, so we instead +- * define our own macro for use here. +- * +- * For 32-bit, the maximum address is complicated to figure out, for +- * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what +- * KASLR uses. +- * +- * Also relocate it if image_offset is zero, i.e. the kernel wasn't +- * loaded by LoadImage, but rather by a bootloader that called the +- * handover entry. The reason we must always relocate in this case is +- * to handle the case of systemd-boot booting a unified kernel image, +- * which is a PE executable that contains the bzImage and an initrd as +- * COFF sections. The initrd section is placed after the bzImage +- * without ensuring that there are at least init_size bytes available +- * for the bzImage, and thus the compressed kernel's startup code may +- * overwrite the initrd unless it is moved out of the way. +- */ ++ /* grab the memory attributes protocol if it exists */ ++ efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr); + +- buffer_start = ALIGN(bzimage_addr - image_offset, +- hdr->kernel_alignment); +- buffer_end = buffer_start + hdr->init_size; +- +- if ((buffer_start < LOAD_PHYSICAL_ADDR) || +- (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || +- (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || +- (image_offset == 0)) { +- extern char _bss[]; +- +- status = efi_relocate_kernel(&bzimage_addr, +- (unsigned long)_bss - bzimage_addr, +- hdr->init_size, +- hdr->pref_address, +- hdr->kernel_alignment, +- LOAD_PHYSICAL_ADDR); +- if (status != EFI_SUCCESS) { +- efi_err("efi_relocate_kernel() failed!\n"); +- goto fail; +- } +- /* +- * Now that we've copied the kernel elsewhere, we no longer +- * have a set up block before startup_32(), so reset image_offset +- * to zero in case it was set earlier. +- */ +- image_offset = 0; ++ status = efi_setup_5level_paging(); ++ if (status != EFI_SUCCESS) { ++ efi_err("efi_setup_5level_paging() failed!\n"); ++ goto fail; + } + + #ifdef CONFIG_CMDLINE_BOOL +@@ -858,6 +878,12 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, + } + } + ++ status = efi_decompress_kernel(&kernel_entry); ++ if (status != EFI_SUCCESS) { ++ efi_err("Failed to decompress kernel\n"); ++ goto fail; ++ } ++ + /* + * At this point, an initrd may already have been loaded by the + * bootloader and passed via bootparams. We permit an initrd loaded +@@ -897,7 +923,7 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, + + setup_efi_pci(boot_params); + +- setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); ++ setup_quirks(boot_params); + + status = exit_boot(boot_params, handle); + if (status != EFI_SUCCESS) { +@@ -905,9 +931,38 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, + goto fail; + } + +- return bzimage_addr; ++ /* ++ * Call the SEV init code while still running with the firmware's ++ * GDT/IDT, so #VC exceptions will be handled by EFI. ++ */ ++ sev_enable(boot_params); ++ ++ efi_5level_switch(); ++ ++ enter_kernel(kernel_entry, boot_params); + fail: +- efi_err("efi_main() failed!\n"); ++ efi_err("efi_stub_entry() failed!\n"); + + efi_exit(handle, status); + } ++ ++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL ++void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, ++ struct boot_params *boot_params) ++{ ++ extern char _bss[], _ebss[]; ++ ++ memset(_bss, 0, _ebss - _bss); ++ efi_stub_entry(handle, sys_table_arg, boot_params); ++} ++ ++#ifndef CONFIG_EFI_MIXED ++extern __alias(efi_handover_entry) ++void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, ++ struct boot_params *boot_params); ++ ++extern __alias(efi_handover_entry) ++void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, ++ struct boot_params *boot_params); ++#endif ++#endif +diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h +new file mode 100644 +index 0000000000000..1c20e99a64944 +--- /dev/null ++++ b/drivers/firmware/efi/libstub/x86-stub.h +@@ -0,0 +1,17 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#include <linux/efi.h> ++ ++extern void trampoline_32bit_src(void *, bool); ++extern const u16 trampoline_ljmp_imm_offset; ++ ++efi_status_t efi_adjust_memory_range_protection(unsigned long start, ++ unsigned long size); ++ ++#ifdef CONFIG_X86_64 ++efi_status_t efi_setup_5level_paging(void); ++void efi_5level_switch(void); ++#else ++static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } ++static inline void efi_5level_switch(void) {} ++#endif +diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c +index 0ba9f18312f5b..4ca256bcd6971 100644 +--- a/drivers/firmware/efi/vars.c ++++ b/drivers/firmware/efi/vars.c +@@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars, + const struct efivar_operations *ops, + struct kobject *kobject) + { ++ int rv; ++ + if (down_interruptible(&efivars_lock)) + return -EINTR; + ++ if (__efivars) { ++ pr_warn("efivars already registered\n"); ++ rv = -EBUSY; ++ goto out; ++ } ++ + efivars->ops = ops; + efivars->kobject = kobject; + + __efivars = efivars; + + pr_info("Registered efivars operations\n"); +- ++ rv = 0; ++out: + up(&efivars_lock); + +- return 0; ++ return rv; + } + EXPORT_SYMBOL_GPL(efivars_register); + +diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c +index e00c333105170..753e7be039e4d 100644 +--- a/drivers/gpio/gpio-74x164.c ++++ b/drivers/gpio/gpio-74x164.c +@@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) + if (IS_ERR(chip->gpiod_oe)) + return PTR_ERR(chip->gpiod_oe); + +- gpiod_set_value_cansleep(chip->gpiod_oe, 1); +- + spi_set_drvdata(spi, chip); + + chip->gpio_chip.label = spi->modalias; +@@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) + goto exit_destroy; + } + ++ gpiod_set_value_cansleep(chip->gpiod_oe, 1); ++ + ret = gpiochip_add_data(&chip->gpio_chip, chip); + if (!ret) + return 0; +diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c +index 6d3e3454a6ed6..9d8c783124033 100644 +--- a/drivers/gpio/gpiolib.c ++++ b/drivers/gpio/gpiolib.c +@@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + + ret = gpiochip_irqchip_init_valid_mask(gc); + if (ret) +- goto err_remove_acpi_chip; ++ goto err_free_hogs; + + ret = gpiochip_irqchip_init_hw(gc); + if (ret) +- goto err_remove_acpi_chip; ++ goto err_remove_irqchip_mask; + + ret = gpiochip_add_irqchip(gc, lock_key, request_key); + if (ret) +@@ -813,13 +813,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + gpiochip_irqchip_remove(gc); + err_remove_irqchip_mask: + gpiochip_irqchip_free_valid_mask(gc); +-err_remove_acpi_chip: ++err_free_hogs: ++ gpiochip_free_hogs(gc); + acpi_gpiochip_remove(gc); ++ gpiochip_remove_pin_ranges(gc); + err_remove_of_chip: +- gpiochip_free_hogs(gc); + of_gpiochip_remove(gc); + err_free_gpiochip_mask: +- gpiochip_remove_pin_ranges(gc); + gpiochip_free_valid_mask(gc); + if (gdev->dev.release) { + /* release() has been registered by gpiochip_setup_dev() */ +diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile +index 6fdf87a6e240f..6c7b286e1123d 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile ++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile +@@ -51,8 +51,12 @@ endif + endif + + ifneq ($(CONFIG_FRAME_WARN),0) ++ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) ++frame_warn_flag := -Wframe-larger-than=3072 ++else + frame_warn_flag := -Wframe-larger-than=2048 + endif ++endif + + CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) + +diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +index dc0a6fba7050f..ff1032de4f76d 100644 +--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c ++++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +@@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) + return 0; + } + ++static int si_set_temperature_range(struct amdgpu_device *adev) ++{ ++ int ret; ++ ++ ret = si_thermal_enable_alert(adev, false); ++ if (ret) ++ return ret; ++ ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); ++ if (ret) ++ return ret; ++ ret = si_thermal_enable_alert(adev, true); ++ if (ret) ++ return ret; ++ ++ return ret; ++} ++ + static void si_dpm_disable(struct amdgpu_device *adev) + { + struct rv7xx_power_info *pi = rv770_get_pi(adev); +@@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, + + static int si_dpm_late_init(void *handle) + { ++ int ret; ++ struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ ++ if (!adev->pm.dpm_enabled) ++ return 0; ++ ++ ret = si_set_temperature_range(adev); ++ if (ret) ++ return ret; ++#if 0 //TODO ? ++ si_dpm_powergate_uvd(adev, true); ++#endif + return 0; + } + +diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c +index 7098f125b54a9..fd32041f82263 100644 +--- a/drivers/gpu/drm/drm_buddy.c ++++ b/drivers/gpu/drm/drm_buddy.c +@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order) + { ++ u64 req_size = mm->chunk_size << order; + struct drm_buddy_block *block; + struct drm_buddy_block *buddy; + LIST_HEAD(dfs); +@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, + if (drm_buddy_block_is_allocated(block)) + continue; + ++ if (block_start < start || block_end > end) { ++ u64 adjusted_start = max(block_start, start); ++ u64 adjusted_end = min(block_end, end); ++ ++ if (round_down(adjusted_end + 1, req_size) <= ++ round_up(adjusted_start, req_size)) ++ continue; ++ } ++ + if (contains(start, end, block_start, block_end) && + order == drm_buddy_block_order(block)) { + /* +diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c +index 119544d88b586..fbac39aa38cc4 100644 +--- a/drivers/gpu/drm/meson/meson_drv.c ++++ b/drivers/gpu/drm/meson/meson_drv.c +@@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) + goto exit_afbcd; + + if (has_components) { +- ret = component_bind_all(drm->dev, drm); ++ ret = component_bind_all(dev, drm); + if (ret) { + dev_err(drm->dev, "Couldn't bind all components\n"); ++ /* Do not try to unbind */ ++ has_components = false; + goto exit_afbcd; + } + } + + ret = meson_encoder_hdmi_init(priv); + if (ret) +- goto unbind_all; ++ goto exit_afbcd; + + ret = meson_plane_create(priv); + if (ret) +- goto unbind_all; ++ goto exit_afbcd; + + ret = meson_overlay_create(priv); + if (ret) +- goto unbind_all; ++ goto exit_afbcd; + + ret = meson_crtc_create(priv); + if (ret) +- goto unbind_all; ++ goto exit_afbcd; + + ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); + if (ret) +- goto unbind_all; ++ goto exit_afbcd; + + drm_mode_config_reset(drm); + +@@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) + + uninstall_irq: + free_irq(priv->vsync_irq, drm); +-unbind_all: +- if (has_components) +- component_unbind_all(drm->dev, drm); + exit_afbcd: + if (priv->afbcd.ops) + priv->afbcd.ops->exit(priv); + free_drm: + drm_dev_put(drm); + ++ meson_encoder_hdmi_remove(priv); ++ meson_encoder_cvbs_remove(priv); ++ ++ if (has_components) ++ component_unbind_all(dev, drm); ++ + return ret; + } + +diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c +index 3f73b211fa8e3..3407450435e20 100644 +--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c ++++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c +@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) + if (priv->encoders[MESON_ENC_CVBS]) { + meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; + drm_bridge_remove(&meson_encoder_cvbs->bridge); +- drm_bridge_remove(meson_encoder_cvbs->next_bridge); + } + } +diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c +index b14e6e507c61b..03062e7a02b64 100644 +--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c ++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c +@@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) + if (priv->encoders[MESON_ENC_HDMI]) { + meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; + drm_bridge_remove(&meson_encoder_hdmi->bridge); +- drm_bridge_remove(meson_encoder_hdmi->next_bridge); + } + } +diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c +index 5fc55b9777cbf..6806779f8ecce 100644 +--- a/drivers/gpu/drm/tegra/drm.c ++++ b/drivers/gpu/drm/tegra/drm.c +@@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev) + + drm_mode_config_reset(drm); + +- err = drm_aperture_remove_framebuffers(&tegra_drm_driver); +- if (err < 0) +- goto hub; ++ /* ++ * Only take over from a potential firmware framebuffer if any CRTCs ++ * have been registered. This must not be a fatal error because there ++ * are other accelerators that are exposed via this driver. ++ * ++ * Another case where this happens is on Tegra234 where the display ++ * hardware is no longer part of the host1x complex, so this driver ++ * will not expose any modesetting features. ++ */ ++ if (drm->mode_config.num_crtc > 0) { ++ err = drm_aperture_remove_framebuffers(&tegra_drm_driver); ++ if (err < 0) ++ goto hub; ++ } else { ++ /* ++ * Indicate to userspace that this doesn't expose any display ++ * capabilities. ++ */ ++ drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); ++ } + + err = tegra_drm_fb_init(drm); + if (err < 0) +diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h +index e9d282679ef15..944d9071245d2 100644 +--- a/drivers/infiniband/core/cm_trace.h ++++ b/drivers/infiniband/core/cm_trace.h +@@ -16,7 +16,7 @@ + + #include <linux/tracepoint.h> + #include <rdma/ib_cm.h> +-#include <trace/events/rdma.h> ++#include <trace/misc/rdma.h> + + /* + * enum ib_cm_state, from include/rdma/ib_cm.h +diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c +index 0773ca7ace247..067d7f42871ff 100644 +--- a/drivers/infiniband/core/cma.c ++++ b/drivers/infiniband/core/cma.c +@@ -3547,121 +3547,6 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) + return ret; + } + +-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, +- const struct sockaddr *dst_addr) +-{ +- struct sockaddr_storage zero_sock = {}; +- +- if (src_addr && src_addr->sa_family) +- return rdma_bind_addr(id, src_addr); +- +- /* +- * When the src_addr is not specified, automatically supply an any addr +- */ +- zero_sock.ss_family = dst_addr->sa_family; +- if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { +- struct sockaddr_in6 *src_addr6 = +- (struct sockaddr_in6 *)&zero_sock; +- struct sockaddr_in6 *dst_addr6 = +- (struct sockaddr_in6 *)dst_addr; +- +- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; +- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) +- id->route.addr.dev_addr.bound_dev_if = +- dst_addr6->sin6_scope_id; +- } else if (dst_addr->sa_family == AF_IB) { +- ((struct sockaddr_ib *)&zero_sock)->sib_pkey = +- ((struct sockaddr_ib *)dst_addr)->sib_pkey; +- } +- return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); +-} +- +-/* +- * If required, resolve the source address for bind and leave the id_priv in +- * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior +- * calls made by ULP, a previously bound ID will not be re-bound and src_addr is +- * ignored. +- */ +-static int resolve_prepare_src(struct rdma_id_private *id_priv, +- struct sockaddr *src_addr, +- const struct sockaddr *dst_addr) +-{ +- int ret; +- +- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); +- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { +- /* For a well behaved ULP state will be RDMA_CM_IDLE */ +- ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); +- if (ret) +- goto err_dst; +- if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, +- RDMA_CM_ADDR_QUERY))) { +- ret = -EINVAL; +- goto err_dst; +- } +- } +- +- if (cma_family(id_priv) != dst_addr->sa_family) { +- ret = -EINVAL; +- goto err_state; +- } +- return 0; +- +-err_state: +- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); +-err_dst: +- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); +- return ret; +-} +- +-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, +- const struct sockaddr *dst_addr, unsigned long timeout_ms) +-{ +- struct rdma_id_private *id_priv = +- container_of(id, struct rdma_id_private, id); +- int ret; +- +- ret = resolve_prepare_src(id_priv, src_addr, dst_addr); +- if (ret) +- return ret; +- +- if (cma_any_addr(dst_addr)) { +- ret = cma_resolve_loopback(id_priv); +- } else { +- if (dst_addr->sa_family == AF_IB) { +- ret = cma_resolve_ib_addr(id_priv); +- } else { +- /* +- * The FSM can return back to RDMA_CM_ADDR_BOUND after +- * rdma_resolve_ip() is called, eg through the error +- * path in addr_handler(). If this happens the existing +- * request must be canceled before issuing a new one. +- * Since canceling a request is a bit slow and this +- * oddball path is rare, keep track once a request has +- * been issued. The track turns out to be a permanent +- * state since this is the only cancel as it is +- * immediately before rdma_resolve_ip(). +- */ +- if (id_priv->used_resolve_ip) +- rdma_addr_cancel(&id->route.addr.dev_addr); +- else +- id_priv->used_resolve_ip = 1; +- ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, +- &id->route.addr.dev_addr, +- timeout_ms, addr_handler, +- false, id_priv); +- } +- } +- if (ret) +- goto err; +- +- return 0; +-err: +- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); +- return ret; +-} +-EXPORT_SYMBOL(rdma_resolve_addr); +- + int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) + { + struct rdma_id_private *id_priv; +@@ -4064,27 +3949,26 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) + } + EXPORT_SYMBOL(rdma_listen); + +-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) ++static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, ++ struct sockaddr *addr, const struct sockaddr *daddr) + { +- struct rdma_id_private *id_priv; ++ struct sockaddr *id_daddr; + int ret; +- struct sockaddr *daddr; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && + addr->sa_family != AF_IB) + return -EAFNOSUPPORT; + +- id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) + return -EINVAL; + +- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); ++ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); + if (ret) + goto err1; + + memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); + if (!cma_any_addr(addr)) { +- ret = cma_translate_addr(addr, &id->route.addr.dev_addr); ++ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); + if (ret) + goto err1; + +@@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) + } + #endif + } +- daddr = cma_dst_addr(id_priv); +- daddr->sa_family = addr->sa_family; ++ id_daddr = cma_dst_addr(id_priv); ++ if (daddr != id_daddr) ++ memcpy(id_daddr, daddr, rdma_addr_size(addr)); ++ id_daddr->sa_family = addr->sa_family; + + ret = cma_get_port(id_priv); + if (ret) +@@ -4121,6 +4007,129 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) + cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); + return ret; + } ++ ++static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, ++ const struct sockaddr *dst_addr) ++{ ++ struct rdma_id_private *id_priv = ++ container_of(id, struct rdma_id_private, id); ++ struct sockaddr_storage zero_sock = {}; ++ ++ if (src_addr && src_addr->sa_family) ++ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); ++ ++ /* ++ * When the src_addr is not specified, automatically supply an any addr ++ */ ++ zero_sock.ss_family = dst_addr->sa_family; ++ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { ++ struct sockaddr_in6 *src_addr6 = ++ (struct sockaddr_in6 *)&zero_sock; ++ struct sockaddr_in6 *dst_addr6 = ++ (struct sockaddr_in6 *)dst_addr; ++ ++ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; ++ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) ++ id->route.addr.dev_addr.bound_dev_if = ++ dst_addr6->sin6_scope_id; ++ } else if (dst_addr->sa_family == AF_IB) { ++ ((struct sockaddr_ib *)&zero_sock)->sib_pkey = ++ ((struct sockaddr_ib *)dst_addr)->sib_pkey; ++ } ++ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); ++} ++ ++/* ++ * If required, resolve the source address for bind and leave the id_priv in ++ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior ++ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is ++ * ignored. ++ */ ++static int resolve_prepare_src(struct rdma_id_private *id_priv, ++ struct sockaddr *src_addr, ++ const struct sockaddr *dst_addr) ++{ ++ int ret; ++ ++ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { ++ /* For a well behaved ULP state will be RDMA_CM_IDLE */ ++ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); ++ if (ret) ++ return ret; ++ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, ++ RDMA_CM_ADDR_QUERY))) ++ return -EINVAL; ++ ++ } else { ++ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); ++ } ++ ++ if (cma_family(id_priv) != dst_addr->sa_family) { ++ ret = -EINVAL; ++ goto err_state; ++ } ++ return 0; ++ ++err_state: ++ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); ++ return ret; ++} ++ ++int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, ++ const struct sockaddr *dst_addr, unsigned long timeout_ms) ++{ ++ struct rdma_id_private *id_priv = ++ container_of(id, struct rdma_id_private, id); ++ int ret; ++ ++ ret = resolve_prepare_src(id_priv, src_addr, dst_addr); ++ if (ret) ++ return ret; ++ ++ if (cma_any_addr(dst_addr)) { ++ ret = cma_resolve_loopback(id_priv); ++ } else { ++ if (dst_addr->sa_family == AF_IB) { ++ ret = cma_resolve_ib_addr(id_priv); ++ } else { ++ /* ++ * The FSM can return back to RDMA_CM_ADDR_BOUND after ++ * rdma_resolve_ip() is called, eg through the error ++ * path in addr_handler(). If this happens the existing ++ * request must be canceled before issuing a new one. ++ * Since canceling a request is a bit slow and this ++ * oddball path is rare, keep track once a request has ++ * been issued. The track turns out to be a permanent ++ * state since this is the only cancel as it is ++ * immediately before rdma_resolve_ip(). ++ */ ++ if (id_priv->used_resolve_ip) ++ rdma_addr_cancel(&id->route.addr.dev_addr); ++ else ++ id_priv->used_resolve_ip = 1; ++ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, ++ &id->route.addr.dev_addr, ++ timeout_ms, addr_handler, ++ false, id_priv); ++ } ++ } ++ if (ret) ++ goto err; ++ ++ return 0; ++err: ++ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); ++ return ret; ++} ++EXPORT_SYMBOL(rdma_resolve_addr); ++ ++int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) ++{ ++ struct rdma_id_private *id_priv = ++ container_of(id, struct rdma_id_private, id); ++ ++ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); ++} + EXPORT_SYMBOL(rdma_bind_addr); + + static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) +diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h +index e45264267bcc9..47f3c6e4be893 100644 +--- a/drivers/infiniband/core/cma_trace.h ++++ b/drivers/infiniband/core/cma_trace.h +@@ -15,7 +15,7 @@ + #define _TRACE_RDMA_CMA_H + + #include <linux/tracepoint.h> +-#include <trace/events/rdma.h> ++#include <trace/misc/rdma.h> + + + DECLARE_EVENT_CLASS(cma_fsm_class, +diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c +index d96c78e436f98..5c284dfbe6923 100644 +--- a/drivers/infiniband/core/user_mad.c ++++ b/drivers/infiniband/core/user_mad.c +@@ -131,6 +131,11 @@ struct ib_umad_packet { + struct ib_user_mad mad; + }; + ++struct ib_rmpp_mad_hdr { ++ struct ib_mad_hdr mad_hdr; ++ struct ib_rmpp_hdr rmpp_hdr; ++} __packed; ++ + #define CREATE_TRACE_POINTS + #include <trace/events/ib_umad.h> + +@@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) + { + struct ib_umad_file *file = filp->private_data; ++ struct ib_rmpp_mad_hdr *rmpp_mad_hdr; + struct ib_umad_packet *packet; + struct ib_mad_agent *agent; + struct rdma_ah_attr ah_attr; + struct ib_ah *ah; +- struct ib_rmpp_mad *rmpp_mad; + __be64 *tid; + int ret, data_len, hdr_len, copy_offset, rmpp_active; + u8 base_version; +@@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) + return -EINVAL; + +- packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); ++ packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); + if (!packet) + return -ENOMEM; + +@@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, + goto err_up; + } + +- rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; +- hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); ++ rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data; ++ hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class); + +- if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) ++ if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) + && ib_mad_kernel_rmpp_agent(agent)) { + copy_offset = IB_MGMT_RMPP_HDR; +- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & ++ rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; + } else { + copy_offset = IB_MGMT_MAD_HDR; +@@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, + tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; + *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | + (be64_to_cpup(tid) & 0xffffffff)); +- rmpp_mad->mad_hdr.tid = *tid; ++ rmpp_mad_hdr->mad_hdr.tid = *tid; + } + + if (!ib_mad_kernel_rmpp_agent(agent) +- && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) +- && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { ++ && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) ++ && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + spin_lock_irq(&file->send_lock); + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); +diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c +index 02f3bc4e4895e..13c36f51b9353 100644 +--- a/drivers/input/joystick/xpad.c ++++ b/drivers/input/joystick/xpad.c +@@ -564,6 +564,9 @@ struct xboxone_init_packet { + #define GIP_MOTOR_LT BIT(3) + #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) + ++#define GIP_WIRED_INTF_DATA 0 ++#define GIP_WIRED_INTF_AUDIO 1 ++ + /* + * This packet is required for all Xbox One pads with 2015 + * or later firmware installed (or present from the factory). +@@ -2008,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id + } + + if (xpad->xtype == XTYPE_XBOXONE && +- intf->cur_altsetting->desc.bInterfaceNumber != 0) { ++ intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { + /* + * The Xbox One controller lists three interfaces all with the + * same interface class, subclass and protocol. Differentiate by +diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c +index 1d9494f64a215..4526ff2e1bd5f 100644 +--- a/drivers/interconnect/core.c ++++ b/drivers/interconnect/core.c +@@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers); + static int providers_count; + static bool synced_state; + static DEFINE_MUTEX(icc_lock); +-static DEFINE_MUTEX(icc_bw_lock); + static struct dentry *icc_debugfs_dir; + + static void icc_summary_show_one(struct seq_file *s, struct icc_node *n) +@@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) + if (WARN_ON(IS_ERR(path) || !path->num_nodes)) + return -EINVAL; + +- mutex_lock(&icc_bw_lock); ++ mutex_lock(&icc_lock); + + old_avg = path->reqs[0].avg_bw; + old_peak = path->reqs[0].peak_bw; +@@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) + apply_constraints(path); + } + +- mutex_unlock(&icc_bw_lock); ++ mutex_unlock(&icc_lock); + + trace_icc_set_bw_end(path, ret); + +@@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) + return; + + mutex_lock(&icc_lock); +- mutex_lock(&icc_bw_lock); + + node->provider = provider; + list_add_tail(&node->node_list, &provider->nodes); +@@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) + node->avg_bw = 0; + node->peak_bw = 0; + +- mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); + } + EXPORT_SYMBOL_GPL(icc_node_add); +@@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev) + return; + + mutex_lock(&icc_lock); +- mutex_lock(&icc_bw_lock); + synced_state = true; + list_for_each_entry(p, &icc_providers, provider_list) { + dev_dbg(p->dev, "interconnect provider is in synced state\n"); +@@ -1150,21 +1146,13 @@ void icc_sync_state(struct device *dev) + } + } + } +- mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); + } + EXPORT_SYMBOL_GPL(icc_sync_state); + + static int __init icc_init(void) + { +- struct device_node *root; +- +- /* Teach lockdep about lock ordering wrt. shrinker: */ +- fs_reclaim_acquire(GFP_KERNEL); +- might_lock(&icc_bw_lock); +- fs_reclaim_release(GFP_KERNEL); +- +- root = of_find_node_by_path("/"); ++ struct device_node *root = of_find_node_by_path("/"); + + providers_count = of_count_icc_providers(root); + of_node_put(root); +diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +index 8966f7d5aab61..82f100e591b5a 100644 +--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c ++++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +@@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q) + q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); + } + ++static void queue_sync_cons_ovf(struct arm_smmu_queue *q) ++{ ++ struct arm_smmu_ll_queue *llq = &q->llq; ++ ++ if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) ++ return; ++ ++ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | ++ Q_IDX(llq, llq->cons); ++ queue_sync_cons_out(q); ++} ++ + static int queue_sync_prod_in(struct arm_smmu_queue *q) + { + u32 prod; +@@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) + } while (!queue_empty(llq)); + + /* Sync our overflow flag, as we believe we're up to speed */ +- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | +- Q_IDX(llq, llq->cons); ++ queue_sync_cons_ovf(q); + return IRQ_HANDLED; + } + +@@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) + } while (!queue_empty(llq)); + + /* Sync our overflow flag, as we believe we're up to speed */ +- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | +- Q_IDX(llq, llq->cons); +- queue_sync_cons_out(q); ++ queue_sync_cons_ovf(q); + return IRQ_HANDLED; + } + +diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +index d80065c8105af..f15dcb9e4175c 100644 +--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c ++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +@@ -267,12 +267,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, + + static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) + { +- unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); ++ unsigned int last_s2cr; + u32 reg; + u32 smr; + int i; + ++ /* ++ * Some platforms support more than the Arm SMMU architected maximum of ++ * 128 stream matching groups. For unknown reasons, the additional ++ * groups don't exhibit the same behavior as the architected registers, ++ * so limit the groups to 128 until the behavior is fixed for the other ++ * groups. ++ */ ++ if (smmu->num_mapping_groups > 128) { ++ dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); ++ smmu->num_mapping_groups = 128; ++ } ++ ++ last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); ++ + /* + * With some firmware versions writes to S2CR of type FAULT are + * ignored, and writing BYPASS will end up written as FAULT in the +diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c +index 8261066de07d7..e4358393fe378 100644 +--- a/drivers/iommu/sprd-iommu.c ++++ b/drivers/iommu/sprd-iommu.c +@@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) + return &dom->domain; + } + +-static void sprd_iommu_domain_free(struct iommu_domain *domain) +-{ +- struct sprd_iommu_domain *dom = to_sprd_domain(domain); +- +- kfree(dom); +-} +- + static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) + { + struct sprd_iommu_device *sdev = dom->sdev; +@@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) + sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); + } + ++static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) ++{ ++ size_t pgt_size; ++ ++ /* Nothing need to do if the domain hasn't been attached */ ++ if (!dom->sdev) ++ return; ++ ++ pgt_size = sprd_iommu_pgt_size(&dom->domain); ++ dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); ++ dom->sdev = NULL; ++ sprd_iommu_hw_en(dom->sdev, false); ++} ++ ++static void sprd_iommu_domain_free(struct iommu_domain *domain) ++{ ++ struct sprd_iommu_domain *dom = to_sprd_domain(domain); ++ ++ sprd_iommu_cleanup(dom); ++ kfree(dom); ++} ++ + static int sprd_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) + { +diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c +index a46ce0868fe1f..3a927452a6501 100644 +--- a/drivers/mmc/core/mmc.c ++++ b/drivers/mmc/core/mmc.c +@@ -1007,10 +1007,12 @@ static int mmc_select_bus_width(struct mmc_card *card) + static unsigned ext_csd_bits[] = { + EXT_CSD_BUS_WIDTH_8, + EXT_CSD_BUS_WIDTH_4, ++ EXT_CSD_BUS_WIDTH_1, + }; + static unsigned bus_widths[] = { + MMC_BUS_WIDTH_8, + MMC_BUS_WIDTH_4, ++ MMC_BUS_WIDTH_1, + }; + struct mmc_host *host = card->host; + unsigned idx, bus_width = 0; +diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c +index 60bca78a72b19..0511583ffa764 100644 +--- a/drivers/mmc/host/mmci_stm32_sdmmc.c ++++ b/drivers/mmc/host/mmci_stm32_sdmmc.c +@@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) + struct scatterlist *sg; + int i; + ++ host->dma_in_progress = true; ++ + if (!host->variant->dma_lli || data->sg_len == 1 || + idma->use_bounce_buffer) { + u32 dma_addr; +@@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) + return 0; + } + ++static void sdmmc_idma_error(struct mmci_host *host) ++{ ++ struct mmc_data *data = host->data; ++ struct sdmmc_idma *idma = host->dma_priv; ++ ++ if (!dma_inprogress(host)) ++ return; ++ ++ writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); ++ host->dma_in_progress = false; ++ data->host_cookie = 0; ++ ++ if (!idma->use_bounce_buffer) ++ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, ++ mmc_get_dma_dir(data)); ++} ++ + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) + { ++ if (!dma_inprogress(host)) ++ return; ++ + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); ++ host->dma_in_progress = false; + + if (!data->host_cookie) + sdmmc_idma_unprep_data(host, data, 0); +@@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { + .dma_setup = sdmmc_idma_setup, + .dma_start = sdmmc_idma_start, + .dma_finalize = sdmmc_idma_finalize, ++ .dma_error = sdmmc_idma_error, + .set_clkreg = mmci_sdmmc_set_clkreg, + .set_pwrreg = mmci_sdmmc_set_pwrreg, + .busy_complete = sdmmc_busy_complete, +diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c +index 8cf3a375de659..cc9d28b75eb91 100644 +--- a/drivers/mmc/host/sdhci-xenon-phy.c ++++ b/drivers/mmc/host/sdhci-xenon-phy.c +@@ -11,6 +11,7 @@ + #include <linux/slab.h> + #include <linux/delay.h> + #include <linux/ktime.h> ++#include <linux/iopoll.h> + #include <linux/of_address.h> + + #include "sdhci-pltfm.h" +@@ -109,6 +110,8 @@ + #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) + #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 + ++#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 ++ + /* + * List offset of PHY registers and some special register values + * in eMMC PHY 5.0 or eMMC PHY 5.1 +@@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) + return 0; + } + ++static int xenon_check_stability_internal_clk(struct sdhci_host *host) ++{ ++ u32 reg; ++ int err; ++ ++ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, ++ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); ++ if (err) ++ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); ++ ++ return err; ++} ++ + /* + * eMMC 5.0/5.1 PHY init/re-init. + * eMMC PHY init should be executed after: +@@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) + struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); + struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + ++ int ret = xenon_check_stability_internal_clk(host); ++ ++ if (ret) ++ return ret; ++ + reg = sdhci_readl(host, phy_regs->timing_adj); + reg |= XENON_PHY_INITIALIZAION; + sdhci_writel(host, reg, phy_regs->timing_adj); +@@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) + /* get the wait time */ + wait /= clock; + wait++; +- /* wait for host eMMC PHY init completes */ +- udelay(wait); + +- reg = sdhci_readl(host, phy_regs->timing_adj); +- reg &= XENON_PHY_INITIALIZAION; +- if (reg) { ++ /* ++ * AC5X spec says bit must be polled until zero. ++ * We see cases in which timeout can take longer ++ * than the standard calculation on AC5X, which is ++ * expected following the spec comment above. ++ * According to the spec, we must wait as long as ++ * it takes for that bit to toggle on AC5X. ++ * Cap that with 100 delay loops so we won't get ++ * stuck here forever: ++ */ ++ ++ ret = read_poll_timeout(sdhci_readl, reg, ++ !(reg & XENON_PHY_INITIALIZAION), ++ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, ++ false, host, phy_regs->timing_adj); ++ if (ret) + dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", +- wait); +- return -ETIMEDOUT; +- } ++ wait * XENON_MAX_PHY_TIMEOUT_LOOPS); + +- return 0; ++ return ret; + } + + #define ARMADA_3700_SOC_PAD_1_8V 0x1 +diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c +index 6b043e24855fb..9116ee7f023ed 100644 +--- a/drivers/mtd/nand/spi/gigadevice.c ++++ b/drivers/mtd/nand/spi/gigadevice.c +@@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, + { + u8 status2; + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, +- &status2); ++ spinand->scratchbuf); + int ret; + + switch (status & STATUS_ECC_MASK) { +@@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, + * report the maximum of 4 in this case + */ + /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ ++ status2 = *(spinand->scratchbuf); + return ((status & STATUS_ECC_MASK) >> 2) | + ((status2 & STATUS_ECC_MASK) >> 4); + +@@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, + { + u8 status2; + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, +- &status2); ++ spinand->scratchbuf); + int ret; + + switch (status & STATUS_ECC_MASK) { +@@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, + * 1 ... 4 bits are flipped (and corrected) + */ + /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ ++ status2 = *(spinand->scratchbuf); + return ((status2 & STATUS_ECC_MASK) >> 4) + 1; + + case STATUS_ECC_UNCOR_ERROR: +diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig +index 1917da7841919..5a274b99f2992 100644 +--- a/drivers/net/ethernet/Kconfig ++++ b/drivers/net/ethernet/Kconfig +@@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" + source "drivers/net/ethernet/i825xx/Kconfig" + source "drivers/net/ethernet/ibm/Kconfig" + source "drivers/net/ethernet/intel/Kconfig" +-source "drivers/net/ethernet/wangxun/Kconfig" + source "drivers/net/ethernet/xscale/Kconfig" + + config JME +@@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig" + source "drivers/net/ethernet/tundra/Kconfig" + source "drivers/net/ethernet/vertexcom/Kconfig" + source "drivers/net/ethernet/via/Kconfig" ++source "drivers/net/ethernet/wangxun/Kconfig" + source "drivers/net/ethernet/wiznet/Kconfig" + source "drivers/net/ethernet/xilinx/Kconfig" + source "drivers/net/ethernet/xircom/Kconfig" +diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c +index 07171e574e7d7..36e62197fba0b 100644 +--- a/drivers/net/ethernet/intel/igb/igb_ptp.c ++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c +@@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + /* adjust timestamp for the TX latency based on link speed */ +- if (adapter->hw.mac.type == e1000_i210) { ++ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_TX_LATENCY_10; +@@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + ktime_t *timestamp) + { + struct igb_adapter *adapter = q_vector->adapter; ++ struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps ts; + __le64 *regval = (__le64 *)va; + int adjust = 0; +@@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); + + /* adjust timestamp for the RX latency based on link speed */ +- if (adapter->hw.mac.type == e1000_i210) { ++ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_RX_LATENCY_10; +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +index dc2e204bcd727..41eac7dfb67e7 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +@@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + max_regions = max_tcam_regions; + + tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); +- if (!tcam->used_regions) +- return -ENOMEM; ++ if (!tcam->used_regions) { ++ err = -ENOMEM; ++ goto err_alloc_used_regions; ++ } + tcam->max_regions = max_regions; + + max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); +@@ -78,6 +80,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + bitmap_free(tcam->used_groups); + err_alloc_used_groups: + bitmap_free(tcam->used_regions); ++err_alloc_used_regions: ++ mutex_destroy(&tcam->lock); + return err; + } + +@@ -86,10 +90,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, + { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + +- mutex_destroy(&tcam->lock); + ops->fini(mlxsw_sp, tcam->priv); + bitmap_free(tcam->used_groups); + bitmap_free(tcam->used_regions); ++ mutex_destroy(&tcam->lock); + } + + int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 91b2aa81914ba..e2d51014ab4bc 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -3900,8 +3900,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) + { + set_bit(__FPE_REMOVING, &priv->fpe_task_state); + +- if (priv->fpe_wq) ++ if (priv->fpe_wq) { + destroy_workqueue(priv->fpe_wq); ++ priv->fpe_wq = NULL; ++ } + + netdev_info(priv->dev, "FPE workqueue stop"); + } +diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c +index 937dd9cf4fbaf..7086acfed5b90 100644 +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -1902,26 +1902,26 @@ static int __init gtp_init(void) + + get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); + +- err = rtnl_link_register(>p_link_ops); ++ err = register_pernet_subsys(>p_net_ops); + if (err < 0) + goto error_out; + +- err = register_pernet_subsys(>p_net_ops); ++ err = rtnl_link_register(>p_link_ops); + if (err < 0) +- goto unreg_rtnl_link; ++ goto unreg_pernet_subsys; + + err = genl_register_family(>p_genl_family); + if (err < 0) +- goto unreg_pernet_subsys; ++ goto unreg_rtnl_link; + + pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", + sizeof(struct pdp_ctx)); + return 0; + +-unreg_pernet_subsys: +- unregister_pernet_subsys(>p_net_ops); + unreg_rtnl_link: + rtnl_link_unregister(>p_link_ops); ++unreg_pernet_subsys: ++ unregister_pernet_subsys(>p_net_ops); + error_out: + pr_err("error loading GTP module loaded\n"); + return err; +diff --git a/drivers/net/tun.c b/drivers/net/tun.c +index 367255bb44cdc..922d6f16d99d1 100644 +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) + tun->tfiles[tun->numqueues - 1]); + ntfile = rtnl_dereference(tun->tfiles[index]); + ntfile->queue_index = index; ++ ntfile->xdp_rxq.queue_index = index; + rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], + NULL); + +diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c +index 99ec1d4a972db..8b6d6a1b3c2ec 100644 +--- a/drivers/net/usb/dm9601.c ++++ b/drivers/net/usb/dm9601.c +@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) + err = dm_read_shared_word(dev, 1, loc, &res); + if (err < 0) { + netdev_err(dev->net, "MDIO read error: %d\n", err); +- return err; ++ return 0; + } + + netdev_dbg(dev->net, +diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c +index c458c030fadf6..4fd4563811299 100644 +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) + + lan78xx_rx_urb_submit_all(dev); + ++ local_bh_disable(); + napi_schedule(&dev->napi); ++ local_bh_enable(); + } + + return 0; +@@ -3035,7 +3037,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) + if (dev->chipid == ID_REV_CHIP_ID_7801_) + buf &= ~MAC_CR_GMII_EN_; + +- if (dev->chipid == ID_REV_CHIP_ID_7800_) { ++ if (dev->chipid == ID_REV_CHIP_ID_7800_ || ++ dev->chipid == ID_REV_CHIP_ID_7850_) { + ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); + if (!ret && sig != EEPROM_INDICATOR) { + /* Implies there is no external eeprom. Set mac speed */ +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 36c5a41f84e44..dd9f5f1461921 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev) + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); + return err; + } +- +- if (!veth_gro_requested(dev)) { +- /* user-space did not require GRO, but adding XDP +- * is supposed to get GRO working +- */ +- dev->features |= NETIF_F_GRO; +- netdev_features_change(dev); +- } + } + } + +@@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev) + for (i = 0; i < dev->real_num_rx_queues; i++) + rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); + +- if (!netif_running(dev) || !veth_gro_requested(dev)) { ++ if (!netif_running(dev) || !veth_gro_requested(dev)) + veth_napi_del(dev); + +- /* if user-space did not require GRO, since adding XDP +- * enabled it, clear it now +- */ +- if (!veth_gro_requested(dev) && netif_running(dev)) { +- dev->features &= ~NETIF_F_GRO; +- netdev_features_change(dev); +- } +- } +- + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); + } + +@@ -1376,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev) + struct veth_priv *priv = netdev_priv(dev); + int i; + +- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); ++ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), ++ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + if (!priv->rq) + return -ENOMEM; + +@@ -1392,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev) + { + struct veth_priv *priv = netdev_priv(dev); + +- kfree(priv->rq); ++ kvfree(priv->rq); + } + + static int veth_dev_init(struct net_device *dev) +@@ -1558,6 +1542,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + } + + if (!old_prog) { ++ if (!veth_gro_requested(dev)) { ++ /* user-space did not require GRO, but adding ++ * XDP is supposed to get GRO working ++ */ ++ dev->features |= NETIF_F_GRO; ++ netdev_features_change(dev); ++ } ++ + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; + peer->max_mtu = max_mtu; + } +@@ -1568,6 +1560,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + if (dev->flags & IFF_UP) + veth_disable_xdp(dev); + ++ /* if user-space did not require GRO, since adding XDP ++ * enabled it, clear it now ++ */ ++ if (!veth_gro_requested(dev)) { ++ dev->features &= ~NETIF_F_GRO; ++ netdev_features_change(dev); ++ } ++ + if (peer) { + peer->hw_features |= NETIF_F_GSO_SOFTWARE; + peer->max_mtu = ETH_MAX_MTU; +diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c +index 4402871b5c0c0..e663d5585a057 100644 +--- a/drivers/of/overlay.c ++++ b/drivers/of/overlay.c +@@ -45,8 +45,8 @@ struct target { + + /** + * struct fragment - info about fragment nodes in overlay expanded device tree +- * @target: target of the overlay operation + * @overlay: pointer to the __overlay__ node ++ * @target: target of the overlay operation + */ + struct fragment { + struct device_node *overlay; +diff --git a/drivers/of/property.c b/drivers/of/property.c +index 33d5f16c81204..da5d712197704 100644 +--- a/drivers/of/property.c ++++ b/drivers/of/property.c +@@ -1332,7 +1332,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np, + int index) + { + /* Return NULL for index > 0 to signify end of remote-endpoints. */ +- if (!index || strcmp(prop_name, "remote-endpoint")) ++ if (index > 0 || strcmp(prop_name, "remote-endpoint")) + return NULL; + + return of_graph_get_remote_port_parent(np); +diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c +index ad99707b3b994..dd7d74fecc48e 100644 +--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c ++++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c +@@ -18,6 +18,20 @@ + + #include "pcie-designware.h" + ++#define PEX_PF0_CONFIG 0xC0014 ++#define PEX_PF0_CFG_READY BIT(0) ++ ++/* PEX PFa PCIE PME and message interrupt registers*/ ++#define PEX_PF0_PME_MES_DR 0xC0020 ++#define PEX_PF0_PME_MES_DR_LUD BIT(7) ++#define PEX_PF0_PME_MES_DR_LDD BIT(9) ++#define PEX_PF0_PME_MES_DR_HRD BIT(10) ++ ++#define PEX_PF0_PME_MES_IER 0xC0028 ++#define PEX_PF0_PME_MES_IER_LUDIE BIT(7) ++#define PEX_PF0_PME_MES_IER_LDDIE BIT(9) ++#define PEX_PF0_PME_MES_IER_HRDIE BIT(10) ++ + #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) + + struct ls_pcie_ep_drvdata { +@@ -30,8 +44,99 @@ struct ls_pcie_ep { + struct dw_pcie *pci; + struct pci_epc_features *ls_epc; + const struct ls_pcie_ep_drvdata *drvdata; ++ int irq; ++ u32 lnkcap; ++ bool big_endian; + }; + ++static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset) ++{ ++ struct dw_pcie *pci = pcie->pci; ++ ++ if (pcie->big_endian) ++ return ioread32be(pci->dbi_base + offset); ++ else ++ return ioread32(pci->dbi_base + offset); ++} ++ ++static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value) ++{ ++ struct dw_pcie *pci = pcie->pci; ++ ++ if (pcie->big_endian) ++ iowrite32be(value, pci->dbi_base + offset); ++ else ++ iowrite32(value, pci->dbi_base + offset); ++} ++ ++static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) ++{ ++ struct ls_pcie_ep *pcie = dev_id; ++ struct dw_pcie *pci = pcie->pci; ++ u32 val, cfg; ++ u8 offset; ++ ++ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); ++ ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); ++ ++ if (!val) ++ return IRQ_NONE; ++ ++ if (val & PEX_PF0_PME_MES_DR_LUD) { ++ ++ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); ++ ++ /* ++ * The values of the Maximum Link Width and Supported Link ++ * Speed from the Link Capabilities Register will be lost ++ * during link down or hot reset. Restore initial value ++ * that configured by the Reset Configuration Word (RCW). ++ */ ++ dw_pcie_dbi_ro_wr_en(pci); ++ dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap); ++ dw_pcie_dbi_ro_wr_dis(pci); ++ ++ cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); ++ cfg |= PEX_PF0_CFG_READY; ++ ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); ++ dw_pcie_ep_linkup(&pci->ep); ++ ++ dev_dbg(pci->dev, "Link up\n"); ++ } else if (val & PEX_PF0_PME_MES_DR_LDD) { ++ dev_dbg(pci->dev, "Link down\n"); ++ } else if (val & PEX_PF0_PME_MES_DR_HRD) { ++ dev_dbg(pci->dev, "Hot reset\n"); ++ } ++ ++ return IRQ_HANDLED; ++} ++ ++static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie, ++ struct platform_device *pdev) ++{ ++ u32 val; ++ int ret; ++ ++ pcie->irq = platform_get_irq_byname(pdev, "pme"); ++ if (pcie->irq < 0) ++ return pcie->irq; ++ ++ ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler, ++ IRQF_SHARED, pdev->name, pcie); ++ if (ret) { ++ dev_err(&pdev->dev, "Can't register PCIe IRQ\n"); ++ return ret; ++ } ++ ++ /* Enable interrupts */ ++ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER); ++ val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE | ++ PEX_PF0_PME_MES_IER_LUDIE; ++ ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val); ++ ++ return 0; ++} ++ + static const struct pci_epc_features* + ls_pcie_ep_get_features(struct dw_pcie_ep *ep) + { +@@ -124,6 +229,8 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) + struct ls_pcie_ep *pcie; + struct pci_epc_features *ls_epc; + struct resource *dbi_base; ++ u8 offset; ++ int ret; + + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); + if (!pcie) +@@ -143,6 +250,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) + pci->ops = pcie->drvdata->dw_pcie_ops; + + ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4); ++ ls_epc->linkup_notifier = true; + + pcie->pci = pci; + pcie->ls_epc = ls_epc; +@@ -154,9 +262,18 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) + + pci->ep.ops = &ls_pcie_ep_ops; + ++ pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian"); ++ + platform_set_drvdata(pdev, pcie); + +- return dw_pcie_ep_init(&pci->ep); ++ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); ++ pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP); ++ ++ ret = dw_pcie_ep_init(&pci->ep); ++ if (ret) ++ return ret; ++ ++ return ls_pcie_ep_interrupt_init(pcie, pdev); + } + + static struct platform_driver ls_pcie_ep_driver = { +diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +index e625b32889bfc..0928a526e2ab3 100644 +--- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c ++++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +@@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) + return ret; + } + +- priv->id = of_alias_get_id(np, "mipi_dphy"); ++ priv->id = of_alias_get_id(np, "mipi-dphy"); + if (priv->id < 0) { + dev_err(dev, "Failed to get phy node alias id: %d\n", + priv->id); +diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c +index 0713a52a25107..17b37354e32c0 100644 +--- a/drivers/power/supply/bq27xxx_battery_i2c.c ++++ b/drivers/power/supply/bq27xxx_battery_i2c.c +@@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) + { + struct bq27xxx_device_info *di = i2c_get_clientdata(client); + +- free_irq(client->irq, di); ++ if (client->irq) ++ free_irq(client->irq, di); ++ + bq27xxx_battery_teardown(di); + + mutex_lock(&battery_mutex); +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index 5c5954b78585e..edd296f950a33 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) + __scsi_queue_insert(cmd, reason, true); + } + +- + /** +- * __scsi_execute - insert request and wait for the result +- * @sdev: scsi device ++ * scsi_execute_cmd - insert request and wait for the result ++ * @sdev: scsi_device + * @cmd: scsi command +- * @data_direction: data direction ++ * @opf: block layer request cmd_flags + * @buffer: data buffer + * @bufflen: len of buffer +- * @sense: optional sense buffer +- * @sshdr: optional decoded sense header + * @timeout: request timeout in HZ + * @retries: number of times to retry request +- * @flags: flags for ->cmd_flags +- * @rq_flags: flags for ->rq_flags +- * @resid: optional residual length ++ * @args: Optional args. See struct definition for field descriptions + * + * Returns the scsi_cmnd result field if a command was executed, or a negative + * Linux error code if we didn't get that far. + */ +-int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, +- int data_direction, void *buffer, unsigned bufflen, +- unsigned char *sense, struct scsi_sense_hdr *sshdr, +- int timeout, int retries, blk_opf_t flags, +- req_flags_t rq_flags, int *resid) ++int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, ++ blk_opf_t opf, void *buffer, unsigned int bufflen, ++ int timeout, int retries, ++ const struct scsi_exec_args *args) + { ++ static const struct scsi_exec_args default_args; + struct request *req; + struct scsi_cmnd *scmd; + int ret; + +- req = scsi_alloc_request(sdev->request_queue, +- data_direction == DMA_TO_DEVICE ? +- REQ_OP_DRV_OUT : REQ_OP_DRV_IN, +- rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); ++ if (!args) ++ args = &default_args; ++ else if (WARN_ON_ONCE(args->sense && ++ args->sense_len != SCSI_SENSE_BUFFERSIZE)) ++ return -EINVAL; ++ ++ req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); + if (IS_ERR(req)) + return PTR_ERR(req); + +@@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + memcpy(scmd->cmnd, cmd, scmd->cmd_len); + scmd->allowed = retries; + req->timeout = timeout; +- req->cmd_flags |= flags; +- req->rq_flags |= rq_flags | RQF_QUIET; ++ req->rq_flags |= RQF_QUIET; + + /* + * head injection *required* here otherwise quiesce won't work +@@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) + memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); + +- if (resid) +- *resid = scmd->resid_len; +- if (sense && scmd->sense_len) +- memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); +- if (sshdr) ++ if (args->resid) ++ *args->resid = scmd->resid_len; ++ if (args->sense) ++ memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); ++ if (args->sshdr) + scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, +- sshdr); ++ args->sshdr); ++ + ret = scmd->result; + out: + blk_mq_free_request(req); + + return ret; + } +-EXPORT_SYMBOL(__scsi_execute); ++EXPORT_SYMBOL(scsi_execute_cmd); + + /* + * Wake up the error handler if necessary. Avoid as follows that the error +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index 31b5273f43a71..4433b02c8935f 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, + return true; + } + ++static void sd_read_block_zero(struct scsi_disk *sdkp) ++{ ++ unsigned int buf_len = sdkp->device->sector_size; ++ char *buffer, cmd[10] = { }; ++ ++ buffer = kmalloc(buf_len, GFP_KERNEL); ++ if (!buffer) ++ return; ++ ++ cmd[0] = READ_10; ++ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ ++ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ ++ ++ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, ++ SD_TIMEOUT, sdkp->max_retries, NULL); ++ kfree(buffer); ++} ++ + /** + * sd_revalidate_disk - called the first time a new disk is seen, + * performs disk spin up, read_capacity, etc. +@@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk) + */ + if (sdkp->media_present) { + sd_read_capacity(sdkp, buffer); +- ++ /* ++ * Some USB/UAS devices return generic values for mode pages ++ * until the media has been accessed. Trigger a READ operation ++ * to force the device to populate mode pages. ++ */ ++ if (sdp->read_before_ms) ++ sd_read_block_zero(sdkp); + /* + * set the default to rotational. All non-rotational devices + * support the block characteristics VPD page, which will +diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c +index 092f6ab09acf3..9a90f241bb97f 100644 +--- a/drivers/soc/qcom/rpmhpd.c ++++ b/drivers/soc/qcom/rpmhpd.c +@@ -492,12 +492,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) + unsigned int active_corner, sleep_corner; + unsigned int this_active_corner = 0, this_sleep_corner = 0; + unsigned int peer_active_corner = 0, peer_sleep_corner = 0; ++ unsigned int peer_enabled_corner; + + to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); + +- if (peer && peer->enabled) +- to_active_sleep(peer, peer->corner, &peer_active_corner, ++ if (peer && peer->enabled) { ++ peer_enabled_corner = max(peer->corner, peer->enable_corner); ++ to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, + &peer_sleep_corner); ++ } + + active_corner = max(this_active_corner, peer_active_corner); + +diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c +index cb0a4e2cdbb73..247cca46cdfae 100644 +--- a/drivers/usb/gadget/composite.c ++++ b/drivers/usb/gadget/composite.c +@@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, + return min(val, 900U) / 8; + } + ++void check_remote_wakeup_config(struct usb_gadget *g, ++ struct usb_configuration *c) ++{ ++ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) { ++ /* Reset the rw bit if gadget is not capable of it */ ++ if (!g->wakeup_capable && g->ops->set_remote_wakeup) { ++ WARN(c->cdev, "Clearing wakeup bit for config c.%d\n", ++ c->bConfigurationValue); ++ c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP; ++ } ++ } ++} ++ + static int config_buf(struct usb_configuration *config, + enum usb_device_speed speed, void *buf, u8 type) + { +@@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev, + power = min(power, 500U); + else + power = min(power, 900U); ++ ++ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) ++ usb_gadget_set_remote_wakeup(gadget, 1); ++ else ++ usb_gadget_set_remote_wakeup(gadget, 0); + done: + if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) + usb_gadget_set_selfpowered(gadget); +diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c +index 4dcf29577f8f1..b94aec6227c51 100644 +--- a/drivers/usb/gadget/configfs.c ++++ b/drivers/usb/gadget/configfs.c +@@ -1376,6 +1376,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget, + if (gadget_is_otg(gadget)) + c->descriptors = otg_desc; + ++ /* Properly configure the bmAttributes wakeup bit */ ++ check_remote_wakeup_config(gadget, c); ++ + cfg = container_of(c, struct config_usb_cfg, c); + if (!list_empty(&cfg->string_list)) { + i = 0; +diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c +index c40f2ecbe1b8c..0edd9e53fc5a1 100644 +--- a/drivers/usb/gadget/udc/core.c ++++ b/drivers/usb/gadget/udc/core.c +@@ -525,6 +525,33 @@ int usb_gadget_wakeup(struct usb_gadget *gadget) + } + EXPORT_SYMBOL_GPL(usb_gadget_wakeup); + ++/** ++ * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature. ++ * @gadget:the device being configured for remote wakeup ++ * @set:value to be configured. ++ * ++ * set to one to enable remote wakeup feature and zero to disable it. ++ * ++ * returns zero on success, else negative errno. ++ */ ++int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) ++{ ++ int ret = 0; ++ ++ if (!gadget->ops->set_remote_wakeup) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ ret = gadget->ops->set_remote_wakeup(gadget, set); ++ ++out: ++ trace_usb_gadget_set_remote_wakeup(gadget, ret); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup); ++ + /** + * usb_gadget_set_selfpowered - sets the device selfpowered feature. + * @gadget:the device being declared as self-powered +diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h +index abdbcb1bacb0b..a5ed26fbc2dad 100644 +--- a/drivers/usb/gadget/udc/trace.h ++++ b/drivers/usb/gadget/udc/trace.h +@@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, + TP_ARGS(g, ret) + ); + ++DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, ++ TP_PROTO(struct usb_gadget *g, int ret), ++ TP_ARGS(g, ret) ++); ++ + DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, + TP_PROTO(struct usb_gadget *g, int ret), + TP_ARGS(g, ret) +diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c +index c54e9805da536..12cf9940e5b67 100644 +--- a/drivers/usb/storage/scsiglue.c ++++ b/drivers/usb/storage/scsiglue.c +@@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) + */ + sdev->use_192_bytes_for_3f = 1; + ++ /* ++ * Some devices report generic values until the media has been ++ * accessed. Force a READ(10) prior to querying device ++ * characteristics. ++ */ ++ sdev->read_before_ms = 1; ++ + /* + * Some devices don't like MODE SENSE with page=0x3f, + * which is the command used for checking if a device +diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c +index de3836412bf32..ed22053b3252f 100644 +--- a/drivers/usb/storage/uas.c ++++ b/drivers/usb/storage/uas.c +@@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev) + if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) + sdev->guess_capacity = 1; + ++ /* ++ * Some devices report generic values until the media has been ++ * accessed. Force a READ(10) prior to querying device ++ * characteristics. ++ */ ++ sdev->read_before_ms = 1; ++ + /* + * Some devices don't like MODE SENSE with page=0x3f, + * which is the command used for checking if a device +diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c +index fa205be94a4b8..14498a0d13e0b 100644 +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; + int resize, ret, old_userfont, old_width, old_height, old_charcount; +- char *old_data = NULL; ++ u8 *old_data = vc->vc_font.data; + + resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); +- if (p->userfont) +- old_data = vc->vc_font.data; + vc->vc_font.data = (void *)(p->fontdata = data); + old_userfont = p->userfont; + if ((p->userfont = userfont)) +@@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + update_screen(vc); + } + +- if (old_data && (--REFCOUNT(old_data) == 0)) ++ if (old_userfont && (--REFCOUNT(old_data) == 0)) + kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); + return 0; + + err_out: + p->fontdata = old_data; +- vc->vc_font.data = (void *)old_data; ++ vc->vc_font.data = old_data; + + if (userfont) { + p->userfont = old_userfont; +diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c +index 00f8e349921d4..96b96516c9806 100644 +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data) + return; + + do_mask(info, EVT_MASK_REASON_EXPLICIT); +- xen_evtchn_close(evtchn); + xen_irq_info_cleanup(info); ++ xen_evtchn_close(evtchn); + } + + static void enable_pirq(struct irq_data *data) +@@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq) + unsigned int cpu = cpu_from_irq(irq); + struct xenbus_device *dev; + +- xen_evtchn_close(evtchn); +- + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; +@@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq) + } + + xen_irq_info_cleanup(info); ++ xen_evtchn_close(evtchn); + } + + xen_free_irq(irq); +diff --git a/fs/afs/dir.c b/fs/afs/dir.c +index cf811b77ee671..6e2c967fae6fc 100644 +--- a/fs/afs/dir.c ++++ b/fs/afs/dir.c +@@ -478,8 +478,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, + dire->u.name[0] == '.' && + ctx->actor != afs_lookup_filldir && + ctx->actor != afs_lookup_one_filldir && +- memcmp(dire->u.name, ".__afs", 6) == 0) ++ memcmp(dire->u.name, ".__afs", 6) == 0) { ++ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); + continue; ++ } + + /* found the next entry */ + if (!dir_emit(ctx, dire->u.name, nlen, +diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c +index 61e58066b5fd2..9c856a73d5333 100644 +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -740,6 +740,23 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, + return ret; + } + ++static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) ++{ ++ if (args->start.srcdevid == 0) { ++ if (memchr(args->start.srcdev_name, 0, ++ sizeof(args->start.srcdev_name)) == NULL) ++ return -ENAMETOOLONG; ++ } else { ++ args->start.srcdev_name[0] = 0; ++ } ++ ++ if (memchr(args->start.tgtdev_name, 0, ++ sizeof(args->start.tgtdev_name)) == NULL) ++ return -ENAMETOOLONG; ++ ++ return 0; ++} ++ + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, + struct btrfs_ioctl_dev_replace_args *args) + { +@@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, + default: + return -EINVAL; + } +- +- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || +- args->start.tgtdev_name[0] == '\0') +- return -EINVAL; ++ ret = btrfs_check_replace_dev_names(args); ++ if (ret < 0) ++ return ret; + + ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, + args->start.srcdevid, +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 0d1b05ded1e35..5756edb37c61e 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) + * + * @objectid: root id + * @anon_dev: preallocated anonymous block device number for new roots, +- * pass 0 for new allocation. ++ * pass NULL for a new allocation. + * @check_ref: whether to check root item references, If true, return -ENOENT + * for orphan roots + */ + static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, +- u64 objectid, dev_t anon_dev, ++ u64 objectid, dev_t *anon_dev, + bool check_ref) + { + struct btrfs_root *root; +@@ -1668,9 +1668,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ +- if (unlikely(anon_dev)) { +- free_anon_bdev(anon_dev); +- anon_dev = 0; ++ if (unlikely(anon_dev && *anon_dev)) { ++ free_anon_bdev(*anon_dev); ++ *anon_dev = 0; + } + + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { +@@ -1692,7 +1692,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, + goto fail; + } + +- ret = btrfs_init_fs_root(root, anon_dev); ++ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); + if (ret) + goto fail; + +@@ -1728,7 +1728,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, + * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() + * and once again by our caller. + */ +- if (anon_dev) ++ if (anon_dev && *anon_dev) + root->anon_dev = 0; + btrfs_put_root(root); + return ERR_PTR(ret); +@@ -1744,7 +1744,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, + struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + u64 objectid, bool check_ref) + { +- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); ++ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); + } + + /* +@@ -1752,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + * the anonymous block device id + * + * @objectid: tree objectid +- * @anon_dev: if zero, allocate a new anonymous block device or use the +- * parameter value ++ * @anon_dev: if NULL, allocate a new anonymous block device or use the ++ * parameter value if not NULL + */ + struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, +- u64 objectid, dev_t anon_dev) ++ u64 objectid, dev_t *anon_dev) + { + return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); + } +diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h +index 7322af63c0cc7..24bddca86e9c9 100644 +--- a/fs/btrfs/disk-io.h ++++ b/fs/btrfs/disk-io.h +@@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); + struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + u64 objectid, bool check_ref); + struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, +- u64 objectid, dev_t anon_dev); ++ u64 objectid, dev_t *anon_dev); + struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, + u64 objectid); +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 196e222749ccd..64b37afb7c87f 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, + free_extent_buffer(leaf); + leaf = NULL; + +- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); ++ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); + if (IS_ERR(new_root)) { + ret = PTR_ERR(new_root); + btrfs_abort_transaction(trans, ret); +diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c +index a75669972dc73..9f7ffd9ef6fd7 100644 +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) + if (ret) + goto out; + } +- if (sctx->cur_inode_last_extent < +- sctx->cur_inode_size) { +- ret = send_hole(sctx, sctx->cur_inode_size); +- if (ret) ++ if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { ++ ret = range_is_hole_in_parent(sctx, ++ sctx->cur_inode_last_extent, ++ sctx->cur_inode_size); ++ if (ret < 0) { + goto out; ++ } else if (ret == 0) { ++ ret = send_hole(sctx, sctx->cur_inode_size); ++ if (ret < 0) ++ goto out; ++ } else { ++ /* Range is already a hole, skip. */ ++ ret = 0; ++ } + } + } + if (need_truncate) { +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index 60db4c3b82fa1..b172091f42612 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + } + + key.offset = (u64)-1; +- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); ++ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); + if (IS_ERR(pending->snap)) { + ret = PTR_ERR(pending->snap); + pending->snap = NULL; +diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c +index 9e4f47808bd5a..13bc606989557 100644 +--- a/fs/efivarfs/vars.c ++++ b/fs/efivarfs/vars.c +@@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, + int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), + void *data, bool duplicates, struct list_head *head) + { +- unsigned long variable_name_size = 1024; ++ unsigned long variable_name_size = 512; + efi_char16_t *variable_name; + efi_status_t status; + efi_guid_t vendor_guid; +@@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), + goto free; + + /* +- * Per EFI spec, the maximum storage allocated for both +- * the variable name and variable data is 1024 bytes. ++ * A small set of old UEFI implementations reject sizes ++ * above a certain threshold, the lowest seen in the wild ++ * is 512. + */ + + do { +- variable_name_size = 1024; ++ variable_name_size = 512; + + status = efivar_get_next_variable(&variable_name_size, + variable_name, +@@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), + break; + case EFI_NOT_FOUND: + break; ++ case EFI_BUFFER_TOO_SMALL: ++ pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", ++ variable_name_size); ++ status = EFI_NOT_FOUND; ++ break; + default: +- printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", +- status); ++ pr_warn("efivars: get_next_variable: status=%lx\n", status); + status = EFI_NOT_FOUND; + break; + } +diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c +index c648a493faf23..3204bd33e4e8a 100644 +--- a/fs/exportfs/expfs.c ++++ b/fs/exportfs/expfs.c +@@ -18,7 +18,7 @@ + #include <linux/sched.h> + #include <linux/cred.h> + +-#define dprintk(fmt, args...) do{}while(0) ++#define dprintk(fmt, args...) pr_debug(fmt, ##args) + + + static int get_name(const struct path *path, char *name, struct dentry *child); +@@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, + inode_unlock(dentry->d_inode); + + if (IS_ERR(parent)) { +- dprintk("%s: get_parent of %ld failed, err %d\n", +- __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); ++ dprintk("get_parent of %lu failed, err %ld\n", ++ dentry->d_inode->i_ino, PTR_ERR(parent)); + return parent; + } + +@@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, + dprintk("%s: found name: %s\n", __func__, nbuf); + tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); + if (IS_ERR(tmp)) { +- dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); ++ dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); + err = PTR_ERR(tmp); + goto out_err; + } +diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c +index 284b019cb6529..b72023a6b4c16 100644 +--- a/fs/lockd/svc4proc.c ++++ b/fs/lockd/svc4proc.c +@@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, + *filp = file; + + /* Set up the missing parts of the file_lock structure */ ++ lock->fl.fl_flags = FL_POSIX; + lock->fl.fl_file = file->f_file[mode]; + lock->fl.fl_pid = current->tgid; + lock->fl.fl_start = (loff_t)lock->lock_start; +diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c +index 9c1aa75441e1c..4e30f3c509701 100644 +--- a/fs/lockd/svclock.c ++++ b/fs/lockd/svclock.c +@@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) + nlmsvc_cancel_blocked(net, file, lock); + + lock->fl.fl_type = F_UNLCK; +- if (file->f_file[O_RDONLY]) +- error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, ++ lock->fl.fl_file = file->f_file[O_RDONLY]; ++ if (lock->fl.fl_file) ++ error = vfs_lock_file(lock->fl.fl_file, F_SETLK, + &lock->fl, NULL); +- if (file->f_file[O_WRONLY]) +- error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, ++ lock->fl.fl_file = file->f_file[O_WRONLY]; ++ if (lock->fl.fl_file) ++ error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, + &lock->fl, NULL); + + return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; +@@ -697,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l + block = nlmsvc_lookup_block(file, lock); + mutex_unlock(&file->f_mutex); + if (block != NULL) { +- mode = lock_to_openmode(&lock->fl); +- vfs_cancel_lock(block->b_file->f_file[mode], +- &block->b_call->a_args.lock.fl); ++ struct file_lock *fl = &block->b_call->a_args.lock.fl; ++ ++ mode = lock_to_openmode(fl); ++ vfs_cancel_lock(block->b_file->f_file[mode], fl); + status = nlmsvc_unlink_block(block); + nlmsvc_release_block(block); + } +diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c +index e35c05e278061..32784f508c810 100644 +--- a/fs/lockd/svcproc.c ++++ b/fs/lockd/svcproc.c +@@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, + + /* Set up the missing parts of the file_lock structure */ + mode = lock_to_openmode(&lock->fl); ++ lock->fl.fl_flags = FL_POSIX; + lock->fl.fl_file = file->f_file[mode]; + lock->fl.fl_pid = current->tgid; + lock->fl.fl_lmops = &nlmsvc_lock_operations; +diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c +index 3515f17eaf3fb..e3b6229e7ae5c 100644 +--- a/fs/lockd/svcsubs.c ++++ b/fs/lockd/svcsubs.c +@@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, + { + struct inode *inode = nlmsvc_file_inode(file); + struct file_lock *fl; +- struct file_lock_context *flctx = inode->i_flctx; ++ struct file_lock_context *flctx = locks_inode_context(inode); + struct nlm_host *lockhost; + + if (!flctx || list_empty_careful(&flctx->flc_posix)) +@@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file) + { + struct inode *inode = nlmsvc_file_inode(file); + struct file_lock *fl; +- struct file_lock_context *flctx = inode->i_flctx; ++ struct file_lock_context *flctx = locks_inode_context(inode); + + if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) + return 1; +diff --git a/fs/locks.c b/fs/locks.c +index 1047ab2b15e96..7d0918b8fe5d6 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type) + struct file_lock_context *ctx; + + /* paired with cmpxchg() below */ +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (likely(ctx) || type == F_UNLCK) + goto out; + +@@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type) + */ + if (cmpxchg(&inode->i_flctx, NULL, ctx)) { + kmem_cache_free(flctx_cache, ctx); +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + } + out: + trace_locks_get_lock_context(inode, type, ctx); +@@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, + void + locks_free_lock_context(struct inode *inode) + { +- struct file_lock_context *ctx = inode->i_flctx; ++ struct file_lock_context *ctx = locks_inode_context(inode); + + if (unlikely(ctx)) { + locks_check_ctx_lists(inode); +@@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) + void *owner; + void (*func)(void); + +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (!ctx || list_empty_careful(&ctx->flc_posix)) { + fl->fl_type = F_UNLCK; + return; +@@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) + new_fl->fl_flags = type; + + /* typically we will check that ctx is non-NULL before calling */ +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (!ctx) { + WARN_ON_ONCE(1); + goto free_lock; +@@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) + struct file_lock_context *ctx; + struct file_lock *fl; + +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + fl = list_first_entry_or_null(&ctx->flc_lease, +@@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp) + int type = F_UNLCK; + LIST_HEAD(dispose); + +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); +@@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner) + struct file_lock_context *ctx; + LIST_HEAD(dispose); + +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (!ctx) { + trace_generic_delete_lease(inode, NULL); + return error; +@@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) + * posix_lock_file(). Another process could be setting a lock on this + * file at the same time, but we wouldn't remove that lock anyway. + */ +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (!ctx || list_empty(&ctx->flc_posix)) + return; + +@@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp) + { + struct file_lock_context *ctx; + +- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); ++ ctx = locks_inode_context(locks_inode(filp)); + if (!ctx) + return; + +@@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode) + struct file_lock_context *ctx; + bool ret; + +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (!ctx) + return false; + +@@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f, + struct file_lock_context *ctx; + int id = 0; + +- ctx = smp_load_acquire(&inode->i_flctx); ++ ctx = locks_inode_context(inode); + if (!ctx) + return; + +diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h +index 3fa77ad7258f2..c8a57cfde64b4 100644 +--- a/fs/nfs/nfs4trace.h ++++ b/fs/nfs/nfs4trace.h +@@ -9,10 +9,10 @@ + #define _TRACE_NFS4_H + + #include <linux/tracepoint.h> +-#include <trace/events/sunrpc_base.h> ++#include <trace/misc/sunrpc.h> + +-#include <trace/events/fs.h> +-#include <trace/events/nfs.h> ++#include <trace/misc/fs.h> ++#include <trace/misc/nfs.h> + + #define show_nfs_fattr_flags(valid) \ + __print_flags((unsigned long)valid, "|", \ +diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h +index 8c6cc58679ff8..642f6921852fa 100644 +--- a/fs/nfs/nfstrace.h ++++ b/fs/nfs/nfstrace.h +@@ -11,9 +11,9 @@ + #include <linux/tracepoint.h> + #include <linux/iversion.h> + +-#include <trace/events/fs.h> +-#include <trace/events/nfs.h> +-#include <trace/events/sunrpc_base.h> ++#include <trace/misc/fs.h> ++#include <trace/misc/nfs.h> ++#include <trace/misc/sunrpc.h> + + #define nfs_show_cache_validity(v) \ + __print_flags(v, "|", \ +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index f41d24b54fd1f..6a06066684172 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page, + int err; + + if (wbc->sync_mode == WB_SYNC_NONE && +- NFS_SERVER(inode)->write_congested) ++ NFS_SERVER(inode)->write_congested) { ++ redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; ++ } + + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_pageio_init_write(&pgio, inode, 0, +diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig +index f6a2fd3015e75..7c441f2bd4440 100644 +--- a/fs/nfsd/Kconfig ++++ b/fs/nfsd/Kconfig +@@ -8,6 +8,7 @@ config NFSD + select SUNRPC + select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V2_ACL ++ select NFS_ACL_SUPPORT if NFSD_V3_ACL + depends on MULTIUSER + help + Choose Y here if you want to allow other computers to access +@@ -26,19 +27,29 @@ config NFSD + + Below you can choose which versions of the NFS protocol are + available to clients mounting the NFS server on this system. +- Support for NFS version 2 (RFC 1094) is always available when ++ Support for NFS version 3 (RFC 1813) is always available when + CONFIG_NFSD is selected. + + If unsure, say N. + +-config NFSD_V2_ACL +- bool ++config NFSD_V2 ++ bool "NFS server support for NFS version 2 (DEPRECATED)" + depends on NFSD ++ default n ++ help ++ NFSv2 (RFC 1094) was the first publicly-released version of NFS. ++ Unless you are hosting ancient (1990's era) NFS clients, you don't ++ need this. ++ ++ If unsure, say N. ++ ++config NFSD_V2_ACL ++ bool "NFS server support for the NFSv2 ACL protocol extension" ++ depends on NFSD_V2 + + config NFSD_V3_ACL + bool "NFS server support for the NFSv3 ACL protocol extension" + depends on NFSD +- select NFSD_V2_ACL + help + Solaris NFS servers support an auxiliary NFSv3 ACL protocol that + never became an official part of the NFS version 3 protocol. +diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile +index 805c06d5f1b4b..6fffc8f03f740 100644 +--- a/fs/nfsd/Makefile ++++ b/fs/nfsd/Makefile +@@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o + # this one should be compiled first, as the tracing macros can easily blow up + nfsd-y += trace.o + +-nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ +- export.o auth.o lockd.o nfscache.o nfsxdr.o \ ++nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ ++ export.o auth.o lockd.o nfscache.o \ + stats.o filecache.o nfs3proc.o nfs3xdr.o ++nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o + nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o + nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o + nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ +diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c +index e7e6e78d965db..01d7fd108cf3d 100644 +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -12,6 +12,7 @@ + #include "blocklayoutxdr.h" + #include "pnfs.h" + #include "filecache.h" ++#include "vfs.h" + + #define NFSDDBG_FACILITY NFSDDBG_PNFS + +diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c +index 2455dc8be18a8..1ed2f691ebb90 100644 +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -9,6 +9,7 @@ + + #include "nfsd.h" + #include "blocklayoutxdr.h" ++#include "vfs.h" + + #define NFSDDBG_FACILITY NFSDDBG_PNFS + +diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h +index ee0e3aba4a6e5..d03f7f6a8642d 100644 +--- a/fs/nfsd/export.h ++++ b/fs/nfsd/export.h +@@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); + int exp_rootfh(struct net *, struct auth_domain *, + char *path, struct knfsd_fh *, int maxsize); + __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +-__be32 nfserrno(int errno); + + static inline void exp_put(struct svc_export *exp) + { +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 5cb8cce153a57..697acf5c3c681 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -1,7 +1,32 @@ ++// SPDX-License-Identifier: GPL-2.0 + /* +- * Open file cache. ++ * The NFSD open file cache. + * + * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> ++ * ++ * An nfsd_file object is a per-file collection of open state that binds ++ * together: ++ * - a struct file * ++ * - a user credential ++ * - a network namespace ++ * - a read-ahead context ++ * - monitoring for writeback errors ++ * ++ * nfsd_file objects are reference-counted. Consumers acquire a new ++ * object via the nfsd_file_acquire API. They manage their interest in ++ * the acquired object, and hence the object's reference count, via ++ * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file ++ * object: ++ * ++ * * non-garbage-collected: When a consumer wants to precisely control ++ * the lifetime of a file's open state, it acquires a non-garbage- ++ * collected nfsd_file. The final nfsd_file_put releases the open ++ * state immediately. ++ * ++ * * garbage-collected: When a consumer does not control the lifetime ++ * of open state, it acquires a garbage-collected nfsd_file. The ++ * final nfsd_file_put allows the open state to linger for a period ++ * during which it may be re-used. + */ + + #include <linux/hash.h> +@@ -186,12 +211,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = { + static void + nfsd_file_schedule_laundrette(void) + { +- if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || +- test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) +- return; +- +- queue_delayed_work(system_wq, &nfsd_filecache_laundrette, +- NFSD_LAUNDRETTE_DELAY); ++ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) ++ queue_delayed_work(system_wq, &nfsd_filecache_laundrette, ++ NFSD_LAUNDRETTE_DELAY); + } + + static void +@@ -589,7 +611,8 @@ static void + nfsd_file_gc_worker(struct work_struct *work) + { + nfsd_file_gc(); +- nfsd_file_schedule_laundrette(); ++ if (list_lru_count(&nfsd_file_lru)) ++ nfsd_file_schedule_laundrette(); + } + + static unsigned long +diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c +index 070f90ed09b61..3ca5304440ff0 100644 +--- a/fs/nfsd/flexfilelayout.c ++++ b/fs/nfsd/flexfilelayout.c +@@ -15,6 +15,7 @@ + + #include "flexfilelayoutxdr.h" + #include "pnfs.h" ++#include "vfs.h" + + #define NFSDDBG_FACILITY NFSDDBG_PNFS + +diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h +index 8c854ba3285bb..51a4b7885cae2 100644 +--- a/fs/nfsd/netns.h ++++ b/fs/nfsd/netns.h +@@ -195,7 +195,7 @@ struct nfsd_net { + + atomic_t nfsd_courtesy_clients; + struct shrinker nfsd_client_shrinker; +- struct delayed_work nfsd_shrinker_work; ++ struct work_struct nfsd_shrinker_work; + }; + + /* Simple check to find out if a given net was properly initialized */ +diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c +index 39989c14c8a1e..4eae2c5af2edf 100644 +--- a/fs/nfsd/nfs4callback.c ++++ b/fs/nfsd/nfs4callback.c +@@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p) + * 1 Protocol" + */ + ++static void encode_uint32(struct xdr_stream *xdr, u32 n) ++{ ++ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); ++} ++ ++static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, ++ size_t len) ++{ ++ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); ++} ++ + /* + * nfs_cb_opnum4 + * +@@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, + hdr->nops++; + } + ++/* ++ * CB_RECALLANY4args ++ * ++ * struct CB_RECALLANY4args { ++ * uint32_t craa_objects_to_keep; ++ * bitmap4 craa_type_mask; ++ * }; ++ */ ++static void ++encode_cb_recallany4args(struct xdr_stream *xdr, ++ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) ++{ ++ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); ++ encode_uint32(xdr, ra->ra_keep); ++ encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); ++ hdr->nops++; ++} ++ + /* + * CB_SEQUENCE4args + * +@@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, + encode_cb_nops(&hdr); + } + ++/* ++ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects ++ */ ++static void ++nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, ++ struct xdr_stream *xdr, const void *data) ++{ ++ const struct nfsd4_callback *cb = data; ++ struct nfsd4_cb_recall_any *ra; ++ struct nfs4_cb_compound_hdr hdr = { ++ .ident = cb->cb_clp->cl_cb_ident, ++ .minorversion = cb->cb_clp->cl_minorversion, ++ }; ++ ++ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); ++ encode_cb_compound4args(xdr, &hdr); ++ encode_cb_sequence4args(xdr, cb, &hdr); ++ encode_cb_recallany4args(xdr, &hdr, ra); ++ encode_cb_nops(&hdr); ++} + + /* + * NFSv4.0 and NFSv4.1 XDR decode functions +@@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, + return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); + } + ++/* ++ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects ++ */ ++static int ++nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, ++ struct xdr_stream *xdr, ++ void *data) ++{ ++ struct nfsd4_callback *cb = data; ++ struct nfs4_cb_compound_hdr hdr; ++ int status; ++ ++ status = decode_cb_compound4res(xdr, &hdr); ++ if (unlikely(status)) ++ return status; ++ status = decode_cb_sequence4res(xdr, cb); ++ if (unlikely(status || cb->cb_seq_status)) ++ return status; ++ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); ++ return status; ++} ++ + #ifdef CONFIG_NFSD_PNFS + /* + * CB_LAYOUTRECALL4args +@@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { + #endif + PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), ++ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), + }; + + static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; +diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c +index e70a1a2999b7b..5e9809aff37eb 100644 +--- a/fs/nfsd/nfs4idmap.c ++++ b/fs/nfsd/nfs4idmap.c +@@ -41,6 +41,7 @@ + #include "idmap.h" + #include "nfsd.h" + #include "netns.h" ++#include "vfs.h" + + /* + * Turn off idmapping when using AUTH_SYS. +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index a9105e95b59c5..ba53cd89ec62c 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &read->rd_stateid, RD_STATE, + &read->rd_nf, NULL); +- if (status) { +- dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); +- goto out; +- } +- status = nfs_ok; +-out: ++ + read->rd_rqstp = rqstp; + read->rd_fhp = &cstate->current_fh; + return status; +@@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, + &cstate->current_fh, &setattr->sa_stateid, + WR_STATE, NULL, NULL); +- if (status) { +- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); ++ if (status) + return status; +- } + } + err = fh_want_write(&cstate->current_fh); + if (err) +@@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + write->wr_offset, cnt); + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + stateid, WR_STATE, &nf, NULL); +- if (status) { +- dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); ++ if (status) + return status; +- } + + write->wr_how_written = write->wr_stable_how; + +@@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, + src_stateid, RD_STATE, src, NULL); +- if (status) { +- dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); ++ if (status) + goto out; +- } + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + dst_stateid, WR_STATE, dst, NULL); +- if (status) { +- dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); ++ if (status) + goto out_put_src; +- } + + /* fix up for NFS-specific error code */ + if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || +@@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &fallocate->falloc_stateid, + WR_STATE, &nf, NULL); +- if (status != nfs_ok) { +- dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); ++ if (status != nfs_ok) + return status; +- } + + status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, + fallocate->falloc_offset, +@@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &seek->seek_stateid, + RD_STATE, &nf, NULL); +- if (status) { +- dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); ++ if (status) + return status; +- } + + switch (seek->seek_whence) { + case NFS4_CONTENT_DATA: +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index b3f6dda930d8b..b9d694ec25d19 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -44,7 +44,9 @@ + #include <linux/jhash.h> + #include <linux/string_helpers.h> + #include <linux/fsnotify.h> ++#include <linux/rhashtable.h> + #include <linux/nfs_ssc.h> ++ + #include "xdr4.h" + #include "xdr4cb.h" + #include "vfs.h" +@@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + static void nfs4_free_ol_stateid(struct nfs4_stid *stid); + void nfsd4_end_grace(struct nfsd_net *nn); + static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); ++static void nfsd4_file_hash_remove(struct nfs4_file *fi); + + /* Locking: */ + +@@ -588,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) + void + put_nfs4_file(struct nfs4_file *fi) + { +- might_lock(&state_lock); +- +- if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { +- hlist_del_rcu(&fi->fi_hash); +- spin_unlock(&state_lock); ++ if (refcount_dec_and_test(&fi->fi_ref)) { ++ nfsd4_file_hash_remove(fi); + WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); + WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); + call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); +@@ -717,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) + return ret & OWNER_HASH_MASK; + } + +-/* hash table for nfs4_file */ +-#define FILE_HASH_BITS 8 +-#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) +- +-static unsigned int file_hashval(struct svc_fh *fh) +-{ +- struct inode *inode = d_inode(fh->fh_dentry); ++static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; + +- /* XXX: why not (here & in file cache) use inode? */ +- return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); +-} ++static const struct rhashtable_params nfs4_file_rhash_params = { ++ .key_len = sizeof_field(struct nfs4_file, fi_inode), ++ .key_offset = offsetof(struct nfs4_file, fi_inode), ++ .head_offset = offsetof(struct nfs4_file, fi_rlist), + +-static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; ++ /* ++ * Start with a single page hash table to reduce resizing churn ++ * on light workloads. ++ */ ++ .min_size = 256, ++ .automatic_shrinking = true, ++}; + + /* + * Check if courtesy clients have conflicting access and resolve it if possible +@@ -1367,6 +1368,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) + + WARN_ON(!list_empty(&dp->dl_recall_lru)); + ++ trace_nfsd_stid_revoke(&dp->dl_stid); ++ + if (clp->cl_minorversion) { + spin_lock(&clp->cl_lock); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; +@@ -1831,13 +1834,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, + int numslots = fattrs->maxreqs; + int slotsize = slot_bytes(fattrs); + struct nfsd4_session *new; +- int mem, i; ++ int i; + +- BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) +- + sizeof(struct nfsd4_session) > PAGE_SIZE); +- mem = numslots * sizeof(struct nfsd4_slot *); ++ BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) ++ > PAGE_SIZE); + +- new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); ++ new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); + if (!new) + return NULL; + /* allocate each struct nfsd4_slot and data cache in one piece */ +@@ -2143,6 +2145,7 @@ static void __free_client(struct kref *k) + kfree(clp->cl_nii_domain.data); + kfree(clp->cl_nii_name.data); + idr_destroy(&clp->cl_stateids); ++ kfree(clp->cl_ra); + kmem_cache_free(client_slab, clp); + } + +@@ -2870,6 +2873,37 @@ static const struct tree_descr client_files[] = { + [3] = {""}, + }; + ++static int ++nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, ++ struct rpc_task *task) ++{ ++ trace_nfsd_cb_recall_any_done(cb, task); ++ switch (task->tk_status) { ++ case -NFS4ERR_DELAY: ++ rpc_delay(task, 2 * HZ); ++ return 0; ++ default: ++ return 1; ++ } ++} ++ ++static void ++nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) ++{ ++ struct nfs4_client *clp = cb->cb_clp; ++ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); ++ ++ spin_lock(&nn->client_lock); ++ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); ++ put_client_renew_locked(clp); ++ spin_unlock(&nn->client_lock); ++} ++ ++static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { ++ .done = nfsd4_cb_recall_any_done, ++ .release = nfsd4_cb_recall_any_release, ++}; ++ + static struct nfs4_client *create_client(struct xdr_netobj name, + struct svc_rqst *rqstp, nfs4_verifier *verf) + { +@@ -2907,6 +2941,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, + free_client(clp); + return NULL; + } ++ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); ++ if (!clp->cl_ra) { ++ free_client(clp); ++ return NULL; ++ } ++ clp->cl_ra_time = 0; ++ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, ++ NFSPROC4_CLNT_CB_RECALL_ANY); + return clp; + } + +@@ -4276,11 +4318,9 @@ static struct nfs4_file *nfsd4_alloc_file(void) + } + + /* OPEN Share state helper functions */ +-static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, +- struct nfs4_file *fp) +-{ +- lockdep_assert_held(&state_lock); + ++static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) ++{ + refcount_set(&fp->fi_ref, 1); + spin_lock_init(&fp->fi_lock); + INIT_LIST_HEAD(&fp->fi_stateids); +@@ -4298,7 +4338,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, + INIT_LIST_HEAD(&fp->fi_lo_states); + atomic_set(&fp->fi_lo_recalls, 0); + #endif +- hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); + } + + void +@@ -4363,25 +4402,27 @@ nfsd4_init_slabs(void) + } + + static unsigned long +-nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) ++nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) + { +- int cnt; ++ int count; + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_client_shrinker); + +- cnt = atomic_read(&nn->nfsd_courtesy_clients); +- if (cnt > 0) +- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); +- return (unsigned long)cnt; ++ count = atomic_read(&nn->nfsd_courtesy_clients); ++ if (!count) ++ count = atomic_long_read(&num_delegations); ++ if (count) ++ queue_work(laundry_wq, &nn->nfsd_shrinker_work); ++ return (unsigned long)count; + } + + static unsigned long +-nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) ++nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) + { + return SHRINK_STOP; + } + +-int ++void + nfsd4_init_leases_net(struct nfsd_net *nn) + { + struct sysinfo si; +@@ -4403,16 +4444,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) + nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); + + atomic_set(&nn->nfsd_courtesy_clients, 0); +- nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; +- nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; +- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; +- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); +-} +- +-void +-nfsd4_leases_net_shutdown(struct nfsd_net *nn) +-{ +- unregister_shrinker(&nn->nfsd_client_shrinker); + } + + static void init_nfs4_replay(struct nfs4_replay *rp) +@@ -4683,71 +4714,80 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) + nfs4_put_stid(&last->st_stid); + } + +-/* search file_hashtbl[] for file */ +-static struct nfs4_file * +-find_file_locked(struct svc_fh *fh, unsigned int hashval) ++static noinline_for_stack struct nfs4_file * ++nfsd4_file_hash_lookup(const struct svc_fh *fhp) + { +- struct nfs4_file *fp; ++ struct inode *inode = d_inode(fhp->fh_dentry); ++ struct rhlist_head *tmp, *list; ++ struct nfs4_file *fi; + +- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, +- lockdep_is_held(&state_lock)) { +- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { +- if (refcount_inc_not_zero(&fp->fi_ref)) +- return fp; ++ rcu_read_lock(); ++ list = rhltable_lookup(&nfs4_file_rhltable, &inode, ++ nfs4_file_rhash_params); ++ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { ++ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { ++ if (refcount_inc_not_zero(&fi->fi_ref)) { ++ rcu_read_unlock(); ++ return fi; ++ } + } + } ++ rcu_read_unlock(); + return NULL; + } + +-static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, +- unsigned int hashval) ++/* ++ * On hash insertion, identify entries with the same inode but ++ * distinct filehandles. They will all be on the list returned ++ * by rhltable_lookup(). ++ * ++ * inode->i_lock prevents racing insertions from adding an entry ++ * for the same inode/fhp pair twice. ++ */ ++static noinline_for_stack struct nfs4_file * ++nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) + { +- struct nfs4_file *fp; ++ struct inode *inode = d_inode(fhp->fh_dentry); ++ struct rhlist_head *tmp, *list; + struct nfs4_file *ret = NULL; + bool alias_found = false; ++ struct nfs4_file *fi; ++ int err; + +- spin_lock(&state_lock); +- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, +- lockdep_is_held(&state_lock)) { +- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { +- if (refcount_inc_not_zero(&fp->fi_ref)) +- ret = fp; +- } else if (d_inode(fh->fh_dentry) == fp->fi_inode) +- fp->fi_aliased = alias_found = true; +- } +- if (likely(ret == NULL)) { +- nfsd4_init_file(fh, hashval, new); +- new->fi_aliased = alias_found; +- ret = new; ++ rcu_read_lock(); ++ spin_lock(&inode->i_lock); ++ ++ list = rhltable_lookup(&nfs4_file_rhltable, &inode, ++ nfs4_file_rhash_params); ++ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { ++ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { ++ if (refcount_inc_not_zero(&fi->fi_ref)) ++ ret = fi; ++ } else ++ fi->fi_aliased = alias_found = true; + } +- spin_unlock(&state_lock); +- return ret; +-} ++ if (ret) ++ goto out_unlock; + +-static struct nfs4_file * find_file(struct svc_fh *fh) +-{ +- struct nfs4_file *fp; +- unsigned int hashval = file_hashval(fh); ++ nfsd4_file_init(fhp, new); ++ err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, ++ nfs4_file_rhash_params); ++ if (err) ++ goto out_unlock; + +- rcu_read_lock(); +- fp = find_file_locked(fh, hashval); ++ new->fi_aliased = alias_found; ++ ret = new; ++ ++out_unlock: ++ spin_unlock(&inode->i_lock); + rcu_read_unlock(); +- return fp; ++ return ret; + } + +-static struct nfs4_file * +-find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) ++static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) + { +- struct nfs4_file *fp; +- unsigned int hashval = file_hashval(fh); +- +- rcu_read_lock(); +- fp = find_file_locked(fh, hashval); +- rcu_read_unlock(); +- if (fp) +- return fp; +- +- return insert_file(new, fh, hashval); ++ rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, ++ nfs4_file_rhash_params); + } + + /* +@@ -4760,9 +4800,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) + struct nfs4_file *fp; + __be32 ret = nfs_ok; + +- fp = find_file(current_fh); ++ fp = nfsd4_file_hash_lookup(current_fh); + if (!fp) + return ret; ++ + /* Check for conflicting share reservations */ + spin_lock(&fp->fi_lock); + if (fp->fi_share_deny & deny_type) +@@ -4774,7 +4815,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) + + static bool nfsd4_deleg_present(const struct inode *inode) + { +- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); ++ struct file_lock_context *ctx = locks_inode_context(inode); + + return ctx && !list_empty_careful(&ctx->flc_lease); + } +@@ -5655,7 +5696,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf + * and check for delegations in the process of being recalled. + * If not found, create the nfs4_file struct + */ +- fp = find_or_add_file(open->op_file, current_fh); ++ fp = nfsd4_file_hash_insert(open->op_file, current_fh); ++ if (unlikely(!fp)) ++ return nfserr_jukebox; + if (fp != open->op_file) { + status = nfs4_check_deleg(cl, open, &dp); + if (status) +@@ -5932,7 +5975,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + nf = stp->st_stid.sc_file; +- ctx = nf->fi_inode->i_flctx; ++ ctx = locks_inode_context(nf->fi_inode); + if (!ctx) + continue; + if (locks_owner_has_blockers(ctx, lo)) +@@ -6160,17 +6203,63 @@ laundromat_main(struct work_struct *laundry) + } + + static void +-courtesy_client_reaper(struct work_struct *reaper) ++courtesy_client_reaper(struct nfsd_net *nn) + { + struct list_head reaplist; +- struct delayed_work *dwork = to_delayed_work(reaper); +- struct nfsd_net *nn = container_of(dwork, struct nfsd_net, +- nfsd_shrinker_work); + + nfs4_get_courtesy_client_reaplist(nn, &reaplist); + nfs4_process_client_reaplist(&reaplist); + } + ++static void ++deleg_reaper(struct nfsd_net *nn) ++{ ++ struct list_head *pos, *next; ++ struct nfs4_client *clp; ++ struct list_head cblist; ++ ++ INIT_LIST_HEAD(&cblist); ++ spin_lock(&nn->client_lock); ++ list_for_each_safe(pos, next, &nn->client_lru) { ++ clp = list_entry(pos, struct nfs4_client, cl_lru); ++ if (clp->cl_state != NFSD4_ACTIVE || ++ list_empty(&clp->cl_delegations) || ++ atomic_read(&clp->cl_delegs_in_recall) || ++ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || ++ (ktime_get_boottime_seconds() - ++ clp->cl_ra_time < 5)) { ++ continue; ++ } ++ list_add(&clp->cl_ra_cblist, &cblist); ++ ++ /* release in nfsd4_cb_recall_any_release */ ++ atomic_inc(&clp->cl_rpc_users); ++ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); ++ clp->cl_ra_time = ktime_get_boottime_seconds(); ++ } ++ spin_unlock(&nn->client_lock); ++ ++ while (!list_empty(&cblist)) { ++ clp = list_first_entry(&cblist, struct nfs4_client, ++ cl_ra_cblist); ++ list_del_init(&clp->cl_ra_cblist); ++ clp->cl_ra->ra_keep = 0; ++ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); ++ trace_nfsd_cb_recall_any(clp->cl_ra); ++ nfsd4_run_cb(&clp->cl_ra->ra_cb); ++ } ++} ++ ++static void ++nfsd4_state_shrinker_worker(struct work_struct *work) ++{ ++ struct nfsd_net *nn = container_of(work, struct nfsd_net, ++ nfsd_shrinker_work); ++ ++ courtesy_client_reaper(nn); ++ deleg_reaper(nn); ++} ++ + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) + { + if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) +@@ -6935,6 +7024,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + if (status) + goto put_stateid; + ++ trace_nfsd_deleg_return(stateid); + wake_up_var(d_inode(cstate->current_fh.fh_dentry)); + destroy_delegation(dp); + put_stateid: +@@ -7748,7 +7838,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + } + + inode = locks_inode(nf->nf_file); +- flctx = inode->i_flctx; ++ flctx = locks_inode_context(inode); + + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&flctx->flc_lock); +@@ -7995,11 +8085,20 @@ static int nfs4_state_create_net(struct net *net) + INIT_LIST_HEAD(&nn->blocked_locks_lru); + + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); +- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); ++ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + get_net(net); + ++ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; ++ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; ++ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; ++ ++ if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) ++ goto err_shrinker; + return 0; + ++err_shrinker: ++ put_net(net); ++ kfree(nn->sessionid_hashtbl); + err_sessionid: + kfree(nn->unconf_id_hashtbl); + err_unconf_id: +@@ -8071,10 +8170,16 @@ nfs4_state_start(void) + { + int ret; + +- ret = nfsd4_create_callback_queue(); ++ ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); + if (ret) + return ret; + ++ ret = nfsd4_create_callback_queue(); ++ if (ret) { ++ rhltable_destroy(&nfs4_file_rhltable); ++ return ret; ++ } ++ + set_max_delegations(); + return 0; + } +@@ -8086,6 +8191,8 @@ nfs4_state_shutdown_net(struct net *net) + struct list_head *pos, *next, reaplist; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + ++ unregister_shrinker(&nn->nfsd_client_shrinker); ++ cancel_work(&nn->nfsd_shrinker_work); + cancel_delayed_work_sync(&nn->laundromat_work); + locks_end_grace(&nn->nfsd4_manager); + +@@ -8114,6 +8221,7 @@ void + nfs4_state_shutdown(void) + { + nfsd4_destroy_callback_queue(); ++ rhltable_destroy(&nfs4_file_rhltable); + } + + static void +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index 89a579be042e5..597f14a80512f 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) + + static __be32 + nfsd4_decode_access(struct nfsd4_compoundargs *argp, +- struct nfsd4_access *access) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_access *access = &u->access; + if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) + return nfserr_bad_xdr; + return nfs_ok; + } + + static __be32 +-nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) ++nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_close *close = &u->close; + if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_stateid4(argp, &close->cl_stateid); +@@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) + + + static __be32 +-nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) ++nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_commit *commit = &u->commit; + if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) +@@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit + } + + static __be32 +-nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) ++nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_create *create = &u->create; + __be32 *p, status; + + memset(create, 0, sizeof(*create)); +@@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create + } + + static inline __be32 +-nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) ++nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_delegreturn *dr = &u->delegreturn; + return nfsd4_decode_stateid4(argp, &dr->dr_stateid); + } + + static inline __be32 +-nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) ++nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_getattr *getattr = &u->getattr; + memset(getattr, 0, sizeof(*getattr)); + return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, + ARRAY_SIZE(getattr->ga_bmval)); + } + + static __be32 +-nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) ++nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_link *link = &u->link; + memset(link, 0, sizeof(*link)); + return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); + } +@@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) + } + + static __be32 +-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) ++nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_lock *lock = &u->lock; + memset(lock, 0, sizeof(*lock)); + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) + return nfserr_bad_xdr; +@@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) + } + + static __be32 +-nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) ++nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_lockt *lockt = &u->lockt; + memset(lockt, 0, sizeof(*lockt)); + if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) + return nfserr_bad_xdr; +@@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) + } + + static __be32 +-nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) ++nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_locku *locku = &u->locku; + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) +@@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) + } + + static __be32 +-nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) ++nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_lookup *lookup = &u->lookup; + return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); + } + +@@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, + } + + static __be32 +-nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) ++nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_open *open = &u->open; + __be32 status; + u32 dummy; + +@@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) + } + + static __be32 +-nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) ++nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_open_confirm *open_conf = &u->open_confirm; + __be32 status; + + if (argp->minorversion >= 1) +@@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con + } + + static __be32 +-nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) ++nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_open_downgrade *open_down = &u->open_downgrade; + __be32 status; + + memset(open_down, 0, sizeof(*open_down)); +@@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d + } + + static __be32 +-nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) ++nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_putfh *putfh = &u->putfh; + __be32 *p; + + if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) +@@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) + } + + static __be32 +-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) ++nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) + { + if (argp->minorversion == 0) + return nfs_ok; +@@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) + } + + static __be32 +-nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) ++nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_read *read = &u->read; + __be32 status; + + memset(read, 0, sizeof(*read)); +@@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) + } + + static __be32 +-nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) ++nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_readdir *readdir = &u->readdir; + __be32 status; + + memset(readdir, 0, sizeof(*readdir)); +@@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read + } + + static __be32 +-nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) ++nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_remove *remove = &u->remove; + memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); + return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); + } + + static __be32 +-nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) ++nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_rename *rename = &u->rename; + __be32 status; + + memset(rename, 0, sizeof(*rename)); +@@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename + } + + static __be32 +-nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) ++nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ clientid_t *clientid = &u->renew; + return nfsd4_decode_clientid4(argp, clientid); + } + + static __be32 + nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, +- struct nfsd4_secinfo *secinfo) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_secinfo *secinfo = &u->secinfo; + secinfo->si_exp = NULL; + return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); + } + + static __be32 +-nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) ++nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_setattr *setattr = &u->setattr; + __be32 status; + + memset(setattr, 0, sizeof(*setattr)); +@@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta + } + + static __be32 +-nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) ++nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_setclientid *setclientid = &u->setclientid; + __be32 *p, status; + + memset(setclientid, 0, sizeof(*setclientid)); +@@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient + } + + static __be32 +-nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) ++nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; + __be32 status; + + if (argp->minorversion >= 1) +@@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s + + /* Also used for NVERIFY */ + static __be32 +-nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) ++nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_verify *verify = &u->verify; + __be32 *p, status; + + memset(verify, 0, sizeof(*verify)); +@@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify + } + + static __be32 +-nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) ++nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_write *write = &u->write; + __be32 status; + + status = nfsd4_decode_stateid4(argp, &write->wr_stateid); +@@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) + } + + static __be32 +-nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) ++nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; + __be32 status; + + if (argp->minorversion >= 1) +@@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel + return nfs_ok; + } + +-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) ++static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; + memset(bc, 0, sizeof(*bc)); + if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); + } + +-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) ++static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; + u32 use_conn_in_rdma_mode; + __be32 status; + +@@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, +- struct nfsd4_exchange_id *exid) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_exchange_id *exid = &u->exchange_id; + __be32 status; + + memset(exid, 0, sizeof(*exid)); +@@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, +- struct nfsd4_create_session *sess) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_create_session *sess = &u->create_session; + __be32 status; + + memset(sess, 0, sizeof(*sess)); +@@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, +- struct nfsd4_destroy_session *destroy_session) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_destroy_session *destroy_session = &u->destroy_session; + return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); + } + + static __be32 + nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, +- struct nfsd4_free_stateid *free_stateid) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_free_stateid *free_stateid = &u->free_stateid; + return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); + } + + #ifdef CONFIG_NFSD_PNFS + static __be32 + nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, +- struct nfsd4_getdeviceinfo *gdev) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; + __be32 status; + + memset(gdev, 0, sizeof(*gdev)); +@@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, +- struct nfsd4_layoutcommit *lcp) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_layoutcommit *lcp = &u->layoutcommit; + __be32 *p, status; + + memset(lcp, 0, sizeof(*lcp)); +@@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, +- struct nfsd4_layoutget *lgp) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_layoutget *lgp = &u->layoutget; + __be32 status; + + memset(lgp, 0, sizeof(*lgp)); +@@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, +- struct nfsd4_layoutreturn *lrp) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_layoutreturn *lrp = &u->layoutreturn; + memset(lrp, 0, sizeof(*lrp)); + if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) + return nfserr_bad_xdr; +@@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, + #endif /* CONFIG_NFSD_PNFS */ + + static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, +- struct nfsd4_secinfo_no_name *sin) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; + if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) + return nfserr_bad_xdr; + +@@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, +- struct nfsd4_sequence *seq) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_sequence *seq = &u->sequence; + __be32 *p, status; + + status = nfsd4_decode_sessionid4(argp, &seq->sessionid); +@@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + } + + static __be32 +-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) ++nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_test_stateid *test_stateid = &u->test_stateid; + struct nfsd4_test_stateid_id *stateid; + __be32 status; + u32 i; +@@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta + } + + static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, +- struct nfsd4_destroy_clientid *dc) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; + return nfsd4_decode_clientid4(argp, &dc->clientid); + } + + static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, +- struct nfsd4_reclaim_complete *rc) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; + if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) + return nfserr_bad_xdr; + return nfs_ok; +@@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, +- struct nfsd4_fallocate *fallocate) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_fallocate *fallocate = &u->allocate; + __be32 status; + + status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); +@@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, + } + + static __be32 +-nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) ++nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_copy *copy = &u->copy; + u32 consecutive, i, count, sync; + struct nl4_server *ns_dummy; + __be32 status; +@@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) + + static __be32 + nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, +- struct nfsd4_copy_notify *cn) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_copy_notify *cn = &u->copy_notify; + __be32 status; + + memset(cn, 0, sizeof(*cn)); +@@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, +- struct nfsd4_offload_status *os) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_offload_status *os = &u->offload_status; + os->count = 0; + os->status = 0; + return nfsd4_decode_stateid4(argp, &os->stateid); + } + + static __be32 +-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) ++nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_seek *seek = &u->seek; + __be32 status; + + status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); +@@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) + } + + static __be32 +-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) ++nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) + { ++ struct nfsd4_clone *clone = &u->clone; + __be32 status; + + status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); +@@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) + */ + static __be32 + nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, +- struct nfsd4_getxattr *getxattr) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_getxattr *getxattr = &u->getxattr; + __be32 status; + u32 maxcount; + +@@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, +- struct nfsd4_setxattr *setxattr) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_setxattr *setxattr = &u->setxattr; + u32 flags, maxcount, size; + __be32 status; + +@@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, +- struct nfsd4_listxattrs *listxattrs) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_listxattrs *listxattrs = &u->listxattrs; + u32 maxcount; + + memset(listxattrs, 0, sizeof(*listxattrs)); +@@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, + + static __be32 + nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, +- struct nfsd4_removexattr *removexattr) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_removexattr *removexattr = &u->removexattr; + memset(removexattr, 0, sizeof(*removexattr)); + return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); + } + + static __be32 +-nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) ++nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) + { + return nfs_ok; + } + + static __be32 +-nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) ++nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) + { + return nfserr_notsupp; + } + +-typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); ++typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); + + static const nfsd4_dec nfsd4_dec_ops[] = { +- [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, +- [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, +- [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, +- [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, +- [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, +- [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, +- [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, +- [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, +- [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, +- [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, +- [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, +- [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, +- [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, +- [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, +- [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, +- [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, +- [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, +- [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, +- [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, +- [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, +- [OP_READ] = (nfsd4_dec)nfsd4_decode_read, +- [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, +- [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, +- [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, +- [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, +- [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, +- [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, +- [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, +- [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, +- [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, +- [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, +- [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, +- [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, +- [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, +- [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, ++ [OP_ACCESS] = nfsd4_decode_access, ++ [OP_CLOSE] = nfsd4_decode_close, ++ [OP_COMMIT] = nfsd4_decode_commit, ++ [OP_CREATE] = nfsd4_decode_create, ++ [OP_DELEGPURGE] = nfsd4_decode_notsupp, ++ [OP_DELEGRETURN] = nfsd4_decode_delegreturn, ++ [OP_GETATTR] = nfsd4_decode_getattr, ++ [OP_GETFH] = nfsd4_decode_noop, ++ [OP_LINK] = nfsd4_decode_link, ++ [OP_LOCK] = nfsd4_decode_lock, ++ [OP_LOCKT] = nfsd4_decode_lockt, ++ [OP_LOCKU] = nfsd4_decode_locku, ++ [OP_LOOKUP] = nfsd4_decode_lookup, ++ [OP_LOOKUPP] = nfsd4_decode_noop, ++ [OP_NVERIFY] = nfsd4_decode_verify, ++ [OP_OPEN] = nfsd4_decode_open, ++ [OP_OPENATTR] = nfsd4_decode_notsupp, ++ [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, ++ [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, ++ [OP_PUTFH] = nfsd4_decode_putfh, ++ [OP_PUTPUBFH] = nfsd4_decode_putpubfh, ++ [OP_PUTROOTFH] = nfsd4_decode_noop, ++ [OP_READ] = nfsd4_decode_read, ++ [OP_READDIR] = nfsd4_decode_readdir, ++ [OP_READLINK] = nfsd4_decode_noop, ++ [OP_REMOVE] = nfsd4_decode_remove, ++ [OP_RENAME] = nfsd4_decode_rename, ++ [OP_RENEW] = nfsd4_decode_renew, ++ [OP_RESTOREFH] = nfsd4_decode_noop, ++ [OP_SAVEFH] = nfsd4_decode_noop, ++ [OP_SECINFO] = nfsd4_decode_secinfo, ++ [OP_SETATTR] = nfsd4_decode_setattr, ++ [OP_SETCLIENTID] = nfsd4_decode_setclientid, ++ [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, ++ [OP_VERIFY] = nfsd4_decode_verify, ++ [OP_WRITE] = nfsd4_decode_write, ++ [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, + + /* new operations for NFSv4.1 */ +- [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, +- [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, +- [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, +- [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, +- [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, +- [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, +- [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, ++ [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, ++ [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, ++ [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, ++ [OP_CREATE_SESSION] = nfsd4_decode_create_session, ++ [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, ++ [OP_FREE_STATEID] = nfsd4_decode_free_stateid, ++ [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, + #ifdef CONFIG_NFSD_PNFS +- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, +- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, +- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, +- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, ++ [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, ++ [OP_GETDEVICELIST] = nfsd4_decode_notsupp, ++ [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, ++ [OP_LAYOUTGET] = nfsd4_decode_layoutget, ++ [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, + #else +- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, ++ [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, ++ [OP_GETDEVICELIST] = nfsd4_decode_notsupp, ++ [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, ++ [OP_LAYOUTGET] = nfsd4_decode_notsupp, ++ [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, + #endif +- [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, +- [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, +- [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, +- [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, +- [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, ++ [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, ++ [OP_SEQUENCE] = nfsd4_decode_sequence, ++ [OP_SET_SSV] = nfsd4_decode_notsupp, ++ [OP_TEST_STATEID] = nfsd4_decode_test_stateid, ++ [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, ++ [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, ++ [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, + + /* new operations for NFSv4.2 */ +- [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, +- [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, +- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, +- [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, +- [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, +- [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, +- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, +- [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, +- [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, +- [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, ++ [OP_ALLOCATE] = nfsd4_decode_fallocate, ++ [OP_COPY] = nfsd4_decode_copy, ++ [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, ++ [OP_DEALLOCATE] = nfsd4_decode_fallocate, ++ [OP_IO_ADVISE] = nfsd4_decode_notsupp, ++ [OP_LAYOUTERROR] = nfsd4_decode_notsupp, ++ [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, ++ [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, ++ [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, ++ [OP_READ_PLUS] = nfsd4_decode_read, ++ [OP_SEEK] = nfsd4_decode_seek, ++ [OP_WRITE_SAME] = nfsd4_decode_notsupp, ++ [OP_CLONE] = nfsd4_decode_clone, + /* RFC 8276 extended atributes operations */ +- [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, +- [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, +- [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, +- [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, ++ [OP_GETXATTR] = nfsd4_decode_getxattr, ++ [OP_SETXATTR] = nfsd4_decode_setxattr, ++ [OP_LISTXATTRS] = nfsd4_decode_listxattrs, ++ [OP_REMOVEXATTR] = nfsd4_decode_removexattr, + }; + + static inline bool +@@ -3643,8 +3702,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) + } + + static __be32 +-nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) ++nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_access *access = &u->access; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -3656,8 +3717,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ + return 0; + } + +-static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) ++static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -3673,8 +3736,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, + } + + static __be32 +-nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) ++nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_close *close = &u->close; + struct xdr_stream *xdr = resp->xdr; + + return nfsd4_encode_stateid(xdr, &close->cl_stateid); +@@ -3682,8 +3747,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c + + + static __be32 +-nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) ++nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_commit *commit = &u->commit; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -3696,8 +3763,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ + } + + static __be32 +-nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) ++nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_create *create = &u->create; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -3710,8 +3779,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ + } + + static __be32 +-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) ++nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_getattr *getattr = &u->getattr; + struct svc_fh *fhp = getattr->ga_fhp; + struct xdr_stream *xdr = resp->xdr; + +@@ -3720,8 +3791,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 + } + + static __be32 +-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) ++nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct svc_fh **fhpp = &u->getfh; + struct xdr_stream *xdr = resp->xdr; + struct svc_fh *fhp = *fhpp; + unsigned int len; +@@ -3775,8 +3848,10 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) + } + + static __be32 +-nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) ++nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_lock *lock = &u->lock; + struct xdr_stream *xdr = resp->xdr; + + if (!nfserr) +@@ -3788,8 +3863,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo + } + + static __be32 +-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) ++nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_lockt *lockt = &u->lockt; + struct xdr_stream *xdr = resp->xdr; + + if (nfserr == nfserr_denied) +@@ -3798,8 +3875,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l + } + + static __be32 +-nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) ++nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_locku *locku = &u->locku; + struct xdr_stream *xdr = resp->xdr; + + return nfsd4_encode_stateid(xdr, &locku->lu_stateid); +@@ -3807,8 +3886,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l + + + static __be32 +-nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) ++nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_link *link = &u->link; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -3821,8 +3902,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li + + + static __be32 +-nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) ++nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_open *open = &u->open; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -3915,16 +3998,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op + } + + static __be32 +-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) ++nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_open_confirm *oc = &u->open_confirm; + struct xdr_stream *xdr = resp->xdr; + + return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); + } + + static __be32 +-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) ++nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_open_downgrade *od = &u->open_downgrade; + struct xdr_stream *xdr = resp->xdr; + + return nfsd4_encode_stateid(xdr, &od->od_stateid); +@@ -4023,8 +4110,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, + + static __be32 + nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_read *read) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_read *read = &u->read; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); + unsigned long maxcount; + struct xdr_stream *xdr = resp->xdr; +@@ -4065,8 +4153,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + } + + static __be32 +-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) ++nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_readlink *readlink = &u->readlink; + __be32 *p, *maxcount_p, zero = xdr_zero; + struct xdr_stream *xdr = resp->xdr; + int length_offset = xdr->buf->len; +@@ -4110,8 +4200,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd + } + + static __be32 +-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) ++nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_readdir *readdir = &u->readdir; + int maxcount; + int bytes_left; + loff_t offset; +@@ -4201,8 +4293,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 + } + + static __be32 +-nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) ++nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_remove *remove = &u->remove; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4214,8 +4308,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ + } + + static __be32 +-nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) ++nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_rename *rename = &u->rename; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4297,8 +4393,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) + + static __be32 + nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_secinfo *secinfo) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_secinfo *secinfo = &u->secinfo; + struct xdr_stream *xdr = resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); +@@ -4306,8 +4403,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_secinfo_no_name *secinfo) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; + struct xdr_stream *xdr = resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); +@@ -4318,8 +4416,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, + * regardless of the error status. + */ + static __be32 +-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) ++nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_setattr *setattr = &u->setattr; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4342,8 +4442,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 + } + + static __be32 +-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) ++nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_setclientid *scd = &u->setclientid; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4366,8 +4468,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n + } + + static __be32 +-nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) ++nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *u) + { ++ struct nfsd4_write *write = &u->write; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4383,8 +4487,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w + + static __be32 + nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_exchange_id *exid) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_exchange_id *exid = &u->exchange_id; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + char *major_id; +@@ -4461,8 +4566,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_create_session *sess) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_create_session *sess = &u->create_session; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4514,8 +4620,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_sequence *seq) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_sequence *seq = &u->sequence; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4537,8 +4644,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_test_stateid *test_stateid) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_test_stateid *test_stateid = &u->test_stateid; + struct xdr_stream *xdr = resp->xdr; + struct nfsd4_test_stateid_id *stateid, *next; + __be32 *p; +@@ -4558,8 +4666,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, + #ifdef CONFIG_NFSD_PNFS + static __be32 + nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_getdeviceinfo *gdev) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; + struct xdr_stream *xdr = resp->xdr; + const struct nfsd4_layout_ops *ops; + u32 starting_len = xdr->buf->len, needed_len; +@@ -4611,8 +4720,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_layoutget *lgp) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_layoutget *lgp = &u->layoutget; + struct xdr_stream *xdr = resp->xdr; + const struct nfsd4_layout_ops *ops; + __be32 *p; +@@ -4638,8 +4748,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_layoutcommit *lcp) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_layoutcommit *lcp = &u->layoutcommit; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4659,8 +4770,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_layoutreturn *lrp) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_layoutreturn *lrp = &u->layoutreturn; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4745,8 +4857,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) + + static __be32 + nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_copy *copy) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_copy *copy = &u->copy; + __be32 *p; + + nfserr = nfsd42_encode_write_res(resp, ©->cp_res, +@@ -4762,8 +4875,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_offload_status *os) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_offload_status *os = &u->offload_status; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4777,156 +4891,83 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, +- struct nfsd4_read *read, +- unsigned long *maxcount, u32 *eof, +- loff_t *pos) ++ struct nfsd4_read *read) + { +- struct xdr_stream *xdr = resp->xdr; ++ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); + struct file *file = read->rd_nf->nf_file; +- int starting_len = xdr->buf->len; +- loff_t hole_pos; +- __be32 nfserr; +- __be32 *p, tmp; +- __be64 tmp64; +- +- hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); +- if (hole_pos > read->rd_offset) +- *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); +- *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); ++ struct xdr_stream *xdr = resp->xdr; ++ unsigned long maxcount; ++ __be32 nfserr, *p; + + /* Content type, offset, byte count */ + p = xdr_reserve_space(xdr, 4 + 8 + 4); + if (!p) +- return nfserr_resource; ++ return nfserr_io; ++ if (resp->xdr->buf->page_len && splice_ok) { ++ WARN_ON_ONCE(splice_ok); ++ return nfserr_serverfault; ++ } + +- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); +- if (read->rd_vlen < 0) +- return nfserr_resource; ++ maxcount = min_t(unsigned long, read->rd_length, ++ (xdr->buf->buflen - xdr->buf->len)); + +- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, +- resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); ++ if (file->f_op->splice_read && splice_ok) ++ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); ++ else ++ nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + if (nfserr) + return nfserr; +- xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); +- +- tmp = htonl(NFS4_CONTENT_DATA); +- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); +- tmp64 = cpu_to_be64(read->rd_offset); +- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); +- tmp = htonl(*maxcount); +- write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); +- +- tmp = xdr_zero; +- write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, +- xdr_pad_size(*maxcount)); +- return nfs_ok; +-} +- +-static __be32 +-nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, +- struct nfsd4_read *read, +- unsigned long *maxcount, u32 *eof) +-{ +- struct file *file = read->rd_nf->nf_file; +- loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); +- loff_t f_size = i_size_read(file_inode(file)); +- unsigned long count; +- __be32 *p; +- +- if (data_pos == -ENXIO) +- data_pos = f_size; +- else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) +- return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); +- count = data_pos - read->rd_offset; + +- /* Content type, offset, byte count */ +- p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); +- if (!p) +- return nfserr_resource; +- +- *p++ = htonl(NFS4_CONTENT_HOLE); ++ *p++ = cpu_to_be32(NFS4_CONTENT_DATA); + p = xdr_encode_hyper(p, read->rd_offset); +- p = xdr_encode_hyper(p, count); ++ *p = cpu_to_be32(read->rd_length); + +- *eof = (read->rd_offset + count) >= f_size; +- *maxcount = min_t(unsigned long, count, *maxcount); + return nfs_ok; + } + + static __be32 + nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_read *read) ++ union nfsd4_op_u *u) + { +- unsigned long maxcount, count; ++ struct nfsd4_read *read = &u->read; ++ struct file *file = read->rd_nf->nf_file; + struct xdr_stream *xdr = resp->xdr; +- struct file *file; + int starting_len = xdr->buf->len; +- int last_segment = xdr->buf->len; +- int segments = 0; +- __be32 *p, tmp; +- bool is_data; +- loff_t pos; +- u32 eof; ++ u32 segments = 0; ++ __be32 *p; + + if (nfserr) + return nfserr; +- file = read->rd_nf->nf_file; + + /* eof flag, segment count */ + p = xdr_reserve_space(xdr, 4 + 4); + if (!p) +- return nfserr_resource; ++ return nfserr_io; + xdr_commit_encode(xdr); + +- maxcount = min_t(unsigned long, read->rd_length, +- (xdr->buf->buflen - xdr->buf->len)); +- count = maxcount; +- +- eof = read->rd_offset >= i_size_read(file_inode(file)); +- if (eof) ++ read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); ++ if (read->rd_eof) + goto out; + +- pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); +- is_data = pos > read->rd_offset; +- +- while (count > 0 && !eof) { +- maxcount = count; +- if (is_data) +- nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, +- segments == 0 ? &pos : NULL); +- else +- nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); +- if (nfserr) +- goto out; +- count -= maxcount; +- read->rd_offset += maxcount; +- is_data = !is_data; +- last_segment = xdr->buf->len; +- segments++; +- } +- +-out: +- if (nfserr && segments == 0) ++ nfserr = nfsd4_encode_read_plus_data(resp, read); ++ if (nfserr) { + xdr_truncate_encode(xdr, starting_len); +- else { +- if (nfserr) { +- xdr_truncate_encode(xdr, last_segment); +- nfserr = nfs_ok; +- eof = 0; +- } +- tmp = htonl(eof); +- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); +- tmp = htonl(segments); +- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); ++ return nfserr; + } + ++ segments++; ++ ++out: ++ p = xdr_encode_bool(p, read->rd_eof); ++ *p = cpu_to_be32(segments); + return nfserr; + } + + static __be32 + nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_copy_notify *cn) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_copy_notify *cn = &u->copy_notify; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -4960,8 +5001,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_seek *seek) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_seek *seek = &u->seek; + __be32 *p; + + p = xdr_reserve_space(resp->xdr, 4 + 8); +@@ -4972,7 +5014,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, + } + + static __be32 +-nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) ++nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, ++ union nfsd4_op_u *p) + { + return nfserr; + } +@@ -5023,8 +5066,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) + + static __be32 + nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_getxattr *getxattr) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_getxattr *getxattr = &u->getxattr; + struct xdr_stream *xdr = resp->xdr; + __be32 *p, err; + +@@ -5047,8 +5091,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_setxattr *setxattr) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_setxattr *setxattr = &u->setxattr; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -5088,8 +5133,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, + + static __be32 + nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_listxattrs *listxattrs) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_listxattrs *listxattrs = &u->listxattrs; + struct xdr_stream *xdr = resp->xdr; + u32 cookie_offset, count_offset, eof; + u32 left, xdrleft, slen, count; +@@ -5199,8 +5245,9 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, + + static __be32 + nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, +- struct nfsd4_removexattr *removexattr) ++ union nfsd4_op_u *u) + { ++ struct nfsd4_removexattr *removexattr = &u->removexattr; + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + +@@ -5212,7 +5259,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, + return 0; + } + +-typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); ++typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); + + /* + * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 +@@ -5220,93 +5267,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); + * done in the decoding phase. + */ + static const nfsd4_enc nfsd4_enc_ops[] = { +- [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, +- [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, +- [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, +- [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, +- [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, +- [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, +- [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, +- [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, +- [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, +- [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, +- [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, +- [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, +- [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, +- [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_READ] = (nfsd4_enc)nfsd4_encode_read, +- [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, +- [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, +- [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, +- [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, +- [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, +- [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, +- [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, +- [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, +- [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, ++ [OP_ACCESS] = nfsd4_encode_access, ++ [OP_CLOSE] = nfsd4_encode_close, ++ [OP_COMMIT] = nfsd4_encode_commit, ++ [OP_CREATE] = nfsd4_encode_create, ++ [OP_DELEGPURGE] = nfsd4_encode_noop, ++ [OP_DELEGRETURN] = nfsd4_encode_noop, ++ [OP_GETATTR] = nfsd4_encode_getattr, ++ [OP_GETFH] = nfsd4_encode_getfh, ++ [OP_LINK] = nfsd4_encode_link, ++ [OP_LOCK] = nfsd4_encode_lock, ++ [OP_LOCKT] = nfsd4_encode_lockt, ++ [OP_LOCKU] = nfsd4_encode_locku, ++ [OP_LOOKUP] = nfsd4_encode_noop, ++ [OP_LOOKUPP] = nfsd4_encode_noop, ++ [OP_NVERIFY] = nfsd4_encode_noop, ++ [OP_OPEN] = nfsd4_encode_open, ++ [OP_OPENATTR] = nfsd4_encode_noop, ++ [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, ++ [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, ++ [OP_PUTFH] = nfsd4_encode_noop, ++ [OP_PUTPUBFH] = nfsd4_encode_noop, ++ [OP_PUTROOTFH] = nfsd4_encode_noop, ++ [OP_READ] = nfsd4_encode_read, ++ [OP_READDIR] = nfsd4_encode_readdir, ++ [OP_READLINK] = nfsd4_encode_readlink, ++ [OP_REMOVE] = nfsd4_encode_remove, ++ [OP_RENAME] = nfsd4_encode_rename, ++ [OP_RENEW] = nfsd4_encode_noop, ++ [OP_RESTOREFH] = nfsd4_encode_noop, ++ [OP_SAVEFH] = nfsd4_encode_noop, ++ [OP_SECINFO] = nfsd4_encode_secinfo, ++ [OP_SETATTR] = nfsd4_encode_setattr, ++ [OP_SETCLIENTID] = nfsd4_encode_setclientid, ++ [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, ++ [OP_VERIFY] = nfsd4_encode_noop, ++ [OP_WRITE] = nfsd4_encode_write, ++ [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, + + /* NFSv4.1 operations */ +- [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, +- [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, +- [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, +- [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, ++ [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, ++ [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, ++ [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, ++ [OP_CREATE_SESSION] = nfsd4_encode_create_session, ++ [OP_DESTROY_SESSION] = nfsd4_encode_noop, ++ [OP_FREE_STATEID] = nfsd4_encode_noop, ++ [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, + #ifdef CONFIG_NFSD_PNFS +- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, +- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, +- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, +- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, ++ [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, ++ [OP_GETDEVICELIST] = nfsd4_encode_noop, ++ [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, ++ [OP_LAYOUTGET] = nfsd4_encode_layoutget, ++ [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, + #else +- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, ++ [OP_GETDEVICEINFO] = nfsd4_encode_noop, ++ [OP_GETDEVICELIST] = nfsd4_encode_noop, ++ [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, ++ [OP_LAYOUTGET] = nfsd4_encode_noop, ++ [OP_LAYOUTRETURN] = nfsd4_encode_noop, + #endif +- [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, +- [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, +- [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, +- [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, ++ [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, ++ [OP_SEQUENCE] = nfsd4_encode_sequence, ++ [OP_SET_SSV] = nfsd4_encode_noop, ++ [OP_TEST_STATEID] = nfsd4_encode_test_stateid, ++ [OP_WANT_DELEGATION] = nfsd4_encode_noop, ++ [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, ++ [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, + + /* NFSv4.2 operations */ +- [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, +- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, +- [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, +- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, +- [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, +- [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, +- [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, ++ [OP_ALLOCATE] = nfsd4_encode_noop, ++ [OP_COPY] = nfsd4_encode_copy, ++ [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, ++ [OP_DEALLOCATE] = nfsd4_encode_noop, ++ [OP_IO_ADVISE] = nfsd4_encode_noop, ++ [OP_LAYOUTERROR] = nfsd4_encode_noop, ++ [OP_LAYOUTSTATS] = nfsd4_encode_noop, ++ [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, ++ [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, ++ [OP_READ_PLUS] = nfsd4_encode_read_plus, ++ [OP_SEEK] = nfsd4_encode_seek, ++ [OP_WRITE_SAME] = nfsd4_encode_noop, ++ [OP_CLONE] = nfsd4_encode_noop, + + /* RFC 8276 extended atributes operations */ +- [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, +- [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, +- [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, +- [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, ++ [OP_GETXATTR] = nfsd4_encode_getxattr, ++ [OP_SETXATTR] = nfsd4_encode_setxattr, ++ [OP_LISTXATTRS] = nfsd4_encode_listxattrs, ++ [OP_REMOVEXATTR] = nfsd4_encode_removexattr, + }; + + /* +diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c +index 573de0d49e172..76a60e7a75097 100644 +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) + + cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; + switch(num) { ++#ifdef CONFIG_NFSD_V2 + case 2: ++#endif + case 3: + nfsd_vers(nn, num, cmd); + break; +@@ -601,7 +603,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) + } + break; + default: +- return -EINVAL; ++ /* Ignore requests to disable non-existent versions */ ++ if (cmd == NFSD_SET) ++ return -EINVAL; + } + vers += len + 1; + } while ((len = qword_get(&mesg, vers, size)) > 0); +@@ -1448,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net) + goto out_idmap_error; + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; +- retval = nfsd4_init_leases_net(nn); +- if (retval) +- goto out_drc_error; ++ nfsd4_init_leases_net(nn); + retval = nfsd_reply_cache_init(nn); + if (retval) + goto out_cache_error; +@@ -1460,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net) + return 0; + + out_cache_error: +- nfsd4_leases_net_shutdown(nn); +-out_drc_error: + nfsd_idmap_shutdown(net); + out_idmap_error: + nfsd_export_shutdown(net); +@@ -1477,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net) + nfsd_idmap_shutdown(net); + nfsd_export_shutdown(net); + nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); +- nfsd4_leases_net_shutdown(nn); + } + + static struct pernet_operations nfsd_net_ops = { +diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h +index 09726c5b9a317..fa0144a742678 100644 +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -64,8 +64,7 @@ struct readdir_cd { + + + extern struct svc_program nfsd_program; +-extern const struct svc_version nfsd_version2, nfsd_version3, +- nfsd_version4; ++extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; + extern struct mutex nfsd_mutex; + extern spinlock_t nfsd_drc_lock; + extern unsigned long nfsd_drc_max_mem; +@@ -505,8 +504,7 @@ extern void unregister_cld_notifier(void); + extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); + #endif + +-extern int nfsd4_init_leases_net(struct nfsd_net *nn); +-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); ++extern void nfsd4_init_leases_net(struct nfsd_net *nn); + + #else /* CONFIG_NFSD_V4 */ + static inline int nfsd4_is_junction(struct dentry *dentry) +@@ -514,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) + return 0; + } + +-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; +-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; ++static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; + + #define register_cld_notifier() 0 + #define unregister_cld_notifier() do { } while(0) +diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h +index c3ae6414fc5cf..513e028b0bbee 100644 +--- a/fs/nfsd/nfsfh.h ++++ b/fs/nfsd/nfsfh.h +@@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *); + void fh_put(struct svc_fh *); + + static __inline__ struct svc_fh * +-fh_copy(struct svc_fh *dst, struct svc_fh *src) ++fh_copy(struct svc_fh *dst, const struct svc_fh *src) + { + WARN_ON(src->fh_dentry); + +@@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src) + } + + static inline void +-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) ++fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) + { + dst->fh_size = src->fh_size; + memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); +@@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize) + return fhp; + } + +-static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) ++static inline bool fh_match(const struct knfsd_fh *fh1, ++ const struct knfsd_fh *fh2) + { + if (fh1->fh_size != fh2->fh_size) + return false; +@@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) + return true; + } + +-static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) ++static inline bool fh_fsid_match(const struct knfsd_fh *fh1, ++ const struct knfsd_fh *fh2) + { + if (fh1->fh_fsid_type != fh2->fh_fsid_type) + return false; +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index 82b3ddeacc338..9744443c39652 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) + if (resp->status == nfs_ok) + resp->status = fh_getattr(&resp->fh, &resp->stat); + else if (resp->status == nfserr_jukebox) +- return rpc_drop_reply; ++ set_bit(RQ_DROPME, &rqstp->rq_flags); + return rpc_success; + } + +@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) + if (resp->status == nfs_ok) + resp->status = fh_getattr(&resp->fh, &resp->stat); + else if (resp->status == nfserr_jukebox) +- return rpc_drop_reply; ++ set_bit(RQ_DROPME, &rqstp->rq_flags); + return rpc_success; + } + +@@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = { + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS2_SVC_XDRSIZE, + }; +- +-/* +- * Map errnos to NFS errnos. +- */ +-__be32 +-nfserrno (int errno) +-{ +- static struct { +- __be32 nfserr; +- int syserr; +- } nfs_errtbl[] = { +- { nfs_ok, 0 }, +- { nfserr_perm, -EPERM }, +- { nfserr_noent, -ENOENT }, +- { nfserr_io, -EIO }, +- { nfserr_nxio, -ENXIO }, +- { nfserr_fbig, -E2BIG }, +- { nfserr_stale, -EBADF }, +- { nfserr_acces, -EACCES }, +- { nfserr_exist, -EEXIST }, +- { nfserr_xdev, -EXDEV }, +- { nfserr_mlink, -EMLINK }, +- { nfserr_nodev, -ENODEV }, +- { nfserr_notdir, -ENOTDIR }, +- { nfserr_isdir, -EISDIR }, +- { nfserr_inval, -EINVAL }, +- { nfserr_fbig, -EFBIG }, +- { nfserr_nospc, -ENOSPC }, +- { nfserr_rofs, -EROFS }, +- { nfserr_mlink, -EMLINK }, +- { nfserr_nametoolong, -ENAMETOOLONG }, +- { nfserr_notempty, -ENOTEMPTY }, +-#ifdef EDQUOT +- { nfserr_dquot, -EDQUOT }, +-#endif +- { nfserr_stale, -ESTALE }, +- { nfserr_jukebox, -ETIMEDOUT }, +- { nfserr_jukebox, -ERESTARTSYS }, +- { nfserr_jukebox, -EAGAIN }, +- { nfserr_jukebox, -EWOULDBLOCK }, +- { nfserr_jukebox, -ENOMEM }, +- { nfserr_io, -ETXTBSY }, +- { nfserr_notsupp, -EOPNOTSUPP }, +- { nfserr_toosmall, -ETOOSMALL }, +- { nfserr_serverfault, -ESERVERFAULT }, +- { nfserr_serverfault, -ENFILE }, +- { nfserr_io, -EREMOTEIO }, +- { nfserr_stale, -EOPENSTALE }, +- { nfserr_io, -EUCLEAN }, +- { nfserr_perm, -ENOKEY }, +- { nfserr_no_grace, -ENOGRACE}, +- }; +- int i; +- +- for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { +- if (nfs_errtbl[i].syserr == errno) +- return nfs_errtbl[i].nfserr; +- } +- WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); +- return nfserr_io; +-} +- +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index c7695ebd28dc3..0c75636054a54 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) + static struct svc_stat nfsd_acl_svcstats; + static const struct svc_version *nfsd_acl_version[] = { ++# if defined(CONFIG_NFSD_V2_ACL) + [2] = &nfsd_acl_version2, ++# endif ++# if defined(CONFIG_NFSD_V3_ACL) + [3] = &nfsd_acl_version3, ++# endif + }; + + #define NFSD_ACL_MINVERS 2 +@@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = { + #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + + static const struct svc_version *nfsd_version[] = { ++#if defined(CONFIG_NFSD_V2) + [2] = &nfsd_version2, ++#endif + [3] = &nfsd_version3, + #if defined(CONFIG_NFSD_V4) + [4] = &nfsd_version4, +@@ -1065,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + + nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); + *statp = proc->pc_func(rqstp); +- if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) ++ if (test_bit(RQ_DROPME, &rqstp->rq_flags)) + goto out_update_drop; + + if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) +diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h +index e2daef3cc0034..e94634d305912 100644 +--- a/fs/nfsd/state.h ++++ b/fs/nfsd/state.h +@@ -368,6 +368,7 @@ struct nfs4_client { + #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ + #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ + 1 << NFSD4_CLIENT_CB_KILL) ++#define NFSD4_CLIENT_CB_RECALL_ANY (6) + unsigned long cl_flags; + const struct cred *cl_cb_cred; + struct rpc_clnt *cl_cb_client; +@@ -411,6 +412,10 @@ struct nfs4_client { + + unsigned int cl_state; + atomic_t cl_delegs_in_recall; ++ ++ struct nfsd4_cb_recall_any *cl_ra; ++ time64_t cl_ra_time; ++ struct list_head cl_ra_cblist; + }; + + /* struct nfs4_client_reset +@@ -536,16 +541,13 @@ struct nfs4_clnt_odstate { + * inode can have multiple filehandles associated with it, so there is + * (potentially) a many to one relationship between this struct and struct + * inode. +- * +- * These are hashed by filehandle in the file_hashtbl, which is protected by +- * the global state_lock spinlock. + */ + struct nfs4_file { + refcount_t fi_ref; + struct inode * fi_inode; + bool fi_aliased; + spinlock_t fi_lock; +- struct hlist_node fi_hash; /* hash on fi_fhandle */ ++ struct rhlist_head fi_rlist; + struct list_head fi_stateids; + union { + struct list_head fi_delegations; +@@ -639,6 +641,7 @@ enum nfsd4_cb_op { + NFSPROC4_CLNT_CB_OFFLOAD, + NFSPROC4_CLNT_CB_SEQUENCE, + NFSPROC4_CLNT_CB_NOTIFY_LOCK, ++ NFSPROC4_CLNT_CB_RECALL_ANY, + }; + + /* Returns true iff a is later than b: */ +diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h +index 132335011ccae..4183819ea0829 100644 +--- a/fs/nfsd/trace.h ++++ b/fs/nfsd/trace.h +@@ -9,9 +9,12 @@ + #define _NFSD_TRACE_H + + #include <linux/tracepoint.h> ++#include <linux/sunrpc/xprt.h> ++#include <trace/misc/nfs.h> + + #include "export.h" + #include "nfsfh.h" ++#include "xdr4.h" + + #define NFSD_TRACE_PROC_RES_FIELDS \ + __field(unsigned int, netns_ino) \ +@@ -604,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); + + DEFINE_STATEID_EVENT(open); + DEFINE_STATEID_EVENT(deleg_read); ++DEFINE_STATEID_EVENT(deleg_return); + DEFINE_STATEID_EVENT(deleg_recall); + + DECLARE_EVENT_CLASS(nfsd_stateseqid_class, +@@ -636,6 +640,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ + DEFINE_STATESEQID_EVENT(preprocess); + DEFINE_STATESEQID_EVENT(open_confirm); + ++TRACE_DEFINE_ENUM(NFS4_OPEN_STID); ++TRACE_DEFINE_ENUM(NFS4_LOCK_STID); ++TRACE_DEFINE_ENUM(NFS4_DELEG_STID); ++TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); ++TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); ++TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); ++TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); ++ ++#define show_stid_type(x) \ ++ __print_flags(x, "|", \ ++ { NFS4_OPEN_STID, "OPEN" }, \ ++ { NFS4_LOCK_STID, "LOCK" }, \ ++ { NFS4_DELEG_STID, "DELEG" }, \ ++ { NFS4_CLOSED_STID, "CLOSED" }, \ ++ { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ ++ { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ ++ { NFS4_LAYOUT_STID, "LAYOUT" }) ++ ++DECLARE_EVENT_CLASS(nfsd_stid_class, ++ TP_PROTO( ++ const struct nfs4_stid *stid ++ ), ++ TP_ARGS(stid), ++ TP_STRUCT__entry( ++ __field(unsigned long, sc_type) ++ __field(int, sc_count) ++ __field(u32, cl_boot) ++ __field(u32, cl_id) ++ __field(u32, si_id) ++ __field(u32, si_generation) ++ ), ++ TP_fast_assign( ++ const stateid_t *stp = &stid->sc_stateid; ++ ++ __entry->sc_type = stid->sc_type; ++ __entry->sc_count = refcount_read(&stid->sc_count); ++ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; ++ __entry->cl_id = stp->si_opaque.so_clid.cl_id; ++ __entry->si_id = stp->si_opaque.so_id; ++ __entry->si_generation = stp->si_generation; ++ ), ++ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", ++ __entry->cl_boot, __entry->cl_id, ++ __entry->si_id, __entry->si_generation, ++ __entry->sc_count, show_stid_type(__entry->sc_type) ++ ) ++); ++ ++#define DEFINE_STID_EVENT(name) \ ++DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ ++ TP_PROTO(const struct nfs4_stid *stid), \ ++ TP_ARGS(stid)) ++ ++DEFINE_STID_EVENT(revoke); ++ + DECLARE_EVENT_CLASS(nfsd_clientid_class, + TP_PROTO(const clientid_t *clid), + TP_ARGS(clid), +@@ -1436,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload, + __entry->fh_hash, __entry->count, __entry->status) + ); + ++TRACE_EVENT(nfsd_cb_recall_any, ++ TP_PROTO( ++ const struct nfsd4_cb_recall_any *ra ++ ), ++ TP_ARGS(ra), ++ TP_STRUCT__entry( ++ __field(u32, cl_boot) ++ __field(u32, cl_id) ++ __field(u32, keep) ++ __field(unsigned long, bmval0) ++ __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen) ++ ), ++ TP_fast_assign( ++ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot; ++ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id; ++ __entry->keep = ra->ra_keep; ++ __entry->bmval0 = ra->ra_bmval[0]; ++ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr, ++ ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen); ++ ), ++ TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s", ++ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, ++ __entry->keep, show_rca_mask(__entry->bmval0) ++ ) ++); ++ + DECLARE_EVENT_CLASS(nfsd_cb_done_class, + TP_PROTO( + const stateid_t *stp, +@@ -1475,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); + DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); + DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); + ++TRACE_EVENT(nfsd_cb_recall_any_done, ++ TP_PROTO( ++ const struct nfsd4_callback *cb, ++ const struct rpc_task *task ++ ), ++ TP_ARGS(cb, task), ++ TP_STRUCT__entry( ++ __field(u32, cl_boot) ++ __field(u32, cl_id) ++ __field(int, status) ++ ), ++ TP_fast_assign( ++ __entry->status = task->tk_status; ++ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot; ++ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id; ++ ), ++ TP_printk("client %08x:%08x status=%d", ++ __entry->cl_boot, __entry->cl_id, __entry->status ++ ) ++); ++ + #endif /* _NFSD_TRACE_H */ + + #undef TRACE_INCLUDE_PATH +diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +index eccc6ce55a63a..5d6a61d47a905 100644 +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -49,6 +49,69 @@ + + #define NFSDDBG_FACILITY NFSDDBG_FILEOP + ++/** ++ * nfserrno - Map Linux errnos to NFS errnos ++ * @errno: POSIX(-ish) error code to be mapped ++ * ++ * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If ++ * it's an error we don't expect, log it once and return nfserr_io. ++ */ ++__be32 ++nfserrno (int errno) ++{ ++ static struct { ++ __be32 nfserr; ++ int syserr; ++ } nfs_errtbl[] = { ++ { nfs_ok, 0 }, ++ { nfserr_perm, -EPERM }, ++ { nfserr_noent, -ENOENT }, ++ { nfserr_io, -EIO }, ++ { nfserr_nxio, -ENXIO }, ++ { nfserr_fbig, -E2BIG }, ++ { nfserr_stale, -EBADF }, ++ { nfserr_acces, -EACCES }, ++ { nfserr_exist, -EEXIST }, ++ { nfserr_xdev, -EXDEV }, ++ { nfserr_mlink, -EMLINK }, ++ { nfserr_nodev, -ENODEV }, ++ { nfserr_notdir, -ENOTDIR }, ++ { nfserr_isdir, -EISDIR }, ++ { nfserr_inval, -EINVAL }, ++ { nfserr_fbig, -EFBIG }, ++ { nfserr_nospc, -ENOSPC }, ++ { nfserr_rofs, -EROFS }, ++ { nfserr_mlink, -EMLINK }, ++ { nfserr_nametoolong, -ENAMETOOLONG }, ++ { nfserr_notempty, -ENOTEMPTY }, ++ { nfserr_dquot, -EDQUOT }, ++ { nfserr_stale, -ESTALE }, ++ { nfserr_jukebox, -ETIMEDOUT }, ++ { nfserr_jukebox, -ERESTARTSYS }, ++ { nfserr_jukebox, -EAGAIN }, ++ { nfserr_jukebox, -EWOULDBLOCK }, ++ { nfserr_jukebox, -ENOMEM }, ++ { nfserr_io, -ETXTBSY }, ++ { nfserr_notsupp, -EOPNOTSUPP }, ++ { nfserr_toosmall, -ETOOSMALL }, ++ { nfserr_serverfault, -ESERVERFAULT }, ++ { nfserr_serverfault, -ENFILE }, ++ { nfserr_io, -EREMOTEIO }, ++ { nfserr_stale, -EOPENSTALE }, ++ { nfserr_io, -EUCLEAN }, ++ { nfserr_perm, -ENOKEY }, ++ { nfserr_no_grace, -ENOGRACE}, ++ }; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { ++ if (nfs_errtbl[i].syserr == errno) ++ return nfs_errtbl[i].nfserr; ++ } ++ WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); ++ return nfserr_io; ++} ++ + /* + * Called from nfsd_lookup and encode_dirent. Check if we have crossed + * a mount point. +@@ -1317,7 +1380,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, + iap->ia_mode &= ~current_umask(); + + err = 0; +- host_err = 0; + switch (type) { + case S_IFREG: + host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); +diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h +index 9744b041105b5..dbdfef7ae85bb 100644 +--- a/fs/nfsd/vfs.h ++++ b/fs/nfsd/vfs.h +@@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) + posix_acl_release(attrs->na_dpacl); + } + ++__be32 nfserrno (int errno); + int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, + struct svc_export **expp); + __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, +diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h +index 36c3340c1d54a..510978e602da6 100644 +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -896,5 +896,10 @@ struct nfsd4_operation { + union nfsd4_op_u *); + }; + ++struct nfsd4_cb_recall_any { ++ struct nfsd4_callback ra_cb; ++ u32 ra_keep; ++ u32 ra_bmval[1]; ++}; + + #endif +diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h +index 547cf07cf4e08..0d39af1b00a0f 100644 +--- a/fs/nfsd/xdr4cb.h ++++ b/fs/nfsd/xdr4cb.h +@@ -48,3 +48,9 @@ + #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) ++#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ ++ cb_sequence_enc_sz + \ ++ 1 + 1 + 1) ++#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ ++ cb_sequence_dec_sz + \ ++ op_dec_sz) +diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c +index bb7e33c240737..d260260900241 100644 +--- a/fs/ntfs3/frecord.c ++++ b/fs/ntfs3/frecord.c +@@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni) + { + struct rb_node *node; + +- if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec)) ++ if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec)) + ni_delete_all(ni); + + al_destroy(ni); +@@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) + return 0; + } + ++ if (!ni->mi.mrec) ++ goto out; ++ + if (is_rec_inuse(ni->mi.mrec) && + !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { + bool modified = false; +diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c +index 1eac80d55b554..4c2d079b3d49b 100644 +--- a/fs/ntfs3/fsntfs.c ++++ b/fs/ntfs3/fsntfs.c +@@ -1674,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) + + out: + if (err) { ++ make_bad_inode(inode); + iput(inode); + ni = ERR_PTR(err); + } +diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c +index 7371f7855e4c4..eee01db6e0cc5 100644 +--- a/fs/ntfs3/index.c ++++ b/fs/ntfs3/index.c +@@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le = NULL; + struct ATTRIB *a; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; ++ struct INDEX_ROOT *root = NULL; + + a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, + mi); +@@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, + if (attr) + *attr = a; + +- return resident_data_ex(a, sizeof(struct INDEX_ROOT)); ++ root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); ++ ++ /* length check */ ++ if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > ++ le32_to_cpu(a->res.data_size)) { ++ return NULL; ++ } ++ ++ return root; + } + + static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, +diff --git a/include/linux/bvec.h b/include/linux/bvec.h +index 9e3dac51eb26b..d4dbaae8b5218 100644 +--- a/include/linux/bvec.h ++++ b/include/linux/bvec.h +@@ -59,7 +59,7 @@ struct bvec_iter { + + unsigned int bi_bvec_done; /* number of bytes completed in + current bvec */ +-} __packed; ++} __packed __aligned(4); + + struct bvec_iter_all { + struct bio_vec bv; +diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h +index 9192986b1a731..ac862422df158 100644 +--- a/include/linux/decompress/mm.h ++++ b/include/linux/decompress/mm.h +@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) + if (!malloc_ptr) + malloc_ptr = free_mem_ptr; + +- malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ ++ malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ + + p = (void *)malloc_ptr; + malloc_ptr += size; +diff --git a/include/linux/efi.h b/include/linux/efi.h +index 4e1bfee9675d2..de6d6558a4d30 100644 +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -390,6 +390,7 @@ void efi_native_runtime_setup(void); + #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) + #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) + #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) ++#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) + + #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) + #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 67313881f8ac1..092d8fa10153f 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1189,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f, + struct file *filp, struct files_struct *files); + extern bool locks_owner_has_blockers(struct file_lock_context *flctx, + fl_owner_t owner); ++ ++static inline struct file_lock_context * ++locks_inode_context(const struct inode *inode) ++{ ++ return smp_load_acquire(&inode->i_flctx); ++} ++ + #else /* !CONFIG_FILE_LOCKING */ + static inline int fcntl_getlk(struct file *file, unsigned int cmd, + struct flock __user *user) +@@ -1334,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, + { + return false; + } ++ ++static inline struct file_lock_context * ++locks_inode_context(const struct inode *inode) ++{ ++ return NULL; ++} ++ + #endif /* !CONFIG_FILE_LOCKING */ + + static inline struct inode *file_inode(const struct file *f) +diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h +index bef8db9d6c085..e5f4b6f8d1c09 100644 +--- a/include/linux/netfilter.h ++++ b/include/linux/netfilter.h +@@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) + #include <linux/netfilter/nf_conntrack_zones_common.h> + + void nf_ct_attach(struct sk_buff *, const struct sk_buff *); ++void nf_ct_set_closing(struct nf_conntrack *nfct); + struct nf_conntrack_tuple; + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb); + #else + static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} ++static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} + struct nf_conntrack_tuple; + static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) +@@ -459,6 +461,8 @@ struct nf_ct_hook { + bool (*get_tuple_skb)(struct nf_conntrack_tuple *, + const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); ++ void (*set_closing)(struct nf_conntrack *nfct); ++ int (*confirm)(struct sk_buff *skb); + }; + extern const struct nf_ct_hook __rcu *nf_ct_hook; + +diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h +index 8d04b6a5964c4..730003c4f4af4 100644 +--- a/include/linux/nfs4.h ++++ b/include/linux/nfs4.h +@@ -732,4 +732,17 @@ enum nfs4_setxattr_options { + SETXATTR4_CREATE = 1, + SETXATTR4_REPLACE = 2, + }; ++ ++enum { ++ RCA4_TYPE_MASK_RDATA_DLG = 0, ++ RCA4_TYPE_MASK_WDATA_DLG = 1, ++ RCA4_TYPE_MASK_DIR_DLG = 2, ++ RCA4_TYPE_MASK_FILE_LAYOUT = 3, ++ RCA4_TYPE_MASK_BLK_LAYOUT = 4, ++ RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, ++ RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, ++ RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, ++ RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, ++}; ++ + #endif +diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h +index 43ac3fa760dbe..9783b9107d76b 100644 +--- a/include/linux/usb/composite.h ++++ b/include/linux/usb/composite.h +@@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite, + extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, + struct usb_ep *ep0); + void composite_dev_cleanup(struct usb_composite_dev *cdev); ++void check_remote_wakeup_config(struct usb_gadget *g, ++ struct usb_configuration *c); + + static inline struct usb_composite_driver *to_cdriver( + struct usb_gadget_driver *gdrv) +diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h +index dc3092cea99e9..5bec668b41dcd 100644 +--- a/include/linux/usb/gadget.h ++++ b/include/linux/usb/gadget.h +@@ -309,6 +309,7 @@ struct usb_udc; + struct usb_gadget_ops { + int (*get_frame)(struct usb_gadget *); + int (*wakeup)(struct usb_gadget *); ++ int (*set_remote_wakeup)(struct usb_gadget *, int set); + int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered); + int (*vbus_session) (struct usb_gadget *, int is_active); + int (*vbus_draw) (struct usb_gadget *, unsigned mA); +@@ -383,6 +384,8 @@ struct usb_gadget_ops { + * @connected: True if gadget is connected. + * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag + * indicates that it supports LPM as per the LPM ECN & errata. ++ * @wakeup_capable: True if gadget is capable of sending remote wakeup. ++ * @wakeup_armed: True if gadget is armed by the host for remote wakeup. + * @irq: the interrupt number for device controller. + * @id_number: a unique ID number for ensuring that gadget names are distinct + * +@@ -444,6 +447,8 @@ struct usb_gadget { + unsigned deactivated:1; + unsigned connected:1; + unsigned lpm_capable:1; ++ unsigned wakeup_capable:1; ++ unsigned wakeup_armed:1; + int irq; + int id_number; + }; +@@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g) + #if IS_ENABLED(CONFIG_USB_GADGET) + int usb_gadget_frame_number(struct usb_gadget *gadget); + int usb_gadget_wakeup(struct usb_gadget *gadget); ++int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set); + int usb_gadget_set_selfpowered(struct usb_gadget *gadget); + int usb_gadget_clear_selfpowered(struct usb_gadget *gadget); + int usb_gadget_vbus_connect(struct usb_gadget *gadget); +@@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget) + { return 0; } + static inline int usb_gadget_wakeup(struct usb_gadget *gadget) + { return 0; } ++static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) ++{ return 0; } + static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget) + { return 0; } + static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget) +diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h +index c48186bf47372..21da31e1dff5d 100644 +--- a/include/net/ipv6_stubs.h ++++ b/include/net/ipv6_stubs.h +@@ -85,6 +85,11 @@ struct ipv6_bpf_stub { + sockptr_t optval, unsigned int optlen); + int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); ++ int (*ipv6_dev_get_saddr)(struct net *net, ++ const struct net_device *dst_dev, ++ const struct in6_addr *daddr, ++ unsigned int prefs, ++ struct in6_addr *saddr); + }; + extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + +diff --git a/include/net/mctp.h b/include/net/mctp.h +index 82800d521c3de..7ed84054f4623 100644 +--- a/include/net/mctp.h ++++ b/include/net/mctp.h +@@ -249,6 +249,7 @@ struct mctp_route { + struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr); + ++/* always takes ownership of skb */ + int mctp_local_output(struct sock *sk, struct mctp_route *rt, + struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); + +diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h +index 6a2019aaa4644..3dbf947285be2 100644 +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -125,6 +125,12 @@ struct nf_conn { + union nf_conntrack_proto proto; + }; + ++static inline struct nf_conn * ++nf_ct_to_nf_conn(const struct nf_conntrack *nfct) ++{ ++ return container_of(nfct, struct nf_conn, ct_general); ++} ++ + static inline struct nf_conn * + nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) + { +@@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) + + void nf_ct_destroy(struct nf_conntrack *nfct); + ++void nf_conntrack_tcp_set_closing(struct nf_conn *ct); ++ + /* decrement reference count on a conntrack */ + static inline void nf_ct_put(struct nf_conn *ct) + { +diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h +index d2751ed536df2..a64713fe52640 100644 +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -204,6 +204,7 @@ struct scsi_device { + unsigned use_10_for_rw:1; /* first try 10-byte read / write */ + unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ + unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ ++ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ + unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ + unsigned no_write_same:1; /* no WRITE SAME command */ + unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ +@@ -479,28 +480,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state); + extern int scsi_is_sdev_device(const struct device *); + extern int scsi_is_target_device(const struct device *); + extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); +-extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, +- int data_direction, void *buffer, unsigned bufflen, +- unsigned char *sense, struct scsi_sense_hdr *sshdr, +- int timeout, int retries, blk_opf_t flags, +- req_flags_t rq_flags, int *resid); ++ ++/* Optional arguments to scsi_execute_cmd */ ++struct scsi_exec_args { ++ unsigned char *sense; /* sense buffer */ ++ unsigned int sense_len; /* sense buffer len */ ++ struct scsi_sense_hdr *sshdr; /* decoded sense header */ ++ blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ ++ int *resid; /* residual length */ ++}; ++ ++int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, ++ blk_opf_t opf, void *buffer, unsigned int bufflen, ++ int timeout, int retries, ++ const struct scsi_exec_args *args); ++ + /* Make sure any sense buffer is the correct size. */ +-#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ +- sshdr, timeout, retries, flags, rq_flags, resid) \ ++#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ ++ _sshdr, _timeout, _retries, _flags, _rq_flags, \ ++ _resid) \ + ({ \ +- BUILD_BUG_ON((sense) != NULL && \ +- sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ +- __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ +- sense, sshdr, timeout, retries, flags, rq_flags, \ +- resid); \ ++ scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ ++ REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ ++ _buffer, _bufflen, _timeout, _retries, \ ++ &(struct scsi_exec_args) { \ ++ .sense = _sense, \ ++ .sshdr = _sshdr, \ ++ .req_flags = _rq_flags & RQF_PM ? \ ++ BLK_MQ_REQ_PM : 0, \ ++ .resid = _resid, \ ++ }); \ + }) ++ + static inline int scsi_execute_req(struct scsi_device *sdev, + const unsigned char *cmd, int data_direction, void *buffer, + unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, + int retries, int *resid) + { +- return scsi_execute(sdev, cmd, data_direction, buffer, +- bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); ++ return scsi_execute_cmd(sdev, cmd, ++ data_direction == DMA_TO_DEVICE ? ++ REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, ++ bufflen, timeout, retries, ++ &(struct scsi_exec_args) { ++ .sshdr = sshdr, ++ .resid = resid, ++ }); + } + extern void sdev_disable_disk_events(struct scsi_device *sdev); + extern void sdev_enable_disk_events(struct scsi_device *sdev); +diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h +deleted file mode 100644 +index 738b97f22f365..0000000000000 +--- a/include/trace/events/fs.h ++++ /dev/null +@@ -1,122 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Display helpers for generic filesystem items +- * +- * Author: Chuck Lever <chuck.lever@oracle.com> +- * +- * Copyright (c) 2020, Oracle and/or its affiliates. +- */ +- +-#include <linux/fs.h> +- +-#define show_fs_dirent_type(x) \ +- __print_symbolic(x, \ +- { DT_UNKNOWN, "UNKNOWN" }, \ +- { DT_FIFO, "FIFO" }, \ +- { DT_CHR, "CHR" }, \ +- { DT_DIR, "DIR" }, \ +- { DT_BLK, "BLK" }, \ +- { DT_REG, "REG" }, \ +- { DT_LNK, "LNK" }, \ +- { DT_SOCK, "SOCK" }, \ +- { DT_WHT, "WHT" }) +- +-#define show_fs_fcntl_open_flags(x) \ +- __print_flags(x, "|", \ +- { O_WRONLY, "O_WRONLY" }, \ +- { O_RDWR, "O_RDWR" }, \ +- { O_CREAT, "O_CREAT" }, \ +- { O_EXCL, "O_EXCL" }, \ +- { O_NOCTTY, "O_NOCTTY" }, \ +- { O_TRUNC, "O_TRUNC" }, \ +- { O_APPEND, "O_APPEND" }, \ +- { O_NONBLOCK, "O_NONBLOCK" }, \ +- { O_DSYNC, "O_DSYNC" }, \ +- { O_DIRECT, "O_DIRECT" }, \ +- { O_LARGEFILE, "O_LARGEFILE" }, \ +- { O_DIRECTORY, "O_DIRECTORY" }, \ +- { O_NOFOLLOW, "O_NOFOLLOW" }, \ +- { O_NOATIME, "O_NOATIME" }, \ +- { O_CLOEXEC, "O_CLOEXEC" }) +- +-#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x } +-#define show_fs_fmode_flags(x) \ +- __print_flags(x, "|", \ +- __fmode_flag(READ), \ +- __fmode_flag(WRITE), \ +- __fmode_flag(EXEC)) +- +-#ifdef CONFIG_64BIT +-#define show_fs_fcntl_cmd(x) \ +- __print_symbolic(x, \ +- { F_DUPFD, "DUPFD" }, \ +- { F_GETFD, "GETFD" }, \ +- { F_SETFD, "SETFD" }, \ +- { F_GETFL, "GETFL" }, \ +- { F_SETFL, "SETFL" }, \ +- { F_GETLK, "GETLK" }, \ +- { F_SETLK, "SETLK" }, \ +- { F_SETLKW, "SETLKW" }, \ +- { F_SETOWN, "SETOWN" }, \ +- { F_GETOWN, "GETOWN" }, \ +- { F_SETSIG, "SETSIG" }, \ +- { F_GETSIG, "GETSIG" }, \ +- { F_SETOWN_EX, "SETOWN_EX" }, \ +- { F_GETOWN_EX, "GETOWN_EX" }, \ +- { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ +- { F_OFD_GETLK, "OFD_GETLK" }, \ +- { F_OFD_SETLK, "OFD_SETLK" }, \ +- { F_OFD_SETLKW, "OFD_SETLKW" }) +-#else /* CONFIG_64BIT */ +-#define show_fs_fcntl_cmd(x) \ +- __print_symbolic(x, \ +- { F_DUPFD, "DUPFD" }, \ +- { F_GETFD, "GETFD" }, \ +- { F_SETFD, "SETFD" }, \ +- { F_GETFL, "GETFL" }, \ +- { F_SETFL, "SETFL" }, \ +- { F_GETLK, "GETLK" }, \ +- { F_SETLK, "SETLK" }, \ +- { F_SETLKW, "SETLKW" }, \ +- { F_SETOWN, "SETOWN" }, \ +- { F_GETOWN, "GETOWN" }, \ +- { F_SETSIG, "SETSIG" }, \ +- { F_GETSIG, "GETSIG" }, \ +- { F_GETLK64, "GETLK64" }, \ +- { F_SETLK64, "SETLK64" }, \ +- { F_SETLKW64, "SETLKW64" }, \ +- { F_SETOWN_EX, "SETOWN_EX" }, \ +- { F_GETOWN_EX, "GETOWN_EX" }, \ +- { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ +- { F_OFD_GETLK, "OFD_GETLK" }, \ +- { F_OFD_SETLK, "OFD_SETLK" }, \ +- { F_OFD_SETLKW, "OFD_SETLKW" }) +-#endif /* CONFIG_64BIT */ +- +-#define show_fs_fcntl_lock_type(x) \ +- __print_symbolic(x, \ +- { F_RDLCK, "RDLCK" }, \ +- { F_WRLCK, "WRLCK" }, \ +- { F_UNLCK, "UNLCK" }) +- +-#define show_fs_lookup_flags(flags) \ +- __print_flags(flags, "|", \ +- { LOOKUP_FOLLOW, "FOLLOW" }, \ +- { LOOKUP_DIRECTORY, "DIRECTORY" }, \ +- { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ +- { LOOKUP_EMPTY, "EMPTY" }, \ +- { LOOKUP_DOWN, "DOWN" }, \ +- { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \ +- { LOOKUP_REVAL, "REVAL" }, \ +- { LOOKUP_RCU, "RCU" }, \ +- { LOOKUP_OPEN, "OPEN" }, \ +- { LOOKUP_CREATE, "CREATE" }, \ +- { LOOKUP_EXCL, "EXCL" }, \ +- { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ +- { LOOKUP_PARENT, "PARENT" }, \ +- { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \ +- { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \ +- { LOOKUP_NO_XDEV, "NO_XDEV" }, \ +- { LOOKUP_BENEATH, "BENEATH" }, \ +- { LOOKUP_IN_ROOT, "IN_ROOT" }, \ +- { LOOKUP_CACHED, "CACHED" }) +diff --git a/include/trace/events/nfs.h b/include/trace/events/nfs.h +deleted file mode 100644 +index 09ffdbb04134d..0000000000000 +--- a/include/trace/events/nfs.h ++++ /dev/null +@@ -1,375 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Display helpers for NFS protocol elements +- * +- * Author: Chuck Lever <chuck.lever@oracle.com> +- * +- * Copyright (c) 2020, Oracle and/or its affiliates. +- */ +- +-#include <linux/nfs.h> +-#include <linux/nfs4.h> +-#include <uapi/linux/nfs.h> +- +-TRACE_DEFINE_ENUM(NFS_OK); +-TRACE_DEFINE_ENUM(NFSERR_PERM); +-TRACE_DEFINE_ENUM(NFSERR_NOENT); +-TRACE_DEFINE_ENUM(NFSERR_IO); +-TRACE_DEFINE_ENUM(NFSERR_NXIO); +-TRACE_DEFINE_ENUM(NFSERR_EAGAIN); +-TRACE_DEFINE_ENUM(NFSERR_ACCES); +-TRACE_DEFINE_ENUM(NFSERR_EXIST); +-TRACE_DEFINE_ENUM(NFSERR_XDEV); +-TRACE_DEFINE_ENUM(NFSERR_NODEV); +-TRACE_DEFINE_ENUM(NFSERR_NOTDIR); +-TRACE_DEFINE_ENUM(NFSERR_ISDIR); +-TRACE_DEFINE_ENUM(NFSERR_INVAL); +-TRACE_DEFINE_ENUM(NFSERR_FBIG); +-TRACE_DEFINE_ENUM(NFSERR_NOSPC); +-TRACE_DEFINE_ENUM(NFSERR_ROFS); +-TRACE_DEFINE_ENUM(NFSERR_MLINK); +-TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); +-TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); +-TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); +-TRACE_DEFINE_ENUM(NFSERR_DQUOT); +-TRACE_DEFINE_ENUM(NFSERR_STALE); +-TRACE_DEFINE_ENUM(NFSERR_REMOTE); +-TRACE_DEFINE_ENUM(NFSERR_WFLUSH); +-TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); +-TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); +-TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); +-TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); +-TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); +-TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); +-TRACE_DEFINE_ENUM(NFSERR_BADTYPE); +-TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); +- +-#define show_nfs_status(x) \ +- __print_symbolic(x, \ +- { NFS_OK, "OK" }, \ +- { NFSERR_PERM, "PERM" }, \ +- { NFSERR_NOENT, "NOENT" }, \ +- { NFSERR_IO, "IO" }, \ +- { NFSERR_NXIO, "NXIO" }, \ +- { ECHILD, "CHILD" }, \ +- { NFSERR_EAGAIN, "AGAIN" }, \ +- { NFSERR_ACCES, "ACCES" }, \ +- { NFSERR_EXIST, "EXIST" }, \ +- { NFSERR_XDEV, "XDEV" }, \ +- { NFSERR_NODEV, "NODEV" }, \ +- { NFSERR_NOTDIR, "NOTDIR" }, \ +- { NFSERR_ISDIR, "ISDIR" }, \ +- { NFSERR_INVAL, "INVAL" }, \ +- { NFSERR_FBIG, "FBIG" }, \ +- { NFSERR_NOSPC, "NOSPC" }, \ +- { NFSERR_ROFS, "ROFS" }, \ +- { NFSERR_MLINK, "MLINK" }, \ +- { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ +- { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ +- { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ +- { NFSERR_DQUOT, "DQUOT" }, \ +- { NFSERR_STALE, "STALE" }, \ +- { NFSERR_REMOTE, "REMOTE" }, \ +- { NFSERR_WFLUSH, "WFLUSH" }, \ +- { NFSERR_BADHANDLE, "BADHANDLE" }, \ +- { NFSERR_NOT_SYNC, "NOTSYNC" }, \ +- { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ +- { NFSERR_NOTSUPP, "NOTSUPP" }, \ +- { NFSERR_TOOSMALL, "TOOSMALL" }, \ +- { NFSERR_SERVERFAULT, "REMOTEIO" }, \ +- { NFSERR_BADTYPE, "BADTYPE" }, \ +- { NFSERR_JUKEBOX, "JUKEBOX" }) +- +-TRACE_DEFINE_ENUM(NFS_UNSTABLE); +-TRACE_DEFINE_ENUM(NFS_DATA_SYNC); +-TRACE_DEFINE_ENUM(NFS_FILE_SYNC); +- +-#define show_nfs_stable_how(x) \ +- __print_symbolic(x, \ +- { NFS_UNSTABLE, "UNSTABLE" }, \ +- { NFS_DATA_SYNC, "DATA_SYNC" }, \ +- { NFS_FILE_SYNC, "FILE_SYNC" }) +- +-TRACE_DEFINE_ENUM(NFS4_OK); +-TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); +-TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); +-TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); +-TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); +-TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); +-TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); +-TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); +-TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); +-TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); +-TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); +-TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); +-TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); +-TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); +-TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); +-TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); +-TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); +-TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); +-TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); +-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); +-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); +-TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); +-TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); +-TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); +-TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); +-TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); +-TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); +-TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); +-TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); +-TRACE_DEFINE_ENUM(NFS4ERR_DELAY); +-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); +-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); +-TRACE_DEFINE_ENUM(NFS4ERR_DENIED); +-TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); +-TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); +-TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); +-TRACE_DEFINE_ENUM(NFS4ERR_EXIST); +-TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); +-TRACE_DEFINE_ENUM(NFS4ERR_FBIG); +-TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); +-TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); +-TRACE_DEFINE_ENUM(NFS4ERR_GRACE); +-TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); +-TRACE_DEFINE_ENUM(NFS4ERR_INVAL); +-TRACE_DEFINE_ENUM(NFS4ERR_IO); +-TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); +-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); +-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); +-TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); +-TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); +-TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); +-TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); +-TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); +-TRACE_DEFINE_ENUM(NFS4ERR_MLINK); +-TRACE_DEFINE_ENUM(NFS4ERR_MOVED); +-TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); +-TRACE_DEFINE_ENUM(NFS4ERR_NOENT); +-TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); +-TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); +-TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); +-TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); +-TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); +-TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); +-TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); +-TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); +-TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); +-TRACE_DEFINE_ENUM(NFS4ERR_NXIO); +-TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); +-TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); +-TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); +-TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); +-TRACE_DEFINE_ENUM(NFS4ERR_PERM); +-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); +-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); +-TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); +-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); +-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); +-TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); +-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); +-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); +-TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); +-TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); +-TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); +-TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); +-TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); +-TRACE_DEFINE_ENUM(NFS4ERR_ROFS); +-TRACE_DEFINE_ENUM(NFS4ERR_SAME); +-TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); +-TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); +-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); +-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); +-TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); +-TRACE_DEFINE_ENUM(NFS4ERR_STALE); +-TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); +-TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); +-TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); +-TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); +-TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); +-TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); +-TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); +-TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); +-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); +-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); +-TRACE_DEFINE_ENUM(NFS4ERR_XDEV); +- +-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); +-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); +- +-#define show_nfs4_status(x) \ +- __print_symbolic(x, \ +- { NFS4_OK, "OK" }, \ +- { EPERM, "EPERM" }, \ +- { ENOENT, "ENOENT" }, \ +- { EIO, "EIO" }, \ +- { ENXIO, "ENXIO" }, \ +- { EACCES, "EACCES" }, \ +- { EEXIST, "EEXIST" }, \ +- { EXDEV, "EXDEV" }, \ +- { ENOTDIR, "ENOTDIR" }, \ +- { EISDIR, "EISDIR" }, \ +- { EFBIG, "EFBIG" }, \ +- { ENOSPC, "ENOSPC" }, \ +- { EROFS, "EROFS" }, \ +- { EMLINK, "EMLINK" }, \ +- { ENAMETOOLONG, "ENAMETOOLONG" }, \ +- { ENOTEMPTY, "ENOTEMPTY" }, \ +- { EDQUOT, "EDQUOT" }, \ +- { ESTALE, "ESTALE" }, \ +- { EBADHANDLE, "EBADHANDLE" }, \ +- { EBADCOOKIE, "EBADCOOKIE" }, \ +- { ENOTSUPP, "ENOTSUPP" }, \ +- { ETOOSMALL, "ETOOSMALL" }, \ +- { EREMOTEIO, "EREMOTEIO" }, \ +- { EBADTYPE, "EBADTYPE" }, \ +- { EAGAIN, "EAGAIN" }, \ +- { ELOOP, "ELOOP" }, \ +- { EOPNOTSUPP, "EOPNOTSUPP" }, \ +- { EDEADLK, "EDEADLK" }, \ +- { ENOMEM, "ENOMEM" }, \ +- { EKEYEXPIRED, "EKEYEXPIRED" }, \ +- { ETIMEDOUT, "ETIMEDOUT" }, \ +- { ERESTARTSYS, "ERESTARTSYS" }, \ +- { ECONNREFUSED, "ECONNREFUSED" }, \ +- { ECONNRESET, "ECONNRESET" }, \ +- { ENETUNREACH, "ENETUNREACH" }, \ +- { EHOSTUNREACH, "EHOSTUNREACH" }, \ +- { EHOSTDOWN, "EHOSTDOWN" }, \ +- { EPIPE, "EPIPE" }, \ +- { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ +- { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ +- { NFS4ERR_ACCESS, "ACCESS" }, \ +- { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ +- { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ +- { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ +- { NFS4ERR_BADCHAR, "BADCHAR" }, \ +- { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ +- { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ +- { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ +- { NFS4ERR_BADLABEL, "BADLABEL" }, \ +- { NFS4ERR_BADNAME, "BADNAME" }, \ +- { NFS4ERR_BADOWNER, "BADOWNER" }, \ +- { NFS4ERR_BADSESSION, "BADSESSION" }, \ +- { NFS4ERR_BADSLOT, "BADSLOT" }, \ +- { NFS4ERR_BADTYPE, "BADTYPE" }, \ +- { NFS4ERR_BADXDR, "BADXDR" }, \ +- { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ +- { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ +- { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ +- { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ +- { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ +- { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ +- { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ +- { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ +- { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ +- { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ +- { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \ +- { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ +- { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ +- { NFS4ERR_DELAY, "DELAY" }, \ +- { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \ +- { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ +- { NFS4ERR_DENIED, "DENIED" }, \ +- { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ +- { NFS4ERR_DQUOT, "DQUOT" }, \ +- { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ +- { NFS4ERR_EXIST, "EXIST" }, \ +- { NFS4ERR_EXPIRED, "EXPIRED" }, \ +- { NFS4ERR_FBIG, "FBIG" }, \ +- { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ +- { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ +- { NFS4ERR_GRACE, "GRACE" }, \ +- { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ +- { NFS4ERR_INVAL, "INVAL" }, \ +- { NFS4ERR_IO, "IO" }, \ +- { NFS4ERR_ISDIR, "ISDIR" }, \ +- { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ +- { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ +- { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ +- { NFS4ERR_LOCKED, "LOCKED" }, \ +- { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ +- { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ +- { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ +- { NFS4ERR_MLINK, "MLINK" }, \ +- { NFS4ERR_MOVED, "MOVED" }, \ +- { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ +- { NFS4ERR_NOENT, "NOENT" }, \ +- { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ +- { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ +- { NFS4ERR_NOSPC, "NOSPC" }, \ +- { NFS4ERR_NOTDIR, "NOTDIR" }, \ +- { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ +- { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ +- { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ +- { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ +- { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ +- { NFS4ERR_NXIO, "NXIO" }, \ +- { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ +- { NFS4ERR_OPENMODE, "OPENMODE" }, \ +- { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ +- { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ +- { NFS4ERR_PERM, "PERM" }, \ +- { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ +- { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ +- { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ +- { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ +- { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ +- { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ +- { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ +- { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \ +- { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ +- { NFS4ERR_RESOURCE, "RESOURCE" }, \ +- { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ +- { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ +- { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ +- { NFS4ERR_ROFS, "ROFS" }, \ +- { NFS4ERR_SAME, "SAME" }, \ +- { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ +- { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ +- { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ +- { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ +- { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ +- { NFS4ERR_STALE, "STALE" }, \ +- { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ +- { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ +- { NFS4ERR_SYMLINK, "SYMLINK" }, \ +- { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ +- { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ +- { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ +- { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ +- { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ +- { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ +- { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ +- { NFS4ERR_XDEV, "XDEV" }, \ +- /* ***** Internal to Linux NFS client ***** */ \ +- { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ +- { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) +- +-#define show_nfs4_verifier(x) \ +- __print_hex_str(x, NFS4_VERIFIER_SIZE) +- +-TRACE_DEFINE_ENUM(IOMODE_READ); +-TRACE_DEFINE_ENUM(IOMODE_RW); +-TRACE_DEFINE_ENUM(IOMODE_ANY); +- +-#define show_pnfs_layout_iomode(x) \ +- __print_symbolic(x, \ +- { IOMODE_READ, "READ" }, \ +- { IOMODE_RW, "RW" }, \ +- { IOMODE_ANY, "ANY" }) +- +-#define show_nfs4_seq4_status(x) \ +- __print_flags(x, "|", \ +- { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ +- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \ +- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \ +- { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \ +- { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \ +- { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \ +- { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \ +- { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ +- { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \ +- { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \ +- { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" }) +diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h +deleted file mode 100644 +index 81bb454fc2888..0000000000000 +--- a/include/trace/events/rdma.h ++++ /dev/null +@@ -1,168 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Copyright (c) 2017 Oracle. All rights reserved. +- */ +- +-/* +- * enum ib_event_type, from include/rdma/ib_verbs.h +- */ +-#define IB_EVENT_LIST \ +- ib_event(CQ_ERR) \ +- ib_event(QP_FATAL) \ +- ib_event(QP_REQ_ERR) \ +- ib_event(QP_ACCESS_ERR) \ +- ib_event(COMM_EST) \ +- ib_event(SQ_DRAINED) \ +- ib_event(PATH_MIG) \ +- ib_event(PATH_MIG_ERR) \ +- ib_event(DEVICE_FATAL) \ +- ib_event(PORT_ACTIVE) \ +- ib_event(PORT_ERR) \ +- ib_event(LID_CHANGE) \ +- ib_event(PKEY_CHANGE) \ +- ib_event(SM_CHANGE) \ +- ib_event(SRQ_ERR) \ +- ib_event(SRQ_LIMIT_REACHED) \ +- ib_event(QP_LAST_WQE_REACHED) \ +- ib_event(CLIENT_REREGISTER) \ +- ib_event(GID_CHANGE) \ +- ib_event_end(WQ_FATAL) +- +-#undef ib_event +-#undef ib_event_end +- +-#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x); +-#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x); +- +-IB_EVENT_LIST +- +-#undef ib_event +-#undef ib_event_end +- +-#define ib_event(x) { IB_EVENT_##x, #x }, +-#define ib_event_end(x) { IB_EVENT_##x, #x } +- +-#define rdma_show_ib_event(x) \ +- __print_symbolic(x, IB_EVENT_LIST) +- +-/* +- * enum ib_wc_status type, from include/rdma/ib_verbs.h +- */ +-#define IB_WC_STATUS_LIST \ +- ib_wc_status(SUCCESS) \ +- ib_wc_status(LOC_LEN_ERR) \ +- ib_wc_status(LOC_QP_OP_ERR) \ +- ib_wc_status(LOC_EEC_OP_ERR) \ +- ib_wc_status(LOC_PROT_ERR) \ +- ib_wc_status(WR_FLUSH_ERR) \ +- ib_wc_status(MW_BIND_ERR) \ +- ib_wc_status(BAD_RESP_ERR) \ +- ib_wc_status(LOC_ACCESS_ERR) \ +- ib_wc_status(REM_INV_REQ_ERR) \ +- ib_wc_status(REM_ACCESS_ERR) \ +- ib_wc_status(REM_OP_ERR) \ +- ib_wc_status(RETRY_EXC_ERR) \ +- ib_wc_status(RNR_RETRY_EXC_ERR) \ +- ib_wc_status(LOC_RDD_VIOL_ERR) \ +- ib_wc_status(REM_INV_RD_REQ_ERR) \ +- ib_wc_status(REM_ABORT_ERR) \ +- ib_wc_status(INV_EECN_ERR) \ +- ib_wc_status(INV_EEC_STATE_ERR) \ +- ib_wc_status(FATAL_ERR) \ +- ib_wc_status(RESP_TIMEOUT_ERR) \ +- ib_wc_status_end(GENERAL_ERR) +- +-#undef ib_wc_status +-#undef ib_wc_status_end +- +-#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x); +-#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x); +- +-IB_WC_STATUS_LIST +- +-#undef ib_wc_status +-#undef ib_wc_status_end +- +-#define ib_wc_status(x) { IB_WC_##x, #x }, +-#define ib_wc_status_end(x) { IB_WC_##x, #x } +- +-#define rdma_show_wc_status(x) \ +- __print_symbolic(x, IB_WC_STATUS_LIST) +- +-/* +- * enum ib_cm_event_type, from include/rdma/ib_cm.h +- */ +-#define IB_CM_EVENT_LIST \ +- ib_cm_event(REQ_ERROR) \ +- ib_cm_event(REQ_RECEIVED) \ +- ib_cm_event(REP_ERROR) \ +- ib_cm_event(REP_RECEIVED) \ +- ib_cm_event(RTU_RECEIVED) \ +- ib_cm_event(USER_ESTABLISHED) \ +- ib_cm_event(DREQ_ERROR) \ +- ib_cm_event(DREQ_RECEIVED) \ +- ib_cm_event(DREP_RECEIVED) \ +- ib_cm_event(TIMEWAIT_EXIT) \ +- ib_cm_event(MRA_RECEIVED) \ +- ib_cm_event(REJ_RECEIVED) \ +- ib_cm_event(LAP_ERROR) \ +- ib_cm_event(LAP_RECEIVED) \ +- ib_cm_event(APR_RECEIVED) \ +- ib_cm_event(SIDR_REQ_ERROR) \ +- ib_cm_event(SIDR_REQ_RECEIVED) \ +- ib_cm_event_end(SIDR_REP_RECEIVED) +- +-#undef ib_cm_event +-#undef ib_cm_event_end +- +-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x); +-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); +- +-IB_CM_EVENT_LIST +- +-#undef ib_cm_event +-#undef ib_cm_event_end +- +-#define ib_cm_event(x) { IB_CM_##x, #x }, +-#define ib_cm_event_end(x) { IB_CM_##x, #x } +- +-#define rdma_show_ib_cm_event(x) \ +- __print_symbolic(x, IB_CM_EVENT_LIST) +- +-/* +- * enum rdma_cm_event_type, from include/rdma/rdma_cm.h +- */ +-#define RDMA_CM_EVENT_LIST \ +- rdma_cm_event(ADDR_RESOLVED) \ +- rdma_cm_event(ADDR_ERROR) \ +- rdma_cm_event(ROUTE_RESOLVED) \ +- rdma_cm_event(ROUTE_ERROR) \ +- rdma_cm_event(CONNECT_REQUEST) \ +- rdma_cm_event(CONNECT_RESPONSE) \ +- rdma_cm_event(CONNECT_ERROR) \ +- rdma_cm_event(UNREACHABLE) \ +- rdma_cm_event(REJECTED) \ +- rdma_cm_event(ESTABLISHED) \ +- rdma_cm_event(DISCONNECTED) \ +- rdma_cm_event(DEVICE_REMOVAL) \ +- rdma_cm_event(MULTICAST_JOIN) \ +- rdma_cm_event(MULTICAST_ERROR) \ +- rdma_cm_event(ADDR_CHANGE) \ +- rdma_cm_event_end(TIMEWAIT_EXIT) +- +-#undef rdma_cm_event +-#undef rdma_cm_event_end +- +-#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x); +-#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x); +- +-RDMA_CM_EVENT_LIST +- +-#undef rdma_cm_event +-#undef rdma_cm_event_end +- +-#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x }, +-#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x } +- +-#define rdma_show_cm_event(x) \ +- __print_symbolic(x, RDMA_CM_EVENT_LIST) +diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h +index c9048f3e471bb..3f121eed369e8 100644 +--- a/include/trace/events/rpcgss.h ++++ b/include/trace/events/rpcgss.h +@@ -13,7 +13,7 @@ + + #include <linux/tracepoint.h> + +-#include <trace/events/sunrpc_base.h> ++#include <trace/misc/sunrpc.h> + + /** + ** GSS-API related trace events +diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h +index fcd3b3f1020a6..8f461e04e5f09 100644 +--- a/include/trace/events/rpcrdma.h ++++ b/include/trace/events/rpcrdma.h +@@ -15,8 +15,8 @@ + #include <linux/tracepoint.h> + #include <rdma/ib_cm.h> + +-#include <trace/events/rdma.h> +-#include <trace/events/sunrpc_base.h> ++#include <trace/misc/rdma.h> ++#include <trace/misc/sunrpc.h> + + /** + ** Event classes +diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h +index f48f2ab9d238b..ffe2679a13ced 100644 +--- a/include/trace/events/sunrpc.h ++++ b/include/trace/events/sunrpc.h +@@ -14,7 +14,7 @@ + #include <linux/net.h> + #include <linux/tracepoint.h> + +-#include <trace/events/sunrpc_base.h> ++#include <trace/misc/sunrpc.h> + + TRACE_DEFINE_ENUM(SOCK_STREAM); + TRACE_DEFINE_ENUM(SOCK_DGRAM); +diff --git a/include/trace/events/sunrpc_base.h b/include/trace/events/sunrpc_base.h +deleted file mode 100644 +index 588557d07ea82..0000000000000 +--- a/include/trace/events/sunrpc_base.h ++++ /dev/null +@@ -1,18 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Copyright (c) 2021 Oracle and/or its affiliates. +- * +- * Common types and format specifiers for sunrpc. +- */ +- +-#if !defined(_TRACE_SUNRPC_BASE_H) +-#define _TRACE_SUNRPC_BASE_H +- +-#include <linux/tracepoint.h> +- +-#define SUNRPC_TRACE_PID_SPECIFIER "%08x" +-#define SUNRPC_TRACE_CLID_SPECIFIER "%08x" +-#define SUNRPC_TRACE_TASK_SPECIFIER \ +- "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER +- +-#endif /* _TRACE_SUNRPC_BASE_H */ +diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h +new file mode 100644 +index 0000000000000..738b97f22f365 +--- /dev/null ++++ b/include/trace/misc/fs.h +@@ -0,0 +1,122 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Display helpers for generic filesystem items ++ * ++ * Author: Chuck Lever <chuck.lever@oracle.com> ++ * ++ * Copyright (c) 2020, Oracle and/or its affiliates. ++ */ ++ ++#include <linux/fs.h> ++ ++#define show_fs_dirent_type(x) \ ++ __print_symbolic(x, \ ++ { DT_UNKNOWN, "UNKNOWN" }, \ ++ { DT_FIFO, "FIFO" }, \ ++ { DT_CHR, "CHR" }, \ ++ { DT_DIR, "DIR" }, \ ++ { DT_BLK, "BLK" }, \ ++ { DT_REG, "REG" }, \ ++ { DT_LNK, "LNK" }, \ ++ { DT_SOCK, "SOCK" }, \ ++ { DT_WHT, "WHT" }) ++ ++#define show_fs_fcntl_open_flags(x) \ ++ __print_flags(x, "|", \ ++ { O_WRONLY, "O_WRONLY" }, \ ++ { O_RDWR, "O_RDWR" }, \ ++ { O_CREAT, "O_CREAT" }, \ ++ { O_EXCL, "O_EXCL" }, \ ++ { O_NOCTTY, "O_NOCTTY" }, \ ++ { O_TRUNC, "O_TRUNC" }, \ ++ { O_APPEND, "O_APPEND" }, \ ++ { O_NONBLOCK, "O_NONBLOCK" }, \ ++ { O_DSYNC, "O_DSYNC" }, \ ++ { O_DIRECT, "O_DIRECT" }, \ ++ { O_LARGEFILE, "O_LARGEFILE" }, \ ++ { O_DIRECTORY, "O_DIRECTORY" }, \ ++ { O_NOFOLLOW, "O_NOFOLLOW" }, \ ++ { O_NOATIME, "O_NOATIME" }, \ ++ { O_CLOEXEC, "O_CLOEXEC" }) ++ ++#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x } ++#define show_fs_fmode_flags(x) \ ++ __print_flags(x, "|", \ ++ __fmode_flag(READ), \ ++ __fmode_flag(WRITE), \ ++ __fmode_flag(EXEC)) ++ ++#ifdef CONFIG_64BIT ++#define show_fs_fcntl_cmd(x) \ ++ __print_symbolic(x, \ ++ { F_DUPFD, "DUPFD" }, \ ++ { F_GETFD, "GETFD" }, \ ++ { F_SETFD, "SETFD" }, \ ++ { F_GETFL, "GETFL" }, \ ++ { F_SETFL, "SETFL" }, \ ++ { F_GETLK, "GETLK" }, \ ++ { F_SETLK, "SETLK" }, \ ++ { F_SETLKW, "SETLKW" }, \ ++ { F_SETOWN, "SETOWN" }, \ ++ { F_GETOWN, "GETOWN" }, \ ++ { F_SETSIG, "SETSIG" }, \ ++ { F_GETSIG, "GETSIG" }, \ ++ { F_SETOWN_EX, "SETOWN_EX" }, \ ++ { F_GETOWN_EX, "GETOWN_EX" }, \ ++ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ ++ { F_OFD_GETLK, "OFD_GETLK" }, \ ++ { F_OFD_SETLK, "OFD_SETLK" }, \ ++ { F_OFD_SETLKW, "OFD_SETLKW" }) ++#else /* CONFIG_64BIT */ ++#define show_fs_fcntl_cmd(x) \ ++ __print_symbolic(x, \ ++ { F_DUPFD, "DUPFD" }, \ ++ { F_GETFD, "GETFD" }, \ ++ { F_SETFD, "SETFD" }, \ ++ { F_GETFL, "GETFL" }, \ ++ { F_SETFL, "SETFL" }, \ ++ { F_GETLK, "GETLK" }, \ ++ { F_SETLK, "SETLK" }, \ ++ { F_SETLKW, "SETLKW" }, \ ++ { F_SETOWN, "SETOWN" }, \ ++ { F_GETOWN, "GETOWN" }, \ ++ { F_SETSIG, "SETSIG" }, \ ++ { F_GETSIG, "GETSIG" }, \ ++ { F_GETLK64, "GETLK64" }, \ ++ { F_SETLK64, "SETLK64" }, \ ++ { F_SETLKW64, "SETLKW64" }, \ ++ { F_SETOWN_EX, "SETOWN_EX" }, \ ++ { F_GETOWN_EX, "GETOWN_EX" }, \ ++ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ ++ { F_OFD_GETLK, "OFD_GETLK" }, \ ++ { F_OFD_SETLK, "OFD_SETLK" }, \ ++ { F_OFD_SETLKW, "OFD_SETLKW" }) ++#endif /* CONFIG_64BIT */ ++ ++#define show_fs_fcntl_lock_type(x) \ ++ __print_symbolic(x, \ ++ { F_RDLCK, "RDLCK" }, \ ++ { F_WRLCK, "WRLCK" }, \ ++ { F_UNLCK, "UNLCK" }) ++ ++#define show_fs_lookup_flags(flags) \ ++ __print_flags(flags, "|", \ ++ { LOOKUP_FOLLOW, "FOLLOW" }, \ ++ { LOOKUP_DIRECTORY, "DIRECTORY" }, \ ++ { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ ++ { LOOKUP_EMPTY, "EMPTY" }, \ ++ { LOOKUP_DOWN, "DOWN" }, \ ++ { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \ ++ { LOOKUP_REVAL, "REVAL" }, \ ++ { LOOKUP_RCU, "RCU" }, \ ++ { LOOKUP_OPEN, "OPEN" }, \ ++ { LOOKUP_CREATE, "CREATE" }, \ ++ { LOOKUP_EXCL, "EXCL" }, \ ++ { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ ++ { LOOKUP_PARENT, "PARENT" }, \ ++ { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \ ++ { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \ ++ { LOOKUP_NO_XDEV, "NO_XDEV" }, \ ++ { LOOKUP_BENEATH, "BENEATH" }, \ ++ { LOOKUP_IN_ROOT, "IN_ROOT" }, \ ++ { LOOKUP_CACHED, "CACHED" }) +diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h +new file mode 100644 +index 0000000000000..0d9d48dca38a8 +--- /dev/null ++++ b/include/trace/misc/nfs.h +@@ -0,0 +1,387 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Display helpers for NFS protocol elements ++ * ++ * Author: Chuck Lever <chuck.lever@oracle.com> ++ * ++ * Copyright (c) 2020, Oracle and/or its affiliates. ++ */ ++ ++#include <linux/nfs.h> ++#include <linux/nfs4.h> ++#include <uapi/linux/nfs.h> ++ ++TRACE_DEFINE_ENUM(NFS_OK); ++TRACE_DEFINE_ENUM(NFSERR_PERM); ++TRACE_DEFINE_ENUM(NFSERR_NOENT); ++TRACE_DEFINE_ENUM(NFSERR_IO); ++TRACE_DEFINE_ENUM(NFSERR_NXIO); ++TRACE_DEFINE_ENUM(NFSERR_EAGAIN); ++TRACE_DEFINE_ENUM(NFSERR_ACCES); ++TRACE_DEFINE_ENUM(NFSERR_EXIST); ++TRACE_DEFINE_ENUM(NFSERR_XDEV); ++TRACE_DEFINE_ENUM(NFSERR_NODEV); ++TRACE_DEFINE_ENUM(NFSERR_NOTDIR); ++TRACE_DEFINE_ENUM(NFSERR_ISDIR); ++TRACE_DEFINE_ENUM(NFSERR_INVAL); ++TRACE_DEFINE_ENUM(NFSERR_FBIG); ++TRACE_DEFINE_ENUM(NFSERR_NOSPC); ++TRACE_DEFINE_ENUM(NFSERR_ROFS); ++TRACE_DEFINE_ENUM(NFSERR_MLINK); ++TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); ++TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); ++TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); ++TRACE_DEFINE_ENUM(NFSERR_DQUOT); ++TRACE_DEFINE_ENUM(NFSERR_STALE); ++TRACE_DEFINE_ENUM(NFSERR_REMOTE); ++TRACE_DEFINE_ENUM(NFSERR_WFLUSH); ++TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); ++TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); ++TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); ++TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); ++TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); ++TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); ++TRACE_DEFINE_ENUM(NFSERR_BADTYPE); ++TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); ++ ++#define show_nfs_status(x) \ ++ __print_symbolic(x, \ ++ { NFS_OK, "OK" }, \ ++ { NFSERR_PERM, "PERM" }, \ ++ { NFSERR_NOENT, "NOENT" }, \ ++ { NFSERR_IO, "IO" }, \ ++ { NFSERR_NXIO, "NXIO" }, \ ++ { ECHILD, "CHILD" }, \ ++ { NFSERR_EAGAIN, "AGAIN" }, \ ++ { NFSERR_ACCES, "ACCES" }, \ ++ { NFSERR_EXIST, "EXIST" }, \ ++ { NFSERR_XDEV, "XDEV" }, \ ++ { NFSERR_NODEV, "NODEV" }, \ ++ { NFSERR_NOTDIR, "NOTDIR" }, \ ++ { NFSERR_ISDIR, "ISDIR" }, \ ++ { NFSERR_INVAL, "INVAL" }, \ ++ { NFSERR_FBIG, "FBIG" }, \ ++ { NFSERR_NOSPC, "NOSPC" }, \ ++ { NFSERR_ROFS, "ROFS" }, \ ++ { NFSERR_MLINK, "MLINK" }, \ ++ { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ ++ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ ++ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ ++ { NFSERR_DQUOT, "DQUOT" }, \ ++ { NFSERR_STALE, "STALE" }, \ ++ { NFSERR_REMOTE, "REMOTE" }, \ ++ { NFSERR_WFLUSH, "WFLUSH" }, \ ++ { NFSERR_BADHANDLE, "BADHANDLE" }, \ ++ { NFSERR_NOT_SYNC, "NOTSYNC" }, \ ++ { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ ++ { NFSERR_NOTSUPP, "NOTSUPP" }, \ ++ { NFSERR_TOOSMALL, "TOOSMALL" }, \ ++ { NFSERR_SERVERFAULT, "REMOTEIO" }, \ ++ { NFSERR_BADTYPE, "BADTYPE" }, \ ++ { NFSERR_JUKEBOX, "JUKEBOX" }) ++ ++TRACE_DEFINE_ENUM(NFS_UNSTABLE); ++TRACE_DEFINE_ENUM(NFS_DATA_SYNC); ++TRACE_DEFINE_ENUM(NFS_FILE_SYNC); ++ ++#define show_nfs_stable_how(x) \ ++ __print_symbolic(x, \ ++ { NFS_UNSTABLE, "UNSTABLE" }, \ ++ { NFS_DATA_SYNC, "DATA_SYNC" }, \ ++ { NFS_FILE_SYNC, "FILE_SYNC" }) ++ ++TRACE_DEFINE_ENUM(NFS4_OK); ++TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); ++TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); ++TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); ++TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); ++TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); ++TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); ++TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); ++TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); ++TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); ++TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); ++TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); ++TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); ++TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); ++TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); ++TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); ++TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); ++TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); ++TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); ++TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); ++TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); ++TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); ++TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); ++TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); ++TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); ++TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); ++TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); ++TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); ++TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); ++TRACE_DEFINE_ENUM(NFS4ERR_DELAY); ++TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); ++TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); ++TRACE_DEFINE_ENUM(NFS4ERR_DENIED); ++TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); ++TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); ++TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); ++TRACE_DEFINE_ENUM(NFS4ERR_EXIST); ++TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); ++TRACE_DEFINE_ENUM(NFS4ERR_FBIG); ++TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); ++TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); ++TRACE_DEFINE_ENUM(NFS4ERR_GRACE); ++TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); ++TRACE_DEFINE_ENUM(NFS4ERR_INVAL); ++TRACE_DEFINE_ENUM(NFS4ERR_IO); ++TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); ++TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); ++TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); ++TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); ++TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); ++TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); ++TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); ++TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); ++TRACE_DEFINE_ENUM(NFS4ERR_MLINK); ++TRACE_DEFINE_ENUM(NFS4ERR_MOVED); ++TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); ++TRACE_DEFINE_ENUM(NFS4ERR_NOENT); ++TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); ++TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); ++TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); ++TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); ++TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); ++TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); ++TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); ++TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); ++TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); ++TRACE_DEFINE_ENUM(NFS4ERR_NXIO); ++TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); ++TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); ++TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); ++TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); ++TRACE_DEFINE_ENUM(NFS4ERR_PERM); ++TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); ++TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); ++TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); ++TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); ++TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); ++TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); ++TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); ++TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); ++TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); ++TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); ++TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); ++TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); ++TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); ++TRACE_DEFINE_ENUM(NFS4ERR_ROFS); ++TRACE_DEFINE_ENUM(NFS4ERR_SAME); ++TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); ++TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); ++TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); ++TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); ++TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); ++TRACE_DEFINE_ENUM(NFS4ERR_STALE); ++TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); ++TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); ++TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); ++TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); ++TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); ++TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); ++TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); ++TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); ++TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); ++TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); ++TRACE_DEFINE_ENUM(NFS4ERR_XDEV); ++ ++TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); ++TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); ++ ++#define show_nfs4_status(x) \ ++ __print_symbolic(x, \ ++ { NFS4_OK, "OK" }, \ ++ { EPERM, "EPERM" }, \ ++ { ENOENT, "ENOENT" }, \ ++ { EIO, "EIO" }, \ ++ { ENXIO, "ENXIO" }, \ ++ { EACCES, "EACCES" }, \ ++ { EEXIST, "EEXIST" }, \ ++ { EXDEV, "EXDEV" }, \ ++ { ENOTDIR, "ENOTDIR" }, \ ++ { EISDIR, "EISDIR" }, \ ++ { EFBIG, "EFBIG" }, \ ++ { ENOSPC, "ENOSPC" }, \ ++ { EROFS, "EROFS" }, \ ++ { EMLINK, "EMLINK" }, \ ++ { ENAMETOOLONG, "ENAMETOOLONG" }, \ ++ { ENOTEMPTY, "ENOTEMPTY" }, \ ++ { EDQUOT, "EDQUOT" }, \ ++ { ESTALE, "ESTALE" }, \ ++ { EBADHANDLE, "EBADHANDLE" }, \ ++ { EBADCOOKIE, "EBADCOOKIE" }, \ ++ { ENOTSUPP, "ENOTSUPP" }, \ ++ { ETOOSMALL, "ETOOSMALL" }, \ ++ { EREMOTEIO, "EREMOTEIO" }, \ ++ { EBADTYPE, "EBADTYPE" }, \ ++ { EAGAIN, "EAGAIN" }, \ ++ { ELOOP, "ELOOP" }, \ ++ { EOPNOTSUPP, "EOPNOTSUPP" }, \ ++ { EDEADLK, "EDEADLK" }, \ ++ { ENOMEM, "ENOMEM" }, \ ++ { EKEYEXPIRED, "EKEYEXPIRED" }, \ ++ { ETIMEDOUT, "ETIMEDOUT" }, \ ++ { ERESTARTSYS, "ERESTARTSYS" }, \ ++ { ECONNREFUSED, "ECONNREFUSED" }, \ ++ { ECONNRESET, "ECONNRESET" }, \ ++ { ENETUNREACH, "ENETUNREACH" }, \ ++ { EHOSTUNREACH, "EHOSTUNREACH" }, \ ++ { EHOSTDOWN, "EHOSTDOWN" }, \ ++ { EPIPE, "EPIPE" }, \ ++ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ ++ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ ++ { NFS4ERR_ACCESS, "ACCESS" }, \ ++ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ ++ { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ ++ { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ ++ { NFS4ERR_BADCHAR, "BADCHAR" }, \ ++ { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ ++ { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ ++ { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ ++ { NFS4ERR_BADLABEL, "BADLABEL" }, \ ++ { NFS4ERR_BADNAME, "BADNAME" }, \ ++ { NFS4ERR_BADOWNER, "BADOWNER" }, \ ++ { NFS4ERR_BADSESSION, "BADSESSION" }, \ ++ { NFS4ERR_BADSLOT, "BADSLOT" }, \ ++ { NFS4ERR_BADTYPE, "BADTYPE" }, \ ++ { NFS4ERR_BADXDR, "BADXDR" }, \ ++ { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ ++ { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ ++ { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ ++ { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ ++ { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ ++ { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ ++ { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ ++ { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ ++ { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ ++ { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ ++ { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \ ++ { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ ++ { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ ++ { NFS4ERR_DELAY, "DELAY" }, \ ++ { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \ ++ { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ ++ { NFS4ERR_DENIED, "DENIED" }, \ ++ { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ ++ { NFS4ERR_DQUOT, "DQUOT" }, \ ++ { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ ++ { NFS4ERR_EXIST, "EXIST" }, \ ++ { NFS4ERR_EXPIRED, "EXPIRED" }, \ ++ { NFS4ERR_FBIG, "FBIG" }, \ ++ { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ ++ { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ ++ { NFS4ERR_GRACE, "GRACE" }, \ ++ { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ ++ { NFS4ERR_INVAL, "INVAL" }, \ ++ { NFS4ERR_IO, "IO" }, \ ++ { NFS4ERR_ISDIR, "ISDIR" }, \ ++ { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ ++ { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ ++ { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ ++ { NFS4ERR_LOCKED, "LOCKED" }, \ ++ { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ ++ { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ ++ { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ ++ { NFS4ERR_MLINK, "MLINK" }, \ ++ { NFS4ERR_MOVED, "MOVED" }, \ ++ { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ ++ { NFS4ERR_NOENT, "NOENT" }, \ ++ { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ ++ { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ ++ { NFS4ERR_NOSPC, "NOSPC" }, \ ++ { NFS4ERR_NOTDIR, "NOTDIR" }, \ ++ { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ ++ { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ ++ { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ ++ { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ ++ { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ ++ { NFS4ERR_NXIO, "NXIO" }, \ ++ { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ ++ { NFS4ERR_OPENMODE, "OPENMODE" }, \ ++ { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ ++ { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ ++ { NFS4ERR_PERM, "PERM" }, \ ++ { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ ++ { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ ++ { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ ++ { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ ++ { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ ++ { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ ++ { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ ++ { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \ ++ { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ ++ { NFS4ERR_RESOURCE, "RESOURCE" }, \ ++ { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ ++ { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ ++ { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ ++ { NFS4ERR_ROFS, "ROFS" }, \ ++ { NFS4ERR_SAME, "SAME" }, \ ++ { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ ++ { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ ++ { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ ++ { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ ++ { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ ++ { NFS4ERR_STALE, "STALE" }, \ ++ { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ ++ { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ ++ { NFS4ERR_SYMLINK, "SYMLINK" }, \ ++ { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ ++ { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ ++ { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ ++ { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ ++ { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ ++ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ ++ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ ++ { NFS4ERR_XDEV, "XDEV" }, \ ++ /* ***** Internal to Linux NFS client ***** */ \ ++ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ ++ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) ++ ++#define show_nfs4_verifier(x) \ ++ __print_hex_str(x, NFS4_VERIFIER_SIZE) ++ ++TRACE_DEFINE_ENUM(IOMODE_READ); ++TRACE_DEFINE_ENUM(IOMODE_RW); ++TRACE_DEFINE_ENUM(IOMODE_ANY); ++ ++#define show_pnfs_layout_iomode(x) \ ++ __print_symbolic(x, \ ++ { IOMODE_READ, "READ" }, \ ++ { IOMODE_RW, "RW" }, \ ++ { IOMODE_ANY, "ANY" }) ++ ++#define show_rca_mask(x) \ ++ __print_flags(x, "|", \ ++ { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \ ++ { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \ ++ { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \ ++ { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \ ++ { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \ ++ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \ ++ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \ ++ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \ ++ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" }) ++ ++#define show_nfs4_seq4_status(x) \ ++ __print_flags(x, "|", \ ++ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ ++ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \ ++ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \ ++ { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \ ++ { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \ ++ { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \ ++ { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \ ++ { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ ++ { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \ ++ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \ ++ { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" }) +diff --git a/include/trace/misc/rdma.h b/include/trace/misc/rdma.h +new file mode 100644 +index 0000000000000..81bb454fc2888 +--- /dev/null ++++ b/include/trace/misc/rdma.h +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (c) 2017 Oracle. All rights reserved. ++ */ ++ ++/* ++ * enum ib_event_type, from include/rdma/ib_verbs.h ++ */ ++#define IB_EVENT_LIST \ ++ ib_event(CQ_ERR) \ ++ ib_event(QP_FATAL) \ ++ ib_event(QP_REQ_ERR) \ ++ ib_event(QP_ACCESS_ERR) \ ++ ib_event(COMM_EST) \ ++ ib_event(SQ_DRAINED) \ ++ ib_event(PATH_MIG) \ ++ ib_event(PATH_MIG_ERR) \ ++ ib_event(DEVICE_FATAL) \ ++ ib_event(PORT_ACTIVE) \ ++ ib_event(PORT_ERR) \ ++ ib_event(LID_CHANGE) \ ++ ib_event(PKEY_CHANGE) \ ++ ib_event(SM_CHANGE) \ ++ ib_event(SRQ_ERR) \ ++ ib_event(SRQ_LIMIT_REACHED) \ ++ ib_event(QP_LAST_WQE_REACHED) \ ++ ib_event(CLIENT_REREGISTER) \ ++ ib_event(GID_CHANGE) \ ++ ib_event_end(WQ_FATAL) ++ ++#undef ib_event ++#undef ib_event_end ++ ++#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x); ++#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x); ++ ++IB_EVENT_LIST ++ ++#undef ib_event ++#undef ib_event_end ++ ++#define ib_event(x) { IB_EVENT_##x, #x }, ++#define ib_event_end(x) { IB_EVENT_##x, #x } ++ ++#define rdma_show_ib_event(x) \ ++ __print_symbolic(x, IB_EVENT_LIST) ++ ++/* ++ * enum ib_wc_status type, from include/rdma/ib_verbs.h ++ */ ++#define IB_WC_STATUS_LIST \ ++ ib_wc_status(SUCCESS) \ ++ ib_wc_status(LOC_LEN_ERR) \ ++ ib_wc_status(LOC_QP_OP_ERR) \ ++ ib_wc_status(LOC_EEC_OP_ERR) \ ++ ib_wc_status(LOC_PROT_ERR) \ ++ ib_wc_status(WR_FLUSH_ERR) \ ++ ib_wc_status(MW_BIND_ERR) \ ++ ib_wc_status(BAD_RESP_ERR) \ ++ ib_wc_status(LOC_ACCESS_ERR) \ ++ ib_wc_status(REM_INV_REQ_ERR) \ ++ ib_wc_status(REM_ACCESS_ERR) \ ++ ib_wc_status(REM_OP_ERR) \ ++ ib_wc_status(RETRY_EXC_ERR) \ ++ ib_wc_status(RNR_RETRY_EXC_ERR) \ ++ ib_wc_status(LOC_RDD_VIOL_ERR) \ ++ ib_wc_status(REM_INV_RD_REQ_ERR) \ ++ ib_wc_status(REM_ABORT_ERR) \ ++ ib_wc_status(INV_EECN_ERR) \ ++ ib_wc_status(INV_EEC_STATE_ERR) \ ++ ib_wc_status(FATAL_ERR) \ ++ ib_wc_status(RESP_TIMEOUT_ERR) \ ++ ib_wc_status_end(GENERAL_ERR) ++ ++#undef ib_wc_status ++#undef ib_wc_status_end ++ ++#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x); ++#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x); ++ ++IB_WC_STATUS_LIST ++ ++#undef ib_wc_status ++#undef ib_wc_status_end ++ ++#define ib_wc_status(x) { IB_WC_##x, #x }, ++#define ib_wc_status_end(x) { IB_WC_##x, #x } ++ ++#define rdma_show_wc_status(x) \ ++ __print_symbolic(x, IB_WC_STATUS_LIST) ++ ++/* ++ * enum ib_cm_event_type, from include/rdma/ib_cm.h ++ */ ++#define IB_CM_EVENT_LIST \ ++ ib_cm_event(REQ_ERROR) \ ++ ib_cm_event(REQ_RECEIVED) \ ++ ib_cm_event(REP_ERROR) \ ++ ib_cm_event(REP_RECEIVED) \ ++ ib_cm_event(RTU_RECEIVED) \ ++ ib_cm_event(USER_ESTABLISHED) \ ++ ib_cm_event(DREQ_ERROR) \ ++ ib_cm_event(DREQ_RECEIVED) \ ++ ib_cm_event(DREP_RECEIVED) \ ++ ib_cm_event(TIMEWAIT_EXIT) \ ++ ib_cm_event(MRA_RECEIVED) \ ++ ib_cm_event(REJ_RECEIVED) \ ++ ib_cm_event(LAP_ERROR) \ ++ ib_cm_event(LAP_RECEIVED) \ ++ ib_cm_event(APR_RECEIVED) \ ++ ib_cm_event(SIDR_REQ_ERROR) \ ++ ib_cm_event(SIDR_REQ_RECEIVED) \ ++ ib_cm_event_end(SIDR_REP_RECEIVED) ++ ++#undef ib_cm_event ++#undef ib_cm_event_end ++ ++#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x); ++#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); ++ ++IB_CM_EVENT_LIST ++ ++#undef ib_cm_event ++#undef ib_cm_event_end ++ ++#define ib_cm_event(x) { IB_CM_##x, #x }, ++#define ib_cm_event_end(x) { IB_CM_##x, #x } ++ ++#define rdma_show_ib_cm_event(x) \ ++ __print_symbolic(x, IB_CM_EVENT_LIST) ++ ++/* ++ * enum rdma_cm_event_type, from include/rdma/rdma_cm.h ++ */ ++#define RDMA_CM_EVENT_LIST \ ++ rdma_cm_event(ADDR_RESOLVED) \ ++ rdma_cm_event(ADDR_ERROR) \ ++ rdma_cm_event(ROUTE_RESOLVED) \ ++ rdma_cm_event(ROUTE_ERROR) \ ++ rdma_cm_event(CONNECT_REQUEST) \ ++ rdma_cm_event(CONNECT_RESPONSE) \ ++ rdma_cm_event(CONNECT_ERROR) \ ++ rdma_cm_event(UNREACHABLE) \ ++ rdma_cm_event(REJECTED) \ ++ rdma_cm_event(ESTABLISHED) \ ++ rdma_cm_event(DISCONNECTED) \ ++ rdma_cm_event(DEVICE_REMOVAL) \ ++ rdma_cm_event(MULTICAST_JOIN) \ ++ rdma_cm_event(MULTICAST_ERROR) \ ++ rdma_cm_event(ADDR_CHANGE) \ ++ rdma_cm_event_end(TIMEWAIT_EXIT) ++ ++#undef rdma_cm_event ++#undef rdma_cm_event_end ++ ++#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x); ++#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x); ++ ++RDMA_CM_EVENT_LIST ++ ++#undef rdma_cm_event ++#undef rdma_cm_event_end ++ ++#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x }, ++#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x } ++ ++#define rdma_show_cm_event(x) \ ++ __print_symbolic(x, RDMA_CM_EVENT_LIST) +diff --git a/include/trace/misc/sunrpc.h b/include/trace/misc/sunrpc.h +new file mode 100644 +index 0000000000000..588557d07ea82 +--- /dev/null ++++ b/include/trace/misc/sunrpc.h +@@ -0,0 +1,18 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (c) 2021 Oracle and/or its affiliates. ++ * ++ * Common types and format specifiers for sunrpc. ++ */ ++ ++#if !defined(_TRACE_SUNRPC_BASE_H) ++#define _TRACE_SUNRPC_BASE_H ++ ++#include <linux/tracepoint.h> ++ ++#define SUNRPC_TRACE_PID_SPECIFIER "%08x" ++#define SUNRPC_TRACE_CLID_SPECIFIER "%08x" ++#define SUNRPC_TRACE_TASK_SPECIFIER \ ++ "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER ++ ++#endif /* _TRACE_SUNRPC_BASE_H */ +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 201dc77ebbd77..d5d2183730b9f 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -3109,6 +3109,10 @@ union bpf_attr { + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. ++ * **BPF_FIB_LOOKUP_TBID** ++ * Used with BPF_FIB_LOOKUP_DIRECT. ++ * Use the routing table ID present in *params*->tbid ++ * for the fib lookup. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). +@@ -3117,6 +3121,11 @@ union bpf_attr { + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). ++ * **BPF_FIB_LOOKUP_SRC** ++ * Derive and set source IP addr in *params*->ipv{4,6}_src ++ * for the nexthop. If the src addr cannot be derived, ++ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this ++ * case, *params*->dmac and *params*->smac are not set either. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. +@@ -6687,6 +6696,8 @@ enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), ++ BPF_FIB_LOOKUP_TBID = (1U << 3), ++ BPF_FIB_LOOKUP_SRC = (1U << 4), + }; + + enum { +@@ -6699,6 +6710,7 @@ enum { + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ + }; + + struct bpf_fib_lookup { +@@ -6733,6 +6745,9 @@ struct bpf_fib_lookup { + __u32 rt_metric; + }; + ++ /* input: source address to consider for lookup ++ * output: source address result from lookup ++ */ + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ +@@ -6747,9 +6762,19 @@ struct bpf_fib_lookup { + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + +- /* output */ +- __be16 h_vlan_proto; +- __be16 h_vlan_TCI; ++ union { ++ struct { ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ }; ++ /* input: when accompanied with the ++ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a ++ * specific routing table to use for the fib lookup. ++ */ ++ __u32 tbid; ++ }; ++ + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; +diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h +index c4c53a9ab9595..ff8d21f9e95b7 100644 +--- a/include/uapi/linux/in6.h ++++ b/include/uapi/linux/in6.h +@@ -145,7 +145,7 @@ struct in6_flowlabel_req { + #define IPV6_TLV_PADN 1 + #define IPV6_TLV_ROUTERALERT 5 + #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +-#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ ++#define IPV6_TLV_IOAM 49 /* RFC 9486 */ + #define IPV6_TLV_JUMBO 194 + #define IPV6_TLV_HAO 201 /* home address option */ + +diff --git a/lib/nlattr.c b/lib/nlattr.c +index dffd60e4065fd..86344df0ccf7b 100644 +--- a/lib/nlattr.c ++++ b/lib/nlattr.c +@@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { + [NLA_S16] = sizeof(s16), + [NLA_S32] = sizeof(s32), + [NLA_S64] = sizeof(s64), ++ [NLA_BE16] = sizeof(__be16), ++ [NLA_BE32] = sizeof(__be32), + }; + + static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { +@@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_S16] = sizeof(s16), + [NLA_S32] = sizeof(s32), + [NLA_S64] = sizeof(s64), ++ [NLA_BE16] = sizeof(__be16), ++ [NLA_BE32] = sizeof(__be32), + }; + + /* +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 59577946735b1..9736e762184bd 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -37,6 +37,7 @@ + #include <linux/page_owner.h> + #include <linux/sched/sysctl.h> + #include <linux/memory-tiers.h> ++#include <linux/compat.h> + + #include <asm/tlb.h> + #include <asm/pgalloc.h> +@@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, + loff_t off_align = round_up(off, size); + unsigned long len_pad, ret; + ++ if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) ++ return 0; ++ + if (off_end <= off_align || (off_end - off_align) < size) + return 0; + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 6a1db678d032f..a8932d449eb63 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) + { + struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + ++ hci_dev_hold(hdev); + BT_DBG("%s", hdev->name); + + if (hdev->hw_error) +@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) + else + bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); + +- if (hci_dev_do_close(hdev)) +- return; ++ if (!hci_dev_do_close(hdev)) ++ hci_dev_do_open(hdev); + +- hci_dev_do_open(hdev); ++ hci_dev_put(hdev); + } + + void hci_uuids_clear(struct hci_dev *hdev) +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 56ecc5f97b916..452d839c152fc 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -5282,9 +5282,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); +- if (!conn || !hci_conn_ssp_enabled(conn)) ++ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + goto unlock; + ++ /* Assume remote supports SSP since it has triggered this event */ ++ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); ++ + hci_conn_hold(conn); + + if (!hci_dev_test_flag(hdev, HCI_MGMT)) +@@ -6716,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_UNKNOWN_CONN_ID); + ++ if (max > hcon->le_conn_max_interval) ++ return send_conn_param_neg_reply(hdev, handle, ++ HCI_ERROR_INVALID_LL_PARAMS); ++ + if (hci_check_conn_params(min, max, latency, timeout)) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); +@@ -7245,10 +7252,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, + * keep track of the bdaddr of the connection event that woke us up. + */ + if (event == HCI_EV_CONN_REQUEST) { +- bacpy(&hdev->wake_addr, &conn_complete->bdaddr); ++ bacpy(&hdev->wake_addr, &conn_request->bdaddr); + hdev->wake_addr_type = BDADDR_BREDR; + } else if (event == HCI_EV_CONN_COMPLETE) { +- bacpy(&hdev->wake_addr, &conn_request->bdaddr); ++ bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + hdev->wake_addr_type = BDADDR_BREDR; + } else if (event == HCI_EV_LE_META) { + struct hci_ev_le_meta *le_ev = (void *)skb->data; +diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c +index 45d19294aa772..a337340464567 100644 +--- a/net/bluetooth/hci_sync.c ++++ b/net/bluetooth/hci_sync.c +@@ -2251,8 +2251,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, + + /* During suspend, only wakeable devices can be in acceptlist */ + if (hdev->suspended && +- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) ++ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { ++ hci_le_del_accept_list_sync(hdev, ¶ms->addr, ++ params->addr_type); + return 0; ++ } + + /* Select filter policy to accept all advertising */ + if (*num_entries >= hdev->le_accept_list_size) +@@ -5482,7 +5485,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) + + bt_dev_dbg(hdev, ""); + +- if (hci_dev_test_flag(hdev, HCI_INQUIRY)) ++ if (test_bit(HCI_INQUIRY, &hdev->flags)) + return 0; + + hci_dev_lock(hdev); +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 81f5974e5eb5a..b4cba55be5ad9 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, + + memset(&rsp, 0, sizeof(rsp)); + +- err = hci_check_conn_params(min, max, latency, to_multiplier); ++ if (max > hcon->le_conn_max_interval) { ++ BT_DBG("requested connection interval exceeds current bounds."); ++ err = -EINVAL; ++ } else { ++ err = hci_check_conn_params(min, max, latency, to_multiplier); ++ } ++ + if (err) + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); + else +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index 202ad43e35d6b..bff48d5763635 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -43,6 +43,10 @@ + #include <linux/sysctl.h> + #endif + ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) ++#include <net/netfilter/nf_conntrack_core.h> ++#endif ++ + static unsigned int brnf_net_id __read_mostly; + + struct brnf_net { +@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, + return NF_STOLEN; + } + ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) ++/* conntracks' nf_confirm logic cannot handle cloned skbs referencing ++ * the same nf_conn entry, which will happen for multicast (broadcast) ++ * Frames on bridges. ++ * ++ * Example: ++ * macvlan0 ++ * br0 ++ * ethX ethY ++ * ++ * ethX (or Y) receives multicast or broadcast packet containing ++ * an IP packet, not yet in conntrack table. ++ * ++ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. ++ * -> skb->_nfct now references a unconfirmed entry ++ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge ++ * interface. ++ * 3. skb gets passed up the stack. ++ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb ++ * and schedules a work queue to send them out on the lower devices. ++ * ++ * The clone skb->_nfct is not a copy, it is the same entry as the ++ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. ++ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. ++ * ++ * The Macvlan broadcast worker and normal confirm path will race. ++ * ++ * This race will not happen if step 2 already confirmed a clone. In that ++ * case later steps perform skb_clone() with skb->_nfct already confirmed (in ++ * hash table). This works fine. ++ * ++ * But such confirmation won't happen when eb/ip/nftables rules dropped the ++ * packets before they reached the nf_confirm step in postrouting. ++ * ++ * Work around this problem by explicit confirmation of the entry at ++ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed ++ * entry. ++ * ++ */ ++static unsigned int br_nf_local_in(void *priv, ++ struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct nf_conntrack *nfct = skb_nfct(skb); ++ const struct nf_ct_hook *ct_hook; ++ struct nf_conn *ct; ++ int ret; ++ ++ if (!nfct || skb->pkt_type == PACKET_HOST) ++ return NF_ACCEPT; ++ ++ ct = container_of(nfct, struct nf_conn, ct_general); ++ if (likely(nf_ct_is_confirmed(ct))) ++ return NF_ACCEPT; ++ ++ WARN_ON_ONCE(skb_shared(skb)); ++ WARN_ON_ONCE(refcount_read(&nfct->use) != 1); ++ ++ /* We can't call nf_confirm here, it would create a dependency ++ * on nf_conntrack module. ++ */ ++ ct_hook = rcu_dereference(nf_ct_hook); ++ if (!ct_hook) { ++ skb->_nfct = 0ul; ++ nf_conntrack_put(nfct); ++ return NF_ACCEPT; ++ } ++ ++ nf_bridge_pull_encap_header(skb); ++ ret = ct_hook->confirm(skb); ++ switch (ret & NF_VERDICT_MASK) { ++ case NF_STOLEN: ++ return NF_STOLEN; ++ default: ++ nf_bridge_push_encap_header(skb); ++ break; ++ } ++ ++ ct = container_of(nfct, struct nf_conn, ct_general); ++ WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); ++ ++ return ret; ++} ++#endif + + /* PF_BRIDGE/FORWARD *************************************************/ + static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +@@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = { + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, + }, ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) ++ { ++ .hook = br_nf_local_in, ++ .pf = NFPROTO_BRIDGE, ++ .hooknum = NF_BR_LOCAL_IN, ++ .priority = NF_BR_PRI_LAST, ++ }, ++#endif + { + .hook = br_nf_forward_ip, + .pf = NFPROTO_BRIDGE, +diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c +index 06d94b2c6b5de..c7c27ada67044 100644 +--- a/net/bridge/netfilter/nf_conntrack_bridge.c ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c +@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, + return nf_conntrack_in(skb, &bridge_state); + } + ++static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct; ++ ++ if (skb->pkt_type == PACKET_HOST) ++ return NF_ACCEPT; ++ ++ /* nf_conntrack_confirm() cannot handle concurrent clones, ++ * this happens for broad/multicast frames with e.g. macvlan on top ++ * of the bridge device. ++ */ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) ++ return NF_ACCEPT; ++ ++ /* let inet prerouting call conntrack again */ ++ skb->_nfct = 0; ++ nf_ct_put(ct); ++ ++ return NF_ACCEPT; ++} ++ + static void nf_ct_bridge_frag_save(struct sk_buff *skb, + struct nf_bridge_frag_data *data) + { +@@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, + }, ++ { ++ .hook = nf_ct_bridge_in, ++ .pf = NFPROTO_BRIDGE, ++ .hooknum = NF_BR_LOCAL_IN, ++ .priority = NF_IP_PRI_CONNTRACK_CONFIRM, ++ }, + { + .hook = nf_ct_bridge_post, + .pf = NFPROTO_BRIDGE, +diff --git a/net/core/filter.c b/net/core/filter.c +index 3a6110ea4009f..cb7c4651eaec8 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; + struct fib_table *tb; + ++ if (flags & BPF_FIB_LOOKUP_TBID) { ++ tbid = params->tbid; ++ /* zero out for vlan output */ ++ params->tbid = 0; ++ } ++ + tb = fib_get_table(net, tbid); + if (unlikely(!tb)) + return BPF_FIB_LKUP_RET_NOT_FWDED; +@@ -5803,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + params->rt_metric = res.fi->fib_priority; + params->ifindex = dev->ifindex; + ++ if (flags & BPF_FIB_LOOKUP_SRC) ++ params->ipv4_src = fib_result_prefsrc(net, &res); ++ + /* xdp and cls_bpf programs are run in RCU-bh so + * rcu_read_lock_bh is not needed here + */ +@@ -5885,6 +5894,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; + struct fib6_table *tb; + ++ if (flags & BPF_FIB_LOOKUP_TBID) { ++ tbid = params->tbid; ++ /* zero out for vlan output */ ++ params->tbid = 0; ++ } ++ + tb = ipv6_stub->fib6_get_table(net, tbid); + if (unlikely(!tb)) + return BPF_FIB_LKUP_RET_NOT_FWDED; +@@ -5939,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + params->rt_metric = res.f6i->fib6_metric; + params->ifindex = dev->ifindex; + ++ if (flags & BPF_FIB_LOOKUP_SRC) { ++ if (res.f6i->fib6_prefsrc.plen) { ++ *src = res.f6i->fib6_prefsrc.addr; ++ } else { ++ err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, ++ &fl6.daddr, 0, ++ src); ++ if (err) ++ return BPF_FIB_LKUP_RET_NO_SRC_ADDR; ++ } ++ } ++ + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + goto set_fwd_params; + +@@ -5957,7 +5984,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + #endif + + #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ +- BPF_FIB_LOOKUP_SKIP_NEIGH) ++ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ ++ BPF_FIB_LOOKUP_SRC) + + BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, + struct bpf_fib_lookup *, params, int, plen, u32, flags) +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index 7cf1e42d7f93b..ac379e4590f8d 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; +- struct nlattr *br_spec, *attr = NULL; ++ struct nlattr *br_spec, *attr, *br_flags_attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 flags = 0; +- bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; +@@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { +- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { ++ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + +- have_flags = true; ++ br_flags_attr = attr; + flags = nla_get_u16(attr); + } + +@@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + } + } + +- if (have_flags) +- memcpy(nla_data(attr), &flags, sizeof(flags)); ++ if (br_flags_attr) ++ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); + out: + return err; + } +diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c +index 80cdc6f6b34c9..0323ab5023c69 100644 +--- a/net/hsr/hsr_forward.c ++++ b/net/hsr/hsr_forward.c +@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) + return false; + + /* Get next tlv */ +- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; ++ total_length += hsr_sup_tag->tlv.HSR_TLV_length; + if (!pskb_may_pull(skb, total_length)) + return false; + skb_pull(skb, total_length); +diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c +index 24961b304dad0..328f9068c6a43 100644 +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + return 0; + } + ++static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) ++{ ++ /* we must cap headroom to some upperlimit, else pskb_expand_head ++ * will overflow header offsets in skb_headers_offset_update(). ++ */ ++ static const unsigned int max_allowed = 512; ++ ++ if (headroom > max_allowed) ++ headroom = max_allowed; ++ ++ if (headroom > READ_ONCE(dev->needed_headroom)) ++ WRITE_ONCE(dev->needed_headroom, headroom); ++} ++ + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + u8 proto, int tunnel_hlen) + { +@@ -614,13 +628,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + } + + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; +- if (headroom > READ_ONCE(dev->needed_headroom)) +- WRITE_ONCE(dev->needed_headroom, headroom); +- +- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ++ if (skb_cow_head(skb, headroom)) { + ip_rt_put(rt); + goto tx_dropped; + } ++ ++ ip_tunnel_adj_headroom(dev, headroom); ++ + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); + return; +@@ -800,16 +814,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); +- if (max_headroom > READ_ONCE(dev->needed_headroom)) +- WRITE_ONCE(dev->needed_headroom, max_headroom); + +- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ++ if (skb_cow_head(skb, max_headroom)) { + ip_rt_put(rt); + dev->stats.tx_dropped++; + kfree_skb(skb); + return; + } + ++ ip_tunnel_adj_headroom(dev, max_headroom); ++ + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); + return; +diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c +index 4073762996e22..fc761915c5f6f 100644 +--- a/net/ipv4/netfilter/nf_reject_ipv4.c ++++ b/net/ipv4/netfilter/nf_reject_ipv4.c +@@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, + goto free_nskb; + + nf_ct_attach(nskb, oldskb); ++ nf_ct_set_closing(skb_nfct(oldskb)); + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip_local_out for bridged traffic, the MAC source on +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 46527b5cc8f0c..1648373692a99 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5473,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, + } + + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); +- if (!addr) +- return -EINVAL; +- ++ if (!addr) { ++ err = -EINVAL; ++ goto errout; ++ } + ifm = nlmsg_data(nlh); + if (ifm->ifa_index) + dev = dev_get_by_index(tgt_net, ifm->ifa_index); +diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c +index 0b42eb8c55aaf..62247621cea52 100644 +--- a/net/ipv6/af_inet6.c ++++ b/net/ipv6/af_inet6.c +@@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { + .udp6_lib_lookup = __udp6_lib_lookup, + .ipv6_setsockopt = do_ipv6_setsockopt, + .ipv6_getsockopt = do_ipv6_getsockopt, ++ .ipv6_dev_get_saddr = ipv6_dev_get_saddr, + }; + + static int __init inet6_init(void) +diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c +index 433d98bbe33f7..71d692728230e 100644 +--- a/net/ipv6/netfilter/nf_reject_ipv6.c ++++ b/net/ipv6/netfilter/nf_reject_ipv6.c +@@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); + + nf_ct_attach(nskb, oldskb); ++ nf_ct_set_closing(skb_nfct(oldskb)); + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip6_local_out for bridged traffic, the MAC source on +diff --git a/net/mctp/route.c b/net/mctp/route.c +index 256bf0b89e6ca..0144d8ebdaefb 100644 +--- a/net/mctp/route.c ++++ b/net/mctp/route.c +@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, + dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); + if (!dev) { + rcu_read_unlock(); +- return rc; ++ goto out_free; + } + rt->dev = __mctp_dev_get(dev); + rcu_read_unlock(); +@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, + rt->mtu = 0; + + } else { +- return -EINVAL; ++ rc = -EINVAL; ++ goto out_free; + } + + spin_lock_irqsave(&rt->dev->addrs_lock, flags); +@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, + rc = mctp_do_fragment_route(rt, skb, mtu, tag); + } + ++ /* route output functions consume the skb, even on error */ ++ skb = NULL; ++ + out_release: + if (!ext_rt) + mctp_route_release(rt); + + mctp_dev_put(tmp_rt.dev); + ++out_free: ++ kfree_skb(skb); + return rc; + } + +diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c +index e57c5f47f0351..7017dd60659dc 100644 +--- a/net/mptcp/diag.c ++++ b/net/mptcp/diag.c +@@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) + bool slow; + int err; + ++ if (inet_sk_state_load(sk) == TCP_LISTEN) ++ return 0; ++ + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); + if (!start) + return -EMSGSIZE; +@@ -65,7 +68,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) + sf->map_data_len) || + nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || +- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { ++ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { + err = -EMSGSIZE; + goto nla_failure; + } +diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c +index 70a1025f093cf..3328870b0c1f8 100644 +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) + } + } + +-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, +- const struct mptcp_addr_info *addr) +-{ +- int i; +- +- for (i = 0; i < nr; i++) { +- if (addrs[i].id == addr->id) +- return true; +- } +- +- return false; +-} +- + /* Fill all the remote addresses into the array addrs[], + * and return the array size. + */ +-static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, ++static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, ++ struct mptcp_addr_info *local, ++ bool fullmesh, + struct mptcp_addr_info *addrs) + { + bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); +@@ -446,15 +435,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm + msk->pm.subflows++; + addrs[i++] = remote; + } else { ++ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); ++ ++ /* Forbid creation of new subflows matching existing ++ * ones, possibly already created by incoming ADD_ADDR ++ */ ++ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); ++ mptcp_for_each_subflow(msk, subflow) ++ if (READ_ONCE(subflow->local_id) == local->id) ++ __set_bit(subflow->remote_id, unavail_id); ++ + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + remote_address((struct sock_common *)ssk, &addrs[i]); +- addrs[i].id = subflow->remote_id; ++ addrs[i].id = READ_ONCE(subflow->remote_id); + if (deny_id0 && !addrs[i].id) + continue; + +- if (!lookup_address_in_vec(addrs, i, &addrs[i]) && +- msk->pm.subflows < subflows_max) { ++ if (msk->pm.subflows < subflows_max) { ++ /* forbid creating multiple address towards ++ * this id ++ */ ++ __set_bit(addrs[i].id, unavail_id); + msk->pm.subflows++; + i++; + } +@@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) + fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + + msk->pm.local_addr_used++; +- nr = fill_remote_addresses_vec(msk, fullmesh, addrs); ++ nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); + if (nr) + __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + spin_unlock_bh(&msk->pm.lock); +@@ -798,18 +800,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, + + mptcp_for_each_subflow_safe(msk, subflow, tmp) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); ++ u8 remote_id = READ_ONCE(subflow->remote_id); + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; +- u8 id = subflow->local_id; ++ u8 id = subflow_get_local_id(subflow); + +- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) ++ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) + continue; + if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) + continue; + + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", +- i, rm_id, subflow->local_id, subflow->remote_id, +- msk->mpc_endpoint_id); ++ i, rm_id, id, remote_id, msk->mpc_endpoint_id); + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, how); + +@@ -2028,7 +2030,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) + if (WARN_ON_ONCE(!sf)) + return -EINVAL; + +- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) ++ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) + return -EMSGSIZE; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) +diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c +index 631fa104617c3..414ed70e7ba2e 100644 +--- a/net/mptcp/pm_userspace.c ++++ b/net/mptcp/pm_userspace.c +@@ -233,7 +233,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { +- if (subflow->local_id == 0) { ++ if (READ_ONCE(subflow->local_id) == 0) { + has_id_0 = true; + break; + } +@@ -489,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) + goto destroy_err; + } + ++#if IS_ENABLED(CONFIG_MPTCP_IPV6) ++ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { ++ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); ++ addr_l.family = AF_INET6; ++ } ++ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { ++ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); ++ addr_r.family = AF_INET6; ++ } ++#endif + if (addr_l.family != addr_r.family) { + GENL_SET_ERR_MSG(info, "address families do not match"); + err = -EINVAL; +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 859b18cb8e4f6..3bc21581486ae 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) + subflow->request_mptcp = 1; + + /* This is the first subflow, always with id 0 */ +- subflow->local_id_valid = 1; ++ WRITE_ONCE(subflow->local_id, 0); + mptcp_sock_graft(msk->first, sk->sk_socket); + + return 0; +@@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, + mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { + TCP_SKB_CB(skb)->eor = 1; ++ tcp_mark_push(tcp_sk(ssk), skb); + goto alloc_skb; + } + +@@ -2440,6 +2441,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); + if (!dispose_it) { + __mptcp_subflow_disconnect(ssk, subflow, flags); ++ if (msk->subflow && ssk == msk->subflow->sk) ++ msk->subflow->state = SS_UNCONNECTED; + release_sock(ssk); + + goto out; +@@ -3166,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) + + return (struct ipv6_pinfo *)(((u8 *)sk) + offset); + } ++ ++static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) ++{ ++ const struct ipv6_pinfo *np = inet6_sk(sk); ++ struct ipv6_txoptions *opt; ++ struct ipv6_pinfo *newnp; ++ ++ newnp = inet6_sk(newsk); ++ ++ rcu_read_lock(); ++ opt = rcu_dereference(np->opt); ++ if (opt) { ++ opt = ipv6_dup_options(newsk, opt); ++ if (!opt) ++ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); ++ } ++ RCU_INIT_POINTER(newnp->opt, opt); ++ rcu_read_unlock(); ++} + #endif + ++static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) ++{ ++ struct ip_options_rcu *inet_opt, *newopt = NULL; ++ const struct inet_sock *inet = inet_sk(sk); ++ struct inet_sock *newinet; ++ ++ newinet = inet_sk(newsk); ++ ++ rcu_read_lock(); ++ inet_opt = rcu_dereference(inet->inet_opt); ++ if (inet_opt) { ++ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + ++ inet_opt->opt.optlen, GFP_ATOMIC); ++ if (newopt) ++ memcpy(newopt, inet_opt, sizeof(*inet_opt) + ++ inet_opt->opt.optlen); ++ else ++ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); ++ } ++ RCU_INIT_POINTER(newinet->inet_opt, newopt); ++ rcu_read_unlock(); ++} ++ + struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, +@@ -3188,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, + + __mptcp_init_sock(nsk); + ++#if IS_ENABLED(CONFIG_MPTCP_IPV6) ++ if (nsk->sk_family == AF_INET6) ++ mptcp_copy_ip6_options(nsk, sk); ++ else ++#endif ++ mptcp_copy_ip_options(nsk, sk); ++ + msk = mptcp_sk(nsk); + msk->local_key = subflow_req->local_key; + msk->token = subflow_req->token; +@@ -3200,7 +3252,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, + msk->write_seq = subflow_req->idsn + 1; + msk->snd_nxt = msk->write_seq; + msk->snd_una = msk->write_seq; +- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; ++ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd; + msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; + + if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { +diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h +index b092205213234..2bc37773e7803 100644 +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -475,7 +475,6 @@ struct mptcp_subflow_context { + can_ack : 1, /* only after processing the remote a key */ + disposable : 1, /* ctx can be free at ulp release time */ + stale : 1, /* unable to snd/rcv data, do not use for xmit */ +- local_id_valid : 1, /* local_id is correctly initialized */ + valid_csum_seen : 1; /* at least one csum validated */ + enum mptcp_data_avail data_avail; + u32 remote_nonce; +@@ -483,7 +482,7 @@ struct mptcp_subflow_context { + u32 local_nonce; + u32 remote_token; + u8 hmac[MPTCPOPT_HMAC_LEN]; +- u8 local_id; ++ s16 local_id; /* if negative not initialized yet */ + u8 remote_id; + u8 reset_seen:1; + u8 reset_transient:1; +@@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) + { + memset(&subflow->reset, 0, sizeof(subflow->reset)); + subflow->request_mptcp = 1; ++ WRITE_ONCE(subflow->local_id, -1); + } + + static inline u64 +@@ -909,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, + int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); + int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); + ++static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) ++{ ++ int local_id = READ_ONCE(subflow->local_id); ++ ++ if (local_id < 0) ++ return 0; ++ return local_id; ++} ++ + void __init mptcp_pm_nl_init(void); + void mptcp_pm_nl_work(struct mptcp_sock *msk); + void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 45d20e20cfc00..891c2f4fed080 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) + subflow->backup = mp_opt.backup; + subflow->thmac = mp_opt.thmac; + subflow->remote_nonce = mp_opt.nonce; +- subflow->remote_id = mp_opt.join_id; ++ WRITE_ONCE(subflow->remote_id, mp_opt.join_id); + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", + subflow, subflow->thmac, subflow->remote_nonce, + subflow->backup); +@@ -489,8 +489,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) + + static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) + { +- subflow->local_id = local_id; +- subflow->local_id_valid = 1; ++ WARN_ON_ONCE(local_id < 0 || local_id > 255); ++ WRITE_ONCE(subflow->local_id, local_id); + } + + static int subflow_chk_local_id(struct sock *sk) +@@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk) + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + int err; + +- if (likely(subflow->local_id_valid)) ++ if (likely(subflow->local_id >= 0)) + return 0; + + err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); +@@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, + pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, + remote_token, local_id, remote_id); + subflow->remote_token = remote_token; +- subflow->remote_id = remote_id; ++ WRITE_ONCE(subflow->remote_id, remote_id); + subflow->request_join = 1; + subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + mptcp_info2sockaddr(remote, &addr, ssk->sk_family); +@@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, + pr_debug("subflow=%p", ctx); + + ctx->tcp_sock = sk; ++ WRITE_ONCE(ctx->local_id, -1); + + return ctx; + } +@@ -1867,13 +1868,13 @@ static void subflow_ulp_clone(const struct request_sock *req, + new_ctx->idsn = subflow_req->idsn; + + /* this is the first subflow, id is always 0 */ +- new_ctx->local_id_valid = 1; ++ subflow_set_local_id(new_ctx, 0); + } else if (subflow_req->mp_join) { + new_ctx->ssn_offset = subflow_req->ssn_offset; + new_ctx->mp_join = 1; + new_ctx->fully_established = 1; + new_ctx->backup = subflow_req->backup; +- new_ctx->remote_id = subflow_req->remote_id; ++ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); + new_ctx->token = subflow_req->token; + new_ctx->thmac = subflow_req->thmac; + +diff --git a/net/netfilter/core.c b/net/netfilter/core.c +index 55a7f72d547cd..edf92074221e2 100644 +--- a/net/netfilter/core.c ++++ b/net/netfilter/core.c +@@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) + } + EXPORT_SYMBOL(nf_conntrack_destroy); + ++void nf_ct_set_closing(struct nf_conntrack *nfct) ++{ ++ const struct nf_ct_hook *ct_hook; ++ ++ if (!nfct) ++ return; ++ ++ rcu_read_lock(); ++ ct_hook = rcu_dereference(nf_ct_hook); ++ if (ct_hook) ++ ct_hook->set_closing(nfct); ++ ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_ct_set_closing); ++ + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) + { +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 7960262966094..024f93fc8c0bb 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -2772,11 +2772,24 @@ int nf_conntrack_init_start(void) + return ret; + } + ++static void nf_conntrack_set_closing(struct nf_conntrack *nfct) ++{ ++ struct nf_conn *ct = nf_ct_to_nf_conn(nfct); ++ ++ switch (nf_ct_protonum(ct)) { ++ case IPPROTO_TCP: ++ nf_conntrack_tcp_set_closing(ct); ++ break; ++ } ++} ++ + static const struct nf_ct_hook nf_conntrack_hook = { + .update = nf_conntrack_update, + .destroy = nf_ct_destroy, + .get_tuple_skb = nf_conntrack_get_tuple_skb, + .attach = nf_conntrack_attach, ++ .set_closing = nf_conntrack_set_closing, ++ .confirm = __nf_conntrack_confirm, + }; + + void nf_conntrack_init_end(void) +diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c +index e0092bf273fd0..9480e638e5d15 100644 +--- a/net/netfilter/nf_conntrack_proto_tcp.c ++++ b/net/netfilter/nf_conntrack_proto_tcp.c +@@ -913,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) + return false; + } + ++void nf_conntrack_tcp_set_closing(struct nf_conn *ct) ++{ ++ enum tcp_conntrack old_state; ++ const unsigned int *timeouts; ++ u32 timeout; ++ ++ if (!nf_ct_is_confirmed(ct)) ++ return; ++ ++ spin_lock_bh(&ct->lock); ++ old_state = ct->proto.tcp.state; ++ ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; ++ ++ if (old_state == TCP_CONNTRACK_CLOSE || ++ test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { ++ spin_unlock_bh(&ct->lock); ++ return; ++ } ++ ++ timeouts = nf_ct_timeout_lookup(ct); ++ if (!timeouts) { ++ const struct nf_tcp_net *tn; ++ ++ tn = nf_tcp_pernet(nf_ct_net(ct)); ++ timeouts = tn->timeouts; ++ } ++ ++ timeout = timeouts[TCP_CONNTRACK_CLOSE]; ++ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); ++ ++ spin_unlock_bh(&ct->lock); ++ ++ nf_conntrack_event_cache(IPCT_PROTOINFO, ct); ++} ++ + static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) + { + state->td_end = 0; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index e21ec3ad80939..d3ba947f43761 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4752,6 +4752,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + ++ if (flags & NFT_SET_ANONYMOUS) ++ return -EOPNOTSUPP; ++ + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); + if (err) + return err; +@@ -4760,6 +4763,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; ++ ++ if (flags & NFT_SET_ANONYMOUS) ++ return -EOPNOTSUPP; ++ + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + } + +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index e1623fbf36548..e4b8c02c5e6ae 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, + + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + ++ ret = nft_chain_validate_hooks(ctx->chain, ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD) | ++ (1 << NF_INET_LOCAL_OUT) | ++ (1 << NF_INET_POST_ROUTING)); ++ if (ret) ++ return ret; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +@@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, + + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + ++ ret = nft_chain_validate_hooks(ctx->chain, ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD) | ++ (1 << NF_INET_LOCAL_OUT) | ++ (1 << NF_INET_POST_ROUTING)); ++ if (ret) ++ return ret; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 6857a4965fe87..e9b81cba1e2b4 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) + static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, + gfp_t gfp_mask) + { +- unsigned int len = skb_end_offset(skb); ++ unsigned int len = skb->len; + struct sk_buff *new; + + new = alloc_skb(len, gfp_mask); +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 93e1bfa72d791..2bd27b77769cb 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk, + return 0; + + ret = crypto_wait_req(ret, &ctx->async_wait); ++ } else if (darg->async) { ++ atomic_dec(&ctx->decrypt_pending); + } + darg->async = false; + +@@ -2021,6 +2023,7 @@ int tls_sw_recvmsg(struct sock *sk, + struct strp_msg *rxm; + struct tls_msg *tlm; + ssize_t copied = 0; ++ ssize_t peeked = 0; + bool async = false; + int target, err; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); +@@ -2168,8 +2171,10 @@ int tls_sw_recvmsg(struct sock *sk, + if (err < 0) + goto put_on_rx_list_err; + +- if (is_peek) ++ if (is_peek) { ++ peeked += chunk; + goto put_on_rx_list; ++ } + + if (partially_consumed) { + rxm->offset += chunk; +@@ -2208,8 +2213,8 @@ int tls_sw_recvmsg(struct sock *sk, + + /* Drain records from the rx_list & copy if required */ + if (is_peek || is_kvec) +- err = process_rx_list(ctx, msg, &control, copied, +- decrypted, is_peek, NULL); ++ err = process_rx_list(ctx, msg, &control, copied + peeked, ++ decrypted - peeked, is_peek, NULL); + else + err = process_rx_list(ctx, msg, &control, 0, + async_copy_bytes, is_peek, NULL); +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 767b338a7a2d4..ab2c83d58b62a 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -284,9 +284,17 @@ void unix_gc(void) + * which are creating the cycle(s). + */ + skb_queue_head_init(&hitlist); +- list_for_each_entry(u, &gc_candidates, link) ++ list_for_each_entry(u, &gc_candidates, link) { + scan_children(&u->sk, inc_inflight, &hitlist); + ++#if IS_ENABLED(CONFIG_AF_UNIX_OOB) ++ if (u->oob_skb) { ++ kfree_skb(u->oob_skb); ++ u->oob_skb = NULL; ++ } ++#endif ++ } ++ + /* not_cycle_list contains those sockets which do not make up a + * cycle. Restore these to the inflight list. + */ +@@ -314,17 +322,6 @@ void unix_gc(void) + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); + +-#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +- list_for_each_entry_safe(u, next, &gc_candidates, link) { +- struct sk_buff *skb = u->oob_skb; +- +- if (skb) { +- u->oob_skb = NULL; +- kfree_skb(skb); +- } +- } +-#endif +- + spin_lock(&unix_gc_lock); + + /* There could be io_uring registered files, just push them back to +diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c +index c259d3227a9e2..1a3bd554e2586 100644 +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -4137,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) + + if (ntype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; ++ if (otype != NL80211_IFTYPE_MESH_POINT) ++ return -EINVAL; + if (netif_running(dev)) + return -EBUSY; + +diff --git a/security/landlock/fs.c b/security/landlock/fs.c +index 64ed7665455fe..d328965f32f7f 100644 +--- a/security/landlock/fs.c ++++ b/security/landlock/fs.c +@@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, + bool allow_parent1, allow_parent2; + access_mask_t access_request_parent1, access_request_parent2; + struct path mnt_dir; +- layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], +- layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; ++ layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, ++ layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; + + if (!dom) + return 0; +diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c +index f4cd9b58b2054..a7af085550b2d 100644 +--- a/security/tomoyo/common.c ++++ b/security/tomoyo/common.c +@@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, + { + int error = buffer_len; + size_t avail_len = buffer_len; +- char *cp0 = head->write_buf; ++ char *cp0; + int idx; + + if (!head->write) + return -EINVAL; + if (mutex_lock_interruptible(&head->io_sem)) + return -EINTR; ++ cp0 = head->write_buf; + head->read_user_buf_avail = 0; + idx = tomoyo_read_lock(); + /* Read a line and dispatch it to the policy handler. */ +diff --git a/sound/core/Makefile b/sound/core/Makefile +index 2762f03d9b7bc..a7a1590b29526 100644 +--- a/sound/core/Makefile ++++ b/sound/core/Makefile +@@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o + snd-rawmidi-objs := rawmidi.o + snd-timer-objs := timer.o + snd-hrtimer-objs := hrtimer.o +-snd-rtctimer-objs := rtctimer.o + snd-hwdep-objs := hwdep.o + snd-seq-device-objs := seq_device.o + +diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c +index 9be2260e4ca2d..f8b644cb9157a 100644 +--- a/sound/firewire/amdtp-stream.c ++++ b/sound/firewire/amdtp-stream.c +@@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, + // to the reason. + unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, + IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); +- lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); ++ lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); + } + if (lost) { + dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 92a656fb53212..75bd7b2fa4ee6 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -9662,6 +9662,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), ++ SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), + SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), +@@ -9687,11 +9688,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), ++ SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), ++ SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index 201dc77ebbd77..d5d2183730b9f 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -3109,6 +3109,10 @@ union bpf_attr { + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. ++ * **BPF_FIB_LOOKUP_TBID** ++ * Used with BPF_FIB_LOOKUP_DIRECT. ++ * Use the routing table ID present in *params*->tbid ++ * for the fib lookup. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). +@@ -3117,6 +3121,11 @@ union bpf_attr { + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). ++ * **BPF_FIB_LOOKUP_SRC** ++ * Derive and set source IP addr in *params*->ipv{4,6}_src ++ * for the nexthop. If the src addr cannot be derived, ++ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this ++ * case, *params*->dmac and *params*->smac are not set either. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. +@@ -6687,6 +6696,8 @@ enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), ++ BPF_FIB_LOOKUP_TBID = (1U << 3), ++ BPF_FIB_LOOKUP_SRC = (1U << 4), + }; + + enum { +@@ -6699,6 +6710,7 @@ enum { + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ + }; + + struct bpf_fib_lookup { +@@ -6733,6 +6745,9 @@ struct bpf_fib_lookup { + __u32 rt_metric; + }; + ++ /* input: source address to consider for lookup ++ * output: source address result from lookup ++ */ + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ +@@ -6747,9 +6762,19 @@ struct bpf_fib_lookup { + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + +- /* output */ +- __be16 h_vlan_proto; +- __be16 h_vlan_TCI; ++ union { ++ struct { ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ }; ++ /* input: when accompanied with the ++ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a ++ * specific routing table to use for the fib lookup. ++ */ ++ __u32 tbid; ++ }; ++ + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; +diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh +index 2107579e2939d..a20dca9d26d68 100755 +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -144,6 +144,11 @@ check_tools() + exit $ksft_skip + fi + ++ if ! ss -h | grep -q MPTCP; then ++ echo "SKIP: ss tool does not support MPTCP" ++ exit $ksft_skip ++ fi ++ + # Use the legacy version if available to support old kernel versions + if iptables-legacy -V &> /dev/null; then + iptables="iptables-legacy" |